Skip to content

Commit 91e8718

Browse files
committed
Update json_extract to produce canonicalized output
1 parent abb3399 commit 91e8718

File tree

13 files changed

+826
-164
lines changed

13 files changed

+826
-164
lines changed

presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class SqlFunctionProperties
3737
private final boolean legacyJsonCast;
3838
private final Map<String, String> extraCredentials;
3939
private final boolean warnOnCommonNanPatterns;
40+
private final boolean canonicalizedJsonExtract;
4041

4142
private SqlFunctionProperties(
4243
boolean parseDecimalLiteralAsDouble,
@@ -50,7 +51,8 @@ private SqlFunctionProperties(
5051
boolean fieldNamesInJsonCastEnabled,
5152
boolean legacyJsonCast,
5253
Map<String, String> extraCredentials,
53-
boolean warnOnCommonNanPatterns)
54+
boolean warnOnCommonNanPatterns,
55+
boolean canonicalizedJsonExtract)
5456
{
5557
this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble;
5658
this.legacyRowFieldOrdinalAccessEnabled = legacyRowFieldOrdinalAccessEnabled;
@@ -64,6 +66,7 @@ private SqlFunctionProperties(
6466
this.legacyJsonCast = legacyJsonCast;
6567
this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null");
6668
this.warnOnCommonNanPatterns = warnOnCommonNanPatterns;
69+
this.canonicalizedJsonExtract = canonicalizedJsonExtract;
6770
}
6871

6972
public boolean isParseDecimalLiteralAsDouble()
@@ -127,6 +130,9 @@ public boolean shouldWarnOnCommonNanPatterns()
127130
return warnOnCommonNanPatterns;
128131
}
129132

133+
public boolean isCanonicalizedJsonExtract()
134+
{ return canonicalizedJsonExtract; }
135+
130136
@Override
131137
public boolean equals(Object o)
132138
{
@@ -146,15 +152,16 @@ public boolean equals(Object o)
146152
Objects.equals(sessionLocale, that.sessionLocale) &&
147153
Objects.equals(sessionUser, that.sessionUser) &&
148154
Objects.equals(extraCredentials, that.extraCredentials) &&
149-
Objects.equals(legacyJsonCast, that.legacyJsonCast);
155+
Objects.equals(legacyJsonCast, that.legacyJsonCast) &&
156+
Objects.equals(canonicalizedJsonExtract, that.legacyJsonCast);
150157
}
151158

152159
@Override
153160
public int hashCode()
154161
{
155162
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey,
156163
legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser,
157-
extraCredentials, legacyJsonCast);
164+
extraCredentials, legacyJsonCast, canonicalizedJsonExtract);
158165
}
159166

160167
public static Builder builder()
@@ -176,6 +183,7 @@ public static class Builder
176183
private boolean legacyJsonCast;
177184
private Map<String, String> extraCredentials = emptyMap();
178185
private boolean warnOnCommonNanPatterns;
186+
private boolean canonicalizedJsonExtract;
179187

180188
private Builder() {}
181189

@@ -251,6 +259,12 @@ public Builder setWarnOnCommonNanPatterns(boolean warnOnCommonNanPatterns)
251259
return this;
252260
}
253261

262+
public Builder setCanonicalizedJsonExtract(boolean canonicalizedJsonExtract)
263+
{
264+
this.canonicalizedJsonExtract = canonicalizedJsonExtract;
265+
return this;
266+
}
267+
254268
public SqlFunctionProperties build()
255269
{
256270
return new SqlFunctionProperties(
@@ -265,7 +279,8 @@ public SqlFunctionProperties build()
265279
fieldNamesInJsonCastEnabled,
266280
legacyJsonCast,
267281
extraCredentials,
268-
warnOnCommonNanPatterns);
282+
warnOnCommonNanPatterns,
283+
canonicalizedJsonExtract);
269284
}
270285
}
271286
}

presto-main/src/main/java/com/facebook/presto/Session.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.util.stream.Collectors;
5858

5959
import static com.facebook.presto.SystemSessionProperties.LEGACY_JSON_CAST;
60+
import static com.facebook.presto.SystemSessionProperties.isCanonicalizedJsonExtract;
6061
import static com.facebook.presto.SystemSessionProperties.isFieldNameInJsonCastEnabled;
6162
import static com.facebook.presto.SystemSessionProperties.isLegacyMapSubscript;
6263
import static com.facebook.presto.SystemSessionProperties.isLegacyRowFieldOrdinalAccessEnabled;
@@ -481,6 +482,7 @@ public SqlFunctionProperties getSqlFunctionProperties()
481482
.setLegacyJsonCast(legacyJsonCast)
482483
.setExtraCredentials(identity.getExtraCredentials())
483484
.setWarnOnCommonNanPatterns(warnOnCommonNanPatterns(this))
485+
.setCanonicalizedJsonExtract(isCanonicalizedJsonExtract(this))
484486
.build();
485487
}
486488

presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ public final class SystemSessionProperties
305305
public static final String REWRITE_CASE_TO_MAP_ENABLED = "rewrite_case_to_map_enabled";
306306
public static final String FIELD_NAMES_IN_JSON_CAST_ENABLED = "field_names_in_json_cast_enabled";
307307
public static final String LEGACY_JSON_CAST = "legacy_json_cast";
308+
public static final String CANONICALIZED_JSON_EXTRACT = "canonicalized_json_extract";
308309
public static final String PULL_EXPRESSION_FROM_LAMBDA_ENABLED = "pull_expression_from_lambda_enabled";
309310
public static final String REWRITE_CONSTANT_ARRAY_CONTAINS_TO_IN_EXPRESSION = "rewrite_constant_array_contains_to_in_expression";
310311
public static final String INFER_INEQUALITY_PREDICATES = "infer_inequality_predicates";
@@ -1636,6 +1637,11 @@ public SystemSessionProperties(
16361637
"Keep the legacy json cast behavior, do not reserve the case for field names when casting to row type",
16371638
functionsConfig.isLegacyJsonCast(),
16381639
true),
1640+
booleanProperty(
1641+
CANONICALIZED_JSON_EXTRACT,
1642+
"Extracts json data in a canonicalized manner, and raises a PrestoException when encountering invalid json structures within the input json path",
1643+
functionsConfig.isCanonicalizedJsonExtract(),
1644+
true),
16391645
booleanProperty(
16401646
OPTIMIZE_JOIN_PROBE_FOR_EMPTY_BUILD_RUNTIME,
16411647
"Optimize join probe at runtime if build side is empty",
@@ -3174,4 +3180,9 @@ public static boolean isEnabledAddExchangeBelowGroupId(Session session)
31743180
{
31753181
return session.getSystemProperty(ADD_EXCHANGE_BELOW_PARTIAL_AGGREGATION_OVER_GROUP_ID, Boolean.class);
31763182
}
3183+
3184+
public static boolean isCanonicalizedJsonExtract(Session session)
3185+
{
3186+
return session.getSystemProperty(CANONICALIZED_JSON_EXTRACT, Boolean.class);
3187+
}
31773188
}

presto-main/src/main/java/com/facebook/presto/operator/scalar/JsonExtract.java

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,23 @@
1313
*/
1414
package com.facebook.presto.operator.scalar;
1515

16+
import com.facebook.airlift.json.JsonObjectMapperProvider;
17+
import com.facebook.presto.common.function.SqlFunctionProperties;
1618
import com.facebook.presto.spi.PrestoException;
1719
import com.fasterxml.jackson.core.JsonFactory;
1820
import com.fasterxml.jackson.core.JsonGenerator;
1921
import com.fasterxml.jackson.core.JsonParseException;
2022
import com.fasterxml.jackson.core.JsonParser;
2123
import com.fasterxml.jackson.core.JsonToken;
2224
import com.fasterxml.jackson.core.io.SerializedString;
25+
import com.fasterxml.jackson.databind.ObjectMapper;
2326
import com.google.common.collect.ImmutableList;
2427
import io.airlift.slice.DynamicSliceOutput;
2528
import io.airlift.slice.Slice;
2629

2730
import java.io.IOException;
2831
import java.io.InputStream;
32+
import java.io.OutputStream;
2933
import java.io.UncheckedIOException;
3034

3135
import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
@@ -38,6 +42,7 @@
3842
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
3943
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
4044
import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL;
45+
import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS;
4146
import static io.airlift.slice.Slices.utf8Slice;
4247
import static java.util.Objects.requireNonNull;
4348

@@ -121,13 +126,15 @@ public final class JsonExtract
121126
private static final JsonFactory JSON_FACTORY = new JsonFactory()
122127
.disable(CANONICALIZE_FIELD_NAMES);
123128

129+
private static final ObjectMapper SORTED_MAPPER = new JsonObjectMapperProvider().get().configure(ORDER_MAP_ENTRIES_BY_KEYS, true);
130+
124131
private JsonExtract() {}
125132

126-
public static <T> T extract(Slice jsonInput, JsonExtractor<T> jsonExtractor)
133+
public static <T> T extract(Slice jsonInput, JsonExtractor<T> jsonExtractor, SqlFunctionProperties properties)
127134
{
128135
requireNonNull(jsonInput, "jsonInput is null");
129136
try {
130-
return jsonExtractor.extract(jsonInput.getInput());
137+
return jsonExtractor.extract(jsonInput.getInput(), properties);
131138
}
132139
catch (JsonParseException e) {
133140
// Return null if we failed to parse something
@@ -156,7 +163,7 @@ public static <T> PrestoJsonExtractor<T> generateExtractor(String path, PrestoJs
156163

157164
public interface JsonExtractor<T>
158165
{
159-
T extract(InputStream inputStream)
166+
T extract(InputStream inputStream, SqlFunctionProperties properties)
160167
throws IOException;
161168
}
162169

@@ -174,11 +181,11 @@ public abstract static class PrestoJsonExtractor<T>
174181
*
175182
* @return the value, or null if not applicable
176183
*/
177-
abstract T extract(JsonParser jsonParser)
184+
abstract T extract(JsonParser jsonParser, SqlFunctionProperties properties)
178185
throws IOException;
179186

180187
@Override
181-
public T extract(InputStream inputStream)
188+
public T extract(InputStream inputStream, SqlFunctionProperties properties)
182189
throws IOException
183190
{
184191
try (JsonParser jsonParser = createJsonParser(JSON_FACTORY, inputStream)) {
@@ -187,7 +194,7 @@ public T extract(InputStream inputStream)
187194
return null;
188195
}
189196

190-
return extract(jsonParser);
197+
return extract(jsonParser, properties);
191198
}
192199
}
193200
}
@@ -214,21 +221,21 @@ public ObjectFieldJsonExtractor(String fieldName, PrestoJsonExtractor<? extends
214221
}
215222

216223
@Override
217-
public T extract(JsonParser jsonParser)
224+
public T extract(JsonParser jsonParser, SqlFunctionProperties properties)
218225
throws IOException
219226
{
220227
if (jsonParser.getCurrentToken() == START_OBJECT) {
221-
return processJsonObject(jsonParser);
228+
return processJsonObject(jsonParser, properties);
222229
}
223230

224231
if (jsonParser.getCurrentToken() == START_ARRAY) {
225-
return processJsonArray(jsonParser);
232+
return processJsonArray(jsonParser, properties);
226233
}
227234

228235
throw new JsonParseException(jsonParser, "Expected a JSON object or array");
229236
}
230237

231-
public T processJsonObject(JsonParser jsonParser)
238+
public T processJsonObject(JsonParser jsonParser, SqlFunctionProperties properties)
232239
throws IOException
233240
{
234241
while (!jsonParser.nextFieldName(fieldName)) {
@@ -244,10 +251,10 @@ public T processJsonObject(JsonParser jsonParser)
244251

245252
jsonParser.nextToken(); // Shift to first token of the value
246253

247-
return delegate.extract(jsonParser);
254+
return delegate.extract(jsonParser, properties);
248255
}
249256

250-
public T processJsonArray(JsonParser jsonParser)
257+
public T processJsonArray(JsonParser jsonParser, SqlFunctionProperties properties)
251258
throws IOException
252259
{
253260
int currentIndex = 0;
@@ -270,15 +277,15 @@ public T processJsonArray(JsonParser jsonParser)
270277
jsonParser.skipChildren(); // Skip nested structure if currently at the start of one
271278
}
272279

273-
return delegate.extract(jsonParser);
280+
return delegate.extract(jsonParser, properties);
274281
}
275282
}
276283

277284
public static class ScalarValueJsonExtractor
278285
extends PrestoJsonExtractor<Slice>
279286
{
280287
@Override
281-
public Slice extract(JsonParser jsonParser)
288+
public Slice extract(JsonParser jsonParser, SqlFunctionProperties properties)
282289
throws IOException
283290
{
284291
JsonToken token = jsonParser.getCurrentToken();
@@ -296,13 +303,31 @@ public static class JsonValueJsonExtractor
296303
extends PrestoJsonExtractor<Slice>
297304
{
298305
@Override
299-
public Slice extract(JsonParser jsonParser)
306+
public Slice extract(JsonParser jsonParser, SqlFunctionProperties properties)
300307
throws IOException
301308
{
302309
if (!jsonParser.hasCurrentToken()) {
303310
throw new JsonParseException(jsonParser, "Unexpected end of value");
304311
}
312+
if (!properties.isCanonicalizedJsonExtract()) {
313+
return legacyExtract(jsonParser);
314+
}
315+
DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE);
316+
// Write the JSON to output stream with sorted keys
317+
SORTED_MAPPER.writeValue((OutputStream) dynamicSliceOutput, SORTED_MAPPER.readValue(jsonParser, Object.class));
318+
// nextToken will throw an exception if there are trailing characters.
319+
try {
320+
jsonParser.nextToken();
321+
}
322+
catch (JsonParseException e) {
323+
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, e.getMessage());
324+
}
325+
return dynamicSliceOutput.slice();
326+
}
305327

328+
public Slice legacyExtract(JsonParser jsonParser)
329+
throws IOException
330+
{
306331
DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE);
307332
try (JsonGenerator jsonGenerator = createJsonGenerator(JSON_FACTORY, dynamicSliceOutput)) {
308333
jsonGenerator.copyCurrentStructure(jsonParser);
@@ -315,7 +340,7 @@ public static class JsonSizeExtractor
315340
extends PrestoJsonExtractor<Long>
316341
{
317342
@Override
318-
public Long extract(JsonParser jsonParser)
343+
public Long extract(JsonParser jsonParser, SqlFunctionProperties properties)
319344
throws IOException
320345
{
321346
if (!jsonParser.hasCurrentToken()) {

presto-main/src/main/java/com/facebook/presto/operator/scalar/JsonFunctions.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -435,51 +435,51 @@ public static Slice jsonArrayGet(@SqlType(StandardTypes.JSON) Slice json, @SqlTy
435435
@SqlNullable
436436
@LiteralParameters("x")
437437
@SqlType("varchar(x)")
438-
public static Slice varcharJsonExtractScalar(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
438+
public static Slice varcharJsonExtractScalar(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
439439
{
440-
return JsonExtract.extract(json, jsonPath.getScalarExtractor());
440+
return JsonExtract.extract(json, jsonPath.getScalarExtractor(), properties);
441441
}
442442

443443
@ScalarFunction
444444
@SqlNullable
445445
@SqlType(StandardTypes.VARCHAR)
446-
public static Slice jsonExtractScalar(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
446+
public static Slice jsonExtractScalar(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
447447
{
448-
return JsonExtract.extract(json, jsonPath.getScalarExtractor());
448+
return JsonExtract.extract(json, jsonPath.getScalarExtractor(), properties);
449449
}
450450

451451
@ScalarFunction("json_extract")
452452
@LiteralParameters("x")
453453
@SqlNullable
454454
@SqlType(StandardTypes.JSON)
455-
public static Slice varcharJsonExtract(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
455+
public static Slice varcharJsonExtract(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
456456
{
457-
return JsonExtract.extract(json, jsonPath.getObjectExtractor());
457+
return JsonExtract.extract(json, jsonPath.getObjectExtractor(), properties);
458458
}
459459

460460
@ScalarFunction
461461
@SqlNullable
462462
@SqlType(StandardTypes.JSON)
463-
public static Slice jsonExtract(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
463+
public static Slice jsonExtract(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
464464
{
465-
return JsonExtract.extract(json, jsonPath.getObjectExtractor());
465+
return JsonExtract.extract(json, jsonPath.getObjectExtractor(), properties);
466466
}
467467

468468
@ScalarFunction("json_size")
469469
@LiteralParameters("x")
470470
@SqlNullable
471471
@SqlType(StandardTypes.BIGINT)
472-
public static Long varcharJsonSize(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
472+
public static Long varcharJsonSize(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
473473
{
474-
return JsonExtract.extract(json, jsonPath.getSizeExtractor());
474+
return JsonExtract.extract(json, jsonPath.getSizeExtractor(), properties);
475475
}
476476

477477
@ScalarFunction
478478
@SqlNullable
479479
@SqlType(StandardTypes.BIGINT)
480-
public static Long jsonSize(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
480+
public static Long jsonSize(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath)
481481
{
482-
return JsonExtract.extract(json, jsonPath.getSizeExtractor());
482+
return JsonExtract.extract(json, jsonPath.getSizeExtractor(), properties);
483483
}
484484

485485
public static Object getJsonObjectValue(Type valueType, SqlFunctionProperties properties, Block block, int position)

0 commit comments

Comments
 (0)