diff options
author | Abhishek Girish <agirish@apache.org> | 2019-03-04 00:18:52 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-04 00:18:52 -0800 |
commit | 8c7de7838124a00e1b6b786fde2ad8dfd1b0ba9d (patch) | |
tree | ff2e769959629860d73e9dfecbb208b4350a13da | |
parent | 5e5b9a415d40899987bbc8fb7778e41779f599ad (diff) |
DRILL-7060: Support JsonParser Feature 'ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER' (#1663)
8 files changed, 125 insertions, 10 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java index ac3252ad7..6fce8b707 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java @@ -442,6 +442,14 @@ public final class ExecConstants { public static final String JSON_READER_NAN_INF_NUMBERS = "store.json.reader.allow_nan_inf"; public static final BooleanValidator JSON_READER_NAN_INF_NUMBERS_VALIDATOR = new BooleanValidator(JSON_READER_NAN_INF_NUMBERS, new OptionDescription("Enables the JSON record reader in Drill to read `NaN` and `Infinity` tokens in JSON data as numbers. Default is true. (Drill 1.13+)")); + + /** + * Json reader option that enables parser to escape any characters + */ + public static final String JSON_READER_ESCAPE_ANY_CHAR = "store.json.reader.allow_escape_any_char"; + public static final BooleanValidator JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR = new BooleanValidator(JSON_READER_ESCAPE_ANY_CHAR, + new OptionDescription("Enables the JSON record reader in Drill to escape any character. Default is false. (Drill 1.16+)")); + /** * The column label (for directory levels) in results when querying files in a directory * E.g. labels: dir0 dir1<pre> diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index 1a9eff3f9..030e1a346 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -182,6 +182,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea new OptionDefinition(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR), new OptionDefinition(ExecConstants.JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR), new OptionDefinition(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR), + new OptionDefinition(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR), new OptionDefinition(ExecConstants.ENABLE_UNION_TYPE), new OptionDefinition(ExecConstants.TEXT_ESTIMATED_ROW_SIZE), new OptionDefinition(ExecConstants.JSON_EXTENDED_TYPES), diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java index 62ace663b..428a4e1bd 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java @@ -61,6 +61,7 @@ public class JSONRecordReader extends AbstractRecordReader { private final FragmentContext fragmentContext; private final boolean enableAllTextMode; private final boolean enableNanInf; + private final boolean enableEscapeAnyChar; private final boolean readNumbersAsDouble; private final boolean unionEnabled; private long parseErrorCount; @@ -115,6 +116,7 @@ public class JSONRecordReader extends AbstractRecordReader { // only enable all text mode if we aren't using embedded content mode. this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR); this.enableNanInf = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR); + this.enableEscapeAnyChar = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR); this.readNumbersAsDouble = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR); this.unionEnabled = embeddedContent == null && fragmentContext.getOptions().getBoolean(ExecConstants.ENABLE_UNION_TYPE_KEY); this.skipMalformedJSONRecords = fragmentContext.getOptions().getOption(ExecConstants.JSON_SKIP_MALFORMED_RECORDS_VALIDATOR); @@ -142,7 +144,7 @@ public class JSONRecordReader extends AbstractRecordReader { this.writer = new VectorContainerWriter(output, unionEnabled); if (isSkipQuery()) { - this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf); + this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf, enableEscapeAnyChar); } else { this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer()) .schemaPathColumns(ImmutableList.copyOf(getColumns())) @@ -150,6 +152,7 @@ public class JSONRecordReader extends AbstractRecordReader { .skipOuterList(true) .readNumbersAsDouble(readNumbersAsDouble) .enableNanInf(enableNanInf) + .enableEscapeAnyChar(enableEscapeAnyChar) .build(); } setupParser(); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java index 48a1464a9..4b61863ec 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java @@ -39,10 +39,17 @@ public abstract class BaseJsonProcessor implements JsonProcessor { private static final ObjectMapper DEFAULT_MAPPER = getDefaultMapper(); private static final ObjectMapper NAN_INF_MAPPER = getDefaultMapper() - .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); + .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); + + private static final ObjectMapper ESCAPE_ANY_MAPPER = getDefaultMapper() + .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); + + private static final ObjectMapper NAN_INF_AND_ESCAPE_ANY_MAPPER = getDefaultMapper() + .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true).configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); private static final String JACKSON_PARSER_EOF_FILE_MSG = "Unexpected end-of-input:"; private final boolean enableNanInf; + private final boolean enableEscapeAnyChar; public enum JsonExceptionProcessingState { END_OF_STREAM, PROC_SUCCEED @@ -75,15 +82,20 @@ public abstract class BaseJsonProcessor implements JsonProcessor { this.ignoreJSONParseErrors = ignoreJSONParseErrors; } - public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf) { + public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) { this.enableNanInf = enableNanInf; + this.enableEscapeAnyChar = enableEscapeAnyChar; workBuf = Preconditions.checkNotNull(workBuf); } @Override public void setSource(InputStream is) throws IOException { - if (enableNanInf) { + if (enableNanInf && enableEscapeAnyChar) { + parser = NAN_INF_AND_ESCAPE_ANY_MAPPER.getFactory().createParser(is); + } else if (enableNanInf) { parser = NAN_INF_MAPPER.getFactory().createParser(is); + } else if (enableEscapeAnyChar){ + parser = ESCAPE_ANY_MAPPER.getFactory().createParser(is); } else { parser = DEFAULT_MAPPER.getFactory().createParser(is); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java index 0f92ec52a..73b93f4c8 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java @@ -27,8 +27,8 @@ import org.apache.drill.exec.vector.complex.writer.BaseWriter; public class CountingJsonReader extends BaseJsonProcessor { - public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf) { - super(workBuf, enableNanInf); + public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) { + super(workBuf, enableNanInf, enableEscapeAnyChar); } @Override diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java index aaa9806a4..34b1f8047 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java @@ -74,7 +74,7 @@ public class JsonReader extends BaseJsonProcessor { private FieldSelection selection; private JsonReader(Builder builder) { - super(builder.managedBuf, builder.enableNanInf); + super(builder.managedBuf, builder.enableNanInf, builder.enableEscapeAnyChar); selection = FieldSelection.getFieldSelection(builder.columns); workingBuffer = builder.workingBuffer; skipOuterList = builder.skipOuterList; @@ -97,6 +97,7 @@ public class JsonReader extends BaseJsonProcessor { private boolean skipOuterList; private boolean allTextMode; private boolean enableNanInf; + private boolean enableEscapeAnyChar; public Builder(DrillBuf managedBuf) { @@ -104,9 +105,6 @@ public class JsonReader extends BaseJsonProcessor { this.workingBuffer = new WorkingBuffer(managedBuf); this.mapOutput = new MapVectorOutput(workingBuffer); this.listOutput = new ListVectorOutput(workingBuffer); - this.readNumbersAsDouble = false; - this.skipOuterList = false; - this.allTextMode = false; this.enableNanInf = true; } @@ -130,6 +128,11 @@ public class JsonReader extends BaseJsonProcessor { return this; } + public Builder enableEscapeAnyChar(boolean enableEscapeAnyChar) { + this.enableEscapeAnyChar = enableEscapeAnyChar; + return this; + } + public Builder defaultSchemaPathColumns() { this.columns = GroupScan.ALL_COLUMNS; return this; diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf index 386b8c2fa..4f6fbb278 100644 --- a/exec/java-exec/src/main/resources/drill-module.conf +++ b/exec/java-exec/src/main/resources/drill-module.conf @@ -606,6 +606,7 @@ drill.exec.options: { store.json.all_text_mode: false, store.json.writer.allow_nan_inf: true, store.json.reader.allow_nan_inf: true, + store.json.reader.allow_escape_any_char: false, store.json.extended_types: false, store.json.read_numbers_as_double: false, store.json.reader.print_skipped_invalid_record_number: false, diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java new file mode 100644 index 000000000..323da27cc --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.complex.writer; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.common.exceptions.UserRemoteException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertThat; + +public class TestJsonEscapeAnyChar extends ClusterTest { + + private File testFile; + private static final String TABLE = "escape.json"; + private static final String JSON_DATA = "{\"name\": \"ABC\\S\"}"; + private static final String QUERY = String.format("select * from dfs.`%s`", TABLE); + + @Before + public void setup() throws Exception { + startCluster(ClusterFixture.builder(dirTestWatcher)); + testFile = new File(dirTestWatcher.getRootDir(), TABLE); + FileUtils.writeStringToFile(testFile, JSON_DATA); + } + + @Test + public void testwithOptionEnabled() throws Exception { + + try { + enableJsonReaderEscapeAnyChar(); + testBuilder() + .sqlQuery(QUERY) + .unOrdered() + .baselineColumns("name") + .baselineValues("ABCS") + .build() + .run(); + } finally { + resetJsonReaderEscapeAnyChar(); + } + } + + @Test + public void testwithOptionDisabled() throws Exception { + try { + queryBuilder().sql(QUERY) + .run(); + } catch (UserRemoteException e) { + assertThat(e.getMessage(), containsString("DATA_READ ERROR: Error parsing JSON - Unrecognized character escape")); + } + } + + private void enableJsonReaderEscapeAnyChar() { + client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, true); + } + + private void resetJsonReaderEscapeAnyChar() { + client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, false); + } + + @After + public void teardown() throws Exception { + FileUtils.deleteQuietly(testFile); + } +}
\ No newline at end of file |