aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbhishek Girish <agirish@apache.org>2019-03-04 00:18:52 -0800
committerGitHub <noreply@github.com>2019-03-04 00:18:52 -0800
commit8c7de7838124a00e1b6b786fde2ad8dfd1b0ba9d (patch)
treeff2e769959629860d73e9dfecbb208b4350a13da
parent5e5b9a415d40899987bbc8fb7778e41779f599ad (diff)
DRILL-7060: Support JsonParser Feature 'ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER' (#1663)
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java8
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java5
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java18
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java4
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java11
-rw-r--r--exec/java-exec/src/main/resources/drill-module.conf1
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java87
8 files changed, 125 insertions, 10 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index ac3252ad7..6fce8b707 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -442,6 +442,14 @@ public final class ExecConstants {
public static final String JSON_READER_NAN_INF_NUMBERS = "store.json.reader.allow_nan_inf";
public static final BooleanValidator JSON_READER_NAN_INF_NUMBERS_VALIDATOR = new BooleanValidator(JSON_READER_NAN_INF_NUMBERS,
new OptionDescription("Enables the JSON record reader in Drill to read `NaN` and `Infinity` tokens in JSON data as numbers. Default is true. (Drill 1.13+)"));
+
+ /**
+ * Json reader option that enables parser to escape any characters
+ */
+ public static final String JSON_READER_ESCAPE_ANY_CHAR = "store.json.reader.allow_escape_any_char";
+ public static final BooleanValidator JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR = new BooleanValidator(JSON_READER_ESCAPE_ANY_CHAR,
+ new OptionDescription("Enables the JSON record reader in Drill to escape any character. Default is false. (Drill 1.16+)"));
+
/**
* The column label (for directory levels) in results when querying files in a directory
* E.g. labels: dir0 dir1<pre>
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 1a9eff3f9..030e1a346 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -182,6 +182,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea
new OptionDefinition(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR),
new OptionDefinition(ExecConstants.JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR),
new OptionDefinition(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR),
+ new OptionDefinition(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR),
new OptionDefinition(ExecConstants.ENABLE_UNION_TYPE),
new OptionDefinition(ExecConstants.TEXT_ESTIMATED_ROW_SIZE),
new OptionDefinition(ExecConstants.JSON_EXTENDED_TYPES),
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
index 62ace663b..428a4e1bd 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
@@ -61,6 +61,7 @@ public class JSONRecordReader extends AbstractRecordReader {
private final FragmentContext fragmentContext;
private final boolean enableAllTextMode;
private final boolean enableNanInf;
+ private final boolean enableEscapeAnyChar;
private final boolean readNumbersAsDouble;
private final boolean unionEnabled;
private long parseErrorCount;
@@ -115,6 +116,7 @@ public class JSONRecordReader extends AbstractRecordReader {
// only enable all text mode if we aren't using embedded content mode.
this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR);
this.enableNanInf = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR);
+ this.enableEscapeAnyChar = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR);
this.readNumbersAsDouble = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR);
this.unionEnabled = embeddedContent == null && fragmentContext.getOptions().getBoolean(ExecConstants.ENABLE_UNION_TYPE_KEY);
this.skipMalformedJSONRecords = fragmentContext.getOptions().getOption(ExecConstants.JSON_SKIP_MALFORMED_RECORDS_VALIDATOR);
@@ -142,7 +144,7 @@ public class JSONRecordReader extends AbstractRecordReader {
this.writer = new VectorContainerWriter(output, unionEnabled);
if (isSkipQuery()) {
- this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf);
+ this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf, enableEscapeAnyChar);
} else {
this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer())
.schemaPathColumns(ImmutableList.copyOf(getColumns()))
@@ -150,6 +152,7 @@ public class JSONRecordReader extends AbstractRecordReader {
.skipOuterList(true)
.readNumbersAsDouble(readNumbersAsDouble)
.enableNanInf(enableNanInf)
+ .enableEscapeAnyChar(enableEscapeAnyChar)
.build();
}
setupParser();
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
index 48a1464a9..4b61863ec 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
@@ -39,10 +39,17 @@ public abstract class BaseJsonProcessor implements JsonProcessor {
private static final ObjectMapper DEFAULT_MAPPER = getDefaultMapper();
private static final ObjectMapper NAN_INF_MAPPER = getDefaultMapper()
- .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+ .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+
+ private static final ObjectMapper ESCAPE_ANY_MAPPER = getDefaultMapper()
+ .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
+
+ private static final ObjectMapper NAN_INF_AND_ESCAPE_ANY_MAPPER = getDefaultMapper()
+ .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true).configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
private static final String JACKSON_PARSER_EOF_FILE_MSG = "Unexpected end-of-input:";
private final boolean enableNanInf;
+ private final boolean enableEscapeAnyChar;
public enum JsonExceptionProcessingState {
END_OF_STREAM, PROC_SUCCEED
@@ -75,15 +82,20 @@ public abstract class BaseJsonProcessor implements JsonProcessor {
this.ignoreJSONParseErrors = ignoreJSONParseErrors;
}
- public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf) {
+ public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) {
this.enableNanInf = enableNanInf;
+ this.enableEscapeAnyChar = enableEscapeAnyChar;
workBuf = Preconditions.checkNotNull(workBuf);
}
@Override
public void setSource(InputStream is) throws IOException {
- if (enableNanInf) {
+ if (enableNanInf && enableEscapeAnyChar) {
+ parser = NAN_INF_AND_ESCAPE_ANY_MAPPER.getFactory().createParser(is);
+ } else if (enableNanInf) {
parser = NAN_INF_MAPPER.getFactory().createParser(is);
+ } else if (enableEscapeAnyChar){
+ parser = ESCAPE_ANY_MAPPER.getFactory().createParser(is);
} else {
parser = DEFAULT_MAPPER.getFactory().createParser(is);
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
index 0f92ec52a..73b93f4c8 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
@@ -27,8 +27,8 @@ import org.apache.drill.exec.vector.complex.writer.BaseWriter;
public class CountingJsonReader extends BaseJsonProcessor {
- public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf) {
- super(workBuf, enableNanInf);
+ public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) {
+ super(workBuf, enableNanInf, enableEscapeAnyChar);
}
@Override
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index aaa9806a4..34b1f8047 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -74,7 +74,7 @@ public class JsonReader extends BaseJsonProcessor {
private FieldSelection selection;
private JsonReader(Builder builder) {
- super(builder.managedBuf, builder.enableNanInf);
+ super(builder.managedBuf, builder.enableNanInf, builder.enableEscapeAnyChar);
selection = FieldSelection.getFieldSelection(builder.columns);
workingBuffer = builder.workingBuffer;
skipOuterList = builder.skipOuterList;
@@ -97,6 +97,7 @@ public class JsonReader extends BaseJsonProcessor {
private boolean skipOuterList;
private boolean allTextMode;
private boolean enableNanInf;
+ private boolean enableEscapeAnyChar;
public Builder(DrillBuf managedBuf) {
@@ -104,9 +105,6 @@ public class JsonReader extends BaseJsonProcessor {
this.workingBuffer = new WorkingBuffer(managedBuf);
this.mapOutput = new MapVectorOutput(workingBuffer);
this.listOutput = new ListVectorOutput(workingBuffer);
- this.readNumbersAsDouble = false;
- this.skipOuterList = false;
- this.allTextMode = false;
this.enableNanInf = true;
}
@@ -130,6 +128,11 @@ public class JsonReader extends BaseJsonProcessor {
return this;
}
+ public Builder enableEscapeAnyChar(boolean enableEscapeAnyChar) {
+ this.enableEscapeAnyChar = enableEscapeAnyChar;
+ return this;
+ }
+
public Builder defaultSchemaPathColumns() {
this.columns = GroupScan.ALL_COLUMNS;
return this;
diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf
index 386b8c2fa..4f6fbb278 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -606,6 +606,7 @@ drill.exec.options: {
store.json.all_text_mode: false,
store.json.writer.allow_nan_inf: true,
store.json.reader.allow_nan_inf: true,
+ store.json.reader.allow_escape_any_char: false,
store.json.extended_types: false,
store.json.read_numbers_as_double: false,
store.json.reader.print_skipped_invalid_record_number: false,
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java
new file mode 100644
index 000000000..323da27cc
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.writer;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.drill.common.exceptions.UserRemoteException;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertThat;
+
+public class TestJsonEscapeAnyChar extends ClusterTest {
+
+ private File testFile;
+ private static final String TABLE = "escape.json";
+ private static final String JSON_DATA = "{\"name\": \"ABC\\S\"}";
+ private static final String QUERY = String.format("select * from dfs.`%s`", TABLE);
+
+ @Before
+ public void setup() throws Exception {
+ startCluster(ClusterFixture.builder(dirTestWatcher));
+ testFile = new File(dirTestWatcher.getRootDir(), TABLE);
+ FileUtils.writeStringToFile(testFile, JSON_DATA);
+ }
+
+ @Test
+ public void testwithOptionEnabled() throws Exception {
+
+ try {
+ enableJsonReaderEscapeAnyChar();
+ testBuilder()
+ .sqlQuery(QUERY)
+ .unOrdered()
+ .baselineColumns("name")
+ .baselineValues("ABCS")
+ .build()
+ .run();
+ } finally {
+ resetJsonReaderEscapeAnyChar();
+ }
+ }
+
+ @Test
+ public void testwithOptionDisabled() throws Exception {
+ try {
+ queryBuilder().sql(QUERY)
+ .run();
+ } catch (UserRemoteException e) {
+ assertThat(e.getMessage(), containsString("DATA_READ ERROR: Error parsing JSON - Unrecognized character escape"));
+ }
+ }
+
+ private void enableJsonReaderEscapeAnyChar() {
+ client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, true);
+ }
+
+ private void resetJsonReaderEscapeAnyChar() {
+ client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, false);
+ }
+
+ @After
+ public void teardown() throws Exception {
+ FileUtils.deleteQuietly(testFile);
+ }
+} \ No newline at end of file