aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorParth Chandra <parthc@apache.org>2016-06-01 17:19:03 -0700
committerParth Chandra <parthc@apache.org>2016-06-07 10:06:53 -0700
commit6286c0a4b8e39524fe00d623152d1d38db15774f (patch)
tree1a41cee33dc7d4668e682369361fc7866e1a7791
parentd1ac6fbae025723d54eeeb7d15b89a0df5912a3c (diff)
DRILL-4694: CTAS in JSON format produces extraneous NULL fields Changed behavior of JSON CTAS to skip fields if the value is null. Added an option "store.json.writer.skip_null_fields" to enable old behavior.
-rw-r--r--exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java13
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java1
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java150
-rw-r--r--exec/java-exec/src/test/resources/json/ctas_alltypes_map.json28
-rw-r--r--exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json41
-rw-r--r--exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json28
-rw-r--r--exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json41
10 files changed, 305 insertions, 0 deletions
diff --git a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
index b6b0e9c14..651f5a860 100644
--- a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
+++ b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java
@@ -43,6 +43,7 @@ import java.util.List;
public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implements RecordWriter {
protected JsonOutput gen;
+ protected boolean skipNullFields = true;
<#list vv.types as type>
<#list type.minor as minor>
@@ -61,7 +62,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem
@Override
public void startField() throws IOException {
+ <#if mode.prefix == "Nullable" >
+ if (!skipNullFields || this.reader.isSet()) {
+ gen.writeFieldName(fieldName);
+ }
+ <#else>
gen.writeFieldName(fieldName);
+ </#if>
}
@Override
@@ -120,7 +127,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem
<#elseif mode.prefix == "Repeated" >
gen.write${typeName}(i, reader);
<#else>
+ <#if mode.prefix == "Nullable" >
+ if (!skipNullFields || this.reader.isSet()) {
+ gen.write${typeName}(reader);
+ }
+ <#else>
gen.write${typeName}(reader);
+ </#if>
</#if>
<#if mode.prefix == "Repeated">
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 17fbb7bb8..6eb8a3a14 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -134,6 +134,7 @@ public interface ExecConstants {
BooleanValidator JSON_READER_ALL_TEXT_MODE_VALIDATOR = new BooleanValidator(JSON_ALL_TEXT_MODE, false);
BooleanValidator JSON_EXTENDED_TYPES = new BooleanValidator("store.json.extended_types", false);
BooleanValidator JSON_WRITER_UGLIFY = new BooleanValidator("store.json.writer.uglify", false);
+ BooleanValidator JSON_WRITER_SKIPNULLFIELDS = new BooleanValidator("store.json.writer.skip_null_fields", true);
DoubleValidator TEXT_ESTIMATED_ROW_SIZE = new RangeDoubleValidator(
"store.text.estimated_row_size_bytes", 1, Long.MAX_VALUE, 100.0);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index c35ed0e11..579276e71 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -99,6 +99,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea
ExecConstants.TEXT_ESTIMATED_ROW_SIZE,
ExecConstants.JSON_EXTENDED_TYPES,
ExecConstants.JSON_WRITER_UGLIFY,
+ ExecConstants.JSON_WRITER_SKIPNULLFIELDS,
ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR,
ExecConstants.MONGO_READER_ALL_TEXT_MODE_VALIDATOR,
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
index 3f093bed7..30c248e43 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
@@ -81,6 +81,7 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> {
options.put("extension", "json");
options.put("extended", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_EXTENDED_TYPES)));
options.put("uglify", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_UGLIFY)));
+ options.put("skipnulls", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_SKIPNULLFIELDS)));
RecordWriter recordWriter = new JsonRecordWriter();
recordWriter.init(options);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
index 848da7720..f27e04ce4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java
@@ -72,6 +72,7 @@ public class JsonRecordWriter extends JSONOutputRecordWriter implements RecordWr
this.fieldDelimiter = writerOptions.get("separator");
this.extension = writerOptions.get("extension");
this.useExtendedOutput = Boolean.parseBoolean(writerOptions.get("extended"));
+ this.skipNullFields = Boolean.parseBoolean(writerOptions.get("skipnulls"));
final boolean uglify = Boolean.parseBoolean(writerOptions.get("uglify"));
Configuration conf = new Configuration();
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
new file mode 100644
index 000000000..c76892d4b
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+
+import org.apache.drill.common.util.TestTools;
+import org.apache.drill.exec.ExecConstants;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestCTASJson extends PlanTestBase {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCTASJson.class);
+
+ static final String WORKING_PATH = TestTools.getWorkingPath();
+ static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources";
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with fields of all types.
+ * Some records have missing fields. CTAS should skip the missing fields
+ */
+ public void testctas_alltypes_map() throws Exception {
+ String testName = "ctas_alltypes_map";
+ test("use dfs_test.tmp");
+ test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ try {
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + ".json")
+ .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+ .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = true") // DEFAULT
+ .build()
+ .run();
+ } finally {
+ test("drop table " + testName + "_json");
+ test("alter session reset store.format ");
+ test("alter session reset store.json.writer.skip_null_fields ");
+ }
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with fields of all types.
+ * Some records have missing fields. CTAS should NOT skip the missing fields
+ */
+ public void testctas_alltypes_map_noskip() throws Exception {
+ String testName = "ctas_alltypes_map";
+ test("use dfs_test.tmp");
+ test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ try {
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + "_out.json")
+ .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+ .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT
+ .build()
+ .run();
+ } finally{
+ test("drop table " + testName + "_json" );
+ test("alter session reset store.format ");
+ test("alter session reset store.json.writer.skip_null_fields ");
+ }
+
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with fields of all types.
+ * Some records have missing fields. CTAS should skip the missing fields
+ */
+ public void testctas_alltypes_repeatedmap() throws Exception {
+ String testName = "ctas_alltypes_repeated_map";
+ test("use dfs_test.tmp");
+ test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ try {
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + ".json")
+ .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+ .optionSettingQueriesForTestQuery(
+ "alter session set store.json.writer.skip_null_fields = true") // DEFAULT
+ .build()
+ .run();
+ }finally{
+ test("drop table " + testName + "_json" );
+ test("alter session reset store.format ");
+ test("alter session reset store.json.writer.skip_null_fields ");
+ }
+
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with fields of all types.
+ * Some records have missing fields. CTAS should NOT skip the missing fields
+ */
+ public void testctas_alltypes_repeated_map_noskip() throws Exception {
+ String testName = "ctas_alltypes_repeated_map";
+ test("use dfs_test.tmp");
+ test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ try {
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + "_out.json")
+ .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ")
+ .optionSettingQueriesForTestQuery(
+ "alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT
+ .build()
+ .run();
+ } finally {
+ test("drop table " + testName + "_json" );
+ test("alter session reset store.format ");
+ test("alter session reset store.json.writer.skip_null_fields ");
+ }
+
+ }
+
+}
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json
new file mode 100644
index 000000000..ca7bda0ea
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json
@@ -0,0 +1,28 @@
+{
+ "X" : {
+ "key1" : "value1",
+ "key2" : 1,
+ "key3" : 10.1,
+ "key4" : true,
+ "key5" : 1000000000,
+ "key6" : 1000000000.1
+ }
+}
+{
+ "X" : {
+ "key1" : "value2",
+ "key2" : 2
+ }
+}
+{
+ "X" : {
+ "key3" : 10.3,
+ "key4" : false
+ }
+}
+{
+ "X" : {
+ "key5" : 1000000005,
+ "key6" : 1000000000.6
+ }
+}
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json
new file mode 100644
index 000000000..51eb90dfd
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json
@@ -0,0 +1,41 @@
+{
+ "X" : {
+ "key1" : "value1",
+ "key2" : 1,
+ "key3" : 10.1,
+ "key4" : true,
+ "key5" : 1000000000,
+ "key6" : 1000000000.1
+ }
+}
+{
+ "X" : {
+ "key1" : "value2",
+ "key2" : 2,
+ "key3" : null,
+ "key4" : null,
+ "key5" : null,
+ "key6" : null
+ }
+}
+{
+ "X" : {
+ "key1" : null,
+ "key2" : null,
+ "key3" : 10.3,
+ "key4" : false,
+ "key5" : null,
+ "key6" : null
+ }
+}
+{
+ "X" : {
+ "key1" : null,
+ "key2" : null,
+ "key3" : null,
+ "key4" : null,
+ "key5" : 1000000005,
+ "key6" : 1000000000.6
+ }
+}
+
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json
new file mode 100644
index 000000000..d19e8c559
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json
@@ -0,0 +1,28 @@
+{
+ "X" : [ {
+ "key1" : "value1",
+ "key2" : 1,
+ "key3" : 10.1,
+ "key4" : true,
+ "key5" : 1000000000,
+ "key6" : 1000000000.1
+ } ]
+}
+{
+ "X" : [ {
+ "key1" : "value2",
+ "key2" : 2
+ } ]
+}
+{
+ "X" : [ {
+ "key3" : 10.3,
+ "key4" : false
+ } ]
+}
+{
+ "X" : [ {
+ "key5" : 1000000005,
+ "key6" : 1000000000.6
+ } ]
+}
diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json
new file mode 100644
index 000000000..dc3aabf1d
--- /dev/null
+++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json
@@ -0,0 +1,41 @@
+{
+ "X" : [ {
+ "key1" : "value1",
+ "key2" : 1,
+ "key3" : 10.1,
+ "key4" : true,
+ "key5" : 1000000000,
+ "key6" : 1000000000.1
+ } ]
+}
+{
+ "X" : [ {
+ "key1" : "value2",
+ "key2" : 2,
+ "key3" : null,
+ "key4" : null,
+ "key5" : null,
+ "key6" : null
+ } ]
+}
+{
+ "X" : [ {
+ "key1" : null,
+ "key2" : null,
+ "key3" : 10.3,
+ "key4" : false,
+ "key5" : null,
+ "key6" : null
+ } ]
+}
+{
+ "X" : [ {
+ "key1" : null,
+ "key2" : null,
+ "key3" : null,
+ "key4" : null,
+ "key5" : 1000000005,
+ "key6" : 1000000000.6
+ } ]
+}
+