diff options
author | Parth Chandra <parthc@apache.org> | 2016-06-01 17:19:03 -0700 |
---|---|---|
committer | Parth Chandra <parthc@apache.org> | 2016-06-07 10:06:53 -0700 |
commit | 6286c0a4b8e39524fe00d623152d1d38db15774f (patch) | |
tree | 1a41cee33dc7d4668e682369361fc7866e1a7791 | |
parent | d1ac6fbae025723d54eeeb7d15b89a0df5912a3c (diff) |
DRILL-4694: CTAS in JSON format produces extraneous NULL fields Changed behavior of JSON CTAS to skip fields if the value is null. Added an option "store.json.writer.skip_null_fields" to enable old behavior.
10 files changed, 305 insertions, 0 deletions
diff --git a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java index b6b0e9c14..651f5a860 100644 --- a/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java +++ b/exec/java-exec/src/main/codegen/templates/JsonOutputRecordWriter.java @@ -43,6 +43,7 @@ import java.util.List; public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implements RecordWriter { protected JsonOutput gen; + protected boolean skipNullFields = true; <#list vv.types as type> <#list type.minor as minor> @@ -61,7 +62,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem @Override public void startField() throws IOException { + <#if mode.prefix == "Nullable" > + if (!skipNullFields || this.reader.isSet()) { + gen.writeFieldName(fieldName); + } + <#else> gen.writeFieldName(fieldName); + </#if> } @Override @@ -120,7 +127,13 @@ public abstract class JSONOutputRecordWriter extends AbstractRecordWriter implem <#elseif mode.prefix == "Repeated" > gen.write${typeName}(i, reader); <#else> + <#if mode.prefix == "Nullable" > + if (!skipNullFields || this.reader.isSet()) { + gen.write${typeName}(reader); + } + <#else> gen.write${typeName}(reader); + </#if> </#if> <#if mode.prefix == "Repeated"> diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java index 17fbb7bb8..6eb8a3a14 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java @@ -134,6 +134,7 @@ public interface ExecConstants { BooleanValidator JSON_READER_ALL_TEXT_MODE_VALIDATOR = new BooleanValidator(JSON_ALL_TEXT_MODE, false); BooleanValidator JSON_EXTENDED_TYPES = new BooleanValidator("store.json.extended_types", false); BooleanValidator JSON_WRITER_UGLIFY = new BooleanValidator("store.json.writer.uglify", false); + BooleanValidator JSON_WRITER_SKIPNULLFIELDS = new BooleanValidator("store.json.writer.skip_null_fields", true); DoubleValidator TEXT_ESTIMATED_ROW_SIZE = new RangeDoubleValidator( "store.text.estimated_row_size_bytes", 1, Long.MAX_VALUE, 100.0); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index c35ed0e11..579276e71 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -99,6 +99,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea ExecConstants.TEXT_ESTIMATED_ROW_SIZE, ExecConstants.JSON_EXTENDED_TYPES, ExecConstants.JSON_WRITER_UGLIFY, + ExecConstants.JSON_WRITER_SKIPNULLFIELDS, ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR, ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR, ExecConstants.MONGO_READER_ALL_TEXT_MODE_VALIDATOR, diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java index 3f093bed7..30c248e43 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java @@ -81,6 +81,7 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> { options.put("extension", "json"); options.put("extended", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_EXTENDED_TYPES))); options.put("uglify", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_UGLIFY))); + options.put("skipnulls", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_SKIPNULLFIELDS))); RecordWriter recordWriter = new JsonRecordWriter(); recordWriter.init(options); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java index 848da7720..f27e04ce4 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java @@ -72,6 +72,7 @@ public class JsonRecordWriter extends JSONOutputRecordWriter implements RecordWr this.fieldDelimiter = writerOptions.get("separator"); this.extension = writerOptions.get("extension"); this.useExtendedOutput = Boolean.parseBoolean(writerOptions.get("extended")); + this.skipNullFields = Boolean.parseBoolean(writerOptions.get("skipnulls")); final boolean uglify = Boolean.parseBoolean(writerOptions.get("uglify")); Configuration conf = new Configuration(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java new file mode 100644 index 000000000..c76892d4b --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + + +import org.apache.drill.common.util.TestTools; +import org.apache.drill.exec.ExecConstants; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestCTASJson extends PlanTestBase { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCTASJson.class); + + static final String WORKING_PATH = TestTools.getWorkingPath(); + static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources"; + + @Test + /** + * Test a source json file that contains records that are maps with fields of all types. + * Some records have missing fields. CTAS should skip the missing fields + */ + public void testctas_alltypes_map() throws Exception { + String testName = "ctas_alltypes_map"; + test("use dfs_test.tmp"); + test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`"); + + final String query = "select * from `" + testName + "_json` t1 "; + + try { + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("json/" + testName + ".json") + .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ") + .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = true") // DEFAULT + .build() + .run(); + } finally { + test("drop table " + testName + "_json"); + test("alter session reset store.format "); + test("alter session reset store.json.writer.skip_null_fields "); + } + } + + @Test + /** + * Test a source json file that contains records that are maps with fields of all types. + * Some records have missing fields. CTAS should NOT skip the missing fields + */ + public void testctas_alltypes_map_noskip() throws Exception { + String testName = "ctas_alltypes_map"; + test("use dfs_test.tmp"); + test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`"); + + final String query = "select * from `" + testName + "_json` t1 "; + + try { + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("json/" + testName + "_out.json") + .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ") + .optionSettingQueriesForTestQuery("alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT + .build() + .run(); + } finally{ + test("drop table " + testName + "_json" ); + test("alter session reset store.format "); + test("alter session reset store.json.writer.skip_null_fields "); + } + + } + + @Test + /** + * Test a source json file that contains records that are maps with fields of all types. + * Some records have missing fields. CTAS should skip the missing fields + */ + public void testctas_alltypes_repeatedmap() throws Exception { + String testName = "ctas_alltypes_repeated_map"; + test("use dfs_test.tmp"); + test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`"); + + final String query = "select * from `" + testName + "_json` t1 "; + + try { + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("json/" + testName + ".json") + .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ") + .optionSettingQueriesForTestQuery( + "alter session set store.json.writer.skip_null_fields = true") // DEFAULT + .build() + .run(); + }finally{ + test("drop table " + testName + "_json" ); + test("alter session reset store.format "); + test("alter session reset store.json.writer.skip_null_fields "); + } + + } + + @Test + /** + * Test a source json file that contains records that are maps with fields of all types. + * Some records have missing fields. CTAS should NOT skip the missing fields + */ + public void testctas_alltypes_repeated_map_noskip() throws Exception { + String testName = "ctas_alltypes_repeated_map"; + test("use dfs_test.tmp"); + test("create table " + testName + "_json as select * from cp.`json/" + testName + ".json`"); + + final String query = "select * from `" + testName + "_json` t1 "; + + try { + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("json/" + testName + "_out.json") + .optionSettingQueriesForTestQuery("alter session set store.format = 'json' ") + .optionSettingQueriesForTestQuery( + "alter session set store.json.writer.skip_null_fields = false") // change from DEFAULT + .build() + .run(); + } finally { + test("drop table " + testName + "_json" ); + test("alter session reset store.format "); + test("alter session reset store.json.writer.skip_null_fields "); + } + + } + +} diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json new file mode 100644 index 000000000..ca7bda0ea --- /dev/null +++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map.json @@ -0,0 +1,28 @@ +{ + "X" : { + "key1" : "value1", + "key2" : 1, + "key3" : 10.1, + "key4" : true, + "key5" : 1000000000, + "key6" : 1000000000.1 + } +} +{ + "X" : { + "key1" : "value2", + "key2" : 2 + } +} +{ + "X" : { + "key3" : 10.3, + "key4" : false + } +} +{ + "X" : { + "key5" : 1000000005, + "key6" : 1000000000.6 + } +} diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json new file mode 100644 index 000000000..51eb90dfd --- /dev/null +++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_map_out.json @@ -0,0 +1,41 @@ +{ + "X" : { + "key1" : "value1", + "key2" : 1, + "key3" : 10.1, + "key4" : true, + "key5" : 1000000000, + "key6" : 1000000000.1 + } +} +{ + "X" : { + "key1" : "value2", + "key2" : 2, + "key3" : null, + "key4" : null, + "key5" : null, + "key6" : null + } +} +{ + "X" : { + "key1" : null, + "key2" : null, + "key3" : 10.3, + "key4" : false, + "key5" : null, + "key6" : null + } +} +{ + "X" : { + "key1" : null, + "key2" : null, + "key3" : null, + "key4" : null, + "key5" : 1000000005, + "key6" : 1000000000.6 + } +} + diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json new file mode 100644 index 000000000..d19e8c559 --- /dev/null +++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map.json @@ -0,0 +1,28 @@ +{ + "X" : [ { + "key1" : "value1", + "key2" : 1, + "key3" : 10.1, + "key4" : true, + "key5" : 1000000000, + "key6" : 1000000000.1 + } ] +} +{ + "X" : [ { + "key1" : "value2", + "key2" : 2 + } ] +} +{ + "X" : [ { + "key3" : 10.3, + "key4" : false + } ] +} +{ + "X" : [ { + "key5" : 1000000005, + "key6" : 1000000000.6 + } ] +} diff --git a/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json new file mode 100644 index 000000000..dc3aabf1d --- /dev/null +++ b/exec/java-exec/src/test/resources/json/ctas_alltypes_repeated_map_out.json @@ -0,0 +1,41 @@ +{ + "X" : [ { + "key1" : "value1", + "key2" : 1, + "key3" : 10.1, + "key4" : true, + "key5" : 1000000000, + "key6" : 1000000000.1 + } ] +} +{ + "X" : [ { + "key1" : "value2", + "key2" : 2, + "key3" : null, + "key4" : null, + "key5" : null, + "key6" : null + } ] +} +{ + "X" : [ { + "key1" : null, + "key2" : null, + "key3" : 10.3, + "key4" : false, + "key5" : null, + "key6" : null + } ] +} +{ + "X" : [ { + "key1" : null, + "key2" : null, + "key3" : null, + "key4" : null, + "key5" : 1000000005, + "key6" : 1000000000.6 + } ] +} + |