diff options
author | Volodymyr Vysotskyi <vvovyk@gmail.com> | 2019-02-24 23:04:46 +0200 |
---|---|---|
committer | Hanumath Maduri <hmaduri@apache.org> | 2019-03-01 17:21:23 -0800 |
commit | ee6504799f248e7d8702002ab208636a91042a34 (patch) | |
tree | cf705d9fe1a5648da830b8aee1555ffc7d8b77ef | |
parent | 95bca65c45b989117b8e28bf610d7972a137ab36 (diff) |
DRILL-6927: Avoid double conversion from impala timestamp when hive native parquet reader is used
closes #1655
2 files changed, 37 insertions, 13 deletions
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java index 7286d7a24..0963d2a60 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java @@ -26,6 +26,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.planner.logical.DrillProjectRel; import org.apache.drill.exec.planner.logical.DrillScanRel; import org.apache.drill.exec.planner.logical.RelOptHelper; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -138,10 +140,10 @@ public class ConvertHiveParquetScanToDrillParquetScan extends StoragePluginOptim /** * Helper method which creates a DrillScalRel with native HiveScan. */ - private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping, - final DrillScanRel hiveScanRel, - final List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits, - final OptionManager options) throws Exception { + private DrillScanRel createNativeScanRel(Map<String, String> partitionColMapping, + DrillScanRel hiveScanRel, + List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits, + OptionManager options) throws IOException { final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory(); final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR); @@ -220,17 +222,21 @@ public class ConvertHiveParquetScanToDrillParquetScan extends StoragePluginOptim /** * Apply any data format conversion expressions. */ - private RexNode createColumnFormatConversion(final DrillScanRel hiveScanRel, final DrillScanRel nativeScanRel, - final String colName, final RexBuilder rb) { + private RexNode createColumnFormatConversion(DrillScanRel hiveScanRel, DrillScanRel nativeScanRel, + String colName, RexBuilder rb) { - final RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType(); - final RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false); - final RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex()); + RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType(); + RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false); + RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex()); - if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP) { - // TIMESTAMP is stored as INT96 by Hive in ParquetFormat. Use convert_fromTIMESTAMP_IMPALA UDF to convert - // INT96 format data to TIMESTAMP - // TODO: Remove this conversion once "store.parquet.reader.int96_as_timestamp" will be true by default + PlannerSettings settings = PrelUtil.getPlannerSettings(hiveScanRel.getCluster().getPlanner()); + boolean conversionToTimestampEnabled = settings.getOptions().getBoolean(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP); + + if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP && !conversionToTimestampEnabled) { + // TIMESTAMP is stored as INT96 by Hive in ParquetFormat. + // Used convert_fromTIMESTAMP_IMPALA UDF to convert INT96 format data to TIMESTAMP + // only for the case when `store.parquet.reader.int96_as_timestamp` is + // disabled to avoid double conversion after reading value from parquet and here. return rb.makeCall(INT96_TO_TIMESTAMP, inputRef); } diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java index 1a02eb48f..c11a6caa7 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java @@ -301,4 +301,22 @@ public class TestHiveDrillNativeParquetReader extends HiveTestBase { .go(); } + @Test + public void testInt96TimestampConversionWithNativeReader() throws Exception { + String query = "select timestamp_field from hive.readtest_parquet"; + + try { + setSessionOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP, true); + + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("timestamp_field") + .baselineValues(DateUtility.parseBest("2013-07-05 17:01:00")) + .baselineValues(new Object[]{null}) + .go(); + } finally { + resetSessionOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP); + } + } } |