aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVolodymyr Vysotskyi <vvovyk@gmail.com>2019-02-24 23:04:46 +0200
committerHanumath Maduri <hmaduri@apache.org>2019-03-01 17:21:23 -0800
commitee6504799f248e7d8702002ab208636a91042a34 (patch)
treecf705d9fe1a5648da830b8aee1555ffc7d8b77ef
parent95bca65c45b989117b8e28bf610d7972a137ab36 (diff)
DRILL-6927: Avoid double conversion from impala timestamp when hive native parquet reader is used
closes #1655
-rw-r--r--contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java32
-rw-r--r--contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java18
2 files changed, 37 insertions, 13 deletions
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java
index 7286d7a24..0963d2a60 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/logical/ConvertHiveParquetScanToDrillParquetScan.java
@@ -26,6 +26,7 @@ import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.planner.logical.DrillProjectRel;
import org.apache.drill.exec.planner.logical.DrillScanRel;
import org.apache.drill.exec.planner.logical.RelOptHelper;
@@ -43,6 +44,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -138,10 +140,10 @@ public class ConvertHiveParquetScanToDrillParquetScan extends StoragePluginOptim
/**
* Helper method which creates a DrillScalRel with native HiveScan.
*/
- private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping,
- final DrillScanRel hiveScanRel,
- final List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits,
- final OptionManager options) throws Exception {
+ private DrillScanRel createNativeScanRel(Map<String, String> partitionColMapping,
+ DrillScanRel hiveScanRel,
+ List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits,
+ OptionManager options) throws IOException {
final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR);
@@ -220,17 +222,21 @@ public class ConvertHiveParquetScanToDrillParquetScan extends StoragePluginOptim
/**
* Apply any data format conversion expressions.
*/
- private RexNode createColumnFormatConversion(final DrillScanRel hiveScanRel, final DrillScanRel nativeScanRel,
- final String colName, final RexBuilder rb) {
+ private RexNode createColumnFormatConversion(DrillScanRel hiveScanRel, DrillScanRel nativeScanRel,
+ String colName, RexBuilder rb) {
- final RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType();
- final RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false);
- final RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex());
+ RelDataType outputType = hiveScanRel.getRowType().getField(colName, false, false).getType();
+ RelDataTypeField inputField = nativeScanRel.getRowType().getField(colName, false, false);
+ RexInputRef inputRef = rb.makeInputRef(inputField.getType(), inputField.getIndex());
- if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP) {
- // TIMESTAMP is stored as INT96 by Hive in ParquetFormat. Use convert_fromTIMESTAMP_IMPALA UDF to convert
- // INT96 format data to TIMESTAMP
- // TODO: Remove this conversion once "store.parquet.reader.int96_as_timestamp" will be true by default
+ PlannerSettings settings = PrelUtil.getPlannerSettings(hiveScanRel.getCluster().getPlanner());
+ boolean conversionToTimestampEnabled = settings.getOptions().getBoolean(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP);
+
+ if (outputType.getSqlTypeName() == SqlTypeName.TIMESTAMP && !conversionToTimestampEnabled) {
+ // TIMESTAMP is stored as INT96 by Hive in ParquetFormat.
+ // Used convert_fromTIMESTAMP_IMPALA UDF to convert INT96 format data to TIMESTAMP
+ // only for the case when `store.parquet.reader.int96_as_timestamp` is
+ // disabled to avoid double conversion after reading value from parquet and here.
return rb.makeCall(INT96_TO_TIMESTAMP, inputRef);
}
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
index 1a02eb48f..c11a6caa7 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
@@ -301,4 +301,22 @@ public class TestHiveDrillNativeParquetReader extends HiveTestBase {
.go();
}
+ @Test
+ public void testInt96TimestampConversionWithNativeReader() throws Exception {
+ String query = "select timestamp_field from hive.readtest_parquet";
+
+ try {
+ setSessionOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP, true);
+
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("timestamp_field")
+ .baselineValues(DateUtility.parseBest("2013-07-05 17:01:00"))
+ .baselineValues(new Object[]{null})
+ .go();
+ } finally {
+ resetSessionOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP);
+ }
+ }
}