aboutsummaryrefslogtreecommitdiff
path: root/contrib/storage-hive/core/src/test/java/org/apache/drill
diff options
context:
space:
mode:
authorArina Ielchiieva <arina.yelchiyeva@gmail.com>2018-10-31 20:24:03 +0200
committerVitalii Diravka <vitalii@apache.org>2018-11-15 13:25:24 -0800
commit527f1fd2452fdb3fd30c0cc154222a099e352f93 (patch)
treeac5704c29e33a31b0b8bc06c3d91dfef79ad1c78 /contrib/storage-hive/core/src/test/java/org/apache/drill
parent904bb71f3b8984148d07a0c8a731ef4aeafa541e (diff)
DRILL-6744: Support varchar and decimal push down
1. Added enableStringsSignedMinMax parquet format plugin config and store.parquet.reader.strings_signed_min_max session option to control reading binary statistics for files generated by prior versions of Parquet 1.10.0. 2. Added ParquetReaderConfig to store configuration needed during reading parquet statistics or files. 3. Provided mechanism to enable varchar / decimal filter push down. 4. Added VersionUtil to compare Drill versions in string representation. 5. Added appropriate unit tests. closes #1537
Diffstat (limited to 'contrib/storage-hive/core/src/test/java/org/apache/drill')
-rw-r--r--contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java41
-rw-r--r--contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java10
2 files changed, 46 insertions, 5 deletions
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
index ea8d5df84..1a02eb48f 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHiveDrillNativeParquetReader.java
@@ -21,6 +21,8 @@ import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.drill.PlanTestBase;
import org.apache.drill.categories.HiveStorageTest;
@@ -260,4 +262,43 @@ public class TestHiveDrillNativeParquetReader extends HiveTestBase {
}
}
+ @Test
+ public void testHiveVarcharPushDown() throws Exception {
+ String query = "select int_key from hive.kv_native where var_key = 'var_1'";
+
+ Map<String, String> properties = new HashMap<>();
+ properties.put("true", "numRowGroups=1");
+ properties.put("false", "numRowGroups=4"); // Hive creates parquet files using Parquet lib older than 1.10.0
+ try {
+ for (Map.Entry<String, String> property : properties.entrySet()) {
+ alterSession(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX, property.getKey());
+ testPlanMatchingPatterns(query, new String[]{"HiveDrillNativeParquetScan", property.getValue()});
+
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("int_key")
+ .baselineValues(1)
+ .go();
+ }
+ } finally {
+ resetSessionOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX);
+ }
+ }
+
+ @Test
+ public void testHiveDecimalPushDown() throws Exception {
+ String query = "select int_key from hive.kv_native where dec_key = cast(1.11 as decimal(5, 2))";
+ // Hive generates parquet files using parquet lib older than 1.10.0
+ // thus statistics for decimal is not available
+ testPlanMatchingPatterns(query, new String[]{"HiveDrillNativeParquetScan", "numRowGroups=4"});
+
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("int_key")
+ .baselineValues(1)
+ .go();
+ }
+
}
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index 65d1700fb..84fa368c8 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -552,12 +552,12 @@ public class HiveTestDataGenerator {
private void createTestDataForDrillNativeParquetReaderTests(Driver hiveDriver) {
// Hive managed table that has data qualified for Drill native filter push down
- executeQuery(hiveDriver, "create table kv_native(key int, sub_key int) stored as parquet");
+ executeQuery(hiveDriver, "create table kv_native(key int, int_key int, var_key varchar(10), dec_key decimal(5, 2)) stored as parquet");
// each insert is created in separate file
- executeQuery(hiveDriver, "insert into table kv_native values (1, 1), (1, 2)");
- executeQuery(hiveDriver, "insert into table kv_native values (1, 3), (1, 4)");
- executeQuery(hiveDriver, "insert into table kv_native values (2, 5), (2, 6)");
- executeQuery(hiveDriver, "insert into table kv_native values (null, 9), (null, 10)");
+ executeQuery(hiveDriver, "insert into table kv_native values (1, 1, 'var_1', 1.11), (1, 2, 'var_2', 2.22)");
+ executeQuery(hiveDriver, "insert into table kv_native values (1, 3, 'var_3', 3.33), (1, 4, 'var_4', 4.44)");
+ executeQuery(hiveDriver, "insert into table kv_native values (2, 5, 'var_5', 5.55), (2, 6, 'var_6', 6.66)");
+ executeQuery(hiveDriver, "insert into table kv_native values (null, 7, 'var_7', 7.77), (null, 8, 'var_8', 8.88)");
// Hive external table which has three partitions