From ad63b70478674185c438682fc0ccd7ca191da1d5 Mon Sep 17 00:00:00 2001 From: Volodymyr Vysotskyi Date: Sun, 10 Feb 2019 19:26:08 +0200 Subject: DRILL-7022: Partition pruning is not happening the first time after the metadata auto-refresh closes #1638 --- .../exec/store/parquet/ParquetReaderUtility.java | 5 +++++ .../store/parquet/TestParquetMetadataCache.java | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java index 733915b61..14a222c37 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java @@ -389,6 +389,11 @@ public class ParquetReaderUtility { minBytes = Base64.decodeBase64(minBytes); maxBytes = hasSingleValue ? minBytes : Base64.decodeBase64(maxBytes); } + } else if (minValue instanceof Binary && maxValue instanceof Binary) { + // for the case when cache file was auto-refreshed, values from parquet footers are used, + // so there is no need to convert values, but they should be set in ColumnMetadata + minBytes = ((Binary) minValue).getBytes(); + maxBytes = ((Binary) maxValue).getBytes(); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java index f152078bf..f85ef2b71 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java @@ -902,6 +902,28 @@ public class TestParquetMetadataCache extends PlanTestBase { .run(); } + @Test + public void testAutoRefreshPartitionPruning() throws Exception { + test("create table dfs.tmp.`orders` partition by (o_orderstatus) as\n" + + "select * from cp.`tpch/orders.parquet`"); + + test("refresh table metadata dfs.tmp.`orders`"); + + File ordersTable = new File(dirTestWatcher.getDfsTestTmpDir(), "orders"); + + // sets last-modified time of directory greater than the time of cache file to force metadata cache file auto-refresh + assertTrue("Unable to change the last-modified time of table directory", + ordersTable.setLastModified(new File(ordersTable, Metadata.METADATA_FILENAME).lastModified() + 100500)); + + String query = "select * from dfs.tmp.`orders`\n" + + "where o_orderstatus = 'O' and o_orderdate < '1995-03-10'"; + PlanTestBase.testPlanOneExpectedPattern(query, "numRowGroups=1"); + + int actualRowCount = testSql(query); + assertEquals("Row count does not match the expected value", 1, actualRowCount); + // TODO: Check that metadata cache file is actually regenerated, once Drill will use JDK version with resolved JDK-8177809. + } + /** * Helper method for checking the metadata file existence * -- cgit v1.2.3