aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
diff options
context:
space:
mode:
authorJason Altekruse <altekrusejason@gmail.com>2014-11-12 18:10:32 -0800
committerJacques Nadeau <jacques@apache.org>2014-11-13 09:17:36 -0800
commitb37dc08a46dea1b1c0bfeba21ea73dd6ac0116bd (patch)
treee0ea6d45fd6ceeedfc189024383f6f9cde17ad89 /exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
parent116f6d167f77289f1b03e8e0895851a0f1f411eb (diff)
DRILL-1704: Use complex reader for dictionary encoded files, as original reader seems to be broken
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java12
1 files changed, 10 insertions, 2 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
index 53a6ffcfe..4467825f9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetScanBatchCreator.java
@@ -41,7 +41,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import parquet.column.ColumnDescriptor;
+import parquet.column.Encoding;
import parquet.hadoop.ParquetFileReader;
+import parquet.hadoop.metadata.ColumnChunkMetaData;
import parquet.hadoop.metadata.ParquetMetadata;
import parquet.schema.MessageType;
import parquet.schema.Type;
@@ -107,7 +109,7 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan
footers.put(e.getPath(),
ParquetFileReader.readFooter( fs.getConf(), new Path(e.getPath())));
}
- if (!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val && !isComplex(footers.get(e.getPath()))) {
+ if (!context.getOptions().getOption(ExecConstants.PARQUET_NEW_RECORD_READER).bool_val && !isComplex(footers.get(e.getPath()), e.getRowGroupIndex())) {
readers.add(
new ParquetRecordReader(
context, e.getPath(), e.getRowGroupIndex(), fs,
@@ -153,7 +155,12 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan
return s;
}
- private static boolean isComplex(ParquetMetadata footer) {
+ private static boolean isComplex(ParquetMetadata footer, int rowGroupIndex) {
+ for (ColumnChunkMetaData md : footer.getBlocks().get(rowGroupIndex).getColumns()) {
+ if (md.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
+ return true; // for now, use Complex reader for Dictionary encoded
+ }
+ }
MessageType schema = footer.getFileMetaData().getSchema();
for (Type type : schema.getFields()) {
@@ -161,6 +168,7 @@ public class ParquetScanBatchCreator implements BatchCreator<ParquetRowGroupScan
return true;
}
}
+ ColumnDescriptor desc;
for (ColumnDescriptor col : schema.getColumns()) {
if (col.getMaxRepetitionLevel() > 0) {
return true;