aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders
diff options
context:
space:
mode:
authorParth Chandra <parthc@apache.org>2017-08-29 11:20:47 -0700
committerParth Chandra <parthc@apache.org>2018-01-11 17:13:47 -0800
commit90eb23baa6a1a205fd0821c7af708969fcb98c5c (patch)
tree669d97a91febd9f2e33bc958118c0509476eb819 /exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders
parentc5af3aefe79c34d5b76bec8ce55875decca9e617 (diff)
DRILL-5971: Fix INT64, INT32 logical types in complex parquet reader
Added the following types : ENUM (Binary annotated as ENUM) INT96 (Dictionary encoded) Fixed issue with reading Dictionary encoded fixed width reader Added test file generator This closes #1049
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java11
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java2
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java4
3 files changed, 14 insertions, 3 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
index 495f70bc5..09cdc5d5a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java
@@ -82,8 +82,9 @@ public class ColumnReaderFactory {
if (columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.BOOLEAN){
return new BitReader(recordReader, allocateSize, descriptor, columnChunkMetaData,
fixedLength, (BitVector) v, schemaElement);
- } else if (columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY ||
- columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.INT96) {
+ } else if (!columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && (
+ columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
+ || columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.INT96)) {
if (convertedType == ConvertedType.DECIMAL){
int length = schemaElement.type_length;
if (length <= 12) {
@@ -125,6 +126,7 @@ public class ColumnReaderFactory {
return new ParquetFixedWidthDictionaryReaders.DictionaryTimeReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (TimeVector) v, schemaElement);
case INT_8:
case INT_16:
+ case INT_32:
return new ParquetFixedWidthDictionaryReaders.DictionaryIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (IntVector) v, schemaElement);
case UINT_8:
case UINT_16:
@@ -138,6 +140,8 @@ public class ColumnReaderFactory {
return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement);
}
switch (convertedType) {
+ case INT_64:
+ return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement);
case UINT_64:
return new ParquetFixedWidthDictionaryReaders.DictionaryUInt8Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt8Vector) v, schemaElement);
case DECIMAL:
@@ -152,6 +156,7 @@ public class ColumnReaderFactory {
case DOUBLE:
return new ParquetFixedWidthDictionaryReaders.DictionaryFloat8Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (Float8Vector) v, schemaElement);
case FIXED_LEN_BYTE_ARRAY:
+ case INT96:
return new ParquetFixedWidthDictionaryReaders.DictionaryFixedBinaryReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement);
default:
throw new ExecutionSetupException("Unsupported dictionary column type " + descriptor.getType().name() );
@@ -213,6 +218,7 @@ public class ColumnReaderFactory {
}
switch (convertedType) {
case UTF8:
+ case ENUM:
return new VarLengthColumnReaders.VarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarCharVector) v, schemaElement);
case DECIMAL:
if (v instanceof Decimal28SparseVector) {
@@ -230,6 +236,7 @@ public class ColumnReaderFactory {
switch (convertedType) {
case UTF8:
+ case ENUM:
return new VarLengthColumnReaders.NullableVarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarCharVector) v, schemaElement);
case DECIMAL:
if (v instanceof NullableDecimal28SparseVector) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
index 53a68ab1d..5fbac204e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java
@@ -114,7 +114,7 @@ public class ParquetFixedWidthDictionaryReaders {
Binary currDictValToWrite = null;
for (int i = 0; i < recordsReadInThisIteration; i++){
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
- mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
+ mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer().slice(), 0,
currDictValToWrite.length());
}
// Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
index 3f5f3b275..ad1c4bfb0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java
@@ -49,6 +49,7 @@ public class ParquetToDrillTypeConverter {
}
switch (convertedType) {
case UTF8:
+ case ENUM:
return (TypeProtos.MinorType.VARCHAR);
case DECIMAL:
ParquetReaderUtility.checkDecimalTypeEnabled(options);
@@ -61,6 +62,8 @@ public class ParquetToDrillTypeConverter {
return (TypeProtos.MinorType.BIGINT);
}
switch(convertedType) {
+ case INT_64:
+ return TypeProtos.MinorType.BIGINT;
case UINT_64:
return TypeProtos.MinorType.UINT8;
case DECIMAL:
@@ -85,6 +88,7 @@ public class ParquetToDrillTypeConverter {
return TypeProtos.MinorType.UINT4;
case INT_8:
case INT_16:
+ case INT_32:
return TypeProtos.MinorType.INT;
case DECIMAL:
ParquetReaderUtility.checkDecimalTypeEnabled(options);