diff options
author | Parth Chandra <parthc@apache.org> | 2017-08-29 11:20:47 -0700 |
---|---|---|
committer | Parth Chandra <parthc@apache.org> | 2018-01-11 17:13:47 -0800 |
commit | 90eb23baa6a1a205fd0821c7af708969fcb98c5c (patch) | |
tree | 669d97a91febd9f2e33bc958118c0509476eb819 /exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders | |
parent | c5af3aefe79c34d5b76bec8ce55875decca9e617 (diff) |
DRILL-5971: Fix INT64, INT32 logical types in complex parquet reader
Added the following types : ENUM (Binary annotated as ENUM) INT96 (Dictionary encoded)
Fixed issue with reading Dictionary encoded fixed width reader
Added test file generator
This closes #1049
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders')
3 files changed, 14 insertions, 3 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java index 495f70bc5..09cdc5d5a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ColumnReaderFactory.java @@ -82,8 +82,9 @@ public class ColumnReaderFactory { if (columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.BOOLEAN){ return new BitReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BitVector) v, schemaElement); - } else if (columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || - columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.INT96) { + } else if (!columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && ( + columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY + || columnChunkMetaData.getType() == PrimitiveType.PrimitiveTypeName.INT96)) { if (convertedType == ConvertedType.DECIMAL){ int length = schemaElement.type_length; if (length <= 12) { @@ -125,6 +126,7 @@ public class ColumnReaderFactory { return new ParquetFixedWidthDictionaryReaders.DictionaryTimeReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (TimeVector) v, schemaElement); case INT_8: case INT_16: + case INT_32: return new ParquetFixedWidthDictionaryReaders.DictionaryIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (IntVector) v, schemaElement); case UINT_8: case UINT_16: @@ -138,6 +140,8 @@ public class ColumnReaderFactory { return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement); } switch (convertedType) { + case INT_64: + return new ParquetFixedWidthDictionaryReaders.DictionaryBigIntReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (BigIntVector) v, schemaElement); case UINT_64: return new ParquetFixedWidthDictionaryReaders.DictionaryUInt8Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (UInt8Vector) v, schemaElement); case DECIMAL: @@ -152,6 +156,7 @@ public class ColumnReaderFactory { case DOUBLE: return new ParquetFixedWidthDictionaryReaders.DictionaryFloat8Reader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (Float8Vector) v, schemaElement); case FIXED_LEN_BYTE_ARRAY: + case INT96: return new ParquetFixedWidthDictionaryReaders.DictionaryFixedBinaryReader(recordReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement); default: throw new ExecutionSetupException("Unsupported dictionary column type " + descriptor.getType().name() ); @@ -213,6 +218,7 @@ public class ColumnReaderFactory { } switch (convertedType) { case UTF8: + case ENUM: return new VarLengthColumnReaders.VarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarCharVector) v, schemaElement); case DECIMAL: if (v instanceof Decimal28SparseVector) { @@ -230,6 +236,7 @@ public class ColumnReaderFactory { switch (convertedType) { case UTF8: + case ENUM: return new VarLengthColumnReaders.NullableVarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarCharVector) v, schemaElement); case DECIMAL: if (v instanceof NullableDecimal28SparseVector) { diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java index 53a68ab1d..5fbac204e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetFixedWidthDictionaryReaders.java @@ -114,7 +114,7 @@ public class ParquetFixedWidthDictionaryReaders { Binary currDictValToWrite = null; for (int i = 0; i < recordsReadInThisIteration; i++){ currDictValToWrite = pageReader.dictionaryValueReader.readBytes(); - mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0, + mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer().slice(), 0, currDictValToWrite.length()); } // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java index 3f5f3b275..ad1c4bfb0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/ParquetToDrillTypeConverter.java @@ -49,6 +49,7 @@ public class ParquetToDrillTypeConverter { } switch (convertedType) { case UTF8: + case ENUM: return (TypeProtos.MinorType.VARCHAR); case DECIMAL: ParquetReaderUtility.checkDecimalTypeEnabled(options); @@ -61,6 +62,8 @@ public class ParquetToDrillTypeConverter { return (TypeProtos.MinorType.BIGINT); } switch(convertedType) { + case INT_64: + return TypeProtos.MinorType.BIGINT; case UINT_64: return TypeProtos.MinorType.UINT8; case DECIMAL: @@ -85,6 +88,7 @@ public class ParquetToDrillTypeConverter { return TypeProtos.MinorType.UINT4; case INT_8: case INT_16: + case INT_32: return TypeProtos.MinorType.INT; case DECIMAL: ParquetReaderUtility.checkDecimalTypeEnabled(options); |