aboutsummaryrefslogtreecommitdiff
path: root/exec
diff options
context:
space:
mode:
authorPaul Rogers <progers@cloudera.com>2019-01-27 10:32:46 -0800
committerArina Ielchiieva <arina.yelchiyeva@gmail.com>2019-01-30 13:55:30 +0200
commit8fb85cd4370e6143641cda1ad5b998caca0b6bf7 (patch)
tree8fb3376925c24f367d0ca98f938806f8c62b4516 /exec
parent85c656e12bb109147576055c1bed0e1463c32b96 (diff)
DRILL-7006: Add type conversion to row writers
Modifies the column metadata and writer abstractions to allow a type conversion "shim" to be specified as part of the schema, then inserted as part of the row set writer. Allows, say, setting an Int or Date from a string, parsing the string to obtain the proper data type to store in the vector. Type conversion not yet supported in the result set loader: some additional complexity needs to be resolved. Adds unit tests for this functionality. Refactors some existing tests to remove rough edges. closes #1623
Diffstat (limited to 'exec')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java16
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java56
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java54
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java145
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java (renamed from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java)45
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java (renamed from exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java)5
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java40
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java40
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java2
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java52
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java186
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java69
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java24
13 files changed, 654 insertions, 80 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
index 595c5c49d..63dda0777 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/AbstractColumnMetadata.java
@@ -21,6 +21,8 @@ import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
+import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
/**
* Abstract definition of column metadata. Allows applications to create
@@ -179,6 +181,20 @@ public abstract class AbstractColumnMetadata implements ColumnMetadata {
public boolean isProjected() { return projected; }
@Override
+ public void setDefaultValue(Object value) { }
+
+ @Override
+ public Object defaultValue() { return null; }
+
+ @Override
+ public void setTypeConverter(ColumnConversionFactory factory) {
+ throw new UnsupportedConversionError("Type conversion not supported for non-scalar writers");
+ }
+
+ @Override
+ public ColumnConversionFactory typeConverter() { return null; }
+
+ @Override
public String toString() {
final StringBuilder buf = new StringBuilder()
.append("[")
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
index dfbd4a9ee..ead6134cb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/metadata/PrimitiveColumnMetadata.java
@@ -23,16 +23,52 @@ import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
/**
- * Primitive (non-map) column. Describes non-nullable, nullable and
- * array types (which differ only in mode, but not in metadata structure.)
+ * Primitive (non-map) column. Describes non-nullable, nullable and array types
+ * (which differ only in mode, but not in metadata structure.)
+ * <p>
+ * Metadata is of two types:
+ * <ul>
+ * <li>Storage metadata that describes how the column is materialized in a
+ * vector. Storage metadata is immutable because revising an existing vector is
+ * a complex operation.</li>
+ * <li>Supplemental metadata used when reading or writing the column.
+ * Supplemental metadata can be changed after the column is created, though it
+ * should generally be set before invoking code that uses the metadata.</li>
+ * </ul>
*/
public class PrimitiveColumnMetadata extends AbstractColumnMetadata {
+ /**
+ * Expected (average) width for variable-width columns.
+ */
+
private int expectedWidth;
+ /**
+ * Default value to use for filling a vector when no real data is
+ * available, such as for columns added in new files but which does not
+ * exist in existing files. The ultimate default value is the SQL null
+ * value, which works only for nullable columns.
+ */
+
+ private Object defaultValue;
+
+ /**
+ * Factory for an optional shim writer that translates from the type of
+ * data available to the code that creates the vectors on the one hand,
+ * and the actual type of the column on the other. For example, a shim
+ * might parse a string form of a date into the form stored in vectors.
+ * <p>
+ * The default is to use the "natural" type: that is, to insert no
+ * conversion shim.
+ */
+
+ private ColumnConversionFactory shimFactory;
+
public PrimitiveColumnMetadata(MaterializedField schema) {
super(schema);
expectedWidth = estimateWidth(schema.getType());
@@ -99,6 +135,22 @@ public class PrimitiveColumnMetadata extends AbstractColumnMetadata {
}
@Override
+ public void setDefaultValue(Object value) {
+ defaultValue = value;
+ }
+
+ @Override
+ public Object defaultValue() { return defaultValue; }
+
+ @Override
+ public void setTypeConverter(ColumnConversionFactory factory) {
+ shimFactory = factory;
+ }
+
+ @Override
+ public ColumnConversionFactory typeConverter() { return shimFactory; }
+
+ @Override
public ColumnMetadata cloneEmpty() {
return new PrimitiveColumnMetadata(this);
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
index 088e8f4f7..8fb600d34 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.physical.rowSet.impl;
+import static org.apache.drill.test.rowSet.RowSetUtilities.intArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -43,8 +44,10 @@ import org.apache.drill.exec.vector.accessor.TupleWriter.UndefinedColumnExceptio
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.test.TestColumnConvertor.TestConvertor;
import org.apache.drill.test.rowSet.RowSetReader;
import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.Ignore;
import org.junit.Test;
/**
@@ -602,4 +605,55 @@ public class TestResultSetLoaderProtocol extends SubOperatorTest {
rsLoader.close();
}
+
+ /**
+ * Test the use of a column type converter in the result set loader for
+ * required, nullable and repeated columns.
+ */
+
+ @Ignore("Not yet")
+ @Test
+ public void testTypeConversion() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("n1", MinorType.INT)
+ .addNullable("n2", MinorType.INT)
+ .addArray("n3", MinorType.INT)
+ .buildSchema();
+
+ // Add a type convertor. Passed in as a factory
+ // since we must create a new one for each row set writer.
+
+ schema.metadata("n1").setTypeConverter(TestConvertor.factory());
+ schema.metadata("n2").setTypeConverter(TestConvertor.factory());
+ schema.metadata("n3").setTypeConverter(TestConvertor.factory());
+
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+
+ // Write data as both a string as an integer
+
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addRow("123", "12", strArray("123", "124"));
+ rootWriter.addRow(234, 23, intArray(234, 235));
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+
+ // Build the expected vector without a type convertor.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("n1", MinorType.INT)
+ .addNullable("n2", MinorType.INT)
+ .addArray("n3", MinorType.INT)
+ .buildSchema();
+ final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(123, 12, intArray(123, 124))
+ .addRow(234, 23, intArray(234, 235))
+ .build();
+
+ // Compare
+
+ RowSetUtilities.verify(expected, actual);
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java
new file mode 100644
index 000000000..b7d865de1
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestColumnConvertor.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
+import static org.apache.drill.test.rowSet.RowSetUtilities.intArray;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractWriteConvertor;
+import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.junit.Test;
+
+/**
+ * Tests the column type convertor feature of the column metadata
+ * and of the RowSetWriter.
+ */
+
+public class TestColumnConvertor extends SubOperatorTest {
+
+ /**
+ * Simple type converter that allows string-to-int conversions.
+ * Inherits usual int value support from the base writer.
+ */
+ public static class TestConvertor extends AbstractWriteConvertor {
+
+ public TestConvertor(ScalarWriter baseWriter) {
+ super(baseWriter);
+ }
+
+ @Override
+ public void setString(String value) {
+ setInt(Integer.parseInt(value));
+ }
+
+ public static ColumnConversionFactory factory() {
+ return new ColumnConversionFactory() {
+ @Override
+ public ConcreteWriter newWriter(ColumnMetadata colDefn,
+ ConcreteWriter baseWriter) {
+ return new TestConvertor(baseWriter);
+ }
+ };
+ }
+ }
+
+ @Test
+ public void testScalarConvertor() {
+
+ // Create the schema
+
+ TupleMetadata schema = new SchemaBuilder()
+ .add("n1", MinorType.INT)
+ .addNullable("n2", MinorType.INT)
+ .buildSchema();
+
+ // Add a type convertor. Passed in as a factory
+ // since we must create a new one for each row set writer.
+
+ schema.metadata("n1").setTypeConverter(TestConvertor.factory());
+ schema.metadata("n2").setTypeConverter(TestConvertor.factory());
+
+ // Write data as both a string as an integer
+
+ RowSet actual = new RowSetBuilder(fixture.allocator(), schema)
+ .addRow("123", "12")
+ .addRow(234, 23)
+ .build();
+
+ // Build the expected vector without a type convertor.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("n1", MinorType.INT)
+ .addNullable("n2", MinorType.INT)
+ .buildSchema();
+ final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(123, 12)
+ .addRow(234, 23)
+ .build();
+
+ // Compare
+
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testArrayConvertor() {
+
+ // Create the schema
+
+ TupleMetadata schema = new SchemaBuilder()
+ .addArray("n", MinorType.INT)
+ .buildSchema();
+
+ // Add a type convertor. Passed in as a factory
+ // since we must create a new one for each row set writer.
+
+ schema.metadata("n").setTypeConverter(TestConvertor.factory());
+
+ // Write data as both a string as an integer
+
+ RowSet actual = new RowSetBuilder(fixture.allocator(), schema)
+ .addSingleCol(strArray("123", "124"))
+ .addSingleCol(intArray(234, 235))
+ .build();
+
+ // Build the expected vector without a type convertor.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("n", MinorType.INT)
+ .buildSchema();
+ final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(intArray(123, 124))
+ .addSingleCol(intArray(234, 235))
+ .build();
+
+ // Compare
+
+ RowSetUtilities.verify(expected, actual);
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java
index ee5c59983..b660672e8 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java
@@ -29,7 +29,6 @@ import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.ValueVector;
@@ -47,8 +46,8 @@ import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.DirectRowSet;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
import org.apache.drill.test.rowSet.RowSetWriter;
import org.junit.Test;
@@ -72,8 +71,8 @@ import org.junit.Test;
* A list is an array of variants. Variants are tested elsewhere.
*/
-public class RowSetTest extends SubOperatorTest {
- private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RowSetTest.class);
+public class TestRowSet extends SubOperatorTest {
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestRowSet.class);
/**
* Test the simplest constructs: a row with top-level scalar
@@ -169,7 +168,7 @@ public class RowSetTest extends SubOperatorTest {
.addRow(30)
.addRow(40)
.build();
- new RowSetComparison(expected).verifyAndClearAll(actual);
+ RowSetUtilities.verify(expected, actual);
}
/**
@@ -303,8 +302,7 @@ public class RowSetTest extends SubOperatorTest {
.addSingleCol(intArray(30))
.addSingleCol(intArray(40, 41))
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(actual);
+ RowSetUtilities.verify(expected, actual);
}
/**
@@ -438,8 +436,7 @@ public class RowSetTest extends SubOperatorTest {
.addRow(20, objArray(intArray(21, 22)))
.addRow(30, objArray(intArray(31, 32)))
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(actual);
+ RowSetUtilities.verify(expected, actual);
}
@Test
@@ -583,8 +580,7 @@ public class RowSetTest extends SubOperatorTest {
.addRow(20, objArray(objArray(201, 202), objArray(211, 212)))
.addRow(30, objArray(objArray(301, 302), objArray(311, 312)))
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(actual);
+ RowSetUtilities.verify(expected, actual);
}
/**
@@ -594,12 +590,12 @@ public class RowSetTest extends SubOperatorTest {
@Test
public void testTopFixedWidthArray() {
- final BatchSchema batchSchema = new SchemaBuilder()
+ final TupleMetadata schema = new SchemaBuilder()
.add("c", MinorType.INT)
.addArray("a", MinorType.INT)
- .build();
+ .buildSchema();
- final ExtendableRowSet rs1 = fixture.rowSet(batchSchema);
+ final ExtendableRowSet rs1 = fixture.rowSet(schema);
final RowSetWriter writer = rs1.writer();
writer.scalar(0).setInt(10);
final ScalarWriter array = writer.array(1).scalar();
@@ -644,14 +640,13 @@ public class RowSetTest extends SubOperatorTest {
assertEquals(0, arrayReader.size());
assertFalse(reader.next());
- final SingleRowSet rs2 = fixture.rowSetBuilder(batchSchema)
+ final SingleRowSet rs2 = fixture.rowSetBuilder(schema)
.addRow(10, intArray(100, 110))
.addRow(20, intArray(200, 120, 220))
.addRow(30, null)
.build();
- new RowSetComparison(rs1)
- .verifyAndClearAll(rs2);
+ RowSetUtilities.verify(rs1, rs2);
}
/**
* Test filling a row set up to the maximum number of rows.
@@ -661,11 +656,11 @@ public class RowSetTest extends SubOperatorTest {
@Test
public void testRowBounds() {
- final BatchSchema batchSchema = new SchemaBuilder()
+ final TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
- .build();
+ .buildSchema();
- final ExtendableRowSet rs = fixture.rowSet(batchSchema);
+ final ExtendableRowSet rs = fixture.rowSet(schema);
final RowSetWriter writer = rs.writer();
int count = 0;
while (! writer.isFull()) {
@@ -695,10 +690,10 @@ public class RowSetTest extends SubOperatorTest {
@Test
public void testBufferBounds() {
- final BatchSchema batchSchema = new SchemaBuilder()
+ final TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.add("b", MinorType.VARCHAR)
- .build();
+ .buildSchema();
String varCharValue;
try {
@@ -709,7 +704,7 @@ public class RowSetTest extends SubOperatorTest {
throw new IllegalStateException(e);
}
- final ExtendableRowSet rs = fixture.rowSet(batchSchema);
+ final ExtendableRowSet rs = fixture.rowSet(schema);
final RowSetWriter writer = rs.writer();
int count = 0;
try {
@@ -751,14 +746,14 @@ public class RowSetTest extends SubOperatorTest {
// will be provided by a reader, by an incoming batch,
// etc.
- final BatchSchema schema = new SchemaBuilder()
+ final TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.addArray("b", MinorType.INT)
.addMap("c")
.add("c1", MinorType.INT)
.add("c2", MinorType.VARCHAR)
.resumeSchema()
- .build();
+ .buildSchema();
// Step 2: Create a batch. Done here because this is
// a batch-oriented test. Done automatically in the
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java
index 062dd56c5..867f61f5b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/TestRowSetComparison.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSetComparison.java
@@ -15,13 +15,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.drill.test.rowSet;
+package org.apache.drill.test.rowSet.test;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.memory.RootAllocator;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
index 0e0fb4920..85d7d256f 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/metadata/ColumnMetadata.java
@@ -21,6 +21,7 @@ import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
/**
* Metadata description of a column including names, types and structure
@@ -182,6 +183,45 @@ public interface ColumnMetadata {
int expectedElementCount();
/**
+ * Set the default value to use for filling a vector when no real data is
+ * available, such as for columns added in new files but which does not
+ * exist in existing files. The "default default" is null, which works
+ * only for nullable columns.
+ *
+ * @param value column value, represented as a Java object, acceptable
+ * to the {@link ColumnWriter#setObject()} method for this column's writer.
+ */
+ void setDefaultValue(Object value);
+
+ /**
+ * Returns the default value for this column.
+ *
+ * @return the default value, or null if no default value has been set
+ */
+ Object defaultValue();
+
+ /**
+ * Set the factory for an optional shim writer that translates from the type of
+ * data available to the code that creates the vectors on the one hand,
+ * and the actual type of the column on the other. For example, a shim
+ * might parse a string form of a date into the form stored in vectors.
+ * <p>
+ * The shim must write to the base vector for this column using one of
+ * the supported base writer "set" methods.
+ * <p>
+ * The default is to use the "natural" type: that is, to insert no
+ * conversion shim.
+ */
+ void setTypeConverter(ColumnConversionFactory factory);
+
+ /**
+ * Returns the type conversion shim for this column.
+ *
+ * @return the type conversion factory, or null if none is set
+ */
+ ColumnConversionFactory typeConverter();
+
+ /**
* Create an empty version of this column. If the column is a scalar,
* produces a simple copy. If a map, produces a clone without child
* columns.
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java
new file mode 100644
index 000000000..02efd6def
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnConversionFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.writer.ConcreteWriter;
+
+/**
+ * Create a column type converter for the given column and base writer.
+ * The new writer is expected to be a "shim" writer that implements
+ * additional "set" methods to convert data from the type that the
+ * client requires to the type required by the underlying vector as
+ * represented by the base writer.
+ */
+public interface ColumnConversionFactory {
+ /**
+ * Create a type conversion writer for the given column, converting data
+ * to the type needed by the base writer.
+ * @param colDefn column metadata definition
+ * @param baseWriter base column writer for the column's vector
+ * @return a new scalar writer to insert between the client and
+ * the base vector
+ */
+ ConcreteWriter newWriter(ColumnMetadata colDefn, ConcreteWriter baseWriter);
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
index dee2612f0..68ed5e0a1 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
@@ -28,7 +28,7 @@ public class UnsupportedConversionError extends UnsupportedOperationException {
private static final long serialVersionUID = 1L;
- private UnsupportedConversionError(String message) {
+ public UnsupportedConversionError(String message) {
super(message);
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
index 08e4ac390..9c2e9861d 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
@@ -17,16 +17,13 @@
*/
package org.apache.drill.exec.vector.accessor.writer;
-import java.math.BigDecimal;
-
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnConversionFactory;
import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
import org.apache.drill.exec.vector.accessor.ObjectType;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
-import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
-import org.joda.time.Period;
/**
* Column writer implementation that acts as the basis for the
@@ -35,14 +32,20 @@ import org.joda.time.Period;
* method(s).
*/
-public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents {
+public abstract class AbstractScalarWriter extends ConcreteWriter {
public static class ScalarObjectWriter extends AbstractObjectWriter {
- private AbstractScalarWriter scalarWriter;
+ private ConcreteWriter scalarWriter;
- public ScalarObjectWriter(AbstractScalarWriter scalarWriter) {
- this.scalarWriter = scalarWriter;
+ public ScalarObjectWriter(ConcreteWriter scalarWriter) {
+ final ColumnMetadata metadata = scalarWriter.schema();
+ final ColumnConversionFactory factory = metadata.typeConverter();
+ if (factory == null) {
+ this.scalarWriter = scalarWriter;
+ } else {
+ this.scalarWriter = factory.newWriter(metadata, scalarWriter);
+ }
}
@Override
@@ -111,40 +114,7 @@ public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents
@Override
public void saveRow() { }
- protected UnsupportedConversionError conversionError(String javaType) {
- return UnsupportedConversionError.writeError(schema(), javaType);
- }
-
@Override
- public void setObject(Object value) {
- if (value == null) {
- setNull();
- } else if (value instanceof Integer) {
- setInt((Integer) value);
- } else if (value instanceof Long) {
- setLong((Long) value);
- } else if (value instanceof String) {
- setString((String) value);
- } else if (value instanceof BigDecimal) {
- setDecimal((BigDecimal) value);
- } else if (value instanceof Period) {
- setPeriod((Period) value);
- } else if (value instanceof byte[]) {
- byte[] bytes = (byte[]) value;
- setBytes(bytes, bytes.length);
- } else if (value instanceof Byte) {
- setInt((Byte) value);
- } else if (value instanceof Short) {
- setInt((Short) value);
- } else if (value instanceof Double) {
- setDouble((Double) value);
- } else if (value instanceof Float) {
- setDouble((Float) value);
- } else {
- throw conversionError(value.getClass().getSimpleName());
- }
- }
-
public void dump(HierarchicalFormatter format) {
format
.startObject(this)
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java
new file mode 100644
index 000000000..0d0bc8803
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractWriteConvertor.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+/**
+ * Base class for type converting scalar column writers. All methods
+ * pass through to the base writer. Override selected "set" methods to
+ * perform the type conversion, such as overriding "setString" to convert
+ * from a string representation of a value to the actual format.
+ * <p>
+ * The {@link #setObject()} method works here: the object is passed
+ * to this class's set methods, allowing, say, setting a string object
+ * for an int column in the case above.
+ */
+
+// TODO: This organization works fine, but is a bit heavy-weight.
+// It may be time to think about separating the pure writer aspect of
+// a column writer from its plumbing aspects. That is, the base
+// ConcreteWriter class combines the public API (ScalarWriter) with
+// the internal implementation (WriterEvents) into a single class.
+// Might be worth using composition rather than inheritance to keep
+// these aspects distinct.
+
+public class AbstractWriteConvertor extends ConcreteWriter {
+
+ private final ConcreteWriter baseWriter;
+
+ public AbstractWriteConvertor(ScalarWriter baseWriter) {
+ this.baseWriter = (ConcreteWriter) baseWriter;
+ }
+
+ @Override
+ public ValueType valueType() {
+ return baseWriter.valueType();
+ }
+
+ @Override
+ public int lastWriteIndex() {
+ return baseWriter.lastWriteIndex();
+ }
+
+ @Override
+ public void restartRow() {
+ baseWriter.restartRow();
+ }
+
+ @Override
+ public void endWrite() {
+ baseWriter.endWrite();
+ }
+
+ @Override
+ public void preRollover() {
+ baseWriter.preRollover();
+ }
+
+ @Override
+ public void postRollover() {
+ baseWriter.postRollover();
+ }
+
+ @Override
+ public ObjectType type() {
+ return baseWriter.type();
+ }
+
+ @Override
+ public boolean nullable() {
+ return baseWriter.nullable();
+ }
+
+ @Override
+ public ColumnMetadata schema() {
+ return baseWriter.schema();
+ }
+
+ @Override
+ public void setNull() {
+ baseWriter.setNull();
+ }
+
+ @Override
+ public int rowStartIndex() {
+ return baseWriter.rowStartIndex();
+ }
+
+ @Override
+ public int writeIndex() {
+ return baseWriter.writeIndex();
+ }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ baseWriter.bindListener(listener);
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ baseWriter.bindIndex(index);
+ }
+
+ @Override
+ public void startWrite() {
+ baseWriter.startWrite();
+ }
+
+ @Override
+ public void startRow() {
+ baseWriter.startRow();
+ }
+
+ @Override
+ public void endArrayValue() {
+ baseWriter.endArrayValue();
+ }
+
+ @Override
+ public void saveRow() {
+ baseWriter.saveRow();
+ }
+
+ @Override
+ public void setInt(int value) {
+ baseWriter.setInt(value);
+ }
+
+ @Override
+ public void setLong(long value) {
+ baseWriter.setLong(value);
+ }
+
+ @Override
+ public void setDouble(double value) {
+ baseWriter.setDouble(value);
+ }
+
+ @Override
+ public void setString(String value) {
+ baseWriter.setString(value);
+ }
+
+ @Override
+ public void setBytes(byte[] value, int len) {
+ baseWriter.setBytes(value, len);
+ }
+
+ @Override
+ public void setDecimal(BigDecimal value) {
+ baseWriter.setDecimal(value);
+ }
+
+ @Override
+ public void setPeriod(Period value) {
+ baseWriter.setPeriod(value);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ baseWriter.dump(format);
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java
new file mode 100644
index 000000000..549431f76
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ConcreteWriter.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+/**
+ * Base class for concrete scalar column writers including actual vector
+ * writers, wrappers for nullable types, and shims used to convert types.
+ */
+
+public abstract class ConcreteWriter implements ScalarWriter, WriterEvents {
+
+ @Override
+ public void setObject(Object value) {
+ if (value == null) {
+ setNull();
+ } else if (value instanceof Integer) {
+ setInt((Integer) value);
+ } else if (value instanceof Long) {
+ setLong((Long) value);
+ } else if (value instanceof String) {
+ setString((String) value);
+ } else if (value instanceof BigDecimal) {
+ setDecimal((BigDecimal) value);
+ } else if (value instanceof Period) {
+ setPeriod((Period) value);
+ } else if (value instanceof byte[]) {
+ final byte[] bytes = (byte[]) value;
+ setBytes(bytes, bytes.length);
+ } else if (value instanceof Byte) {
+ setInt((Byte) value);
+ } else if (value instanceof Short) {
+ setInt((Short) value);
+ } else if (value instanceof Double) {
+ setDouble((Double) value);
+ } else if (value instanceof Float) {
+ setDouble((Float) value);
+ } else {
+ throw conversionError(value.getClass().getSimpleName());
+ }
+ }
+
+ protected UnsupportedConversionError conversionError(String javaType) {
+ return UnsupportedConversionError.writeError(schema(), javaType);
+ }
+
+ abstract void dump(HierarchicalFormatter format);
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
index 2ac7d45b5..f271bfab0 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
@@ -60,13 +60,17 @@ public class ScalarArrayWriter extends BaseArrayWriter {
public final void nextElement() { next(); }
}
- private final BaseScalarWriter elementWriter;
+ private final ConcreteWriter elementWriter;
public ScalarArrayWriter(ColumnMetadata schema,
RepeatedValueVector vector, BaseScalarWriter elementWriter) {
super(schema, vector.getOffsetVector(),
new ScalarObjectWriter(elementWriter));
- this.elementWriter = elementWriter;
+
+ // Save the writer from the scalar object writer created above
+ // which may have wrapped the element writer in a type convertor.
+
+ this.elementWriter = (ConcreteWriter) elementObjWriter.scalar();
}
public static ArrayObjectWriter build(ColumnMetadata schema,
@@ -110,7 +114,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
return;
}
- String objClass = array.getClass().getName();
+ final String objClass = array.getClass().getName();
if (! objClass.startsWith("[")) {
throw new IllegalArgumentException(
String.format("Argument must be an array. Column `%s`, value = %s",
@@ -119,12 +123,12 @@ public class ScalarArrayWriter extends BaseArrayWriter {
// Figure out type
- char second = objClass.charAt(1);
+ final char second = objClass.charAt(1);
switch ( second ) {
case '[':
// bytes is represented as an array of byte arrays.
- char third = objClass.charAt(2);
+ final char third = objClass.charAt(2);
switch (third) {
case 'B':
setBytesArray((byte[][]) array);
@@ -157,11 +161,11 @@ public class ScalarArrayWriter extends BaseArrayWriter {
setBooleanArray((boolean[]) array);
break;
case 'L':
- int posn = objClass.indexOf(';');
+ final int posn = objClass.indexOf(';');
// If the array is of type Object, then we have no type info.
- String memberClassName = objClass.substring(2, posn);
+ final String memberClassName = objClass.substring(2, posn);
if (memberClassName.equals(String.class.getName())) {
setStringArray((String[]) array);
} else if (memberClassName.equals(Period.class.getName())) {
@@ -215,7 +219,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
public void setIntObjectArray(Integer[] value) {
for (int i = 0; i < value.length; i++) {
- Integer element = value[i];
+ final Integer element = value[i];
if (element == null) {
elementWriter.setNull();
} else {
@@ -232,7 +236,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
public void setLongObjectArray(Long[] value) {
for (int i = 0; i < value.length; i++) {
- Long element = value[i];
+ final Long element = value[i];
if (element == null) {
elementWriter.setNull();
} else {
@@ -255,7 +259,7 @@ public class ScalarArrayWriter extends BaseArrayWriter {
public void setDoubleObjectArray(Double[] value) {
for (int i = 0; i < value.length; i++) {
- Double element = value[i];
+ final Double element = value[i];
if (element == null) {
elementWriter.setNull();
} else {