aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/test/java/org/apache
diff options
context:
space:
mode:
Diffstat (limited to 'exec/java-exec/src/test/java/org/apache')
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java172
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java19
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java46
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java153
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java170
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java4
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java11
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java281
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java459
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java3
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java223
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java11
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java255
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java6
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java77
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java557
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java946
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java45
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java113
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java250
20 files changed, 895 insertions, 2906 deletions
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
index 3587e2711..88ccd3c41 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
@@ -22,33 +22,42 @@ import static org.junit.Assert.fail;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager;
+import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetUtilities;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+/**
+ * Test the "columns" array mechanism integrated with the scan schema
+ * orchestrator including simulating reading data.
+ */
+
+@Category(RowSetTests.class)
public class TestColumnsArray extends SubOperatorTest {
- /**
- * Test columns array. The table must be able to support it by having a
- * matching column.
- */
+ private static class MockScanner {
+ ScanSchemaOrchestrator scanner;
+ ReaderSchemaOrchestrator reader;
+ ResultSetLoader loader;
+ }
- @Test
- public void testColumnsArray() {
+ private MockScanner buildScanner(List<SchemaPath> projList) {
+
+ MockScanner mock = new MockScanner();
// Set up the file metadata manager
@@ -64,21 +73,19 @@ public class TestColumnsArray extends SubOperatorTest {
// Configure the schema orchestrator
- ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator());
- scanner.withMetadata(metadataManager);
- scanner.addParser(colsManager.projectionParser());
- scanner.addResolver(colsManager.resolver());
+ mock.scanner = new ScanSchemaOrchestrator(fixture.allocator());
+ mock.scanner.withMetadata(metadataManager);
+ mock.scanner.addParser(colsManager.projectionParser());
+ mock.scanner.addResolver(colsManager.resolver());
- // SELECT filename, columns, dir0 ...
+ // SELECT <proj list> ...
- scanner.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL,
- ColumnsArrayManager.COLUMNS_COL,
- ScanTestUtils.partitionColName(0)));
+ mock.scanner.build(projList);
// FROM z.csv
metadataManager.startFile(filePath);
- ReaderSchemaOrchestrator reader = scanner.startReader();
+ mock.reader = mock.scanner.startReader();
// Table schema (columns: VARCHAR[])
@@ -86,7 +93,25 @@ public class TestColumnsArray extends SubOperatorTest {
.addArray(ColumnsArrayManager.COLUMNS_COL, MinorType.VARCHAR)
.buildSchema();
- ResultSetLoader loader = reader.makeTableLoader(tableSchema);
+ mock.loader = mock.reader.makeTableLoader(tableSchema);
+
+ // First empty batch
+
+ mock.reader.defineSchema();
+ return mock;
+ }
+
+ /**
+ * Test columns array. The table must be able to support it by having a
+ * matching column.
+ */
+
+ @Test
+ public void testColumnsArray() {
+
+ MockScanner mock = buildScanner(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL,
+ ColumnsArrayManager.COLUMNS_COL,
+ ScanTestUtils.partitionColName(0)));
// Verify empty batch.
@@ -99,18 +124,18 @@ public class TestColumnsArray extends SubOperatorTest {
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
.build();
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ assertNotNull(mock.scanner.output());
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
}
// Create a batch of data.
- reader.startBatch();
- loader.writer()
+ mock.reader.startBatch();
+ mock.loader.writer()
.addRow(new Object[] {new String[] {"fred", "flintstone"}})
.addRow(new Object[] {new String[] {"barney", "rubble"}});
- reader.endBatch();
+ mock. reader.endBatch();
// Verify
@@ -120,11 +145,100 @@ public class TestColumnsArray extends SubOperatorTest {
.addRow("z.csv", new String[] {"barney", "rubble"}, "x")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
}
- scanner.close();
+ mock.scanner.close();
+ }
+
+ @Test
+ public void testWildcard() {
+
+ MockScanner mock = buildScanner(RowSetTestUtils.projectAll());
+
+ // Verify empty batch.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+ {
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .build();
+
+ assertNotNull(mock.scanner.output());
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
+ }
+
+ // Create a batch of data.
+
+ mock.reader.startBatch();
+ mock.loader.writer()
+ .addRow(new Object[] {new String[] {"fred", "flintstone"}})
+ .addRow(new Object[] {new String[] {"barney", "rubble"}});
+ mock. reader.endBatch();
+
+ // Verify
+
+ {
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(new String[] {"fred", "flintstone"})
+ .addSingleCol(new String[] {"barney", "rubble"})
+ .build();
+
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
+ }
+
+ mock.scanner.close();
+ }
+
+ @Test
+ public void testWildcardAndFileMetadata() {
+
+ MockScanner mock = buildScanner(RowSetTestUtils.projectList(
+ ScanTestUtils.FILE_NAME_COL,
+ SchemaPath.DYNAMIC_STAR,
+ ScanTestUtils.partitionColName(0)));
+
+ // Verify empty batch.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("filename", MinorType.VARCHAR)
+ .addArray("columns", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .buildSchema();
+ {
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .build();
+
+ assertNotNull(mock.scanner.output());
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
+ }
+
+ // Create a batch of data.
+
+ mock.reader.startBatch();
+ mock.loader.writer()
+ .addRow(new Object[] {new String[] {"fred", "flintstone"}})
+ .addRow(new Object[] {new String[] {"barney", "rubble"}});
+ mock. reader.endBatch();
+
+ // Verify
+
+ {
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow("z.csv", new String[] {"fred", "flintstone"}, "x")
+ .addRow("z.csv", new String[] {"barney", "rubble"}, "x")
+ .build();
+
+ RowSetUtilities.verify(expected,
+ fixture.wrap(mock.scanner.output()));
+ }
+
+ mock.scanner.close();
}
private ScanSchemaOrchestrator buildScan(List<SchemaPath> cols) {
@@ -160,6 +274,7 @@ public class TestColumnsArray extends SubOperatorTest {
try {
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(tableSchema);
+ reader.defineSchema();
fail();
} catch (IllegalStateException e) {
// Expected
@@ -180,6 +295,7 @@ public class TestColumnsArray extends SubOperatorTest {
try {
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(tableSchema);
+ reader.defineSchema();
fail();
} catch (IllegalStateException e) {
// Expected
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java
index 4f32b56e0..e7a0188cd 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.MinorType;
@@ -46,7 +47,7 @@ import org.apache.drill.test.SubOperatorTest;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
@@ -54,6 +55,7 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
* Test the columns-array specific behavior in the columns scan framework.
*/
+@Category(RowSetTests.class)
public class TestColumnsArrayFramework extends SubOperatorTest {
private static final Path MOCK_FILE_PATH = new Path("file:/w/x/y/z.csv");
@@ -101,7 +103,7 @@ public class TestColumnsArrayFramework extends SubOperatorTest {
@Override
public boolean open(ColumnsSchemaNegotiator negotiator) {
this.negotiator = negotiator;
- negotiator.setTableSchema(schema);
+ negotiator.setTableSchema(schema, true);
negotiator.build();
return true;
}
@@ -115,6 +117,11 @@ public class TestColumnsArrayFramework extends SubOperatorTest {
public void close() { }
}
+ /**
+ * Test including a column other than "columns". Occurs when
+ * using implicit columns.
+ */
+
@Test
public void testNonColumnsProjection() {
@@ -143,6 +150,10 @@ public class TestColumnsArrayFramework extends SubOperatorTest {
scanFixture.close();
}
+ /**
+ * Test projecting just the `columns` column.
+ */
+
@Test
public void testColumnsProjection() {
@@ -171,6 +182,10 @@ public class TestColumnsArrayFramework extends SubOperatorTest {
scanFixture.close();
}
+ /**
+ * Test including a specific index of `columns` such as
+ * `columns`[1].
+ */
@Test
public void testColumnsIndexProjection() {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
index e2c8a21d3..d1e91a283 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager;
@@ -35,9 +36,10 @@ import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
import org.apache.drill.test.SubOperatorTest;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+@Category(RowSetTests.class)
public class TestColumnsArrayParser extends SubOperatorTest {
/**
@@ -255,5 +257,45 @@ public class TestColumnsArrayParser extends SubOperatorTest {
assertEquals(FileMetadataColumn.ID, scanProj.columns().get(2).nodeType());
}
- // TODO: Test Columns element projection
+ /**
+ * If a query is of the form:
+ * <pre><code>
+ * select * from dfs.`multilevel/csv` where columns[1] < 1000
+ * </code><pre>
+ * Then the projection list passed to the scan operator
+ * includes both the wildcard and the `columns` array.
+ * We can ignore one of them.
+ */
+
+ @Test
+ public void testWildcardAndColumns() {
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(
+ SchemaPath.DYNAMIC_STAR,
+ ColumnsArrayManager.COLUMNS_COL),
+ ScanTestUtils.parsers(new ColumnsArrayParser(true)));
+
+ assertFalse(scanProj.projectAll());
+ assertEquals(2, scanProj.requestedCols().size());
+
+ assertEquals(1, scanProj.columns().size());
+ assertEquals(ColumnsArrayManager.COLUMNS_COL, scanProj.columns().get(0).name());
+
+ // Verify column type
+
+ assertEquals(UnresolvedColumnsArrayColumn.ID, scanProj.columns().get(0).nodeType());
+ }
+
+ @Test
+ public void testColumnsAsMap() {
+ try {
+ new ScanLevelProjection(
+ RowSetTestUtils.projectList("columns.x"),
+ ScanTestUtils.parsers(new ColumnsArrayParser(true)));
+ fail();
+ }
+ catch (UserException e) {
+ // Expected
+ }
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java
deleted file mode 100644
index 330a66115..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.physical.impl.scan.file.FileMetadata;
-import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
-import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn;
-import org.apache.drill.exec.physical.impl.scan.file.PartitionColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader;
-import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader.ConstantColumnSpec;
-import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.store.ColumnExplorer.ImplicitFileColumns;
-import org.apache.drill.test.SubOperatorTest;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
-import org.apache.hadoop.fs.Path;
-import org.junit.Test;
-
-public class TestConstantColumnLoader extends SubOperatorTest {
-
- private static class DummyColumn implements ConstantColumnSpec {
-
- private String name;
- private MaterializedField schema;
- private String value;
-
- public DummyColumn(String name, MajorType type, String value) {
- this.name = name;
- this.schema = MaterializedField.create(name, type);
- this.value = value;
- }
-
- @Override
- public String name() { return name; }
-
- @Override
- public MaterializedField schema() { return schema; }
-
- @Override
- public String value() { return value; }
- }
-
- /**
- * Test the static column loader using one column of each type.
- * The null column is of type int, but the associated value is of
- * type string. This is a bit odd, but works out because we detect that
- * the string value is null and call setNull on the writer, and avoid
- * using the actual data.
- */
-
- @Test
- public void testConstantColumnLoader() {
-
- MajorType aType = MajorType.newBuilder()
- .setMinorType(MinorType.VARCHAR)
- .setMode(DataMode.REQUIRED)
- .build();
- MajorType bType = MajorType.newBuilder()
- .setMinorType(MinorType.VARCHAR)
- .setMode(DataMode.OPTIONAL)
- .build();
-
- List<ConstantColumnSpec> defns = new ArrayList<>();
- defns.add(
- new DummyColumn("a", aType, "a-value" ));
- defns.add(
- new DummyColumn("b", bType, "b-value" ));
-
- ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
- ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns);
-
- // Create a batch
-
- staticLoader.load(2);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", aType)
- .add("b", bType)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("a-value", "b-value")
- .addRow("a-value", "b-value")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
- staticLoader.close();
- }
-
- @Test
- public void testFileMetadata() {
-
- FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w"));
- List<ConstantColumnSpec> defns = new ArrayList<>();
- FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn(
- ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX);
- FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL,
- iDefn, fileInfo, null, 0);
- defns.add(iCol);
-
- String partColName = ScanTestUtils.partitionColName(1);
- PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0);
- defns.add(pCol);
-
- ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
- ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns);
-
- // Create a batch
-
- staticLoader.load(2);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR)
- .addNullable(partColName, MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("csv", "y")
- .addRow("csv", "y")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
- staticLoader.close();
- }
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
index 08aeed53f..a6de5e6f3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager;
@@ -33,9 +34,10 @@ import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
import org.apache.drill.test.SubOperatorTest;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+@Category(RowSetTests.class)
public class TestFileMetadataColumnParser extends SubOperatorTest {
@Test
@@ -135,8 +137,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
assertEquals(PartitionColumn.ID, scanProj.columns().get(0).nodeType());
}
+ /**
+ * Test wildcard expansion.
+ */
+
@Test
- public void testWildcard() {
+ public void testRevisedWildcard() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
@@ -153,15 +159,45 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
}
/**
+ * Legacy (prior version) wildcard expansion always expands partition
+ * columns.
+ */
+
+ @Test
+ public void testLegacyWildcard() {
+ Path filePath = new Path("hdfs:///w/x/y/z.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ true, // Put partitions at end
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePath));
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectAll(),
+ Lists.newArrayList(metadataManager.projectionParser()));
+
+ List<ColumnProjection> cols = scanProj.columns();
+ assertEquals(3, cols.size());
+ assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
+ assertEquals(0, ((PartitionColumn) cols.get(1)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
+ assertEquals(1, ((PartitionColumn) cols.get(2)).partition());
+ }
+
+ /**
* Combine wildcard and file metadata columms. The wildcard expands
* table columns but not metadata columns.
*/
@Test
- public void testWildcardAndFileMetadata() {
+ public void testLegacyWildcardAndFileMetadata() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ false, // Put partitions at end
new Path("hdfs:///w"),
Lists.newArrayList(filePath));
@@ -173,10 +209,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Lists.newArrayList(metadataManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
- assertEquals(3, cols.size());
+ assertEquals(5, cols.size());
assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
assertEquals(FileMetadataColumn.ID, cols.get(1).nodeType());
assertEquals(FileMetadataColumn.ID, cols.get(2).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(3).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(4).nodeType());
}
/**
@@ -185,10 +223,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
*/
@Test
- public void testWildcardAndFileMetadataMixed() {
+ public void testLegacyWildcardAndFileMetadataMixed() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ false, // Put partitions at end
new Path("hdfs:///w"),
Lists.newArrayList(filePath));
@@ -200,18 +240,25 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Lists.newArrayList(metadataManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
- assertEquals(3, cols.size());
+ assertEquals(5, cols.size());
assertEquals(FileMetadataColumn.ID, cols.get(0).nodeType());
assertEquals(UnresolvedColumn.WILDCARD, cols.get(1).nodeType());
assertEquals(FileMetadataColumn.ID, cols.get(2).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(3).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(4).nodeType());
}
/**
- * Include both a wildcard and a partition column.
+ * Include both a wildcard and a partition column. The wildcard, in
+ * legacy mode, will create partition columns for any partitions not
+ * mentioned in the project list.
+ * <p>
+ * Tests proposed functionality: included only requested partition
+ * columns.
*/
@Test
- public void testWildcardAndPartition() {
+ public void testRevisedWildcardAndPartition() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
@@ -229,6 +276,113 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
}
+ @Test
+ public void testLegacyWildcardAndPartition() {
+ Path filePath = new Path("hdfs:///w/x/y/z.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ true, // Put partitions at end
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePath));
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
+ ScanTestUtils.partitionColName(8)),
+ Lists.newArrayList(metadataManager.projectionParser()));
+
+ List<ColumnProjection> cols = scanProj.columns();
+ assertEquals(4, cols.size());
+ assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
+ assertEquals(0, ((PartitionColumn) cols.get(1)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
+ assertEquals(1, ((PartitionColumn) cols.get(2)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(3).nodeType());
+ assertEquals(8, ((PartitionColumn) cols.get(3)).partition());
+ }
+
+ @Test
+ public void testPreferredPartitionExpansion() {
+ Path filePath = new Path("hdfs:///w/x/y/z.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ false, // Put partitions at end
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePath));
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
+ ScanTestUtils.partitionColName(8)),
+ Lists.newArrayList(metadataManager.projectionParser()));
+
+ List<ColumnProjection> cols = scanProj.columns();
+ assertEquals(4, cols.size());
+ assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
+ assertEquals(8, ((PartitionColumn) cols.get(1)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
+ assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(3).nodeType());
+ assertEquals(1, ((PartitionColumn) cols.get(3)).partition());
+ }
+
+ /**
+ * Test a case like:<br>
+ * <code>SELECT *, dir1 FROM ...</code><br>
+ * The projection list includes "dir1". The wildcard will
+ * fill in "dir0".
+ */
+
+ @Test
+ public void testLegacyWildcardAndPartitionWithOverlap() {
+ Path filePath = new Path("hdfs:///w/x/y/z.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ true, // Put partitions at end
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePath));
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
+ ScanTestUtils.partitionColName(1)),
+ Lists.newArrayList(metadataManager.projectionParser()));
+
+ List<ColumnProjection> cols = scanProj.columns();
+ assertEquals(3, cols.size());
+ assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
+ assertEquals(0, ((PartitionColumn) cols.get(1)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
+ assertEquals(1, ((PartitionColumn) cols.get(2)).partition());
+ }
+
+ @Test
+ public void testPreferedWildcardExpansionWithOverlap() {
+ Path filePath = new Path("hdfs:///w/x/y/z.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ true, // Use legacy wildcard expansion
+ false, // Put partitions at end
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePath));
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
+ ScanTestUtils.partitionColName(1)),
+ Lists.newArrayList(metadataManager.projectionParser()));
+
+ List<ColumnProjection> cols = scanProj.columns();
+ assertEquals(3, cols.size());
+ assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
+ assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
+ assertEquals(1, ((PartitionColumn) cols.get(1)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
+ assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
+ }
+
/**
* Verify that names that look like metadata columns, but appear
* to be maps or arrays, are not interpreted as metadata. That is,
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
index 9161932d4..314bc2a13 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
@@ -26,6 +26,7 @@ import static org.junit.Assert.fail;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadata;
@@ -46,9 +47,10 @@ import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.SubOperatorTest;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+@Category(RowSetTests.class)
public class TestFileMetadataProjection extends SubOperatorTest {
@Test
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
index a10e7665a..2fdf3e637 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
@@ -32,7 +33,7 @@ import org.apache.drill.exec.physical.impl.scan.TestScanOperatorExec.AbstractSca
import org.apache.drill.exec.physical.impl.scan.file.BaseFileScanFramework;
import org.apache.drill.exec.physical.impl.scan.file.BaseFileScanFramework.FileSchemaNegotiator;
import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework;
-import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderCreator;
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.RowSetLoader;
@@ -50,6 +51,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Tests the file metadata extensions to the file operator framework.
@@ -57,6 +59,7 @@ import org.junit.Test;
* verified the underlying mechanisms.
*/
+@Category(RowSetTests.class)
public class TestFileScanFramework extends SubOperatorTest {
private static final String MOCK_FILE_NAME = "foo.csv";
@@ -117,7 +120,7 @@ public class TestFileScanFramework extends SubOperatorTest {
}
}
- public static class FileScanOpFixture extends BaseFileScanOpFixture implements FileReaderCreator {
+ public static class FileScanOpFixture extends BaseFileScanOpFixture implements FileReaderFactory {
protected final List<MockFileReader> readers = new ArrayList<>();
protected Iterator<MockFileReader> readerIter;
@@ -252,7 +255,7 @@ public class TestFileScanFramework extends SubOperatorTest {
.add("a", MinorType.INT)
.addNullable("b", MinorType.VARCHAR, 10)
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
tableLoader = schemaNegotiator.build();
return true;
}
@@ -486,7 +489,7 @@ public class TestFileScanFramework extends SubOperatorTest {
.add("b", MinorType.INT)
.resumeSchema()
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
tableLoader = schemaNegotiator.build();
return true;
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java
deleted file mode 100644
index a41305221..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import static org.junit.Assert.assertSame;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.common.types.Types;
-import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder;
-import org.apache.drill.exec.physical.impl.scan.project.NullColumnLoader;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn;
-import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
-import org.apache.drill.exec.physical.rowSet.impl.NullResultVectorCacheImpl;
-import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.VectorContainer;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.test.SubOperatorTest;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
-import org.junit.Test;
-
-public class TestNullColumnLoader extends SubOperatorTest {
-
- private ResolvedNullColumn makeNullCol(String name, MajorType nullType) {
-
- // For this test, we don't need the projection, so just
- // set it to null.
-
- return new ResolvedNullColumn(name, nullType, null, 0);
- }
-
- private ResolvedNullColumn makeNullCol(String name) {
- return makeNullCol(name, null);
- }
-
- /**
- * Test the simplest case: default null type, nothing in the vector
- * cache. Specify no column type, the special NULL type, or a
- * predefined type. Output types should be set accordingly.
- */
-
- @Test
- public void testBasics() {
-
- List<ResolvedNullColumn> defns = new ArrayList<>();
- defns.add(makeNullCol("unspecified", null));
- defns.add(makeNullCol("nullType", Types.optional(MinorType.NULL)));
- defns.add(makeNullCol("specifiedOpt", Types.optional(MinorType.VARCHAR)));
- defns.add(makeNullCol("specifiedReq", Types.required(MinorType.VARCHAR)));
- defns.add(makeNullCol("specifiedArray", Types.repeated(MinorType.VARCHAR)));
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, null, false);
-
- // Create a batch
-
- VectorContainer output = staticLoader.load(2);
-
- // Verify values and types
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE)
- .add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE)
- .addNullable("specifiedOpt", MinorType.VARCHAR)
- .addNullable("specifiedReq", MinorType.VARCHAR)
- .addArray("specifiedArray", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(null, null, null, null, new String[] {})
- .addRow(null, null, null, null, new String[] {})
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(output));
- staticLoader.close();
- }
-
- @Test
- public void testCustomNullType() {
-
- List<ResolvedNullColumn> defns = new ArrayList<>();
- defns.add(makeNullCol("unspecified", null));
- defns.add(makeNullCol("nullType", MajorType.newBuilder()
- .setMinorType(MinorType.NULL)
- .setMode(DataMode.OPTIONAL)
- .build()));
- defns.add(makeNullCol("nullTypeReq", MajorType.newBuilder()
- .setMinorType(MinorType.NULL)
- .setMode(DataMode.REQUIRED)
- .build()));
-
- // Null type array does not make sense, so is not tested.
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- MajorType nullType = MajorType.newBuilder()
- .setMinorType(MinorType.VARCHAR)
- .setMode(DataMode.OPTIONAL)
- .build();
- NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
-
- // Create a batch
-
- VectorContainer output = staticLoader.load(2);
-
- // Verify values and types
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("unspecified", nullType)
- .add("nullType", nullType)
- .add("nullTypeReq", nullType)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(null, null, null)
- .addRow(null, null, null)
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(output));
- staticLoader.close();
- }
-
- @Test
- public void testCachedTypesMapToNullable() {
-
- List<ResolvedNullColumn> defns = new ArrayList<>();
- defns.add(makeNullCol("req"));
- defns.add(makeNullCol("opt"));
- defns.add(makeNullCol("rep"));
- defns.add(makeNullCol("unk"));
-
- // Populate the cache with a column of each mode.
-
- ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
- cache.addOrGet(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
- ValueVector opt = cache.addOrGet(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
- ValueVector rep = cache.addOrGet(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
-
- // Use nullable Varchar for unknown null columns.
-
- MajorType nullType = MajorType.newBuilder()
- .setMinorType(MinorType.VARCHAR)
- .setMode(DataMode.OPTIONAL)
- .build();
- NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
-
- // Create a batch
-
- VectorContainer output = staticLoader.load(2);
-
- // Verify vectors are reused
-
- assertSame(opt, output.getValueVector(1).getValueVector());
- assertSame(rep, output.getValueVector(2).getValueVector());
-
- // Verify values and types
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .addNullable("req", MinorType.FLOAT8)
- .addNullable("opt", MinorType.FLOAT8)
- .addArray("rep", MinorType.FLOAT8)
- .addNullable("unk", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(null, null, new int[] { }, null)
- .addRow(null, null, new int[] { }, null)
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(output));
- staticLoader.close();
- }
-
- @Test
- public void testCachedTypesAllowRequired() {
-
- List<ResolvedNullColumn> defns = new ArrayList<>();
- defns.add(makeNullCol("req"));
- defns.add(makeNullCol("opt"));
- defns.add(makeNullCol("rep"));
- defns.add(makeNullCol("unk"));
-
- // Populate the cache with a column of each mode.
-
- ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
- cache.addOrGet(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
- ValueVector opt = cache.addOrGet(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
- ValueVector rep = cache.addOrGet(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
-
- // Use nullable Varchar for unknown null columns.
-
- MajorType nullType = MajorType.newBuilder()
- .setMinorType(MinorType.VARCHAR)
- .setMode(DataMode.OPTIONAL)
- .build();
- NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, true);
-
- // Create a batch
-
- VectorContainer output = staticLoader.load(2);
-
- // Verify vectors are reused
-
- assertSame(opt, output.getValueVector(1).getValueVector());
- assertSame(rep, output.getValueVector(2).getValueVector());
-
- // Verify values and types
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("req", MinorType.FLOAT8)
- .addNullable("opt", MinorType.FLOAT8)
- .addArray("rep", MinorType.FLOAT8)
- .addNullable("unk", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(0.0, null, new int[] { }, null)
- .addRow(0.0, null, new int[] { }, null)
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(output));
- staticLoader.close();
- }
-
- @Test
- public void testNullColumnBuilder() {
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
-
- builder.add("unspecified");
- builder.add("nullType", Types.optional(MinorType.NULL));
- builder.add("specifiedOpt", Types.optional(MinorType.VARCHAR));
- builder.add("specifiedReq", Types.required(MinorType.VARCHAR));
- builder.add("specifiedArray", Types.repeated(MinorType.VARCHAR));
- builder.build(cache);
-
- // Create a batch
-
- builder.load(2);
-
- // Verify values and types
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE)
- .add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE)
- .addNullable("specifiedOpt", MinorType.VARCHAR)
- .addNullable("specifiedReq", MinorType.VARCHAR)
- .addArray("specifiedArray", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(null, null, null, null, new String[] {})
- .addRow(null, null, null, null, new String[] {})
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(builder.output()));
- builder.close();
- }
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java
deleted file mode 100644
index 079455789..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.common.types.Types;
-import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedMapColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow;
-import org.apache.drill.exec.physical.impl.scan.project.VectorSource;
-import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
-import org.apache.drill.exec.physical.rowSet.impl.NullResultVectorCacheImpl;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.record.VectorContainer;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.test.SubOperatorTest;
-import org.apache.drill.test.rowSet.RowSet;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
-import org.junit.Test;
-
-import static org.apache.drill.test.rowSet.RowSetUtilities.mapValue;
-import static org.apache.drill.test.rowSet.RowSetUtilities.singleMap;
-import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray;
-
-
-/**
- * Test the row batch merger by merging two batches. Tests both the
- * "direct" and "exchange" cases. Direct means that the output container
- * contains the source vector directly: they are the same vectors.
- * Exchange means we have two vectors, but we swap the underlying
- * Drillbufs to effectively shift data from source to destination
- * vector.
- */
-
-public class TestRowBatchMerger extends SubOperatorTest {
-
- public static class RowSetSource implements VectorSource {
-
- private SingleRowSet rowSet;
-
- public RowSetSource(SingleRowSet rowSet) {
- this.rowSet = rowSet;
- }
-
- public RowSet rowSet() { return rowSet; }
-
- public void clear() {
- rowSet.clear();
- }
-
- @Override
- public ValueVector vector(int index) {
- return rowSet.container().getValueVector(index).getValueVector();
- }
- }
-
- private RowSetSource makeFirst() {
- BatchSchema firstSchema = new SchemaBuilder()
- .add("d", MinorType.VARCHAR)
- .add("a", MinorType.INT)
- .build();
- return new RowSetSource(
- fixture.rowSetBuilder(firstSchema)
- .addRow("barney", 10)
- .addRow("wilma", 20)
- .build());
- }
-
- private RowSetSource makeSecond() {
- BatchSchema secondSchema = new SchemaBuilder()
- .add("b", MinorType.INT)
- .add("c", MinorType.VARCHAR)
- .build();
- return new RowSetSource(
- fixture.rowSetBuilder(secondSchema)
- .addRow(1, "foo.csv")
- .addRow(2, "foo.csv")
- .build());
- }
-
- public static class TestProjection extends ResolvedColumn {
-
- public TestProjection(VectorSource source, int sourceIndex) {
- super(source, sourceIndex);
- }
-
- @Override
- public String name() { return null; }
-
- @Override
- public int nodeType() { return -1; }
-
- @Override
- public MaterializedField schema() { return null; }
- }
-
- @Test
- public void testSimpleFlat() {
-
- // Create the first batch
-
- RowSetSource first = makeFirst();
-
- // Create the second batch
-
- RowSetSource second = makeSecond();
-
- ResolvedRow resolvedTuple = new ResolvedRow(null);
- resolvedTuple.add(new TestProjection(first, 1));
- resolvedTuple.add(new TestProjection(second, 0));
- resolvedTuple.add(new TestProjection(second, 1));
- resolvedTuple.add(new TestProjection(first, 0));
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(null, output);
- output.setRecordCount(first.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.INT)
- .add("c", MinorType.VARCHAR)
- .add("d", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(10, 1, "foo.csv", "barney")
- .addRow(20, 2, "foo.csv", "wilma")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- }
-
- @Test
- public void testImplicitFlat() {
-
- // Create the first batch
-
- RowSetSource first = makeFirst();
-
- // Create the second batch
-
- RowSetSource second = makeSecond();
-
- ResolvedRow resolvedTuple = new ResolvedRow(null);
- resolvedTuple.add(new TestProjection(resolvedTuple, 1));
- resolvedTuple.add(new TestProjection(second, 0));
- resolvedTuple.add(new TestProjection(second, 1));
- resolvedTuple.add(new TestProjection(resolvedTuple, 0));
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(first.rowSet().container(), output);
- output.setRecordCount(first.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.INT)
- .add("c", MinorType.VARCHAR)
- .add("d", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(10, 1, "foo.csv", "barney")
- .addRow(20, 2, "foo.csv", "wilma")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- }
-
- @Test
- public void testFlatWithNulls() {
-
- // Create the first batch
-
- RowSetSource first = makeFirst();
-
- // Create null columns
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
-
- ResolvedRow resolvedTuple = new ResolvedRow(builder);
- resolvedTuple.add(new TestProjection(resolvedTuple, 1));
- resolvedTuple.add(resolvedTuple.nullBuilder().add("null1"));
- resolvedTuple.add(resolvedTuple.nullBuilder().add("null2", Types.optional(MinorType.VARCHAR)));
- resolvedTuple.add(new TestProjection(resolvedTuple, 0));
-
- // Build the null values
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- builder.build(cache);
- builder.load(first.rowSet().rowCount());
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(first.rowSet().container(), output);
- output.setRecordCount(first.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("null1", MinorType.INT)
- .addNullable("null2", MinorType.VARCHAR)
- .add("d", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(10, null, null, "barney")
- .addRow(20, null, null, "wilma")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- builder.close();
- }
-
- /**
- * Test the ability to create maps from whole cloth if requested in
- * the projection list, and the map is not available from the data
- * source.
- */
-
- @Test
- public void testNullMaps() {
-
- // Create the first batch
-
- RowSetSource first = makeFirst();
-
- // Create null columns
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow resolvedTuple = new ResolvedRow(builder);
-
- resolvedTuple.add(new TestProjection(resolvedTuple, 1));
-
- ResolvedMapColumn nullMapCol = new ResolvedMapColumn(resolvedTuple, "map1");
- ResolvedTuple nullMap = nullMapCol.members();
- nullMap.add(nullMap.nullBuilder().add("null1"));
- nullMap.add(nullMap.nullBuilder().add("null2", Types.optional(MinorType.VARCHAR)));
-
- ResolvedMapColumn nullMapCol2 = new ResolvedMapColumn(nullMap, "map2");
- ResolvedTuple nullMap2 = nullMapCol2.members();
- nullMap2.add(nullMap2.nullBuilder().add("null3"));
- nullMap.add(nullMapCol2);
-
- resolvedTuple.add(nullMapCol);
- resolvedTuple.add(new TestProjection(resolvedTuple, 0));
-
- // Build the null values
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- resolvedTuple.buildNulls(cache);
-
- // LoadNulls
-
- resolvedTuple.loadNulls(first.rowSet().rowCount());
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(first.rowSet().container(), output);
- resolvedTuple.setRowCount(first.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addMap("map1")
- .addNullable("null1", MinorType.INT)
- .addNullable("null2", MinorType.VARCHAR)
- .addMap("map2")
- .addNullable("null3", MinorType.INT)
- .resumeMap()
- .resumeSchema()
- .add("d", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(10, mapValue(null, null, singleMap(null)), "barney")
- .addRow(20, mapValue(null, null, singleMap(null)), "wilma")
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- resolvedTuple.close();
- }
-
- /**
- * Test that the merger mechanism can rewrite a map to include
- * projected null columns.
- */
-
- @Test
- public void testMapRevision() {
-
- // Create the first batch
-
- BatchSchema inputSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .addMap("a")
- .add("c", MinorType.INT)
- .resumeSchema()
- .build();
- RowSetSource input = new RowSetSource(
- fixture.rowSetBuilder(inputSchema)
- .addRow("barney", singleMap(10))
- .addRow("wilma", singleMap(20))
- .build());
-
- // Create mappings
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow resolvedTuple = new ResolvedRow(builder);
-
- resolvedTuple.add(new TestProjection(resolvedTuple, 0));
- ResolvedMapColumn mapCol = new ResolvedMapColumn(resolvedTuple,
- inputSchema.getColumn(1), 1);
- resolvedTuple.add(mapCol);
- ResolvedTuple map = mapCol.members();
- map.add(new TestProjection(map, 0));
- map.add(map.nullBuilder().add("null1"));
-
- // Build the null values
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- resolvedTuple.buildNulls(cache);
-
- // LoadNulls
-
- resolvedTuple.loadNulls(input.rowSet().rowCount());
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(input.rowSet().container(), output);
- output.setRecordCount(input.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .addMap("a")
- .add("c", MinorType.INT)
- .addNullable("null1", MinorType.INT)
- .resumeSchema()
- .build();
- RowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("barney", mapValue(10, null))
- .addRow("wilma", mapValue(20, null))
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- }
-
- /**
- * Test that the merger mechanism can rewrite a map array to include
- * projected null columns.
- */
-
- @Test
- public void testMapArrayRevision() {
-
- // Create the first batch
-
- BatchSchema inputSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .addMapArray("a")
- .add("c", MinorType.INT)
- .resumeSchema()
- .build();
- RowSetSource input = new RowSetSource(
- fixture.rowSetBuilder(inputSchema)
- .addRow("barney", mapArray(singleMap(10), singleMap(11), singleMap(12)))
- .addRow("wilma", mapArray(singleMap(20), singleMap(21)))
- .build());
-
- // Create mappings
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow resolvedTuple = new ResolvedRow(builder);
-
- resolvedTuple.add(new TestProjection(resolvedTuple, 0));
- ResolvedMapColumn mapCol = new ResolvedMapColumn(resolvedTuple,
- inputSchema.getColumn(1), 1);
- resolvedTuple.add(mapCol);
- ResolvedTuple map = mapCol.members();
- map.add(new TestProjection(map, 0));
- map.add(map.nullBuilder().add("null1"));
-
- // Build the null values
-
- ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
- resolvedTuple.buildNulls(cache);
-
- // LoadNulls
-
- resolvedTuple.loadNulls(input.rowSet().rowCount());
-
- // Do the merge
-
- VectorContainer output = new VectorContainer(fixture.allocator());
- resolvedTuple.project(input.rowSet().container(), output);
- output.setRecordCount(input.rowSet().rowCount());
- RowSet result = fixture.wrap(output);
-
- // Verify
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .addMapArray("a")
- .add("c", MinorType.INT)
- .addNullable("null1", MinorType.INT)
- .resumeSchema()
- .build();
- RowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("barney", mapArray(
- mapValue(10, null), mapValue(11, null), mapValue(12, null)))
- .addRow("wilma", mapArray(
- mapValue(20, null), mapValue(21, null)))
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(result);
- }
-
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java
index 912a7172e..03587cb08 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.physical.impl.scan;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.ops.OperatorContext;
import org.apache.drill.exec.physical.base.AbstractSubScan;
@@ -34,6 +35,7 @@ import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
import io.netty.buffer.DrillBuf;
@@ -42,6 +44,7 @@ import io.netty.buffer.DrillBuf;
* set follows the same semantics as the original set.
*/
+@Category(RowSetTests.class)
public class TestScanBatchWriters extends SubOperatorTest {
@Test
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java
deleted file mode 100644
index 558f761c9..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import org.apache.drill.common.exceptions.UserException;
-import org.apache.drill.common.expression.SchemaPath;
-import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
-import org.apache.drill.exec.physical.impl.scan.project.UnresolvedColumn;
-import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
-import org.apache.drill.exec.physical.rowSet.project.RequestedTuple.RequestedColumn;
-import org.apache.drill.exec.record.metadata.ProjectionType;
-import org.apache.drill.test.SubOperatorTest;
-import org.junit.Test;
-
-/**
- * Test the level of projection done at the level of the scan as a whole;
- * before knowledge of table "implicit" columns or the specific table schema.
- */
-
-public class TestScanLevelProjection extends SubOperatorTest {
-
- /**
- * Basic test: select a set of columns (a, b, c) when the
- * data source has an early schema of (a, c, d). (a, c) are
- * projected, (d) is null.
- */
-
- @Test
- public void testBasics() {
-
- // Simulate SELECT a, b, c ...
- // Build the projection plan and verify
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a", "b", "c"),
- ScanTestUtils.parsers());
- assertFalse(scanProj.projectAll());
- assertFalse(scanProj.projectNone());
-
- assertEquals(3, scanProj.requestedCols().size());
- assertEquals("a", scanProj.requestedCols().get(0).rootName());
- assertEquals("b", scanProj.requestedCols().get(1).rootName());
- assertEquals("c", scanProj.requestedCols().get(2).rootName());
-
- assertEquals(3, scanProj.columns().size());
- assertEquals("a", scanProj.columns().get(0).name());
- assertEquals("b", scanProj.columns().get(1).name());
- assertEquals("c", scanProj.columns().get(2).name());
-
- // Verify column type
-
- assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType());
- }
-
- @Test
- public void testMap() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a.x", "b.x", "a.y", "b.y", "c"),
- ScanTestUtils.parsers());
- assertFalse(scanProj.projectAll());
- assertFalse(scanProj.projectNone());
-
- assertEquals(3, scanProj.columns().size());
- assertEquals("a", scanProj.columns().get(0).name());
- assertEquals("b", scanProj.columns().get(1).name());
- assertEquals("c", scanProj.columns().get(2).name());
-
- // Verify column type
-
- assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType());
-
- // Map structure
-
- RequestedColumn a = ((UnresolvedColumn) scanProj.columns().get(0)).element();
- assertTrue(a.isTuple());
- assertEquals(ProjectionType.UNSPECIFIED, a.mapProjection().projectionType("x"));
- assertEquals(ProjectionType.UNSPECIFIED, a.mapProjection().projectionType("y"));
- assertEquals(ProjectionType.UNPROJECTED, a.mapProjection().projectionType("z"));
-
- RequestedColumn c = ((UnresolvedColumn) scanProj.columns().get(2)).element();
- assertTrue(c.isSimple());
- }
-
- @Test
- public void testArray() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a[1]", "a[3]"),
- ScanTestUtils.parsers());
- assertFalse(scanProj.projectAll());
- assertFalse(scanProj.projectNone());
-
- assertEquals(1, scanProj.columns().size());
- assertEquals("a", scanProj.columns().get(0).name());
-
- // Verify column type
-
- assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType());
-
- // Map structure
-
- RequestedColumn a = ((UnresolvedColumn) scanProj.columns().get(0)).element();
- assertTrue(a.isArray());
- assertFalse(a.hasIndex(0));
- assertTrue(a.hasIndex(1));
- assertFalse(a.hasIndex(2));
- assertTrue(a.hasIndex(3));
- }
-
- /**
- * Simulate a SELECT * query by passing "**" (Drill's internal version
- * of the wildcard) as a column name.
- */
-
- @Test
- public void testWildcard() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- assertTrue(scanProj.projectAll());
- assertFalse(scanProj.projectNone());
- assertEquals(1, scanProj.requestedCols().size());
- assertTrue(scanProj.requestedCols().get(0).isDynamicStar());
-
- assertEquals(1, scanProj.columns().size());
- assertEquals(SchemaPath.DYNAMIC_STAR, scanProj.columns().get(0).name());
-
- // Verify bindings
-
- assertEquals(scanProj.columns().get(0).name(), scanProj.requestedCols().get(0).rootName());
-
- // Verify column type
-
- assertEquals(UnresolvedColumn.WILDCARD, scanProj.columns().get(0).nodeType());
- }
-
- /**
- * Test an empty projection which occurs in a
- * SELECT COUNT(*) query.
- */
-
- @Test
- public void testEmptyProjection() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList(),
- ScanTestUtils.parsers());
-
- assertFalse(scanProj.projectAll());
- assertTrue(scanProj.projectNone());
- assertEquals(0, scanProj.requestedCols().size());
- }
-
- /**
- * Can't include both a wildcard and a column name.
- */
-
- @Test
- public void testErrorWildcardAndColumns() {
- try {
- new ScanLevelProjection(
- RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, "a"),
- ScanTestUtils.parsers());
- fail();
- } catch (IllegalArgumentException e) {
- // Expected
- }
- }
-
- /**
- * Can't include both a column name and a wildcard.
- */
-
- @Test
- public void testErrorColumnAndWildcard() {
- try {
- new ScanLevelProjection(
- RowSetTestUtils.projectList("a", SchemaPath.DYNAMIC_STAR),
- ScanTestUtils.parsers());
- fail();
- } catch (IllegalArgumentException e) {
- // Expected
- }
- }
-
- /**
- * Can't include a wildcard twice.
- * <p>
- * Note: Drill actually allows this, but the work should be done
- * in the project operator; scan should see at most one wildcard.
- */
-
- @Test
- public void testErrorTwoWildcards() {
- try {
- new ScanLevelProjection(
- RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, SchemaPath.DYNAMIC_STAR),
- ScanTestUtils.parsers());
- fail();
- } catch (UserException e) {
- // Expected
- }
- }
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java
index 33752a746..386849bb6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
@@ -54,6 +55,7 @@ import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Test of the scan operator framework. Here the focus is on the
@@ -66,6 +68,7 @@ import org.junit.Test;
* appear elsewhere.
*/
+@Category(RowSetTests.class)
public class TestScanOperatorExec extends SubOperatorTest {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestScanOperatorExec.class);
@@ -187,7 +190,7 @@ public class TestScanOperatorExec extends SubOperatorTest {
.add("a", MinorType.INT)
.addNullable("b", MinorType.VARCHAR, 10)
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
tableLoader = schemaNegotiator.build();
return true;
}
@@ -213,7 +216,7 @@ public class TestScanOperatorExec extends SubOperatorTest {
.add("a", MinorType.VARCHAR)
.addNullable("b", MinorType.VARCHAR, 10)
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
schemaNegotiator.build();
tableLoader = schemaNegotiator.build();
return true;
@@ -1367,7 +1370,7 @@ public class TestScanOperatorExec extends SubOperatorTest {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
tableLoader = schemaNegotiator.build();
return true;
}
@@ -1493,7 +1496,7 @@ public class TestScanOperatorExec extends SubOperatorTest {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.buildSchema();
- schemaNegotiator.setTableSchema(schema);
+ schemaNegotiator.setTableSchema(schema, true);
tableLoader = schemaNegotiator.build();
return true;
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java
index d8c9a65ef..ef79b0ee6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java
@@ -22,12 +22,13 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.protocol.SchemaTracker;
+import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
import org.apache.drill.exec.record.BatchSchema;
@@ -37,8 +38,9 @@ import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Test the early-schema support of the scan orchestrator. "Early schema"
@@ -49,6 +51,7 @@ import org.junit.Test;
* that tests for lower-level components have already passed.
*/
+@Category(RowSetTests.class)
public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
/**
@@ -78,16 +81,17 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Schema provided, so an empty batch is available to
- // send downstream.
+ // Simulate a first reader in a scan that can provide an
+ // empty batch to define schema.
{
+ reader.defineSchema();
SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
.build();
assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
// Create a batch of data.
@@ -107,8 +111,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(2, "wilma")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
// Second batch.
@@ -128,8 +132,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(4, "betty")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
// Explicit reader close. (All other tests are lazy, they
@@ -167,16 +171,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch.
-
- {
- SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ // Don't bother with an empty batch here or in other tests.
+ // Simulates the second reader in a scan.
// Create a batch of data.
@@ -188,15 +184,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
- .addRow(1, "fred")
- .addRow(2, "wilma")
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
+ .addRow(1, "fred")
+ .addRow(2, "wilma")
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -228,20 +222,10 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch.
-
BatchSchema expectedSchema = new SchemaBuilder()
.add("b", MinorType.VARCHAR)
.add("a", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -253,17 +237,15 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("fred", 1)
- .addRow("wilma", 2)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow("fred", 1)
+ .addRow("wilma", 2)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
- scanner.close();
+ scanner.close();
}
/**
@@ -294,21 +276,11 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch
-
BatchSchema expectedSchema = new SchemaBuilder()
.add("a", MinorType.INT)
.add("b", MinorType.VARCHAR)
.addNullable("c", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -320,15 +292,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(1, "fred", null)
- .addRow(2, "wilma", null)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(1, "fred", null)
+ .addRow(2, "wilma", null)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -369,21 +339,11 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch
-
BatchSchema expectedSchema = new SchemaBuilder()
.add("a", MinorType.INT)
.add("b", MinorType.VARCHAR)
.addNullable("c", MinorType.VARCHAR)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -395,15 +355,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(1, "fred", null)
- .addRow(2, "wilma", null)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(1, "fred", null)
+ .addRow(2, "wilma", null)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -440,19 +398,9 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
assertFalse(loader.writer().column("b").schema().isProjected());
- // Verify empty batch.
-
BatchSchema expectedSchema = new SchemaBuilder()
.add("a", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -464,15 +412,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(1)
- .addRow(2)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(1)
+ .addRow(2)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -516,16 +462,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
BatchSchema expectedSchema = new SchemaBuilder()
.build();
- {
- // Expect an empty schema
-
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -545,8 +481,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow()
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
// Fast path to fill in empty rows
@@ -592,18 +528,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
reader.makeTableLoader(tableSchema);
- // Schema provided, so an empty batch is available to
- // send downstream.
-
- {
- SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
-
// Create a batch of data. Because there are no columns, it does
// not make sense to ready any rows.
@@ -616,8 +540,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
SingleRowSet expected = fixture.rowSetBuilder(tableSchema)
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
scanner.close();
@@ -650,19 +574,9 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
reader.makeTableLoader(tableSchema);
- // Verify initial empty batch.
-
BatchSchema expectedSchema = new SchemaBuilder()
.addNullable("a", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data. Because there are no columns, it does
// not make sense to ready any rows.
@@ -672,16 +586,14 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .build();
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
+
/**
* The projection mechanism provides "type smoothing": null
* columns prefer the type of previously-seen non-null columns.
@@ -718,6 +630,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table1Schema);
+ reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
schemaVersion = tracker.schemaVersion();
@@ -737,6 +650,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table2Schema);
+ reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(schemaVersion, tracker.schemaVersion());
@@ -756,6 +670,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table3Schema);
+ reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(schemaVersion, tracker.schemaVersion());
@@ -776,6 +691,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table2Schema);
+ reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(MinorType.BIGINT, output.getSchema().getColumn(0).getType().getMinorType());
@@ -843,8 +759,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(10, "fred")
.addRow(20, "wilma")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(projector.output()));
}
{
// ... FROM table 2
@@ -874,8 +790,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(30, null)
.addRow(40, null)
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(projector.output()));
}
{
// ... FROM table 3
@@ -899,8 +815,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(50, "dino")
.addRow(60, "barney")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(projector.output()));
}
projector.close();
@@ -926,9 +842,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema1);
- tracker.trackSchema(scanner.output());
- schemaVersion = tracker.schemaVersion();
-
// Create a batch
reader.startBatch();
@@ -936,6 +849,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow("fred")
.addRow("wilma");
reader.endBatch();
+ tracker.trackSchema(scanner.output());
+ schemaVersion = tracker.schemaVersion();
// Verify
@@ -944,8 +859,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow("wilma")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.closeReader();
}
{
@@ -960,8 +875,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema2);
- tracker.trackSchema(scanner.output());
- assertEquals(schemaVersion, tracker.schemaVersion());
// Create a batch
@@ -973,13 +886,15 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify, using persistent schema
+ tracker.trackSchema(scanner.output());
+ assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema1)
.addRow("barney")
.addRow("betty")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.closeReader();
}
{
@@ -994,9 +909,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema3);
- tracker.trackSchema(scanner.output());
- assertEquals(schemaVersion, tracker.schemaVersion());
-
// Create a batch
reader.startBatch();
@@ -1007,13 +919,16 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
// Verify, using persistent schema
+ tracker.trackSchema(scanner.output());
+ assertEquals(schemaVersion, tracker.schemaVersion());
+
SingleRowSet expected = fixture.rowSetBuilder(schema1)
.addRow("bam-bam")
.addRow("pebbles")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.closeReader();
}
@@ -1073,8 +988,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(10, "fred", 110L)
.addRow(20, "wilma", 110L)
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.closeReader();
}
@@ -1100,8 +1015,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(30, "bambam", 330L)
.addRow(40, "betty", 440L)
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
{
// ... FROM table 3
@@ -1125,8 +1040,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest {
.addRow(50, "dino", 550L)
.addRow(60, "barney", 660L)
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
scanner.close();
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java
index 0cf2cba31..84ffc4e32 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java
@@ -18,10 +18,12 @@
package org.apache.drill.exec.physical.impl.scan;
import static org.junit.Assert.assertFalse;
+
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.RowSetLoader;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
@@ -32,6 +34,7 @@ import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Test the late-schema support in the scan orchestrator. "Late schema" is the case
@@ -43,6 +46,7 @@ import org.junit.Test;
* that tests for lower-level components have already passed.
*/
+@Category(RowSetTests.class)
public class TestScanOrchestratorLateSchema extends SubOperatorTest {
/**
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
index e10055114..c7b52e2da 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
@@ -20,14 +20,15 @@ package org.apache.drill.exec.physical.impl.scan;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.protocol.SchemaTracker;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager;
+import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
import org.apache.drill.exec.record.BatchSchema;
@@ -35,10 +36,10 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetUtilities;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
-
+import org.junit.experimental.categories.Category;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
/**
@@ -46,6 +47,7 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
* with implicit file columns provided by the file metadata manager.
*/
+@Category(RowSetTests.class)
public class TestScanOrchestratorMetadata extends SubOperatorTest {
/**
@@ -104,8 +106,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
.addRow(2, "wilma", "/w/x/y/z.csv", "/w/x/y", "z.csv", "csv", "x", "y")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -146,19 +148,9 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch.
-
BatchSchema expectedSchema = new SchemaBuilder()
.addNullable("c", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -170,15 +162,13 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addSingleCol(null)
- .addSingleCol(null)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addSingleCol(null)
+ .addSingleCol(null)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -229,6 +219,7 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
// Verify empty batch.
+ reader.defineSchema();
BatchSchema expectedSchema = new SchemaBuilder()
.add("a", MinorType.INT)
.add("b", MinorType.VARCHAR)
@@ -240,8 +231,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
.build();
assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
// Create a batch of data.
@@ -260,8 +251,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
.addRow(2, "wilma", "x", "csv")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
}
scanner.close();
@@ -302,22 +293,12 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
- // Verify empty batch.
-
BatchSchema expectedSchema = new SchemaBuilder()
.addNullable("dir0", MinorType.VARCHAR)
.add("b", MinorType.VARCHAR)
.add("suffix", MinorType.VARCHAR)
.addNullable("c", MinorType.INT)
.build();
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .build();
-
- assertNotNull(scanner.output());
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
// Create a batch of data.
@@ -329,15 +310,13 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
// Verify
- {
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow("x", "fred", "csv", null)
- .addRow("x", "wilma", "csv", null)
- .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow("x", "fred", "csv", null)
+ .addRow("x", "wilma", "csv", null)
+ .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
- }
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.close();
}
@@ -403,8 +382,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
.addRow("x", "y", "a.csv", "fred")
.addRow("x", "y", "a.csv", "wilma")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
// Do explicit close (as in real code) to avoid an implicit
// close which will blow away the current file info...
@@ -431,8 +410,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
.addRow("x", null, "b.csv", "bambam")
.addRow("x", null, "b.csv", "betty")
.build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(scanner.output()));
+ RowSetUtilities.verify(expected,
+ fixture.wrap(scanner.output()));
scanner.closeReader();
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java
deleted file mode 100644
index 021d7e381..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.util.List;
-
-import org.apache.drill.common.exceptions.UserException;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedMapColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow;
-import org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection;
-import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTableColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple;
-import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
-import org.apache.drill.exec.physical.impl.scan.project.UnresolvedColumn;
-import org.apache.drill.exec.physical.impl.scan.project.VectorSource;
-import org.apache.drill.exec.physical.impl.scan.project.WildcardSchemaProjection;
-import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.record.metadata.TupleMetadata;
-import org.apache.drill.test.SubOperatorTest;
-import org.junit.Test;
-
-public class TestSchemaLevelProjection extends SubOperatorTest {
-
- @Test
- public void testWildcard() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
- assertEquals(1, scanProj.columns().size());
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .addNullable("c", MinorType.INT)
- .addArray("d", MinorType.FLOAT8)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new WildcardSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(3, columns.size());
-
- assertEquals("a", columns.get(0).name());
- assertEquals(0, columns.get(0).sourceIndex());
- assertSame(rootTuple, columns.get(0).source());
- assertEquals("c", columns.get(1).name());
- assertEquals(1, columns.get(1).sourceIndex());
- assertSame(rootTuple, columns.get(1).source());
- assertEquals("d", columns.get(2).name());
- assertEquals(2, columns.get(2).sourceIndex());
- assertSame(rootTuple, columns.get(2).source());
- }
-
- /**
- * Test SELECT list with columns defined in a order and with
- * name case different than the early-schema table.
- */
-
- @Test
- public void testFullList() {
-
- // Simulate SELECT c, b, a ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("c", "b", "a"),
- ScanTestUtils.parsers());
- assertEquals(3, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (a, b, c)
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .add("B", MinorType.VARCHAR)
- .add("C", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(3, columns.size());
-
- assertEquals("c", columns.get(0).name());
- assertEquals(2, columns.get(0).sourceIndex());
- assertSame(rootTuple, columns.get(0).source());
-
- assertEquals("b", columns.get(1).name());
- assertEquals(1, columns.get(1).sourceIndex());
- assertSame(rootTuple, columns.get(1).source());
-
- assertEquals("a", columns.get(2).name());
- assertEquals(0, columns.get(2).sourceIndex());
- assertSame(rootTuple, columns.get(2).source());
- }
-
- /**
- * Test SELECT list with columns missing from the table schema.
- */
-
- @Test
- public void testMissing() {
-
- // Simulate SELECT c, v, b, w ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("c", "v", "b", "w"),
- ScanTestUtils.parsers());
- assertEquals(4, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (a, b, c)
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .add("B", MinorType.VARCHAR)
- .add("C", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(4, columns.size());
- VectorSource nullBuilder = rootTuple.nullBuilder();
-
- assertEquals("c", columns.get(0).name());
- assertEquals(2, columns.get(0).sourceIndex());
- assertSame(rootTuple, columns.get(0).source());
-
- assertEquals("v", columns.get(1).name());
- assertEquals(0, columns.get(1).sourceIndex());
- assertSame(nullBuilder, columns.get(1).source());
-
- assertEquals("b", columns.get(2).name());
- assertEquals(1, columns.get(2).sourceIndex());
- assertSame(rootTuple, columns.get(2).source());
-
- assertEquals("w", columns.get(3).name());
- assertEquals(1, columns.get(3).sourceIndex());
- assertSame(nullBuilder, columns.get(3).source());
- }
-
- @Test
- public void testSubset() {
-
- // Simulate SELECT c, a ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("c", "a"),
- ScanTestUtils.parsers());
- assertEquals(2, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (a, b, c)
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .add("B", MinorType.VARCHAR)
- .add("C", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(2, columns.size());
-
- assertEquals("c", columns.get(0).name());
- assertEquals(2, columns.get(0).sourceIndex());
- assertSame(rootTuple, columns.get(0).source());
-
- assertEquals("a", columns.get(1).name());
- assertEquals(0, columns.get(1).sourceIndex());
- assertSame(rootTuple, columns.get(1).source());
- }
-
- @Test
- public void testDisjoint() {
-
- // Simulate SELECT c, a ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("b"),
- ScanTestUtils.parsers());
- assertEquals(1, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (a)
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(1, columns.size());
- VectorSource nullBuilder = rootTuple.nullBuilder();
-
- assertEquals("b", columns.get(0).name());
- assertEquals(0, columns.get(0).sourceIndex());
- assertSame(nullBuilder, columns.get(0).source());
- }
-
- @Test
- public void testOmittedMap() {
-
- // Simulate SELECT a, b.c.d ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a", "b.c.d"),
- ScanTestUtils.parsers());
- assertEquals(2, scanProj.columns().size());
- {
- assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(1).nodeType());
- UnresolvedColumn bCol = (UnresolvedColumn) (scanProj.columns().get(1));
- assertTrue(bCol.element().isTuple());
- }
-
- // Simulate a data source, with early schema, of (a)
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(2, columns.size());
-
- // Should have resolved a to a table column, b to a missing map.
-
- // A is projected
-
- ResolvedColumn aCol = columns.get(0);
- assertEquals("a", aCol.name());
- assertEquals(ResolvedTableColumn.ID, aCol.nodeType());
-
- // B is not projected, is implicitly a map
-
- ResolvedColumn bCol = columns.get(1);
- assertEquals("b", bCol.name());
- assertEquals(ResolvedMapColumn.ID, bCol.nodeType());
-
- ResolvedMapColumn bMap = (ResolvedMapColumn) bCol;
- ResolvedTuple bMembers = bMap.members();
- assertNotNull(bMembers);
- assertEquals(1, bMembers.columns().size());
-
- // C is a map within b
-
- ResolvedColumn cCol = bMembers.columns().get(0);
- assertEquals(ResolvedMapColumn.ID, cCol.nodeType());
-
- ResolvedMapColumn cMap = (ResolvedMapColumn) cCol;
- ResolvedTuple cMembers = cMap.members();
- assertNotNull(cMembers);
- assertEquals(1, cMembers.columns().size());
-
- // D is an unknown column type (not a map)
-
- ResolvedColumn dCol = cMembers.columns().get(0);
- assertEquals(ResolvedNullColumn.ID, dCol.nodeType());
- }
-
- /**
- * Test of a map with missing columns.
- * table of (a{b, c}), project a.c, a.d, a.e.f
- */
-
- @Test
- public void testOmittedMapMembers() {
-
- // Simulate SELECT a.c, a.d, a.e.f ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("x", "a.c", "a.d", "a.e.f", "y"),
- ScanTestUtils.parsers());
- assertEquals(3, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (x, y, a{b, c})
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("x", MinorType.VARCHAR)
- .add("y", MinorType.INT)
- .addMap("a")
- .add("b", MinorType.BIGINT)
- .add("c", MinorType.FLOAT8)
- .resumeSchema()
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(3, columns.size());
-
- // Should have resolved a.b to a map column,
- // a.d to a missing nested map, and a.e.f to a missing
- // nested map member
-
- // X is projected
-
- ResolvedColumn xCol = columns.get(0);
- assertEquals("x", xCol.name());
- assertEquals(ResolvedTableColumn.ID, xCol.nodeType());
- assertSame(rootTuple, ((ResolvedTableColumn) (xCol)).source());
- assertEquals(0, ((ResolvedTableColumn) (xCol)).sourceIndex());
-
- // Y is projected
-
- ResolvedColumn yCol = columns.get(2);
- assertEquals("y", yCol.name());
- assertEquals(ResolvedTableColumn.ID, yCol.nodeType());
- assertSame(rootTuple, ((ResolvedTableColumn) (yCol)).source());
- assertEquals(1, ((ResolvedTableColumn) (yCol)).sourceIndex());
-
- // A is projected
-
- ResolvedColumn aCol = columns.get(1);
- assertEquals("a", aCol.name());
- assertEquals(ResolvedMapColumn.ID, aCol.nodeType());
-
- ResolvedMapColumn aMap = (ResolvedMapColumn) aCol;
- ResolvedTuple aMembers = aMap.members();
- assertFalse(aMembers.isSimpleProjection());
- assertNotNull(aMembers);
- assertEquals(3, aMembers.columns().size());
-
- // a.c is projected
-
- ResolvedColumn acCol = aMembers.columns().get(0);
- assertEquals("c", acCol.name());
- assertEquals(ResolvedTableColumn.ID, acCol.nodeType());
- assertEquals(1, ((ResolvedTableColumn) (acCol)).sourceIndex());
-
- // a.d is not in the table, is null
-
- ResolvedColumn adCol = aMembers.columns().get(1);
- assertEquals("d", adCol.name());
- assertEquals(ResolvedNullColumn.ID, adCol.nodeType());
-
- // a.e is not in the table, is implicitly a map
-
- ResolvedColumn aeCol = aMembers.columns().get(2);
- assertEquals("e", aeCol.name());
- assertEquals(ResolvedMapColumn.ID, aeCol.nodeType());
-
- ResolvedMapColumn aeMap = (ResolvedMapColumn) aeCol;
- ResolvedTuple aeMembers = aeMap.members();
- assertFalse(aeMembers.isSimpleProjection());
- assertNotNull(aeMembers);
- assertEquals(1, aeMembers.columns().size());
-
- // a.d.f is a null column
-
- ResolvedColumn aefCol = aeMembers.columns().get(0);
- assertEquals("f", aefCol.name());
- assertEquals(ResolvedNullColumn.ID, aefCol.nodeType());
- }
-
- /**
- * Simple map project. This is an internal case in which the
- * query asks for a set of columns inside a map, and the table
- * loader produces exactly that set. No special projection is
- * needed, the map is projected as a whole.
- */
-
- @Test
- public void testSimpleMapProject() {
-
- // Simulate SELECT a.b, a.c ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a.b", "a.c"),
- ScanTestUtils.parsers());
- assertEquals(1, scanProj.columns().size());
-
- // Simulate a data source, with early schema, of (a{b, c})
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .addMap("a")
- .add("b", MinorType.BIGINT)
- .add("c", MinorType.FLOAT8)
- .resumeSchema()
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(1, columns.size());
-
- // Should have resolved a.b to a map column,
- // a.d to a missing nested map, and a.e.f to a missing
- // nested map member
-
- // a is projected as a vector, not as a structured map
-
- ResolvedColumn aCol = columns.get(0);
- assertEquals("a", aCol.name());
- assertEquals(ResolvedTableColumn.ID, aCol.nodeType());
- assertSame(rootTuple, ((ResolvedTableColumn) (aCol)).source());
- assertEquals(0, ((ResolvedTableColumn) (aCol)).sourceIndex());
- }
-
- /**
- * Project of a non-map as a map
- */
-
- @Test
- public void testMapMismatch() {
-
- // Simulate SELECT a.b ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a.b"),
- ScanTestUtils.parsers());
-
- // Simulate a data source, with early schema, of (a)
- // where a is not a map.
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- try {
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
- fail();
- } catch (UserException e) {
- // Expected
- }
- }
-
- /**
- * Test project of an array. At the scan level, we just verify
- * that the requested column is, indeed, an array.
- */
-
- @Test
- public void testArrayProject() {
-
- // Simulate SELECT a[0] ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a[0]"),
- ScanTestUtils.parsers());
-
- // Simulate a data source, with early schema, of (a)
- // where a is not an array.
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .addArray("a", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(1, columns.size());
-
- ResolvedColumn aCol = columns.get(0);
- assertEquals("a", aCol.name());
- assertEquals(ResolvedTableColumn.ID, aCol.nodeType());
- assertSame(rootTuple, ((ResolvedTableColumn) (aCol)).source());
- assertEquals(0, ((ResolvedTableColumn) (aCol)).sourceIndex());
- }
-
- /**
- * Project of a non-array as an array
- */
-
- @Test
- public void testArrayMismatch() {
-
- // Simulate SELECT a[0] ...
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList("a[0]"),
- ScanTestUtils.parsers());
-
- // Simulate a data source, with early schema, of (a)
- // where a is not an array.
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .buildSchema();
-
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- try {
- new ExplicitSchemaProjection(
- scanProj, tableSchema, rootTuple,
- ScanTestUtils.resolvers());
- fail();
- } catch (UserException e) {
- // Expected
- }
- }
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java
deleted file mode 100644
index a30321b0d..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java
+++ /dev/null
@@ -1,946 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.physical.impl.scan;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.physical.impl.protocol.SchemaTracker;
-import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
-import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager;
-import org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection;
-import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTableColumn;
-import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow;
-import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
-import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother;
-import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother.IncompatibleSchemaException;
-import org.apache.drill.exec.physical.impl.scan.project.SmoothingProjection;
-import org.apache.drill.exec.physical.impl.scan.project.WildcardSchemaProjection;
-import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
-import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.record.metadata.TupleMetadata;
-import org.apache.drill.test.SubOperatorTest;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
-import org.apache.hadoop.fs.Path;
-import org.junit.Test;
-
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
-
-/**
- * Tests schema smoothing at the schema projection level.
- * This level handles reusing prior types when filling null
- * values. But, because no actual vectors are involved, it
- * does not handle the schema chosen for a table ahead of
- * time, only the schema as it is merged with prior schema to
- * detect missing columns.
- * <p>
- * Focuses on the <tt>SmoothingProjection</tt> class itself.
- * <p>
- * Note that, at present, schema smoothing does not work for entire
- * maps. That is, if file 1 has, say <tt>{a: {b: 10, c: "foo"}}</tt>
- * and file 2 has, say, <tt>{a: null}</tt>, then schema smoothing does
- * not currently know how to recreate the map. The same is true of
- * lists and unions. Handling such cases is complex and is probably
- * better handled via a system that allows the user to specify their
- * intent by providing a schema to apply to the two files.
- */
-
-public class TestSchemaSmoothing extends SubOperatorTest {
-
- /**
- * Sanity test for the simple, discrete case. The purpose of
- * discrete is just to run the basic lifecycle in a way that
- * is compatible with the schema-persistence version.
- */
-
- @Test
- public void testDiscrete() {
-
- // Set up the file metadata manager
-
- Path filePathA = new Path("hdfs:///w/x/y/a.csv");
- Path filePathB = new Path("hdfs:///w/x/y/b.csv");
- FileMetadataManager metadataManager = new FileMetadataManager(
- fixture.getOptionManager(),
- new Path("hdfs:///w"),
- Lists.newArrayList(filePathA, filePathB));
-
- // Set up the scan level projection
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"),
- ScanTestUtils.parsers(metadataManager.projectionParser()));
-
- {
- // Define a file a.csv
-
- metadataManager.startFile(filePathA);
-
- // Build the output schema from the (a, b) table schema
-
- TupleMetadata twoColSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR, 10)
- .buildSchema();
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, twoColSchema, rootTuple,
- ScanTestUtils.resolvers(metadataManager));
-
- // Verify the full output schema
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("filename", MinorType.VARCHAR)
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR, 10)
- .buildSchema();
-
- // Verify
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(3, columns.size());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
- assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
- assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType());
- }
- {
- // Define a file b.csv
-
- metadataManager.startFile(filePathB);
-
- // Build the output schema from the (a) table schema
-
- TupleMetadata oneColSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .buildSchema();
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new ExplicitSchemaProjection(
- scanProj, oneColSchema, rootTuple,
- ScanTestUtils.resolvers(metadataManager));
-
- // Verify the full output schema
- // Since this mode is "discrete", we don't remember the type
- // of the missing column. (Instead, it is filled in at the
- // vector level as part of vector persistence.) During projection, it is
- // marked with type NULL so that the null column builder will fill in
- // the proper type.
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("filename", MinorType.VARCHAR)
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.NULL)
- .buildSchema();
-
- // Verify
-
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals(3, columns.size());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
- assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
- assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType());
- assertEquals(ResolvedNullColumn.ID, columns.get(2).nodeType());
- }
- }
-
- /**
- * Low-level test of the smoothing projection, including the exceptions
- * it throws when things are not going its way.
- */
-
- @Test
- public void testSmoothingProjection() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- // Table 1: (a: nullable bigint, b)
-
- TupleMetadata schema1 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .add("c", MinorType.FLOAT8)
- .buildSchema();
- ResolvedRow priorSchema;
- {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new WildcardSchemaProjection(
- scanProj, schema1, rootTuple,
- ScanTestUtils.resolvers());
- priorSchema = rootTuple;
- }
-
- // Table 2: (a: nullable bigint, c), column omitted, original schema preserved
-
- TupleMetadata schema2 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .add("c", MinorType.FLOAT8)
- .buildSchema();
- try {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new SmoothingProjection(
- scanProj, schema2, priorSchema, rootTuple,
- ScanTestUtils.resolvers());
- assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
- priorSchema = rootTuple;
- } catch (IncompatibleSchemaException e) {
- fail();
- }
-
- // Table 3: (a, c, d), column added, must replan schema
-
- TupleMetadata schema3 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .add("c", MinorType.FLOAT8)
- .add("d", MinorType.INT)
- .buildSchema();
- try {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new SmoothingProjection(
- scanProj, schema3, priorSchema, rootTuple,
- ScanTestUtils.resolvers());
- fail();
- } catch (IncompatibleSchemaException e) {
- // Expected
- }
-
- // Table 4: (a: double), change type must replan schema
-
- TupleMetadata schema4 = new SchemaBuilder()
- .addNullable("a", MinorType.FLOAT8)
- .addNullable("b", MinorType.VARCHAR)
- .add("c", MinorType.FLOAT8)
- .buildSchema();
- try {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new SmoothingProjection(
- scanProj, schema4, priorSchema, rootTuple,
- ScanTestUtils.resolvers());
- fail();
- } catch (IncompatibleSchemaException e) {
- // Expected
- }
-
-// // Table 5: (a: not-nullable bigint): convert to nullable for consistency
-//
-// TupleMetadata schema5 = new SchemaBuilder()
-// .addNullable("a", MinorType.BIGINT)
-// .add("c", MinorType.FLOAT8)
-// .buildSchema();
-// try {
-// SmoothingProjection schemaProj = new SmoothingProjection(
-// scanProj, schema5, dummySource, dummySource,
-// new ArrayList<>(), priorSchema);
-// assertTrue(schema1.isEquivalent(ScanTestUtils.schema(schemaProj.columns())));
-// } catch (IncompatibleSchemaException e) {
-// fail();
-// }
-
- // Table 6: Drop a non-nullable column, must replan
-
- TupleMetadata schema6 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
- try {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- new SmoothingProjection(
- scanProj, schema6, priorSchema, rootTuple,
- ScanTestUtils.resolvers());
- fail();
- } catch (IncompatibleSchemaException e) {
- // Expected
- }
- }
-
- /**
- * Case in which the table schema is a superset of the prior
- * schema. Discard prior schema. Turn off auto expansion of
- * metadata for a simpler test.
- */
-
- @Test
- public void testSmaller() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- smoother.resolve(priorSchema, rootTuple);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
- }
- {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- smoother.resolve(tableSchema, rootTuple);
- assertEquals(2, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
- }
- }
-
- /**
- * Case in which the table schema and prior are disjoint
- * sets. Discard the prior schema.
- */
-
- @Test
- public void testDisjoint() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(2, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
- }
- }
-
- private ResolvedRow doResolve(SchemaSmoother smoother, TupleMetadata schema) {
- NullColumnBuilder builder = new NullColumnBuilder(null, false);
- ResolvedRow rootTuple = new ResolvedRow(builder);
- smoother.resolve(schema, rootTuple);
- return rootTuple;
- }
-
- /**
- * Column names match, but types differ. Discard the prior schema.
- */
-
- @Test
- public void testDifferentTypes() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(2, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
- }
- }
-
- /**
- * The prior and table schemas are identical. Preserve the prior
- * schema (though, the output is no different than if we discarded
- * the prior schema...)
- */
-
- @Test
- public void testSameSchemas() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- TupleMetadata actualSchema = ScanTestUtils.schema(rootTuple);
- assertTrue(actualSchema.isEquivalent(tableSchema));
- assertTrue(actualSchema.isEquivalent(priorSchema));
- }
- }
-
- /**
- * The prior and table schemas are identical, but the cases of names differ.
- * Preserve the case of the first schema.
- */
-
- @Test
- public void testDifferentCase() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("A", MinorType.INT)
- .add("B", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
- List<ResolvedColumn> columns = rootTuple.columns();
- assertEquals("a", columns.get(0).name());
- }
- }
-
- /**
- * Can't preserve the prior schema if it had required columns
- * where the new schema has no columns.
- */
-
- @Test
- public void testRequired() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(2, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
- }
- }
-
- /**
- * Preserve the prior schema if table is a subset and missing columns
- * are nullable or repeated.
- */
-
- @Test
- public void testMissingNullableColumns() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .addNullable("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .addArray("c", MinorType.BIGINT)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
- }
- }
-
- /**
- * Preserve the prior schema if table is a subset. Map the table
- * columns to the output using the prior schema ordering.
- */
-
- @Test
- public void testReordering() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- TupleMetadata priorSchema = new SchemaBuilder()
- .addNullable("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .addArray("c", MinorType.BIGINT)
- .buildSchema();
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("b", MinorType.VARCHAR)
- .addNullable("a", MinorType.INT)
- .buildSchema();
-
- {
- doResolve(smoother, priorSchema);
- }
- {
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
- }
- }
-
- /**
- * If using the legacy wildcard expansion, reuse schema if partition paths
- * are the same length.
- */
-
- @Test
- public void testSamePartitionLength() {
-
- // Set up the file metadata manager
-
- Path filePathA = new Path("hdfs:///w/x/y/a.csv");
- Path filePathB = new Path("hdfs:///w/x/y/b.csv");
- FileMetadataManager metadataManager = new FileMetadataManager(
- fixture.getOptionManager(),
- new Path("hdfs:///w"),
- Lists.newArrayList(filePathA, filePathB));
-
- // Set up the scan level projection
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- ScanTestUtils.projectAllWithMetadata(2),
- ScanTestUtils.parsers(metadataManager.projectionParser()));
-
- // Define the schema smoother
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers(metadataManager));
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
- {
- metadataManager.startFile(filePathA);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- {
- metadataManager.startFile(filePathB);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- }
-
- /**
- * If using the legacy wildcard expansion, reuse schema if the new partition path
- * is shorter than the previous. (Unneeded partitions will be set to null by the
- * scan projector.)
- */
-
- @Test
- public void testShorterPartitionLength() {
-
- // Set up the file metadata manager
-
- Path filePathA = new Path("hdfs:///w/x/y/a.csv");
- Path filePathB = new Path("hdfs:///w/x/b.csv");
- FileMetadataManager metadataManager = new FileMetadataManager(
- fixture.getOptionManager(),
- new Path("hdfs:///w"),
- Lists.newArrayList(filePathA, filePathB));
-
- // Set up the scan level projection
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- ScanTestUtils.projectAllWithMetadata(2),
- ScanTestUtils.parsers(metadataManager.projectionParser()));
-
- // Define the schema smoother
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers(metadataManager));
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
- {
- metadataManager.startFile(filePathA);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- {
- metadataManager.startFile(filePathB);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- }
-
- /**
- * If using the legacy wildcard expansion, we are able to use the same
- * schema even if the new partition path is longer than the previous.
- * Because all file names are provided up front.
- */
-
- @Test
- public void testLongerPartitionLength() {
-
- // Set up the file metadata manager
-
- Path filePathA = new Path("hdfs:///w/x/a.csv");
- Path filePathB = new Path("hdfs:///w/x/y/b.csv");
- FileMetadataManager metadataManager = new FileMetadataManager(
- fixture.getOptionManager(),
- new Path("hdfs:///w"),
- Lists.newArrayList(filePathA, filePathB));
-
- // Set up the scan level projection
-
- ScanLevelProjection scanProj = new ScanLevelProjection(
- ScanTestUtils.projectAllWithMetadata(2),
- ScanTestUtils.parsers(metadataManager.projectionParser()));
-
- // Define the schema smoother
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers(metadataManager));
-
- TupleMetadata tableSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .add("b", MinorType.VARCHAR)
- .buildSchema();
-
- TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
- {
- metadataManager.startFile(filePathA);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- {
- metadataManager.startFile(filePathB);
- ResolvedRow rootTuple = doResolve(smoother, tableSchema);
- assertEquals(1, smoother.schemaVersion());
- assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
- }
- }
-
- /**
- * Integrated test across multiple schemas at the batch level.
- */
-
- @Test
- public void testSmoothableSchemaBatches() {
- ScanLevelProjection scanProj = new ScanLevelProjection(
- RowSetTestUtils.projectAll(),
- ScanTestUtils.parsers());
-
- SchemaSmoother smoother = new SchemaSmoother(scanProj,
- ScanTestUtils.resolvers());
-
- // Table 1: (a: bigint, b)
-
- TupleMetadata schema1 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .add("c", MinorType.FLOAT8)
- .buildSchema();
- {
- ResolvedRow rootTuple = doResolve(smoother, schema1);
-
- // Just use the original schema.
-
- assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
- assertEquals(1, smoother.schemaVersion());
- }
-
- // Table 2: (a: nullable bigint, c), column ommitted, original schema preserved
-
- TupleMetadata schema2 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .add("c", MinorType.FLOAT8)
- .buildSchema();
- {
- ResolvedRow rootTuple = doResolve(smoother, schema2);
- assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
- assertEquals(1, smoother.schemaVersion());
- }
-
- // Table 3: (a, c, d), column added, must replan schema
-
- TupleMetadata schema3 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .add("c", MinorType.FLOAT8)
- .add("d", MinorType.INT)
- .buildSchema();
- {
- ResolvedRow rootTuple = doResolve(smoother, schema3);
- assertTrue(schema3.isEquivalent(ScanTestUtils.schema(rootTuple)));
- assertEquals(2, smoother.schemaVersion());
- }
-
- // Table 4: Drop a non-nullable column, must replan
-
- TupleMetadata schema4 = new SchemaBuilder()
- .addNullable("a", MinorType.BIGINT)
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
- {
- ResolvedRow rootTuple = doResolve(smoother, schema4);
- assertTrue(schema4.isEquivalent(ScanTestUtils.schema(rootTuple)));
- assertEquals(3, smoother.schemaVersion());
- }
-
- // Table 5: (a: double), change type must replan schema
-
- TupleMetadata schema5 = new SchemaBuilder()
- .addNullable("a", MinorType.FLOAT8)
- .addNullable("b", MinorType.VARCHAR)
- .buildSchema();
- {
- ResolvedRow rootTuple = doResolve(smoother, schema5);
- assertTrue(schema5.isEquivalent(ScanTestUtils.schema(rootTuple)));
- assertEquals(4, smoother.schemaVersion());
- }
-
-// // Table 6: (a: not-nullable bigint): convert to nullable for consistency
-//
-// TupleMetadata schema6 = new SchemaBuilder()
-// .add("a", MinorType.FLOAT8)
-// .add("b", MinorType.VARCHAR)
-// .buildSchema();
-// {
-// SchemaLevelProjection schemaProj = smoother.resolve(schema3, dummySource);
-// assertTrue(schema5.isEquivalent(ScanTestUtils.schema(schemaProj.columns())));
-// }
- }
-
- /**
- * A SELECT * query uses the schema of the table as the output schema.
- * This is trivial when the scanner has one table. But, if two or more
- * tables occur, then things get interesting. The first table sets the
- * schema. The second table then has:
- * <ul>
- * <li>The same schema, trivial case.</li>
- * <li>A subset of the first table. The type of the "missing" column
- * from the first table is used for a null column in the second table.</li>
- * <li>A superset or disjoint set of the first schema. This triggers a hard schema
- * change.</li>
- * </ul>
- * <p>
- * It is an open question whether previous columns should be preserved on
- * a hard reset. For now, the code implements, and this test verifies, that a
- * hard reset clears the "memory" of prior schemas.
- */
-
- @Test
- public void testWildcardSmoothing() {
- ScanSchemaOrchestrator projector = new ScanSchemaOrchestrator(fixture.allocator());
- projector.enableSchemaSmoothing(true);
- projector.build(RowSetTestUtils.projectAll());
-
- TupleMetadata firstSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR, 10)
- .addNullable("c", MinorType.BIGINT)
- .buildSchema();
- TupleMetadata subsetSchema = new SchemaBuilder()
- .addNullable("b", MinorType.VARCHAR, 10)
- .add("a", MinorType.INT)
- .buildSchema();
- TupleMetadata disjointSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR, 10)
- .add("d", MinorType.VARCHAR)
- .buildSchema();
-
- SchemaTracker tracker = new SchemaTracker();
- int schemaVersion;
- {
- // First table, establishes the baseline
- // ... FROM table 1
-
- ReaderSchemaOrchestrator reader = projector.startReader();
- ResultSetLoader loader = reader.makeTableLoader(firstSchema);
-
- reader.startBatch();
- loader.writer()
- .addRow(10, "fred", 110L)
- .addRow(20, "wilma", 110L);
- reader.endBatch();
-
- tracker.trackSchema(projector.output());
- schemaVersion = tracker.schemaVersion();
-
- SingleRowSet expected = fixture.rowSetBuilder(firstSchema)
- .addRow(10, "fred", 110L)
- .addRow(20, "wilma", 110L)
- .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
- }
- {
- // Second table, same schema, the trivial case
- // ... FROM table 2
-
- ReaderSchemaOrchestrator reader = projector.startReader();
- ResultSetLoader loader = reader.makeTableLoader(firstSchema);
-
- reader.startBatch();
- loader.writer()
- .addRow(70, "pebbles", 770L)
- .addRow(80, "hoppy", 880L);
- reader.endBatch();
-
- tracker.trackSchema(projector.output());
- assertEquals(schemaVersion, tracker.schemaVersion());
-
- SingleRowSet expected = fixture.rowSetBuilder(firstSchema)
- .addRow(70, "pebbles", 770L)
- .addRow(80, "hoppy", 880L)
- .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
- }
- {
- // Third table: subset schema of first two
- // ... FROM table 3
-
- ReaderSchemaOrchestrator reader = projector.startReader();
- ResultSetLoader loader = reader.makeTableLoader(subsetSchema);
-
- reader.startBatch();
- loader.writer()
- .addRow("bambam", 30)
- .addRow("betty", 40);
- reader.endBatch();
-
- tracker.trackSchema(projector.output());
- assertEquals(schemaVersion, tracker.schemaVersion());
-
- SingleRowSet expected = fixture.rowSetBuilder(firstSchema)
- .addRow(30, "bambam", null)
- .addRow(40, "betty", null)
- .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
- }
- {
- // Fourth table: disjoint schema, cases a schema reset
- // ... FROM table 4
-
- ReaderSchemaOrchestrator reader = projector.startReader();
- ResultSetLoader loader = reader.makeTableLoader(disjointSchema);
-
- reader.startBatch();
- loader.writer()
- .addRow(50, "dino", "supporting")
- .addRow(60, "barney", "main");
- reader.endBatch();
-
- tracker.trackSchema(projector.output());
- assertNotEquals(schemaVersion, tracker.schemaVersion());
-
- SingleRowSet expected = fixture.rowSetBuilder(disjointSchema)
- .addRow(50, "dino", "supporting")
- .addRow(60, "barney", "main")
- .build();
- new RowSetComparison(expected)
- .verifyAndClearAll(fixture.wrap(projector.output()));
- }
-
- projector.close();
- }
-
- // TODO: Test schema smoothing with repeated
- // TODO: Test hard schema change
- // TODO: Typed null column tests (resurrect)
- // TODO: Test maps and arrays of maps
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java
index b45374be2..086da96b4 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java
@@ -24,14 +24,21 @@ import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.scan.ScanTestUtils;
+import org.apache.drill.exec.physical.impl.scan.file.FileMetadata;
+import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
+import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn;
+import org.apache.drill.exec.physical.impl.scan.file.PartitionColumn;
import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader.ConstantColumnSpec;
import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.store.ColumnExplorer.ImplicitFileColumns;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -114,4 +121,42 @@ public class TestConstantColumnLoader extends SubOperatorTest {
.verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
staticLoader.close();
}
+
+ @Test
+ public void testFileMetadata() {
+
+ FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w"));
+ List<ConstantColumnSpec> defns = new ArrayList<>();
+ FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn(
+ ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX);
+ FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL,
+ iDefn, fileInfo, null, 0);
+ defns.add(iCol);
+
+ String partColName = ScanTestUtils.partitionColName(1);
+ PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0);
+ defns.add(pCol);
+
+ ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
+ ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns);
+
+ // Create a batch
+
+ staticLoader.load(2);
+
+ // Verify
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR)
+ .addNullable(partColName, MinorType.VARCHAR)
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow("csv", "y")
+ .addRow("csv", "y")
+ .build();
+
+ new RowSetComparison(expected)
+ .verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
+ staticLoader.close();
+ }
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java
index 5b49ab3c7..f40e84787 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java
@@ -19,6 +19,7 @@ package org.apache.drill.exec.physical.impl.scan.project;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -27,6 +28,8 @@ import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.physical.impl.scan.ScanTestUtils;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
+import org.apache.drill.exec.physical.rowSet.project.ImpliedTupleRequest;
+import org.apache.drill.exec.physical.rowSet.project.RequestedTuple;
import org.apache.drill.exec.physical.rowSet.project.RequestedTuple.RequestedColumn;
import org.apache.drill.exec.record.metadata.ProjectionType;
import org.apache.drill.test.SubOperatorTest;
@@ -56,6 +59,7 @@ public class TestScanLevelProjection extends SubOperatorTest {
final ScanLevelProjection scanProj = new ScanLevelProjection(
RowSetTestUtils.projectList("a", "b", "c"),
ScanTestUtils.parsers());
+
assertFalse(scanProj.projectAll());
assertFalse(scanProj.projectNone());
@@ -72,6 +76,19 @@ public class TestScanLevelProjection extends SubOperatorTest {
// Verify column type
assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType());
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(3, outputProj.projections().size());
+ assertNotNull(outputProj.get("a"));
+ assertTrue(outputProj.get("a").isSimple());
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertEquals(3, readerProj.projections().size());
+ assertNotNull(readerProj.get("a"));
+ assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a"));
+ assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("d"));
}
/**
@@ -85,6 +102,7 @@ public class TestScanLevelProjection extends SubOperatorTest {
final ScanLevelProjection scanProj = new ScanLevelProjection(
RowSetTestUtils.projectList("a.x", "b.x", "a.y", "b.y", "c"),
ScanTestUtils.parsers());
+
assertFalse(scanProj.projectAll());
assertFalse(scanProj.projectNone());
@@ -107,6 +125,20 @@ public class TestScanLevelProjection extends SubOperatorTest {
final RequestedColumn c = ((UnresolvedColumn) scanProj.columns().get(2)).element();
assertTrue(c.isSimple());
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(3, outputProj.projections().size());
+ assertNotNull(outputProj.get("a"));
+ assertTrue(outputProj.get("a").isTuple());
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertEquals(3, readerProj.projections().size());
+ assertNotNull(readerProj.get("a"));
+ assertEquals(ProjectionType.TUPLE, readerProj.projectionType("a"));
+ assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("c"));
+ assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("d"));
}
/**
@@ -119,6 +151,7 @@ public class TestScanLevelProjection extends SubOperatorTest {
final ScanLevelProjection scanProj = new ScanLevelProjection(
RowSetTestUtils.projectList("a[1]", "a[3]"),
ScanTestUtils.parsers());
+
assertFalse(scanProj.projectAll());
assertFalse(scanProj.projectNone());
@@ -137,6 +170,19 @@ public class TestScanLevelProjection extends SubOperatorTest {
assertTrue(a.hasIndex(1));
assertFalse(a.hasIndex(2));
assertTrue(a.hasIndex(3));
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(1, outputProj.projections().size());
+ assertNotNull(outputProj.get("a"));
+ assertTrue(outputProj.get("a").isArray());
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertEquals(1, readerProj.projections().size());
+ assertNotNull(readerProj.get("a"));
+ assertEquals(ProjectionType.ARRAY, readerProj.projectionType("a"));
+ assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("c"));
}
/**
@@ -165,6 +211,17 @@ public class TestScanLevelProjection extends SubOperatorTest {
// Verify column type
assertEquals(UnresolvedColumn.WILDCARD, scanProj.columns().get(0).nodeType());
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(1, outputProj.projections().size());
+ assertNotNull(outputProj.get("**"));
+ assertTrue(outputProj.get("**").isWildcard());
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertTrue(readerProj instanceof ImpliedTupleRequest);
+ assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a"));
}
/**
@@ -181,38 +238,62 @@ public class TestScanLevelProjection extends SubOperatorTest {
assertFalse(scanProj.projectAll());
assertTrue(scanProj.projectNone());
assertEquals(0, scanProj.requestedCols().size());
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(0, outputProj.projections().size());
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertTrue(readerProj instanceof ImpliedTupleRequest);
+ assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("a"));
}
/**
- * Can't include both a wildcard and a column name.
+ * Can include both a wildcard and a column name. The Project
+ * operator will fill in the column, the scan framework just ignores
+ * the extra column.
*/
@Test
- public void testErrorWildcardAndColumns() {
- try {
- new ScanLevelProjection(
+ public void testWildcardAndColumns() {
+ ScanLevelProjection scanProj = new ScanLevelProjection(
RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, "a"),
ScanTestUtils.parsers());
- fail();
- } catch (final IllegalArgumentException e) {
- // Expected
- }
+
+ assertTrue(scanProj.projectAll());
+ assertFalse(scanProj.projectNone());
+ assertEquals(2, scanProj.requestedCols().size());
+ assertEquals(1, scanProj.columns().size());
+
+ // Verify tuple projection
+
+ RequestedTuple outputProj = scanProj.rootProjection();
+ assertEquals(2, outputProj.projections().size());
+ assertNotNull(outputProj.get("**"));
+ assertTrue(outputProj.get("**").isWildcard());
+ assertNotNull(outputProj.get("a"));
+
+ RequestedTuple readerProj = scanProj.readerProjection();
+ assertTrue(readerProj instanceof ImpliedTupleRequest);
+ assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a"));
+ assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("c"));
}
/**
- * Can't include both a column name and a wildcard.
+ * Test a column name and a wildcard.
*/
@Test
- public void testErrorColumnAndWildcard() {
- try {
- new ScanLevelProjection(
+ public void testColumnAndWildcard() {
+ ScanLevelProjection scanProj = new ScanLevelProjection(
RowSetTestUtils.projectList("a", SchemaPath.DYNAMIC_STAR),
ScanTestUtils.parsers());
- fail();
- } catch (final IllegalArgumentException e) {
- // Expected
- }
+
+ assertTrue(scanProj.projectAll());
+ assertFalse(scanProj.projectNone());
+ assertEquals(2, scanProj.requestedCols().size());
+ assertEquals(1, scanProj.columns().size());
}
/**
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
index a21b1e472..8adc0372a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
@@ -28,8 +28,9 @@ import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.impl.protocol.SchemaTracker;
import org.apache.drill.exec.physical.impl.scan.ScanTestUtils;
+import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
+import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager;
import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow;
-import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator;
import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother.IncompatibleSchemaException;
import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils;
@@ -38,8 +39,10 @@ import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
/**
* Tests schema smoothing at the schema projection level.
@@ -63,7 +66,7 @@ import org.junit.experimental.categories.Category;
* to a fundamental limitation in Drill:
* <ul>
* <li>Drill cannot predict the future: each file (or batch)
- * may have a different schema.</ul>
+ * may have a different schema.</li>
* <li>Drill does not know about these differences until they
* occur.</li>
* <li>The scan operator is obligated to return the same schema
@@ -85,6 +88,105 @@ import org.junit.experimental.categories.Category;
public class TestSchemaSmoothing extends SubOperatorTest {
/**
+ * Sanity test for the simple, discrete case. The purpose of
+ * discrete is just to run the basic lifecycle in a way that
+ * is compatible with the schema-persistence version.
+ */
+
+ @Test
+ public void testDiscrete() {
+
+ // Set up the file metadata manager
+
+ Path filePathA = new Path("hdfs:///w/x/y/a.csv");
+ Path filePathB = new Path("hdfs:///w/x/y/b.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePathA, filePathB));
+
+ // Set up the scan level projection
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"),
+ ScanTestUtils.parsers(metadataManager.projectionParser()));
+
+ {
+ // Define a file a.csv
+
+ metadataManager.startFile(filePathA);
+
+ // Build the output schema from the (a, b) table schema
+
+ TupleMetadata twoColSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.VARCHAR, 10)
+ .buildSchema();
+ NullColumnBuilder builder = new NullColumnBuilder(null, false);
+ ResolvedRow rootTuple = new ResolvedRow(builder);
+ new ExplicitSchemaProjection(
+ scanProj, twoColSchema, rootTuple,
+ ScanTestUtils.resolvers(metadataManager));
+
+ // Verify the full output schema
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("filename", MinorType.VARCHAR)
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.VARCHAR, 10)
+ .buildSchema();
+
+ // Verify
+
+ List<ResolvedColumn> columns = rootTuple.columns();
+ assertEquals(3, columns.size());
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
+ assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
+ assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType());
+ }
+ {
+ // Define a file b.csv
+
+ metadataManager.startFile(filePathB);
+
+ // Build the output schema from the (a) table schema
+
+ TupleMetadata oneColSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .buildSchema();
+ NullColumnBuilder builder = new NullColumnBuilder(null, false);
+ ResolvedRow rootTuple = new ResolvedRow(builder);
+ new ExplicitSchemaProjection(
+ scanProj, oneColSchema, rootTuple,
+ ScanTestUtils.resolvers(metadataManager));
+
+ // Verify the full output schema
+ // Since this mode is "discrete", we don't remember the type
+ // of the missing column. (Instead, it is filled in at the
+ // vector level as part of vector persistence.) During projection, it is
+ // marked with type NULL so that the null column builder will fill in
+ // the proper type.
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("filename", MinorType.VARCHAR)
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.NULL)
+ .buildSchema();
+
+ // Verify
+
+ List<ResolvedColumn> columns = rootTuple.columns();
+ assertEquals(3, columns.size());
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
+ assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
+ assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType());
+ assertEquals(ResolvedNullColumn.ID, columns.get(2).nodeType());
+ }
+ }
+
+ /**
* Low-level test of the smoothing projection, including the exceptions
* it throws when things are not going its way.
*/
@@ -463,6 +565,150 @@ public class TestSchemaSmoothing extends SubOperatorTest {
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
}
}
+
+ /**
+ * If using the legacy wildcard expansion, reuse schema if partition paths
+ * are the same length.
+ */
+
+ @Test
+ public void testSamePartitionLength() {
+
+ // Set up the file metadata manager
+
+ Path filePathA = new Path("hdfs:///w/x/y/a.csv");
+ Path filePathB = new Path("hdfs:///w/x/y/b.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePathA, filePathB));
+
+ // Set up the scan level projection
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ ScanTestUtils.projectAllWithMetadata(2),
+ ScanTestUtils.parsers(metadataManager.projectionParser()));
+
+ // Define the schema smoother
+
+ SchemaSmoother smoother = new SchemaSmoother(scanProj,
+ ScanTestUtils.resolvers(metadataManager));
+
+ TupleMetadata tableSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildSchema();
+
+ TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
+ {
+ metadataManager.startFile(filePathA);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ {
+ metadataManager.startFile(filePathB);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertEquals(1, smoother.schemaVersion());
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ }
+
+ /**
+ * If using the legacy wildcard expansion, reuse schema if the new partition path
+ * is shorter than the previous. (Unneeded partitions will be set to null by the
+ * scan projector.)
+ */
+
+ @Test
+ public void testShorterPartitionLength() {
+
+ // Set up the file metadata manager
+
+ Path filePathA = new Path("hdfs:///w/x/y/a.csv");
+ Path filePathB = new Path("hdfs:///w/x/b.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePathA, filePathB));
+
+ // Set up the scan level projection
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ ScanTestUtils.projectAllWithMetadata(2),
+ ScanTestUtils.parsers(metadataManager.projectionParser()));
+
+ // Define the schema smoother
+
+ SchemaSmoother smoother = new SchemaSmoother(scanProj,
+ ScanTestUtils.resolvers(metadataManager));
+
+ TupleMetadata tableSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildSchema();
+
+ TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
+ {
+ metadataManager.startFile(filePathA);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ {
+ metadataManager.startFile(filePathB);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertEquals(1, smoother.schemaVersion());
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ }
+
+ /**
+ * If using the legacy wildcard expansion, we are able to use the same
+ * schema even if the new partition path is longer than the previous.
+ * Because all file names are provided up front.
+ */
+
+ @Test
+ public void testLongerPartitionLength() {
+
+ // Set up the file metadata manager
+
+ Path filePathA = new Path("hdfs:///w/x/a.csv");
+ Path filePathB = new Path("hdfs:///w/x/y/b.csv");
+ FileMetadataManager metadataManager = new FileMetadataManager(
+ fixture.getOptionManager(),
+ new Path("hdfs:///w"),
+ Lists.newArrayList(filePathA, filePathB));
+
+ // Set up the scan level projection
+
+ ScanLevelProjection scanProj = new ScanLevelProjection(
+ ScanTestUtils.projectAllWithMetadata(2),
+ ScanTestUtils.parsers(metadataManager.projectionParser()));
+
+ // Define the schema smoother
+
+ SchemaSmoother smoother = new SchemaSmoother(scanProj,
+ ScanTestUtils.resolvers(metadataManager));
+
+ TupleMetadata tableSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildSchema();
+
+ TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2);
+ {
+ metadataManager.startFile(filePathA);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ {
+ metadataManager.startFile(filePathB);
+ ResolvedRow rootTuple = doResolve(smoother, tableSchema);
+ assertEquals(1, smoother.schemaVersion());
+ assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
+ }
+ }
+
/**
* Integrated test across multiple schemas at the batch level.
*/