diff options
Diffstat (limited to 'exec/java-exec/src/test/java/org/apache')
20 files changed, 895 insertions, 2906 deletions
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java index 3587e2711..88ccd3c41 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java @@ -22,33 +22,42 @@ import static org.junit.Assert.fail; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager; import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager; +import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.drill.test.rowSet.RowSetUtilities; import org.apache.hadoop.fs.Path; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +/** + * Test the "columns" array mechanism integrated with the scan schema + * orchestrator including simulating reading data. + */ + +@Category(RowSetTests.class) public class TestColumnsArray extends SubOperatorTest { - /** - * Test columns array. The table must be able to support it by having a - * matching column. - */ + private static class MockScanner { + ScanSchemaOrchestrator scanner; + ReaderSchemaOrchestrator reader; + ResultSetLoader loader; + } - @Test - public void testColumnsArray() { + private MockScanner buildScanner(List<SchemaPath> projList) { + + MockScanner mock = new MockScanner(); // Set up the file metadata manager @@ -64,21 +73,19 @@ public class TestColumnsArray extends SubOperatorTest { // Configure the schema orchestrator - ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator()); - scanner.withMetadata(metadataManager); - scanner.addParser(colsManager.projectionParser()); - scanner.addResolver(colsManager.resolver()); + mock.scanner = new ScanSchemaOrchestrator(fixture.allocator()); + mock.scanner.withMetadata(metadataManager); + mock.scanner.addParser(colsManager.projectionParser()); + mock.scanner.addResolver(colsManager.resolver()); - // SELECT filename, columns, dir0 ... + // SELECT <proj list> ... - scanner.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, - ColumnsArrayManager.COLUMNS_COL, - ScanTestUtils.partitionColName(0))); + mock.scanner.build(projList); // FROM z.csv metadataManager.startFile(filePath); - ReaderSchemaOrchestrator reader = scanner.startReader(); + mock.reader = mock.scanner.startReader(); // Table schema (columns: VARCHAR[]) @@ -86,7 +93,25 @@ public class TestColumnsArray extends SubOperatorTest { .addArray(ColumnsArrayManager.COLUMNS_COL, MinorType.VARCHAR) .buildSchema(); - ResultSetLoader loader = reader.makeTableLoader(tableSchema); + mock.loader = mock.reader.makeTableLoader(tableSchema); + + // First empty batch + + mock.reader.defineSchema(); + return mock; + } + + /** + * Test columns array. The table must be able to support it by having a + * matching column. + */ + + @Test + public void testColumnsArray() { + + MockScanner mock = buildScanner(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, + ColumnsArrayManager.COLUMNS_COL, + ScanTestUtils.partitionColName(0))); // Verify empty batch. @@ -99,18 +124,18 @@ public class TestColumnsArray extends SubOperatorTest { SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) .build(); - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + assertNotNull(mock.scanner.output()); + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); } // Create a batch of data. - reader.startBatch(); - loader.writer() + mock.reader.startBatch(); + mock.loader.writer() .addRow(new Object[] {new String[] {"fred", "flintstone"}}) .addRow(new Object[] {new String[] {"barney", "rubble"}}); - reader.endBatch(); + mock. reader.endBatch(); // Verify @@ -120,11 +145,100 @@ public class TestColumnsArray extends SubOperatorTest { .addRow("z.csv", new String[] {"barney", "rubble"}, "x") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); } - scanner.close(); + mock.scanner.close(); + } + + @Test + public void testWildcard() { + + MockScanner mock = buildScanner(RowSetTestUtils.projectAll()); + + // Verify empty batch. + + TupleMetadata expectedSchema = new SchemaBuilder() + .addArray("columns", MinorType.VARCHAR) + .buildSchema(); + { + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .build(); + + assertNotNull(mock.scanner.output()); + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); + } + + // Create a batch of data. + + mock.reader.startBatch(); + mock.loader.writer() + .addRow(new Object[] {new String[] {"fred", "flintstone"}}) + .addRow(new Object[] {new String[] {"barney", "rubble"}}); + mock. reader.endBatch(); + + // Verify + + { + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(new String[] {"fred", "flintstone"}) + .addSingleCol(new String[] {"barney", "rubble"}) + .build(); + + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); + } + + mock.scanner.close(); + } + + @Test + public void testWildcardAndFileMetadata() { + + MockScanner mock = buildScanner(RowSetTestUtils.projectList( + ScanTestUtils.FILE_NAME_COL, + SchemaPath.DYNAMIC_STAR, + ScanTestUtils.partitionColName(0))); + + // Verify empty batch. + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("filename", MinorType.VARCHAR) + .addArray("columns", MinorType.VARCHAR) + .addNullable("dir0", MinorType.VARCHAR) + .buildSchema(); + { + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .build(); + + assertNotNull(mock.scanner.output()); + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); + } + + // Create a batch of data. + + mock.reader.startBatch(); + mock.loader.writer() + .addRow(new Object[] {new String[] {"fred", "flintstone"}}) + .addRow(new Object[] {new String[] {"barney", "rubble"}}); + mock. reader.endBatch(); + + // Verify + + { + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow("z.csv", new String[] {"fred", "flintstone"}, "x") + .addRow("z.csv", new String[] {"barney", "rubble"}, "x") + .build(); + + RowSetUtilities.verify(expected, + fixture.wrap(mock.scanner.output())); + } + + mock.scanner.close(); } private ScanSchemaOrchestrator buildScan(List<SchemaPath> cols) { @@ -160,6 +274,7 @@ public class TestColumnsArray extends SubOperatorTest { try { ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(tableSchema); + reader.defineSchema(); fail(); } catch (IllegalStateException e) { // Expected @@ -180,6 +295,7 @@ public class TestColumnsArray extends SubOperatorTest { try { ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(tableSchema); + reader.defineSchema(); fail(); } catch (IllegalStateException e) { // Expected diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java index 4f32b56e0..e7a0188cd 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayFramework.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos.MinorType; @@ -46,7 +47,7 @@ import org.apache.drill.test.SubOperatorTest; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; @@ -54,6 +55,7 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists; * Test the columns-array specific behavior in the columns scan framework. */ +@Category(RowSetTests.class) public class TestColumnsArrayFramework extends SubOperatorTest { private static final Path MOCK_FILE_PATH = new Path("file:/w/x/y/z.csv"); @@ -101,7 +103,7 @@ public class TestColumnsArrayFramework extends SubOperatorTest { @Override public boolean open(ColumnsSchemaNegotiator negotiator) { this.negotiator = negotiator; - negotiator.setTableSchema(schema); + negotiator.setTableSchema(schema, true); negotiator.build(); return true; } @@ -115,6 +117,11 @@ public class TestColumnsArrayFramework extends SubOperatorTest { public void close() { } } + /** + * Test including a column other than "columns". Occurs when + * using implicit columns. + */ + @Test public void testNonColumnsProjection() { @@ -143,6 +150,10 @@ public class TestColumnsArrayFramework extends SubOperatorTest { scanFixture.close(); } + /** + * Test projecting just the `columns` column. + */ + @Test public void testColumnsProjection() { @@ -171,6 +182,10 @@ public class TestColumnsArrayFramework extends SubOperatorTest { scanFixture.close(); } + /** + * Test including a specific index of `columns` such as + * `columns`[1]. + */ @Test public void testColumnsIndexProjection() { diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java index e2c8a21d3..d1e91a283 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager; @@ -35,9 +36,10 @@ import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; import org.apache.drill.test.SubOperatorTest; import org.apache.hadoop.fs.Path; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +@Category(RowSetTests.class) public class TestColumnsArrayParser extends SubOperatorTest { /** @@ -255,5 +257,45 @@ public class TestColumnsArrayParser extends SubOperatorTest { assertEquals(FileMetadataColumn.ID, scanProj.columns().get(2).nodeType()); } - // TODO: Test Columns element projection + /** + * If a query is of the form: + * <pre><code> + * select * from dfs.`multilevel/csv` where columns[1] < 1000 + * </code><pre> + * Then the projection list passed to the scan operator + * includes both the wildcard and the `columns` array. + * We can ignore one of them. + */ + + @Test + public void testWildcardAndColumns() { + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList( + SchemaPath.DYNAMIC_STAR, + ColumnsArrayManager.COLUMNS_COL), + ScanTestUtils.parsers(new ColumnsArrayParser(true))); + + assertFalse(scanProj.projectAll()); + assertEquals(2, scanProj.requestedCols().size()); + + assertEquals(1, scanProj.columns().size()); + assertEquals(ColumnsArrayManager.COLUMNS_COL, scanProj.columns().get(0).name()); + + // Verify column type + + assertEquals(UnresolvedColumnsArrayColumn.ID, scanProj.columns().get(0).nodeType()); + } + + @Test + public void testColumnsAsMap() { + try { + new ScanLevelProjection( + RowSetTestUtils.projectList("columns.x"), + ScanTestUtils.parsers(new ColumnsArrayParser(true))); + fail(); + } + catch (UserException e) { + // Expected + } + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java deleted file mode 100644 index 330a66115..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestConstantColumnLoader.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.drill.common.types.TypeProtos.DataMode; -import org.apache.drill.common.types.TypeProtos.MajorType; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.physical.impl.scan.file.FileMetadata; -import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn; -import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn; -import org.apache.drill.exec.physical.impl.scan.file.PartitionColumn; -import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader; -import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader.ConstantColumnSpec; -import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.metadata.SchemaBuilder; -import org.apache.drill.exec.store.ColumnExplorer.ImplicitFileColumns; -import org.apache.drill.test.SubOperatorTest; -import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; -import org.apache.hadoop.fs.Path; -import org.junit.Test; - -public class TestConstantColumnLoader extends SubOperatorTest { - - private static class DummyColumn implements ConstantColumnSpec { - - private String name; - private MaterializedField schema; - private String value; - - public DummyColumn(String name, MajorType type, String value) { - this.name = name; - this.schema = MaterializedField.create(name, type); - this.value = value; - } - - @Override - public String name() { return name; } - - @Override - public MaterializedField schema() { return schema; } - - @Override - public String value() { return value; } - } - - /** - * Test the static column loader using one column of each type. - * The null column is of type int, but the associated value is of - * type string. This is a bit odd, but works out because we detect that - * the string value is null and call setNull on the writer, and avoid - * using the actual data. - */ - - @Test - public void testConstantColumnLoader() { - - MajorType aType = MajorType.newBuilder() - .setMinorType(MinorType.VARCHAR) - .setMode(DataMode.REQUIRED) - .build(); - MajorType bType = MajorType.newBuilder() - .setMinorType(MinorType.VARCHAR) - .setMode(DataMode.OPTIONAL) - .build(); - - List<ConstantColumnSpec> defns = new ArrayList<>(); - defns.add( - new DummyColumn("a", aType, "a-value" )); - defns.add( - new DummyColumn("b", bType, "b-value" )); - - ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator()); - ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns); - - // Create a batch - - staticLoader.load(2); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("a", aType) - .add("b", bType) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("a-value", "b-value") - .addRow("a-value", "b-value") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(staticLoader.load(2))); - staticLoader.close(); - } - - @Test - public void testFileMetadata() { - - FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w")); - List<ConstantColumnSpec> defns = new ArrayList<>(); - FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn( - ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX); - FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL, - iDefn, fileInfo, null, 0); - defns.add(iCol); - - String partColName = ScanTestUtils.partitionColName(1); - PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0); - defns.add(pCol); - - ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator()); - ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns); - - // Create a batch - - staticLoader.load(2); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR) - .addNullable(partColName, MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("csv", "y") - .addRow("csv", "y") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(staticLoader.load(2))); - staticLoader.close(); - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java index 08aeed53f..a6de5e6f3 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn; import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager; @@ -33,9 +34,10 @@ import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; import org.apache.drill.test.SubOperatorTest; import org.apache.hadoop.fs.Path; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +@Category(RowSetTests.class) public class TestFileMetadataColumnParser extends SubOperatorTest { @Test @@ -135,8 +137,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { assertEquals(PartitionColumn.ID, scanProj.columns().get(0).nodeType()); } + /** + * Test wildcard expansion. + */ + @Test - public void testWildcard() { + public void testRevisedWildcard() { Path filePath = new Path("hdfs:///w/x/y/z.csv"); FileMetadataManager metadataManager = new FileMetadataManager( fixture.getOptionManager(), @@ -153,15 +159,45 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { } /** + * Legacy (prior version) wildcard expansion always expands partition + * columns. + */ + + @Test + public void testLegacyWildcard() { + Path filePath = new Path("hdfs:///w/x/y/z.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + true, // Use legacy wildcard expansion + true, // Put partitions at end + new Path("hdfs:///w"), + Lists.newArrayList(filePath)); + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectAll(), + Lists.newArrayList(metadataManager.projectionParser())); + + List<ColumnProjection> cols = scanProj.columns(); + assertEquals(3, cols.size()); + assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); + assertEquals(0, ((PartitionColumn) cols.get(1)).partition()); + assertEquals(PartitionColumn.ID, cols.get(2).nodeType()); + assertEquals(1, ((PartitionColumn) cols.get(2)).partition()); + } + + /** * Combine wildcard and file metadata columms. The wildcard expands * table columns but not metadata columns. */ @Test - public void testWildcardAndFileMetadata() { + public void testLegacyWildcardAndFileMetadata() { Path filePath = new Path("hdfs:///w/x/y/z.csv"); FileMetadataManager metadataManager = new FileMetadataManager( fixture.getOptionManager(), + true, // Use legacy wildcard expansion + false, // Put partitions at end new Path("hdfs:///w"), Lists.newArrayList(filePath)); @@ -173,10 +209,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { Lists.newArrayList(metadataManager.projectionParser())); List<ColumnProjection> cols = scanProj.columns(); - assertEquals(3, cols.size()); + assertEquals(5, cols.size()); assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); assertEquals(FileMetadataColumn.ID, cols.get(1).nodeType()); assertEquals(FileMetadataColumn.ID, cols.get(2).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(3).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(4).nodeType()); } /** @@ -185,10 +223,12 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { */ @Test - public void testWildcardAndFileMetadataMixed() { + public void testLegacyWildcardAndFileMetadataMixed() { Path filePath = new Path("hdfs:///w/x/y/z.csv"); FileMetadataManager metadataManager = new FileMetadataManager( fixture.getOptionManager(), + true, // Use legacy wildcard expansion + false, // Put partitions at end new Path("hdfs:///w"), Lists.newArrayList(filePath)); @@ -200,18 +240,25 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { Lists.newArrayList(metadataManager.projectionParser())); List<ColumnProjection> cols = scanProj.columns(); - assertEquals(3, cols.size()); + assertEquals(5, cols.size()); assertEquals(FileMetadataColumn.ID, cols.get(0).nodeType()); assertEquals(UnresolvedColumn.WILDCARD, cols.get(1).nodeType()); assertEquals(FileMetadataColumn.ID, cols.get(2).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(3).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(4).nodeType()); } /** - * Include both a wildcard and a partition column. + * Include both a wildcard and a partition column. The wildcard, in + * legacy mode, will create partition columns for any partitions not + * mentioned in the project list. + * <p> + * Tests proposed functionality: included only requested partition + * columns. */ @Test - public void testWildcardAndPartition() { + public void testRevisedWildcardAndPartition() { Path filePath = new Path("hdfs:///w/x/y/z.csv"); FileMetadataManager metadataManager = new FileMetadataManager( fixture.getOptionManager(), @@ -229,6 +276,113 @@ public class TestFileMetadataColumnParser extends SubOperatorTest { assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); } + @Test + public void testLegacyWildcardAndPartition() { + Path filePath = new Path("hdfs:///w/x/y/z.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + true, // Use legacy wildcard expansion + true, // Put partitions at end + new Path("hdfs:///w"), + Lists.newArrayList(filePath)); + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, + ScanTestUtils.partitionColName(8)), + Lists.newArrayList(metadataManager.projectionParser())); + + List<ColumnProjection> cols = scanProj.columns(); + assertEquals(4, cols.size()); + assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); + assertEquals(0, ((PartitionColumn) cols.get(1)).partition()); + assertEquals(PartitionColumn.ID, cols.get(2).nodeType()); + assertEquals(1, ((PartitionColumn) cols.get(2)).partition()); + assertEquals(PartitionColumn.ID, cols.get(3).nodeType()); + assertEquals(8, ((PartitionColumn) cols.get(3)).partition()); + } + + @Test + public void testPreferredPartitionExpansion() { + Path filePath = new Path("hdfs:///w/x/y/z.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + true, // Use legacy wildcard expansion + false, // Put partitions at end + new Path("hdfs:///w"), + Lists.newArrayList(filePath)); + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, + ScanTestUtils.partitionColName(8)), + Lists.newArrayList(metadataManager.projectionParser())); + + List<ColumnProjection> cols = scanProj.columns(); + assertEquals(4, cols.size()); + assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); + assertEquals(8, ((PartitionColumn) cols.get(1)).partition()); + assertEquals(PartitionColumn.ID, cols.get(2).nodeType()); + assertEquals(0, ((PartitionColumn) cols.get(2)).partition()); + assertEquals(PartitionColumn.ID, cols.get(3).nodeType()); + assertEquals(1, ((PartitionColumn) cols.get(3)).partition()); + } + + /** + * Test a case like:<br> + * <code>SELECT *, dir1 FROM ...</code><br> + * The projection list includes "dir1". The wildcard will + * fill in "dir0". + */ + + @Test + public void testLegacyWildcardAndPartitionWithOverlap() { + Path filePath = new Path("hdfs:///w/x/y/z.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + true, // Use legacy wildcard expansion + true, // Put partitions at end + new Path("hdfs:///w"), + Lists.newArrayList(filePath)); + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, + ScanTestUtils.partitionColName(1)), + Lists.newArrayList(metadataManager.projectionParser())); + + List<ColumnProjection> cols = scanProj.columns(); + assertEquals(3, cols.size()); + assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); + assertEquals(0, ((PartitionColumn) cols.get(1)).partition()); + assertEquals(PartitionColumn.ID, cols.get(2).nodeType()); + assertEquals(1, ((PartitionColumn) cols.get(2)).partition()); + } + + @Test + public void testPreferedWildcardExpansionWithOverlap() { + Path filePath = new Path("hdfs:///w/x/y/z.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + true, // Use legacy wildcard expansion + false, // Put partitions at end + new Path("hdfs:///w"), + Lists.newArrayList(filePath)); + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, + ScanTestUtils.partitionColName(1)), + Lists.newArrayList(metadataManager.projectionParser())); + + List<ColumnProjection> cols = scanProj.columns(); + assertEquals(3, cols.size()); + assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType()); + assertEquals(PartitionColumn.ID, cols.get(1).nodeType()); + assertEquals(1, ((PartitionColumn) cols.get(1)).partition()); + assertEquals(PartitionColumn.ID, cols.get(2).nodeType()); + assertEquals(0, ((PartitionColumn) cols.get(2)).partition()); + } + /** * Verify that names that look like metadata columns, but appear * to be maps or arrays, are not interpreted as metadata. That is, diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java index 9161932d4..314bc2a13 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java @@ -26,6 +26,7 @@ import static org.junit.Assert.fail; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.impl.scan.file.FileMetadata; @@ -46,9 +47,10 @@ import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.SubOperatorTest; import org.apache.hadoop.fs.Path; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +@Category(RowSetTests.class) public class TestFileMetadataProjection extends SubOperatorTest { @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java index a10e7665a..2fdf3e637 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; @@ -32,7 +33,7 @@ import org.apache.drill.exec.physical.impl.scan.TestScanOperatorExec.AbstractSca import org.apache.drill.exec.physical.impl.scan.file.BaseFileScanFramework; import org.apache.drill.exec.physical.impl.scan.file.BaseFileScanFramework.FileSchemaNegotiator; import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework; -import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderCreator; +import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory; import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.RowSetLoader; @@ -50,6 +51,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Tests the file metadata extensions to the file operator framework. @@ -57,6 +59,7 @@ import org.junit.Test; * verified the underlying mechanisms. */ +@Category(RowSetTests.class) public class TestFileScanFramework extends SubOperatorTest { private static final String MOCK_FILE_NAME = "foo.csv"; @@ -117,7 +120,7 @@ public class TestFileScanFramework extends SubOperatorTest { } } - public static class FileScanOpFixture extends BaseFileScanOpFixture implements FileReaderCreator { + public static class FileScanOpFixture extends BaseFileScanOpFixture implements FileReaderFactory { protected final List<MockFileReader> readers = new ArrayList<>(); protected Iterator<MockFileReader> readerIter; @@ -252,7 +255,7 @@ public class TestFileScanFramework extends SubOperatorTest { .add("a", MinorType.INT) .addNullable("b", MinorType.VARCHAR, 10) .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); tableLoader = schemaNegotiator.build(); return true; } @@ -486,7 +489,7 @@ public class TestFileScanFramework extends SubOperatorTest { .add("b", MinorType.INT) .resumeSchema() .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); tableLoader = schemaNegotiator.build(); return true; } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java deleted file mode 100644 index a41305221..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestNullColumnLoader.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import static org.junit.Assert.assertSame; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.drill.common.types.TypeProtos.DataMode; -import org.apache.drill.common.types.TypeProtos.MajorType; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.common.types.Types; -import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder; -import org.apache.drill.exec.physical.impl.scan.project.NullColumnLoader; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn; -import org.apache.drill.exec.physical.rowSet.ResultVectorCache; -import org.apache.drill.exec.physical.rowSet.impl.NullResultVectorCacheImpl; -import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.VectorContainer; -import org.apache.drill.exec.record.metadata.SchemaBuilder; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.test.SubOperatorTest; -import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; -import org.junit.Test; - -public class TestNullColumnLoader extends SubOperatorTest { - - private ResolvedNullColumn makeNullCol(String name, MajorType nullType) { - - // For this test, we don't need the projection, so just - // set it to null. - - return new ResolvedNullColumn(name, nullType, null, 0); - } - - private ResolvedNullColumn makeNullCol(String name) { - return makeNullCol(name, null); - } - - /** - * Test the simplest case: default null type, nothing in the vector - * cache. Specify no column type, the special NULL type, or a - * predefined type. Output types should be set accordingly. - */ - - @Test - public void testBasics() { - - List<ResolvedNullColumn> defns = new ArrayList<>(); - defns.add(makeNullCol("unspecified", null)); - defns.add(makeNullCol("nullType", Types.optional(MinorType.NULL))); - defns.add(makeNullCol("specifiedOpt", Types.optional(MinorType.VARCHAR))); - defns.add(makeNullCol("specifiedReq", Types.required(MinorType.VARCHAR))); - defns.add(makeNullCol("specifiedArray", Types.repeated(MinorType.VARCHAR))); - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, null, false); - - // Create a batch - - VectorContainer output = staticLoader.load(2); - - // Verify values and types - - BatchSchema expectedSchema = new SchemaBuilder() - .add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE) - .add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE) - .addNullable("specifiedOpt", MinorType.VARCHAR) - .addNullable("specifiedReq", MinorType.VARCHAR) - .addArray("specifiedArray", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(null, null, null, null, new String[] {}) - .addRow(null, null, null, null, new String[] {}) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(output)); - staticLoader.close(); - } - - @Test - public void testCustomNullType() { - - List<ResolvedNullColumn> defns = new ArrayList<>(); - defns.add(makeNullCol("unspecified", null)); - defns.add(makeNullCol("nullType", MajorType.newBuilder() - .setMinorType(MinorType.NULL) - .setMode(DataMode.OPTIONAL) - .build())); - defns.add(makeNullCol("nullTypeReq", MajorType.newBuilder() - .setMinorType(MinorType.NULL) - .setMode(DataMode.REQUIRED) - .build())); - - // Null type array does not make sense, so is not tested. - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - MajorType nullType = MajorType.newBuilder() - .setMinorType(MinorType.VARCHAR) - .setMode(DataMode.OPTIONAL) - .build(); - NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false); - - // Create a batch - - VectorContainer output = staticLoader.load(2); - - // Verify values and types - - BatchSchema expectedSchema = new SchemaBuilder() - .add("unspecified", nullType) - .add("nullType", nullType) - .add("nullTypeReq", nullType) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(null, null, null) - .addRow(null, null, null) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(output)); - staticLoader.close(); - } - - @Test - public void testCachedTypesMapToNullable() { - - List<ResolvedNullColumn> defns = new ArrayList<>(); - defns.add(makeNullCol("req")); - defns.add(makeNullCol("opt")); - defns.add(makeNullCol("rep")); - defns.add(makeNullCol("unk")); - - // Populate the cache with a column of each mode. - - ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator()); - cache.addOrGet(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED)); - ValueVector opt = cache.addOrGet(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL)); - ValueVector rep = cache.addOrGet(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED)); - - // Use nullable Varchar for unknown null columns. - - MajorType nullType = MajorType.newBuilder() - .setMinorType(MinorType.VARCHAR) - .setMode(DataMode.OPTIONAL) - .build(); - NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false); - - // Create a batch - - VectorContainer output = staticLoader.load(2); - - // Verify vectors are reused - - assertSame(opt, output.getValueVector(1).getValueVector()); - assertSame(rep, output.getValueVector(2).getValueVector()); - - // Verify values and types - - BatchSchema expectedSchema = new SchemaBuilder() - .addNullable("req", MinorType.FLOAT8) - .addNullable("opt", MinorType.FLOAT8) - .addArray("rep", MinorType.FLOAT8) - .addNullable("unk", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(null, null, new int[] { }, null) - .addRow(null, null, new int[] { }, null) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(output)); - staticLoader.close(); - } - - @Test - public void testCachedTypesAllowRequired() { - - List<ResolvedNullColumn> defns = new ArrayList<>(); - defns.add(makeNullCol("req")); - defns.add(makeNullCol("opt")); - defns.add(makeNullCol("rep")); - defns.add(makeNullCol("unk")); - - // Populate the cache with a column of each mode. - - ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator()); - cache.addOrGet(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED)); - ValueVector opt = cache.addOrGet(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL)); - ValueVector rep = cache.addOrGet(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED)); - - // Use nullable Varchar for unknown null columns. - - MajorType nullType = MajorType.newBuilder() - .setMinorType(MinorType.VARCHAR) - .setMode(DataMode.OPTIONAL) - .build(); - NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, true); - - // Create a batch - - VectorContainer output = staticLoader.load(2); - - // Verify vectors are reused - - assertSame(opt, output.getValueVector(1).getValueVector()); - assertSame(rep, output.getValueVector(2).getValueVector()); - - // Verify values and types - - BatchSchema expectedSchema = new SchemaBuilder() - .add("req", MinorType.FLOAT8) - .addNullable("opt", MinorType.FLOAT8) - .addArray("rep", MinorType.FLOAT8) - .addNullable("unk", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(0.0, null, new int[] { }, null) - .addRow(0.0, null, new int[] { }, null) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(output)); - staticLoader.close(); - } - - @Test - public void testNullColumnBuilder() { - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - NullColumnBuilder builder = new NullColumnBuilder(null, false); - - builder.add("unspecified"); - builder.add("nullType", Types.optional(MinorType.NULL)); - builder.add("specifiedOpt", Types.optional(MinorType.VARCHAR)); - builder.add("specifiedReq", Types.required(MinorType.VARCHAR)); - builder.add("specifiedArray", Types.repeated(MinorType.VARCHAR)); - builder.build(cache); - - // Create a batch - - builder.load(2); - - // Verify values and types - - BatchSchema expectedSchema = new SchemaBuilder() - .add("unspecified", NullColumnLoader.DEFAULT_NULL_TYPE) - .add("nullType", NullColumnLoader.DEFAULT_NULL_TYPE) - .addNullable("specifiedOpt", MinorType.VARCHAR) - .addNullable("specifiedReq", MinorType.VARCHAR) - .addArray("specifiedArray", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(null, null, null, null, new String[] {}) - .addRow(null, null, null, null, new String[] {}) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(builder.output())); - builder.close(); - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java deleted file mode 100644 index 079455789..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestRowBatchMerger.java +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.common.types.Types; -import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedMapColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow; -import org.apache.drill.exec.physical.impl.scan.project.VectorSource; -import org.apache.drill.exec.physical.rowSet.ResultVectorCache; -import org.apache.drill.exec.physical.rowSet.impl.NullResultVectorCacheImpl; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.record.VectorContainer; -import org.apache.drill.exec.record.metadata.SchemaBuilder; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.test.SubOperatorTest; -import org.apache.drill.test.rowSet.RowSet; -import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; -import org.junit.Test; - -import static org.apache.drill.test.rowSet.RowSetUtilities.mapValue; -import static org.apache.drill.test.rowSet.RowSetUtilities.singleMap; -import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray; - - -/** - * Test the row batch merger by merging two batches. Tests both the - * "direct" and "exchange" cases. Direct means that the output container - * contains the source vector directly: they are the same vectors. - * Exchange means we have two vectors, but we swap the underlying - * Drillbufs to effectively shift data from source to destination - * vector. - */ - -public class TestRowBatchMerger extends SubOperatorTest { - - public static class RowSetSource implements VectorSource { - - private SingleRowSet rowSet; - - public RowSetSource(SingleRowSet rowSet) { - this.rowSet = rowSet; - } - - public RowSet rowSet() { return rowSet; } - - public void clear() { - rowSet.clear(); - } - - @Override - public ValueVector vector(int index) { - return rowSet.container().getValueVector(index).getValueVector(); - } - } - - private RowSetSource makeFirst() { - BatchSchema firstSchema = new SchemaBuilder() - .add("d", MinorType.VARCHAR) - .add("a", MinorType.INT) - .build(); - return new RowSetSource( - fixture.rowSetBuilder(firstSchema) - .addRow("barney", 10) - .addRow("wilma", 20) - .build()); - } - - private RowSetSource makeSecond() { - BatchSchema secondSchema = new SchemaBuilder() - .add("b", MinorType.INT) - .add("c", MinorType.VARCHAR) - .build(); - return new RowSetSource( - fixture.rowSetBuilder(secondSchema) - .addRow(1, "foo.csv") - .addRow(2, "foo.csv") - .build()); - } - - public static class TestProjection extends ResolvedColumn { - - public TestProjection(VectorSource source, int sourceIndex) { - super(source, sourceIndex); - } - - @Override - public String name() { return null; } - - @Override - public int nodeType() { return -1; } - - @Override - public MaterializedField schema() { return null; } - } - - @Test - public void testSimpleFlat() { - - // Create the first batch - - RowSetSource first = makeFirst(); - - // Create the second batch - - RowSetSource second = makeSecond(); - - ResolvedRow resolvedTuple = new ResolvedRow(null); - resolvedTuple.add(new TestProjection(first, 1)); - resolvedTuple.add(new TestProjection(second, 0)); - resolvedTuple.add(new TestProjection(second, 1)); - resolvedTuple.add(new TestProjection(first, 0)); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(null, output); - output.setRecordCount(first.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.INT) - .add("c", MinorType.VARCHAR) - .add("d", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(10, 1, "foo.csv", "barney") - .addRow(20, 2, "foo.csv", "wilma") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - } - - @Test - public void testImplicitFlat() { - - // Create the first batch - - RowSetSource first = makeFirst(); - - // Create the second batch - - RowSetSource second = makeSecond(); - - ResolvedRow resolvedTuple = new ResolvedRow(null); - resolvedTuple.add(new TestProjection(resolvedTuple, 1)); - resolvedTuple.add(new TestProjection(second, 0)); - resolvedTuple.add(new TestProjection(second, 1)); - resolvedTuple.add(new TestProjection(resolvedTuple, 0)); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(first.rowSet().container(), output); - output.setRecordCount(first.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.INT) - .add("c", MinorType.VARCHAR) - .add("d", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(10, 1, "foo.csv", "barney") - .addRow(20, 2, "foo.csv", "wilma") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - } - - @Test - public void testFlatWithNulls() { - - // Create the first batch - - RowSetSource first = makeFirst(); - - // Create null columns - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - - ResolvedRow resolvedTuple = new ResolvedRow(builder); - resolvedTuple.add(new TestProjection(resolvedTuple, 1)); - resolvedTuple.add(resolvedTuple.nullBuilder().add("null1")); - resolvedTuple.add(resolvedTuple.nullBuilder().add("null2", Types.optional(MinorType.VARCHAR))); - resolvedTuple.add(new TestProjection(resolvedTuple, 0)); - - // Build the null values - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - builder.build(cache); - builder.load(first.rowSet().rowCount()); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(first.rowSet().container(), output); - output.setRecordCount(first.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("null1", MinorType.INT) - .addNullable("null2", MinorType.VARCHAR) - .add("d", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(10, null, null, "barney") - .addRow(20, null, null, "wilma") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - builder.close(); - } - - /** - * Test the ability to create maps from whole cloth if requested in - * the projection list, and the map is not available from the data - * source. - */ - - @Test - public void testNullMaps() { - - // Create the first batch - - RowSetSource first = makeFirst(); - - // Create null columns - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow resolvedTuple = new ResolvedRow(builder); - - resolvedTuple.add(new TestProjection(resolvedTuple, 1)); - - ResolvedMapColumn nullMapCol = new ResolvedMapColumn(resolvedTuple, "map1"); - ResolvedTuple nullMap = nullMapCol.members(); - nullMap.add(nullMap.nullBuilder().add("null1")); - nullMap.add(nullMap.nullBuilder().add("null2", Types.optional(MinorType.VARCHAR))); - - ResolvedMapColumn nullMapCol2 = new ResolvedMapColumn(nullMap, "map2"); - ResolvedTuple nullMap2 = nullMapCol2.members(); - nullMap2.add(nullMap2.nullBuilder().add("null3")); - nullMap.add(nullMapCol2); - - resolvedTuple.add(nullMapCol); - resolvedTuple.add(new TestProjection(resolvedTuple, 0)); - - // Build the null values - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - resolvedTuple.buildNulls(cache); - - // LoadNulls - - resolvedTuple.loadNulls(first.rowSet().rowCount()); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(first.rowSet().container(), output); - resolvedTuple.setRowCount(first.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addMap("map1") - .addNullable("null1", MinorType.INT) - .addNullable("null2", MinorType.VARCHAR) - .addMap("map2") - .addNullable("null3", MinorType.INT) - .resumeMap() - .resumeSchema() - .add("d", MinorType.VARCHAR) - .build(); - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(10, mapValue(null, null, singleMap(null)), "barney") - .addRow(20, mapValue(null, null, singleMap(null)), "wilma") - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - resolvedTuple.close(); - } - - /** - * Test that the merger mechanism can rewrite a map to include - * projected null columns. - */ - - @Test - public void testMapRevision() { - - // Create the first batch - - BatchSchema inputSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .addMap("a") - .add("c", MinorType.INT) - .resumeSchema() - .build(); - RowSetSource input = new RowSetSource( - fixture.rowSetBuilder(inputSchema) - .addRow("barney", singleMap(10)) - .addRow("wilma", singleMap(20)) - .build()); - - // Create mappings - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow resolvedTuple = new ResolvedRow(builder); - - resolvedTuple.add(new TestProjection(resolvedTuple, 0)); - ResolvedMapColumn mapCol = new ResolvedMapColumn(resolvedTuple, - inputSchema.getColumn(1), 1); - resolvedTuple.add(mapCol); - ResolvedTuple map = mapCol.members(); - map.add(new TestProjection(map, 0)); - map.add(map.nullBuilder().add("null1")); - - // Build the null values - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - resolvedTuple.buildNulls(cache); - - // LoadNulls - - resolvedTuple.loadNulls(input.rowSet().rowCount()); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(input.rowSet().container(), output); - output.setRecordCount(input.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .addMap("a") - .add("c", MinorType.INT) - .addNullable("null1", MinorType.INT) - .resumeSchema() - .build(); - RowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("barney", mapValue(10, null)) - .addRow("wilma", mapValue(20, null)) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - } - - /** - * Test that the merger mechanism can rewrite a map array to include - * projected null columns. - */ - - @Test - public void testMapArrayRevision() { - - // Create the first batch - - BatchSchema inputSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .addMapArray("a") - .add("c", MinorType.INT) - .resumeSchema() - .build(); - RowSetSource input = new RowSetSource( - fixture.rowSetBuilder(inputSchema) - .addRow("barney", mapArray(singleMap(10), singleMap(11), singleMap(12))) - .addRow("wilma", mapArray(singleMap(20), singleMap(21))) - .build()); - - // Create mappings - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow resolvedTuple = new ResolvedRow(builder); - - resolvedTuple.add(new TestProjection(resolvedTuple, 0)); - ResolvedMapColumn mapCol = new ResolvedMapColumn(resolvedTuple, - inputSchema.getColumn(1), 1); - resolvedTuple.add(mapCol); - ResolvedTuple map = mapCol.members(); - map.add(new TestProjection(map, 0)); - map.add(map.nullBuilder().add("null1")); - - // Build the null values - - ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator()); - resolvedTuple.buildNulls(cache); - - // LoadNulls - - resolvedTuple.loadNulls(input.rowSet().rowCount()); - - // Do the merge - - VectorContainer output = new VectorContainer(fixture.allocator()); - resolvedTuple.project(input.rowSet().container(), output); - output.setRecordCount(input.rowSet().rowCount()); - RowSet result = fixture.wrap(output); - - // Verify - - BatchSchema expectedSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .addMapArray("a") - .add("c", MinorType.INT) - .addNullable("null1", MinorType.INT) - .resumeSchema() - .build(); - RowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("barney", mapArray( - mapValue(10, null), mapValue(11, null), mapValue(12, null))) - .addRow("wilma", mapArray( - mapValue(20, null), mapValue(21, null))) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(result); - } - -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java index 912a7172e..03587cb08 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanBatchWriters.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.physical.impl.scan; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.ops.OperatorContext; import org.apache.drill.exec.physical.base.AbstractSubScan; @@ -34,6 +35,7 @@ import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSetComparison; import org.junit.Test; +import org.junit.experimental.categories.Category; import io.netty.buffer.DrillBuf; @@ -42,6 +44,7 @@ import io.netty.buffer.DrillBuf; * set follows the same semantics as the original set. */ +@Category(RowSetTests.class) public class TestScanBatchWriters extends SubOperatorTest { @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java deleted file mode 100644 index 558f761c9..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanLevelProjection.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import org.apache.drill.common.exceptions.UserException; -import org.apache.drill.common.expression.SchemaPath; -import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection; -import org.apache.drill.exec.physical.impl.scan.project.UnresolvedColumn; -import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; -import org.apache.drill.exec.physical.rowSet.project.RequestedTuple.RequestedColumn; -import org.apache.drill.exec.record.metadata.ProjectionType; -import org.apache.drill.test.SubOperatorTest; -import org.junit.Test; - -/** - * Test the level of projection done at the level of the scan as a whole; - * before knowledge of table "implicit" columns or the specific table schema. - */ - -public class TestScanLevelProjection extends SubOperatorTest { - - /** - * Basic test: select a set of columns (a, b, c) when the - * data source has an early schema of (a, c, d). (a, c) are - * projected, (d) is null. - */ - - @Test - public void testBasics() { - - // Simulate SELECT a, b, c ... - // Build the projection plan and verify - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a", "b", "c"), - ScanTestUtils.parsers()); - assertFalse(scanProj.projectAll()); - assertFalse(scanProj.projectNone()); - - assertEquals(3, scanProj.requestedCols().size()); - assertEquals("a", scanProj.requestedCols().get(0).rootName()); - assertEquals("b", scanProj.requestedCols().get(1).rootName()); - assertEquals("c", scanProj.requestedCols().get(2).rootName()); - - assertEquals(3, scanProj.columns().size()); - assertEquals("a", scanProj.columns().get(0).name()); - assertEquals("b", scanProj.columns().get(1).name()); - assertEquals("c", scanProj.columns().get(2).name()); - - // Verify column type - - assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType()); - } - - @Test - public void testMap() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a.x", "b.x", "a.y", "b.y", "c"), - ScanTestUtils.parsers()); - assertFalse(scanProj.projectAll()); - assertFalse(scanProj.projectNone()); - - assertEquals(3, scanProj.columns().size()); - assertEquals("a", scanProj.columns().get(0).name()); - assertEquals("b", scanProj.columns().get(1).name()); - assertEquals("c", scanProj.columns().get(2).name()); - - // Verify column type - - assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType()); - - // Map structure - - RequestedColumn a = ((UnresolvedColumn) scanProj.columns().get(0)).element(); - assertTrue(a.isTuple()); - assertEquals(ProjectionType.UNSPECIFIED, a.mapProjection().projectionType("x")); - assertEquals(ProjectionType.UNSPECIFIED, a.mapProjection().projectionType("y")); - assertEquals(ProjectionType.UNPROJECTED, a.mapProjection().projectionType("z")); - - RequestedColumn c = ((UnresolvedColumn) scanProj.columns().get(2)).element(); - assertTrue(c.isSimple()); - } - - @Test - public void testArray() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a[1]", "a[3]"), - ScanTestUtils.parsers()); - assertFalse(scanProj.projectAll()); - assertFalse(scanProj.projectNone()); - - assertEquals(1, scanProj.columns().size()); - assertEquals("a", scanProj.columns().get(0).name()); - - // Verify column type - - assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType()); - - // Map structure - - RequestedColumn a = ((UnresolvedColumn) scanProj.columns().get(0)).element(); - assertTrue(a.isArray()); - assertFalse(a.hasIndex(0)); - assertTrue(a.hasIndex(1)); - assertFalse(a.hasIndex(2)); - assertTrue(a.hasIndex(3)); - } - - /** - * Simulate a SELECT * query by passing "**" (Drill's internal version - * of the wildcard) as a column name. - */ - - @Test - public void testWildcard() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - assertTrue(scanProj.projectAll()); - assertFalse(scanProj.projectNone()); - assertEquals(1, scanProj.requestedCols().size()); - assertTrue(scanProj.requestedCols().get(0).isDynamicStar()); - - assertEquals(1, scanProj.columns().size()); - assertEquals(SchemaPath.DYNAMIC_STAR, scanProj.columns().get(0).name()); - - // Verify bindings - - assertEquals(scanProj.columns().get(0).name(), scanProj.requestedCols().get(0).rootName()); - - // Verify column type - - assertEquals(UnresolvedColumn.WILDCARD, scanProj.columns().get(0).nodeType()); - } - - /** - * Test an empty projection which occurs in a - * SELECT COUNT(*) query. - */ - - @Test - public void testEmptyProjection() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList(), - ScanTestUtils.parsers()); - - assertFalse(scanProj.projectAll()); - assertTrue(scanProj.projectNone()); - assertEquals(0, scanProj.requestedCols().size()); - } - - /** - * Can't include both a wildcard and a column name. - */ - - @Test - public void testErrorWildcardAndColumns() { - try { - new ScanLevelProjection( - RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, "a"), - ScanTestUtils.parsers()); - fail(); - } catch (IllegalArgumentException e) { - // Expected - } - } - - /** - * Can't include both a column name and a wildcard. - */ - - @Test - public void testErrorColumnAndWildcard() { - try { - new ScanLevelProjection( - RowSetTestUtils.projectList("a", SchemaPath.DYNAMIC_STAR), - ScanTestUtils.parsers()); - fail(); - } catch (IllegalArgumentException e) { - // Expected - } - } - - /** - * Can't include a wildcard twice. - * <p> - * Note: Drill actually allows this, but the work should be done - * in the project operator; scan should see at most one wildcard. - */ - - @Test - public void testErrorTwoWildcards() { - try { - new ScanLevelProjection( - RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, SchemaPath.DYNAMIC_STAR), - ScanTestUtils.parsers()); - fail(); - } catch (UserException e) { - // Expected - } - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java index 33752a746..386849bb6 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOperatorExec.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; @@ -54,6 +55,7 @@ import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Test of the scan operator framework. Here the focus is on the @@ -66,6 +68,7 @@ import org.junit.Test; * appear elsewhere. */ +@Category(RowSetTests.class) public class TestScanOperatorExec extends SubOperatorTest { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestScanOperatorExec.class); @@ -187,7 +190,7 @@ public class TestScanOperatorExec extends SubOperatorTest { .add("a", MinorType.INT) .addNullable("b", MinorType.VARCHAR, 10) .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); tableLoader = schemaNegotiator.build(); return true; } @@ -213,7 +216,7 @@ public class TestScanOperatorExec extends SubOperatorTest { .add("a", MinorType.VARCHAR) .addNullable("b", MinorType.VARCHAR, 10) .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); schemaNegotiator.build(); tableLoader = schemaNegotiator.build(); return true; @@ -1367,7 +1370,7 @@ public class TestScanOperatorExec extends SubOperatorTest { TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.VARCHAR) .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); tableLoader = schemaNegotiator.build(); return true; } @@ -1493,7 +1496,7 @@ public class TestScanOperatorExec extends SubOperatorTest { TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.INT) .buildSchema(); - schemaNegotiator.setTableSchema(schema); + schemaNegotiator.setTableSchema(schema, true); tableLoader = schemaNegotiator.build(); return true; } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java index d8c9a65ef..ef79b0ee6 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorEarlySchema.java @@ -22,12 +22,13 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.impl.protocol.SchemaTracker; +import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; import org.apache.drill.exec.record.BatchSchema; @@ -37,8 +38,9 @@ import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.drill.test.rowSet.RowSetUtilities; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Test the early-schema support of the scan orchestrator. "Early schema" @@ -49,6 +51,7 @@ import org.junit.Test; * that tests for lower-level components have already passed. */ +@Category(RowSetTests.class) public class TestScanOrchestratorEarlySchema extends SubOperatorTest { /** @@ -78,16 +81,17 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Schema provided, so an empty batch is available to - // send downstream. + // Simulate a first reader in a scan that can provide an + // empty batch to define schema. { + reader.defineSchema(); SingleRowSet expected = fixture.rowSetBuilder(tableSchema) .build(); assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } // Create a batch of data. @@ -107,8 +111,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(2, "wilma") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } // Second batch. @@ -128,8 +132,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(4, "betty") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } // Explicit reader close. (All other tests are lazy, they @@ -167,16 +171,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch. - - { - SingleRowSet expected = fixture.rowSetBuilder(tableSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + // Don't bother with an empty batch here or in other tests. + // Simulates the second reader in a scan. // Create a batch of data. @@ -188,15 +184,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(tableSchema) - .addRow(1, "fred") - .addRow(2, "wilma") - .build(); + SingleRowSet expected = fixture.rowSetBuilder(tableSchema) + .addRow(1, "fred") + .addRow(2, "wilma") + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -228,20 +222,10 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch. - BatchSchema expectedSchema = new SchemaBuilder() .add("b", MinorType.VARCHAR) .add("a", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -253,17 +237,15 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("fred", 1) - .addRow("wilma", 2) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow("fred", 1) + .addRow("wilma", 2) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); - scanner.close(); + scanner.close(); } /** @@ -294,21 +276,11 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch - BatchSchema expectedSchema = new SchemaBuilder() .add("a", MinorType.INT) .add("b", MinorType.VARCHAR) .addNullable("c", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -320,15 +292,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(1, "fred", null) - .addRow(2, "wilma", null) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow(1, "fred", null) + .addRow(2, "wilma", null) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -369,21 +339,11 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch - BatchSchema expectedSchema = new SchemaBuilder() .add("a", MinorType.INT) .add("b", MinorType.VARCHAR) .addNullable("c", MinorType.VARCHAR) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -395,15 +355,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(1, "fred", null) - .addRow(2, "wilma", null) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow(1, "fred", null) + .addRow(2, "wilma", null) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -440,19 +398,9 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { assertFalse(loader.writer().column("b").schema().isProjected()); - // Verify empty batch. - BatchSchema expectedSchema = new SchemaBuilder() .add("a", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -464,15 +412,13 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow(1) - .addRow(2) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow(1) + .addRow(2) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -516,16 +462,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { BatchSchema expectedSchema = new SchemaBuilder() .build(); - { - // Expect an empty schema - - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -545,8 +481,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow() .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } // Fast path to fill in empty rows @@ -592,18 +528,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { reader.makeTableLoader(tableSchema); - // Schema provided, so an empty batch is available to - // send downstream. - - { - SingleRowSet expected = fixture.rowSetBuilder(tableSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } - // Create a batch of data. Because there are no columns, it does // not make sense to ready any rows. @@ -616,8 +540,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { SingleRowSet expected = fixture.rowSetBuilder(tableSchema) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } scanner.close(); @@ -650,19 +574,9 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { reader.makeTableLoader(tableSchema); - // Verify initial empty batch. - BatchSchema expectedSchema = new SchemaBuilder() .addNullable("a", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. Because there are no columns, it does // not make sense to ready any rows. @@ -672,16 +586,14 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .build(); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } + /** * The projection mechanism provides "type smoothing": null * columns prefer the type of previously-seen non-null columns. @@ -718,6 +630,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(table1Schema); + reader.defineSchema(); VectorContainer output = scanner.output(); tracker.trackSchema(output); schemaVersion = tracker.schemaVersion(); @@ -737,6 +650,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .buildSchema(); ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(table2Schema); + reader.defineSchema(); VectorContainer output = scanner.output(); tracker.trackSchema(output); assertEquals(schemaVersion, tracker.schemaVersion()); @@ -756,6 +670,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .buildSchema(); ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(table3Schema); + reader.defineSchema(); VectorContainer output = scanner.output(); tracker.trackSchema(output); assertEquals(schemaVersion, tracker.schemaVersion()); @@ -776,6 +691,7 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .buildSchema(); ReaderSchemaOrchestrator reader = scanner.startReader(); reader.makeTableLoader(table2Schema); + reader.defineSchema(); VectorContainer output = scanner.output(); tracker.trackSchema(output); assertEquals(MinorType.BIGINT, output.getSchema().getColumn(0).getType().getMinorType()); @@ -843,8 +759,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(10, "fred") .addRow(20, "wilma") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); + RowSetUtilities.verify(expected, + fixture.wrap(projector.output())); } { // ... FROM table 2 @@ -874,8 +790,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(30, null) .addRow(40, null) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); + RowSetUtilities.verify(expected, + fixture.wrap(projector.output())); } { // ... FROM table 3 @@ -899,8 +815,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(50, "dino") .addRow(60, "barney") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); + RowSetUtilities.verify(expected, + fixture.wrap(projector.output())); } projector.close(); @@ -926,9 +842,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ReaderSchemaOrchestrator reader = scanner.startReader(); ResultSetLoader loader = reader.makeTableLoader(schema1); - tracker.trackSchema(scanner.output()); - schemaVersion = tracker.schemaVersion(); - // Create a batch reader.startBatch(); @@ -936,6 +849,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow("fred") .addRow("wilma"); reader.endBatch(); + tracker.trackSchema(scanner.output()); + schemaVersion = tracker.schemaVersion(); // Verify @@ -944,8 +859,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow("wilma") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.closeReader(); } { @@ -960,8 +875,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ReaderSchemaOrchestrator reader = scanner.startReader(); ResultSetLoader loader = reader.makeTableLoader(schema2); - tracker.trackSchema(scanner.output()); - assertEquals(schemaVersion, tracker.schemaVersion()); // Create a batch @@ -973,13 +886,15 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify, using persistent schema + tracker.trackSchema(scanner.output()); + assertEquals(schemaVersion, tracker.schemaVersion()); SingleRowSet expected = fixture.rowSetBuilder(schema1) .addRow("barney") .addRow("betty") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.closeReader(); } { @@ -994,9 +909,6 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { ReaderSchemaOrchestrator reader = scanner.startReader(); ResultSetLoader loader = reader.makeTableLoader(schema3); - tracker.trackSchema(scanner.output()); - assertEquals(schemaVersion, tracker.schemaVersion()); - // Create a batch reader.startBatch(); @@ -1007,13 +919,16 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { // Verify, using persistent schema + tracker.trackSchema(scanner.output()); + assertEquals(schemaVersion, tracker.schemaVersion()); + SingleRowSet expected = fixture.rowSetBuilder(schema1) .addRow("bam-bam") .addRow("pebbles") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.closeReader(); } @@ -1073,8 +988,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(10, "fred", 110L) .addRow(20, "wilma", 110L) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.closeReader(); } @@ -1100,8 +1015,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(30, "bambam", 330L) .addRow(40, "betty", 440L) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } { // ... FROM table 3 @@ -1125,8 +1040,8 @@ public class TestScanOrchestratorEarlySchema extends SubOperatorTest { .addRow(50, "dino", 550L) .addRow(60, "barney", 660L) .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } scanner.close(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java index 0cf2cba31..84ffc4e32 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorLateSchema.java @@ -18,10 +18,12 @@ package org.apache.drill.exec.physical.impl.scan; import static org.junit.Assert.assertFalse; + +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.RowSetLoader; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; @@ -32,6 +34,7 @@ import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; import org.junit.Test; +import org.junit.experimental.categories.Category; /** * Test the late-schema support in the scan orchestrator. "Late schema" is the case @@ -43,6 +46,7 @@ import org.junit.Test; * that tests for lower-level components have already passed. */ +@Category(RowSetTests.class) public class TestScanOrchestratorLateSchema extends SubOperatorTest { /** diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java index e10055114..c7b52e2da 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java @@ -20,14 +20,15 @@ package org.apache.drill.exec.physical.impl.scan; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.impl.protocol.SchemaTracker; import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager; +import org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; import org.apache.drill.exec.record.BatchSchema; @@ -35,10 +36,10 @@ import org.apache.drill.exec.record.metadata.SchemaBuilder; import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.drill.test.rowSet.RowSetUtilities; import org.apache.hadoop.fs.Path; import org.junit.Test; - +import org.junit.experimental.categories.Category; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; /** @@ -46,6 +47,7 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists; * with implicit file columns provided by the file metadata manager. */ +@Category(RowSetTests.class) public class TestScanOrchestratorMetadata extends SubOperatorTest { /** @@ -104,8 +106,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { .addRow(2, "wilma", "/w/x/y/z.csv", "/w/x/y", "z.csv", "csv", "x", "y") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -146,19 +148,9 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch. - BatchSchema expectedSchema = new SchemaBuilder() .addNullable("c", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -170,15 +162,13 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addSingleCol(null) - .addSingleCol(null) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addSingleCol(null) + .addSingleCol(null) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -229,6 +219,7 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { // Verify empty batch. + reader.defineSchema(); BatchSchema expectedSchema = new SchemaBuilder() .add("a", MinorType.INT) .add("b", MinorType.VARCHAR) @@ -240,8 +231,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { .build(); assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } // Create a batch of data. @@ -260,8 +251,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { .addRow(2, "wilma", "x", "csv") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); } scanner.close(); @@ -302,22 +293,12 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { ResultSetLoader loader = reader.makeTableLoader(tableSchema); - // Verify empty batch. - BatchSchema expectedSchema = new SchemaBuilder() .addNullable("dir0", MinorType.VARCHAR) .add("b", MinorType.VARCHAR) .add("suffix", MinorType.VARCHAR) .addNullable("c", MinorType.INT) .build(); - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .build(); - - assertNotNull(scanner.output()); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } // Create a batch of data. @@ -329,15 +310,13 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { // Verify - { - SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) - .addRow("x", "fred", "csv", null) - .addRow("x", "wilma", "csv", null) - .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow("x", "fred", "csv", null) + .addRow("x", "wilma", "csv", null) + .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); - } + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.close(); } @@ -403,8 +382,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { .addRow("x", "y", "a.csv", "fred") .addRow("x", "y", "a.csv", "wilma") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); // Do explicit close (as in real code) to avoid an implicit // close which will blow away the current file info... @@ -431,8 +410,8 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest { .addRow("x", null, "b.csv", "bambam") .addRow("x", null, "b.csv", "betty") .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(scanner.output())); + RowSetUtilities.verify(expected, + fixture.wrap(scanner.output())); scanner.closeReader(); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java deleted file mode 100644 index 021d7e381..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaLevelProjection.java +++ /dev/null @@ -1,557 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.util.List; - -import org.apache.drill.common.exceptions.UserException; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedMapColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow; -import org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection; -import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTableColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple; -import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection; -import org.apache.drill.exec.physical.impl.scan.project.UnresolvedColumn; -import org.apache.drill.exec.physical.impl.scan.project.VectorSource; -import org.apache.drill.exec.physical.impl.scan.project.WildcardSchemaProjection; -import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; -import org.apache.drill.exec.record.metadata.SchemaBuilder; -import org.apache.drill.exec.record.metadata.TupleMetadata; -import org.apache.drill.test.SubOperatorTest; -import org.junit.Test; - -public class TestSchemaLevelProjection extends SubOperatorTest { - - @Test - public void testWildcard() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - assertEquals(1, scanProj.columns().size()); - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.VARCHAR) - .addNullable("c", MinorType.INT) - .addArray("d", MinorType.FLOAT8) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new WildcardSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(3, columns.size()); - - assertEquals("a", columns.get(0).name()); - assertEquals(0, columns.get(0).sourceIndex()); - assertSame(rootTuple, columns.get(0).source()); - assertEquals("c", columns.get(1).name()); - assertEquals(1, columns.get(1).sourceIndex()); - assertSame(rootTuple, columns.get(1).source()); - assertEquals("d", columns.get(2).name()); - assertEquals(2, columns.get(2).sourceIndex()); - assertSame(rootTuple, columns.get(2).source()); - } - - /** - * Test SELECT list with columns defined in a order and with - * name case different than the early-schema table. - */ - - @Test - public void testFullList() { - - // Simulate SELECT c, b, a ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("c", "b", "a"), - ScanTestUtils.parsers()); - assertEquals(3, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (a, b, c) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("A", MinorType.VARCHAR) - .add("B", MinorType.VARCHAR) - .add("C", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(3, columns.size()); - - assertEquals("c", columns.get(0).name()); - assertEquals(2, columns.get(0).sourceIndex()); - assertSame(rootTuple, columns.get(0).source()); - - assertEquals("b", columns.get(1).name()); - assertEquals(1, columns.get(1).sourceIndex()); - assertSame(rootTuple, columns.get(1).source()); - - assertEquals("a", columns.get(2).name()); - assertEquals(0, columns.get(2).sourceIndex()); - assertSame(rootTuple, columns.get(2).source()); - } - - /** - * Test SELECT list with columns missing from the table schema. - */ - - @Test - public void testMissing() { - - // Simulate SELECT c, v, b, w ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("c", "v", "b", "w"), - ScanTestUtils.parsers()); - assertEquals(4, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (a, b, c) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("A", MinorType.VARCHAR) - .add("B", MinorType.VARCHAR) - .add("C", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(4, columns.size()); - VectorSource nullBuilder = rootTuple.nullBuilder(); - - assertEquals("c", columns.get(0).name()); - assertEquals(2, columns.get(0).sourceIndex()); - assertSame(rootTuple, columns.get(0).source()); - - assertEquals("v", columns.get(1).name()); - assertEquals(0, columns.get(1).sourceIndex()); - assertSame(nullBuilder, columns.get(1).source()); - - assertEquals("b", columns.get(2).name()); - assertEquals(1, columns.get(2).sourceIndex()); - assertSame(rootTuple, columns.get(2).source()); - - assertEquals("w", columns.get(3).name()); - assertEquals(1, columns.get(3).sourceIndex()); - assertSame(nullBuilder, columns.get(3).source()); - } - - @Test - public void testSubset() { - - // Simulate SELECT c, a ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("c", "a"), - ScanTestUtils.parsers()); - assertEquals(2, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (a, b, c) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("A", MinorType.VARCHAR) - .add("B", MinorType.VARCHAR) - .add("C", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(2, columns.size()); - - assertEquals("c", columns.get(0).name()); - assertEquals(2, columns.get(0).sourceIndex()); - assertSame(rootTuple, columns.get(0).source()); - - assertEquals("a", columns.get(1).name()); - assertEquals(0, columns.get(1).sourceIndex()); - assertSame(rootTuple, columns.get(1).source()); - } - - @Test - public void testDisjoint() { - - // Simulate SELECT c, a ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("b"), - ScanTestUtils.parsers()); - assertEquals(1, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (a) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("A", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(1, columns.size()); - VectorSource nullBuilder = rootTuple.nullBuilder(); - - assertEquals("b", columns.get(0).name()); - assertEquals(0, columns.get(0).sourceIndex()); - assertSame(nullBuilder, columns.get(0).source()); - } - - @Test - public void testOmittedMap() { - - // Simulate SELECT a, b.c.d ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a", "b.c.d"), - ScanTestUtils.parsers()); - assertEquals(2, scanProj.columns().size()); - { - assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(1).nodeType()); - UnresolvedColumn bCol = (UnresolvedColumn) (scanProj.columns().get(1)); - assertTrue(bCol.element().isTuple()); - } - - // Simulate a data source, with early schema, of (a) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(2, columns.size()); - - // Should have resolved a to a table column, b to a missing map. - - // A is projected - - ResolvedColumn aCol = columns.get(0); - assertEquals("a", aCol.name()); - assertEquals(ResolvedTableColumn.ID, aCol.nodeType()); - - // B is not projected, is implicitly a map - - ResolvedColumn bCol = columns.get(1); - assertEquals("b", bCol.name()); - assertEquals(ResolvedMapColumn.ID, bCol.nodeType()); - - ResolvedMapColumn bMap = (ResolvedMapColumn) bCol; - ResolvedTuple bMembers = bMap.members(); - assertNotNull(bMembers); - assertEquals(1, bMembers.columns().size()); - - // C is a map within b - - ResolvedColumn cCol = bMembers.columns().get(0); - assertEquals(ResolvedMapColumn.ID, cCol.nodeType()); - - ResolvedMapColumn cMap = (ResolvedMapColumn) cCol; - ResolvedTuple cMembers = cMap.members(); - assertNotNull(cMembers); - assertEquals(1, cMembers.columns().size()); - - // D is an unknown column type (not a map) - - ResolvedColumn dCol = cMembers.columns().get(0); - assertEquals(ResolvedNullColumn.ID, dCol.nodeType()); - } - - /** - * Test of a map with missing columns. - * table of (a{b, c}), project a.c, a.d, a.e.f - */ - - @Test - public void testOmittedMapMembers() { - - // Simulate SELECT a.c, a.d, a.e.f ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("x", "a.c", "a.d", "a.e.f", "y"), - ScanTestUtils.parsers()); - assertEquals(3, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (x, y, a{b, c}) - - TupleMetadata tableSchema = new SchemaBuilder() - .add("x", MinorType.VARCHAR) - .add("y", MinorType.INT) - .addMap("a") - .add("b", MinorType.BIGINT) - .add("c", MinorType.FLOAT8) - .resumeSchema() - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(3, columns.size()); - - // Should have resolved a.b to a map column, - // a.d to a missing nested map, and a.e.f to a missing - // nested map member - - // X is projected - - ResolvedColumn xCol = columns.get(0); - assertEquals("x", xCol.name()); - assertEquals(ResolvedTableColumn.ID, xCol.nodeType()); - assertSame(rootTuple, ((ResolvedTableColumn) (xCol)).source()); - assertEquals(0, ((ResolvedTableColumn) (xCol)).sourceIndex()); - - // Y is projected - - ResolvedColumn yCol = columns.get(2); - assertEquals("y", yCol.name()); - assertEquals(ResolvedTableColumn.ID, yCol.nodeType()); - assertSame(rootTuple, ((ResolvedTableColumn) (yCol)).source()); - assertEquals(1, ((ResolvedTableColumn) (yCol)).sourceIndex()); - - // A is projected - - ResolvedColumn aCol = columns.get(1); - assertEquals("a", aCol.name()); - assertEquals(ResolvedMapColumn.ID, aCol.nodeType()); - - ResolvedMapColumn aMap = (ResolvedMapColumn) aCol; - ResolvedTuple aMembers = aMap.members(); - assertFalse(aMembers.isSimpleProjection()); - assertNotNull(aMembers); - assertEquals(3, aMembers.columns().size()); - - // a.c is projected - - ResolvedColumn acCol = aMembers.columns().get(0); - assertEquals("c", acCol.name()); - assertEquals(ResolvedTableColumn.ID, acCol.nodeType()); - assertEquals(1, ((ResolvedTableColumn) (acCol)).sourceIndex()); - - // a.d is not in the table, is null - - ResolvedColumn adCol = aMembers.columns().get(1); - assertEquals("d", adCol.name()); - assertEquals(ResolvedNullColumn.ID, adCol.nodeType()); - - // a.e is not in the table, is implicitly a map - - ResolvedColumn aeCol = aMembers.columns().get(2); - assertEquals("e", aeCol.name()); - assertEquals(ResolvedMapColumn.ID, aeCol.nodeType()); - - ResolvedMapColumn aeMap = (ResolvedMapColumn) aeCol; - ResolvedTuple aeMembers = aeMap.members(); - assertFalse(aeMembers.isSimpleProjection()); - assertNotNull(aeMembers); - assertEquals(1, aeMembers.columns().size()); - - // a.d.f is a null column - - ResolvedColumn aefCol = aeMembers.columns().get(0); - assertEquals("f", aefCol.name()); - assertEquals(ResolvedNullColumn.ID, aefCol.nodeType()); - } - - /** - * Simple map project. This is an internal case in which the - * query asks for a set of columns inside a map, and the table - * loader produces exactly that set. No special projection is - * needed, the map is projected as a whole. - */ - - @Test - public void testSimpleMapProject() { - - // Simulate SELECT a.b, a.c ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a.b", "a.c"), - ScanTestUtils.parsers()); - assertEquals(1, scanProj.columns().size()); - - // Simulate a data source, with early schema, of (a{b, c}) - - TupleMetadata tableSchema = new SchemaBuilder() - .addMap("a") - .add("b", MinorType.BIGINT) - .add("c", MinorType.FLOAT8) - .resumeSchema() - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(1, columns.size()); - - // Should have resolved a.b to a map column, - // a.d to a missing nested map, and a.e.f to a missing - // nested map member - - // a is projected as a vector, not as a structured map - - ResolvedColumn aCol = columns.get(0); - assertEquals("a", aCol.name()); - assertEquals(ResolvedTableColumn.ID, aCol.nodeType()); - assertSame(rootTuple, ((ResolvedTableColumn) (aCol)).source()); - assertEquals(0, ((ResolvedTableColumn) (aCol)).sourceIndex()); - } - - /** - * Project of a non-map as a map - */ - - @Test - public void testMapMismatch() { - - // Simulate SELECT a.b ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a.b"), - ScanTestUtils.parsers()); - - // Simulate a data source, with early schema, of (a) - // where a is not a map. - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - try { - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - fail(); - } catch (UserException e) { - // Expected - } - } - - /** - * Test project of an array. At the scan level, we just verify - * that the requested column is, indeed, an array. - */ - - @Test - public void testArrayProject() { - - // Simulate SELECT a[0] ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a[0]"), - ScanTestUtils.parsers()); - - // Simulate a data source, with early schema, of (a) - // where a is not an array. - - TupleMetadata tableSchema = new SchemaBuilder() - .addArray("a", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(1, columns.size()); - - ResolvedColumn aCol = columns.get(0); - assertEquals("a", aCol.name()); - assertEquals(ResolvedTableColumn.ID, aCol.nodeType()); - assertSame(rootTuple, ((ResolvedTableColumn) (aCol)).source()); - assertEquals(0, ((ResolvedTableColumn) (aCol)).sourceIndex()); - } - - /** - * Project of a non-array as an array - */ - - @Test - public void testArrayMismatch() { - - // Simulate SELECT a[0] ... - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList("a[0]"), - ScanTestUtils.parsers()); - - // Simulate a data source, with early schema, of (a) - // where a is not an array. - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.VARCHAR) - .buildSchema(); - - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - try { - new ExplicitSchemaProjection( - scanProj, tableSchema, rootTuple, - ScanTestUtils.resolvers()); - fail(); - } catch (UserException e) { - // Expected - } - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java deleted file mode 100644 index a30321b0d..000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestSchemaSmoothing.java +++ /dev/null @@ -1,946 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.physical.impl.scan; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.util.List; - -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.physical.impl.protocol.SchemaTracker; -import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn; -import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager; -import org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection; -import org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedNullColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTableColumn; -import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow; -import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; -import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother; -import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother.IncompatibleSchemaException; -import org.apache.drill.exec.physical.impl.scan.project.SmoothingProjection; -import org.apache.drill.exec.physical.impl.scan.project.WildcardSchemaProjection; -import org.apache.drill.exec.physical.rowSet.ResultSetLoader; -import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; -import org.apache.drill.exec.record.metadata.SchemaBuilder; -import org.apache.drill.exec.record.metadata.TupleMetadata; -import org.apache.drill.test.SubOperatorTest; -import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetComparison; -import org.apache.hadoop.fs.Path; -import org.junit.Test; - -import org.apache.drill.shaded.guava.com.google.common.collect.Lists; - -/** - * Tests schema smoothing at the schema projection level. - * This level handles reusing prior types when filling null - * values. But, because no actual vectors are involved, it - * does not handle the schema chosen for a table ahead of - * time, only the schema as it is merged with prior schema to - * detect missing columns. - * <p> - * Focuses on the <tt>SmoothingProjection</tt> class itself. - * <p> - * Note that, at present, schema smoothing does not work for entire - * maps. That is, if file 1 has, say <tt>{a: {b: 10, c: "foo"}}</tt> - * and file 2 has, say, <tt>{a: null}</tt>, then schema smoothing does - * not currently know how to recreate the map. The same is true of - * lists and unions. Handling such cases is complex and is probably - * better handled via a system that allows the user to specify their - * intent by providing a schema to apply to the two files. - */ - -public class TestSchemaSmoothing extends SubOperatorTest { - - /** - * Sanity test for the simple, discrete case. The purpose of - * discrete is just to run the basic lifecycle in a way that - * is compatible with the schema-persistence version. - */ - - @Test - public void testDiscrete() { - - // Set up the file metadata manager - - Path filePathA = new Path("hdfs:///w/x/y/a.csv"); - Path filePathB = new Path("hdfs:///w/x/y/b.csv"); - FileMetadataManager metadataManager = new FileMetadataManager( - fixture.getOptionManager(), - new Path("hdfs:///w"), - Lists.newArrayList(filePathA, filePathB)); - - // Set up the scan level projection - - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), - ScanTestUtils.parsers(metadataManager.projectionParser())); - - { - // Define a file a.csv - - metadataManager.startFile(filePathA); - - // Build the output schema from the (a, b) table schema - - TupleMetadata twoColSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR, 10) - .buildSchema(); - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, twoColSchema, rootTuple, - ScanTestUtils.resolvers(metadataManager)); - - // Verify the full output schema - - TupleMetadata expectedSchema = new SchemaBuilder() - .add("filename", MinorType.VARCHAR) - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR, 10) - .buildSchema(); - - // Verify - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(3, columns.size()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name()); - assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value()); - assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType()); - } - { - // Define a file b.csv - - metadataManager.startFile(filePathB); - - // Build the output schema from the (a) table schema - - TupleMetadata oneColSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .buildSchema(); - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new ExplicitSchemaProjection( - scanProj, oneColSchema, rootTuple, - ScanTestUtils.resolvers(metadataManager)); - - // Verify the full output schema - // Since this mode is "discrete", we don't remember the type - // of the missing column. (Instead, it is filled in at the - // vector level as part of vector persistence.) During projection, it is - // marked with type NULL so that the null column builder will fill in - // the proper type. - - TupleMetadata expectedSchema = new SchemaBuilder() - .add("filename", MinorType.VARCHAR) - .add("a", MinorType.INT) - .addNullable("b", MinorType.NULL) - .buildSchema(); - - // Verify - - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals(3, columns.size()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name()); - assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value()); - assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType()); - assertEquals(ResolvedNullColumn.ID, columns.get(2).nodeType()); - } - } - - /** - * Low-level test of the smoothing projection, including the exceptions - * it throws when things are not going its way. - */ - - @Test - public void testSmoothingProjection() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - // Table 1: (a: nullable bigint, b) - - TupleMetadata schema1 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .add("c", MinorType.FLOAT8) - .buildSchema(); - ResolvedRow priorSchema; - { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new WildcardSchemaProjection( - scanProj, schema1, rootTuple, - ScanTestUtils.resolvers()); - priorSchema = rootTuple; - } - - // Table 2: (a: nullable bigint, c), column omitted, original schema preserved - - TupleMetadata schema2 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .add("c", MinorType.FLOAT8) - .buildSchema(); - try { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new SmoothingProjection( - scanProj, schema2, priorSchema, rootTuple, - ScanTestUtils.resolvers()); - assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple))); - priorSchema = rootTuple; - } catch (IncompatibleSchemaException e) { - fail(); - } - - // Table 3: (a, c, d), column added, must replan schema - - TupleMetadata schema3 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .add("c", MinorType.FLOAT8) - .add("d", MinorType.INT) - .buildSchema(); - try { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new SmoothingProjection( - scanProj, schema3, priorSchema, rootTuple, - ScanTestUtils.resolvers()); - fail(); - } catch (IncompatibleSchemaException e) { - // Expected - } - - // Table 4: (a: double), change type must replan schema - - TupleMetadata schema4 = new SchemaBuilder() - .addNullable("a", MinorType.FLOAT8) - .addNullable("b", MinorType.VARCHAR) - .add("c", MinorType.FLOAT8) - .buildSchema(); - try { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new SmoothingProjection( - scanProj, schema4, priorSchema, rootTuple, - ScanTestUtils.resolvers()); - fail(); - } catch (IncompatibleSchemaException e) { - // Expected - } - -// // Table 5: (a: not-nullable bigint): convert to nullable for consistency -// -// TupleMetadata schema5 = new SchemaBuilder() -// .addNullable("a", MinorType.BIGINT) -// .add("c", MinorType.FLOAT8) -// .buildSchema(); -// try { -// SmoothingProjection schemaProj = new SmoothingProjection( -// scanProj, schema5, dummySource, dummySource, -// new ArrayList<>(), priorSchema); -// assertTrue(schema1.isEquivalent(ScanTestUtils.schema(schemaProj.columns()))); -// } catch (IncompatibleSchemaException e) { -// fail(); -// } - - // Table 6: Drop a non-nullable column, must replan - - TupleMetadata schema6 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - try { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - new SmoothingProjection( - scanProj, schema6, priorSchema, rootTuple, - ScanTestUtils.resolvers()); - fail(); - } catch (IncompatibleSchemaException e) { - // Expected - } - } - - /** - * Case in which the table schema is a superset of the prior - * schema. Discard prior schema. Turn off auto expansion of - * metadata for a simpler test. - */ - - @Test - public void testSmaller() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - - { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - smoother.resolve(priorSchema, rootTuple); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema)); - } - { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - smoother.resolve(tableSchema, rootTuple); - assertEquals(2, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema)); - } - } - - /** - * Case in which the table schema and prior are disjoint - * sets. Discard the prior schema. - */ - - @Test - public void testDisjoint() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(2, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema)); - } - } - - private ResolvedRow doResolve(SchemaSmoother smoother, TupleMetadata schema) { - NullColumnBuilder builder = new NullColumnBuilder(null, false); - ResolvedRow rootTuple = new ResolvedRow(builder); - smoother.resolve(schema, rootTuple); - return rootTuple; - } - - /** - * Column names match, but types differ. Discard the prior schema. - */ - - @Test - public void testDifferentTypes() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(2, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema)); - } - } - - /** - * The prior and table schemas are identical. Preserve the prior - * schema (though, the output is no different than if we discarded - * the prior schema...) - */ - - @Test - public void testSameSchemas() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - TupleMetadata actualSchema = ScanTestUtils.schema(rootTuple); - assertTrue(actualSchema.isEquivalent(tableSchema)); - assertTrue(actualSchema.isEquivalent(priorSchema)); - } - } - - /** - * The prior and table schemas are identical, but the cases of names differ. - * Preserve the case of the first schema. - */ - - @Test - public void testDifferentCase() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("A", MinorType.INT) - .add("B", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema)); - List<ResolvedColumn> columns = rootTuple.columns(); - assertEquals("a", columns.get(0).name()); - } - } - - /** - * Can't preserve the prior schema if it had required columns - * where the new schema has no columns. - */ - - @Test - public void testRequired() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(2, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema)); - } - } - - /** - * Preserve the prior schema if table is a subset and missing columns - * are nullable or repeated. - */ - - @Test - public void testMissingNullableColumns() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .addNullable("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .addArray("c", MinorType.BIGINT) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema)); - } - } - - /** - * Preserve the prior schema if table is a subset. Map the table - * columns to the output using the prior schema ordering. - */ - - @Test - public void testReordering() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - TupleMetadata priorSchema = new SchemaBuilder() - .addNullable("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .addArray("c", MinorType.BIGINT) - .buildSchema(); - TupleMetadata tableSchema = new SchemaBuilder() - .add("b", MinorType.VARCHAR) - .addNullable("a", MinorType.INT) - .buildSchema(); - - { - doResolve(smoother, priorSchema); - } - { - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema)); - } - } - - /** - * If using the legacy wildcard expansion, reuse schema if partition paths - * are the same length. - */ - - @Test - public void testSamePartitionLength() { - - // Set up the file metadata manager - - Path filePathA = new Path("hdfs:///w/x/y/a.csv"); - Path filePathB = new Path("hdfs:///w/x/y/b.csv"); - FileMetadataManager metadataManager = new FileMetadataManager( - fixture.getOptionManager(), - new Path("hdfs:///w"), - Lists.newArrayList(filePathA, filePathB)); - - // Set up the scan level projection - - ScanLevelProjection scanProj = new ScanLevelProjection( - ScanTestUtils.projectAllWithMetadata(2), - ScanTestUtils.parsers(metadataManager.projectionParser())); - - // Define the schema smoother - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers(metadataManager)); - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - - TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); - { - metadataManager.startFile(filePathA); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - { - metadataManager.startFile(filePathB); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - } - - /** - * If using the legacy wildcard expansion, reuse schema if the new partition path - * is shorter than the previous. (Unneeded partitions will be set to null by the - * scan projector.) - */ - - @Test - public void testShorterPartitionLength() { - - // Set up the file metadata manager - - Path filePathA = new Path("hdfs:///w/x/y/a.csv"); - Path filePathB = new Path("hdfs:///w/x/b.csv"); - FileMetadataManager metadataManager = new FileMetadataManager( - fixture.getOptionManager(), - new Path("hdfs:///w"), - Lists.newArrayList(filePathA, filePathB)); - - // Set up the scan level projection - - ScanLevelProjection scanProj = new ScanLevelProjection( - ScanTestUtils.projectAllWithMetadata(2), - ScanTestUtils.parsers(metadataManager.projectionParser())); - - // Define the schema smoother - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers(metadataManager)); - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - - TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); - { - metadataManager.startFile(filePathA); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - { - metadataManager.startFile(filePathB); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - } - - /** - * If using the legacy wildcard expansion, we are able to use the same - * schema even if the new partition path is longer than the previous. - * Because all file names are provided up front. - */ - - @Test - public void testLongerPartitionLength() { - - // Set up the file metadata manager - - Path filePathA = new Path("hdfs:///w/x/a.csv"); - Path filePathB = new Path("hdfs:///w/x/y/b.csv"); - FileMetadataManager metadataManager = new FileMetadataManager( - fixture.getOptionManager(), - new Path("hdfs:///w"), - Lists.newArrayList(filePathA, filePathB)); - - // Set up the scan level projection - - ScanLevelProjection scanProj = new ScanLevelProjection( - ScanTestUtils.projectAllWithMetadata(2), - ScanTestUtils.parsers(metadataManager.projectionParser())); - - // Define the schema smoother - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers(metadataManager)); - - TupleMetadata tableSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .add("b", MinorType.VARCHAR) - .buildSchema(); - - TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); - { - metadataManager.startFile(filePathA); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - { - metadataManager.startFile(filePathB); - ResolvedRow rootTuple = doResolve(smoother, tableSchema); - assertEquals(1, smoother.schemaVersion()); - assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); - } - } - - /** - * Integrated test across multiple schemas at the batch level. - */ - - @Test - public void testSmoothableSchemaBatches() { - ScanLevelProjection scanProj = new ScanLevelProjection( - RowSetTestUtils.projectAll(), - ScanTestUtils.parsers()); - - SchemaSmoother smoother = new SchemaSmoother(scanProj, - ScanTestUtils.resolvers()); - - // Table 1: (a: bigint, b) - - TupleMetadata schema1 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .add("c", MinorType.FLOAT8) - .buildSchema(); - { - ResolvedRow rootTuple = doResolve(smoother, schema1); - - // Just use the original schema. - - assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple))); - assertEquals(1, smoother.schemaVersion()); - } - - // Table 2: (a: nullable bigint, c), column ommitted, original schema preserved - - TupleMetadata schema2 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .add("c", MinorType.FLOAT8) - .buildSchema(); - { - ResolvedRow rootTuple = doResolve(smoother, schema2); - assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple))); - assertEquals(1, smoother.schemaVersion()); - } - - // Table 3: (a, c, d), column added, must replan schema - - TupleMetadata schema3 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .add("c", MinorType.FLOAT8) - .add("d", MinorType.INT) - .buildSchema(); - { - ResolvedRow rootTuple = doResolve(smoother, schema3); - assertTrue(schema3.isEquivalent(ScanTestUtils.schema(rootTuple))); - assertEquals(2, smoother.schemaVersion()); - } - - // Table 4: Drop a non-nullable column, must replan - - TupleMetadata schema4 = new SchemaBuilder() - .addNullable("a", MinorType.BIGINT) - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - { - ResolvedRow rootTuple = doResolve(smoother, schema4); - assertTrue(schema4.isEquivalent(ScanTestUtils.schema(rootTuple))); - assertEquals(3, smoother.schemaVersion()); - } - - // Table 5: (a: double), change type must replan schema - - TupleMetadata schema5 = new SchemaBuilder() - .addNullable("a", MinorType.FLOAT8) - .addNullable("b", MinorType.VARCHAR) - .buildSchema(); - { - ResolvedRow rootTuple = doResolve(smoother, schema5); - assertTrue(schema5.isEquivalent(ScanTestUtils.schema(rootTuple))); - assertEquals(4, smoother.schemaVersion()); - } - -// // Table 6: (a: not-nullable bigint): convert to nullable for consistency -// -// TupleMetadata schema6 = new SchemaBuilder() -// .add("a", MinorType.FLOAT8) -// .add("b", MinorType.VARCHAR) -// .buildSchema(); -// { -// SchemaLevelProjection schemaProj = smoother.resolve(schema3, dummySource); -// assertTrue(schema5.isEquivalent(ScanTestUtils.schema(schemaProj.columns()))); -// } - } - - /** - * A SELECT * query uses the schema of the table as the output schema. - * This is trivial when the scanner has one table. But, if two or more - * tables occur, then things get interesting. The first table sets the - * schema. The second table then has: - * <ul> - * <li>The same schema, trivial case.</li> - * <li>A subset of the first table. The type of the "missing" column - * from the first table is used for a null column in the second table.</li> - * <li>A superset or disjoint set of the first schema. This triggers a hard schema - * change.</li> - * </ul> - * <p> - * It is an open question whether previous columns should be preserved on - * a hard reset. For now, the code implements, and this test verifies, that a - * hard reset clears the "memory" of prior schemas. - */ - - @Test - public void testWildcardSmoothing() { - ScanSchemaOrchestrator projector = new ScanSchemaOrchestrator(fixture.allocator()); - projector.enableSchemaSmoothing(true); - projector.build(RowSetTestUtils.projectAll()); - - TupleMetadata firstSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR, 10) - .addNullable("c", MinorType.BIGINT) - .buildSchema(); - TupleMetadata subsetSchema = new SchemaBuilder() - .addNullable("b", MinorType.VARCHAR, 10) - .add("a", MinorType.INT) - .buildSchema(); - TupleMetadata disjointSchema = new SchemaBuilder() - .add("a", MinorType.INT) - .addNullable("b", MinorType.VARCHAR, 10) - .add("d", MinorType.VARCHAR) - .buildSchema(); - - SchemaTracker tracker = new SchemaTracker(); - int schemaVersion; - { - // First table, establishes the baseline - // ... FROM table 1 - - ReaderSchemaOrchestrator reader = projector.startReader(); - ResultSetLoader loader = reader.makeTableLoader(firstSchema); - - reader.startBatch(); - loader.writer() - .addRow(10, "fred", 110L) - .addRow(20, "wilma", 110L); - reader.endBatch(); - - tracker.trackSchema(projector.output()); - schemaVersion = tracker.schemaVersion(); - - SingleRowSet expected = fixture.rowSetBuilder(firstSchema) - .addRow(10, "fred", 110L) - .addRow(20, "wilma", 110L) - .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); - } - { - // Second table, same schema, the trivial case - // ... FROM table 2 - - ReaderSchemaOrchestrator reader = projector.startReader(); - ResultSetLoader loader = reader.makeTableLoader(firstSchema); - - reader.startBatch(); - loader.writer() - .addRow(70, "pebbles", 770L) - .addRow(80, "hoppy", 880L); - reader.endBatch(); - - tracker.trackSchema(projector.output()); - assertEquals(schemaVersion, tracker.schemaVersion()); - - SingleRowSet expected = fixture.rowSetBuilder(firstSchema) - .addRow(70, "pebbles", 770L) - .addRow(80, "hoppy", 880L) - .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); - } - { - // Third table: subset schema of first two - // ... FROM table 3 - - ReaderSchemaOrchestrator reader = projector.startReader(); - ResultSetLoader loader = reader.makeTableLoader(subsetSchema); - - reader.startBatch(); - loader.writer() - .addRow("bambam", 30) - .addRow("betty", 40); - reader.endBatch(); - - tracker.trackSchema(projector.output()); - assertEquals(schemaVersion, tracker.schemaVersion()); - - SingleRowSet expected = fixture.rowSetBuilder(firstSchema) - .addRow(30, "bambam", null) - .addRow(40, "betty", null) - .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); - } - { - // Fourth table: disjoint schema, cases a schema reset - // ... FROM table 4 - - ReaderSchemaOrchestrator reader = projector.startReader(); - ResultSetLoader loader = reader.makeTableLoader(disjointSchema); - - reader.startBatch(); - loader.writer() - .addRow(50, "dino", "supporting") - .addRow(60, "barney", "main"); - reader.endBatch(); - - tracker.trackSchema(projector.output()); - assertNotEquals(schemaVersion, tracker.schemaVersion()); - - SingleRowSet expected = fixture.rowSetBuilder(disjointSchema) - .addRow(50, "dino", "supporting") - .addRow(60, "barney", "main") - .build(); - new RowSetComparison(expected) - .verifyAndClearAll(fixture.wrap(projector.output())); - } - - projector.close(); - } - - // TODO: Test schema smoothing with repeated - // TODO: Test hard schema change - // TODO: Typed null column tests (resurrect) - // TODO: Test maps and arrays of maps -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java index b45374be2..086da96b4 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestConstantColumnLoader.java @@ -24,14 +24,21 @@ import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.physical.impl.scan.ScanTestUtils; +import org.apache.drill.exec.physical.impl.scan.file.FileMetadata; +import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn; +import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn; +import org.apache.drill.exec.physical.impl.scan.file.PartitionColumn; import org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader.ConstantColumnSpec; import org.apache.drill.exec.physical.rowSet.impl.ResultVectorCacheImpl; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.exec.store.ColumnExplorer.ImplicitFileColumns; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.hadoop.fs.Path; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -114,4 +121,42 @@ public class TestConstantColumnLoader extends SubOperatorTest { .verifyAndClearAll(fixture.wrap(staticLoader.load(2))); staticLoader.close(); } + + @Test + public void testFileMetadata() { + + FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w")); + List<ConstantColumnSpec> defns = new ArrayList<>(); + FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn( + ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX); + FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL, + iDefn, fileInfo, null, 0); + defns.add(iCol); + + String partColName = ScanTestUtils.partitionColName(1); + PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0); + defns.add(pCol); + + ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator()); + ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns); + + // Create a batch + + staticLoader.load(2); + + // Verify + + BatchSchema expectedSchema = new SchemaBuilder() + .add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR) + .addNullable(partColName, MinorType.VARCHAR) + .build(); + SingleRowSet expected = fixture.rowSetBuilder(expectedSchema) + .addRow("csv", "y") + .addRow("csv", "y") + .build(); + + new RowSetComparison(expected) + .verifyAndClearAll(fixture.wrap(staticLoader.load(2))); + staticLoader.close(); + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java index 5b49ab3c7..f40e84787 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestScanLevelProjection.java @@ -19,6 +19,7 @@ package org.apache.drill.exec.physical.impl.scan.project; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -27,6 +28,8 @@ import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.impl.scan.ScanTestUtils; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; +import org.apache.drill.exec.physical.rowSet.project.ImpliedTupleRequest; +import org.apache.drill.exec.physical.rowSet.project.RequestedTuple; import org.apache.drill.exec.physical.rowSet.project.RequestedTuple.RequestedColumn; import org.apache.drill.exec.record.metadata.ProjectionType; import org.apache.drill.test.SubOperatorTest; @@ -56,6 +59,7 @@ public class TestScanLevelProjection extends SubOperatorTest { final ScanLevelProjection scanProj = new ScanLevelProjection( RowSetTestUtils.projectList("a", "b", "c"), ScanTestUtils.parsers()); + assertFalse(scanProj.projectAll()); assertFalse(scanProj.projectNone()); @@ -72,6 +76,19 @@ public class TestScanLevelProjection extends SubOperatorTest { // Verify column type assertEquals(UnresolvedColumn.UNRESOLVED, scanProj.columns().get(0).nodeType()); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(3, outputProj.projections().size()); + assertNotNull(outputProj.get("a")); + assertTrue(outputProj.get("a").isSimple()); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertEquals(3, readerProj.projections().size()); + assertNotNull(readerProj.get("a")); + assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a")); + assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("d")); } /** @@ -85,6 +102,7 @@ public class TestScanLevelProjection extends SubOperatorTest { final ScanLevelProjection scanProj = new ScanLevelProjection( RowSetTestUtils.projectList("a.x", "b.x", "a.y", "b.y", "c"), ScanTestUtils.parsers()); + assertFalse(scanProj.projectAll()); assertFalse(scanProj.projectNone()); @@ -107,6 +125,20 @@ public class TestScanLevelProjection extends SubOperatorTest { final RequestedColumn c = ((UnresolvedColumn) scanProj.columns().get(2)).element(); assertTrue(c.isSimple()); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(3, outputProj.projections().size()); + assertNotNull(outputProj.get("a")); + assertTrue(outputProj.get("a").isTuple()); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertEquals(3, readerProj.projections().size()); + assertNotNull(readerProj.get("a")); + assertEquals(ProjectionType.TUPLE, readerProj.projectionType("a")); + assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("c")); + assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("d")); } /** @@ -119,6 +151,7 @@ public class TestScanLevelProjection extends SubOperatorTest { final ScanLevelProjection scanProj = new ScanLevelProjection( RowSetTestUtils.projectList("a[1]", "a[3]"), ScanTestUtils.parsers()); + assertFalse(scanProj.projectAll()); assertFalse(scanProj.projectNone()); @@ -137,6 +170,19 @@ public class TestScanLevelProjection extends SubOperatorTest { assertTrue(a.hasIndex(1)); assertFalse(a.hasIndex(2)); assertTrue(a.hasIndex(3)); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(1, outputProj.projections().size()); + assertNotNull(outputProj.get("a")); + assertTrue(outputProj.get("a").isArray()); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertEquals(1, readerProj.projections().size()); + assertNotNull(readerProj.get("a")); + assertEquals(ProjectionType.ARRAY, readerProj.projectionType("a")); + assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("c")); } /** @@ -165,6 +211,17 @@ public class TestScanLevelProjection extends SubOperatorTest { // Verify column type assertEquals(UnresolvedColumn.WILDCARD, scanProj.columns().get(0).nodeType()); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(1, outputProj.projections().size()); + assertNotNull(outputProj.get("**")); + assertTrue(outputProj.get("**").isWildcard()); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertTrue(readerProj instanceof ImpliedTupleRequest); + assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a")); } /** @@ -181,38 +238,62 @@ public class TestScanLevelProjection extends SubOperatorTest { assertFalse(scanProj.projectAll()); assertTrue(scanProj.projectNone()); assertEquals(0, scanProj.requestedCols().size()); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(0, outputProj.projections().size()); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertTrue(readerProj instanceof ImpliedTupleRequest); + assertEquals(ProjectionType.UNPROJECTED, readerProj.projectionType("a")); } /** - * Can't include both a wildcard and a column name. + * Can include both a wildcard and a column name. The Project + * operator will fill in the column, the scan framework just ignores + * the extra column. */ @Test - public void testErrorWildcardAndColumns() { - try { - new ScanLevelProjection( + public void testWildcardAndColumns() { + ScanLevelProjection scanProj = new ScanLevelProjection( RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, "a"), ScanTestUtils.parsers()); - fail(); - } catch (final IllegalArgumentException e) { - // Expected - } + + assertTrue(scanProj.projectAll()); + assertFalse(scanProj.projectNone()); + assertEquals(2, scanProj.requestedCols().size()); + assertEquals(1, scanProj.columns().size()); + + // Verify tuple projection + + RequestedTuple outputProj = scanProj.rootProjection(); + assertEquals(2, outputProj.projections().size()); + assertNotNull(outputProj.get("**")); + assertTrue(outputProj.get("**").isWildcard()); + assertNotNull(outputProj.get("a")); + + RequestedTuple readerProj = scanProj.readerProjection(); + assertTrue(readerProj instanceof ImpliedTupleRequest); + assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("a")); + assertEquals(ProjectionType.UNSPECIFIED, readerProj.projectionType("c")); } /** - * Can't include both a column name and a wildcard. + * Test a column name and a wildcard. */ @Test - public void testErrorColumnAndWildcard() { - try { - new ScanLevelProjection( + public void testColumnAndWildcard() { + ScanLevelProjection scanProj = new ScanLevelProjection( RowSetTestUtils.projectList("a", SchemaPath.DYNAMIC_STAR), ScanTestUtils.parsers()); - fail(); - } catch (final IllegalArgumentException e) { - // Expected - } + + assertTrue(scanProj.projectAll()); + assertFalse(scanProj.projectNone()); + assertEquals(2, scanProj.requestedCols().size()); + assertEquals(1, scanProj.columns().size()); } /** diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java index a21b1e472..8adc0372a 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java @@ -28,8 +28,9 @@ import org.apache.drill.categories.RowSetTests; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.impl.protocol.SchemaTracker; import org.apache.drill.exec.physical.impl.scan.ScanTestUtils; +import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn; +import org.apache.drill.exec.physical.impl.scan.file.FileMetadataManager; import org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow; -import org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ReaderSchemaOrchestrator; import org.apache.drill.exec.physical.impl.scan.project.SchemaSmoother.IncompatibleSchemaException; import org.apache.drill.exec.physical.rowSet.ResultSetLoader; import org.apache.drill.exec.physical.rowSet.impl.RowSetTestUtils; @@ -38,8 +39,10 @@ import org.apache.drill.exec.record.metadata.TupleMetadata; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.hadoop.fs.Path; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.apache.drill.shaded.guava.com.google.common.collect.Lists; /** * Tests schema smoothing at the schema projection level. @@ -63,7 +66,7 @@ import org.junit.experimental.categories.Category; * to a fundamental limitation in Drill: * <ul> * <li>Drill cannot predict the future: each file (or batch) - * may have a different schema.</ul> + * may have a different schema.</li> * <li>Drill does not know about these differences until they * occur.</li> * <li>The scan operator is obligated to return the same schema @@ -85,6 +88,105 @@ import org.junit.experimental.categories.Category; public class TestSchemaSmoothing extends SubOperatorTest { /** + * Sanity test for the simple, discrete case. The purpose of + * discrete is just to run the basic lifecycle in a way that + * is compatible with the schema-persistence version. + */ + + @Test + public void testDiscrete() { + + // Set up the file metadata manager + + Path filePathA = new Path("hdfs:///w/x/y/a.csv"); + Path filePathB = new Path("hdfs:///w/x/y/b.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + new Path("hdfs:///w"), + Lists.newArrayList(filePathA, filePathB)); + + // Set up the scan level projection + + ScanLevelProjection scanProj = new ScanLevelProjection( + RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), + ScanTestUtils.parsers(metadataManager.projectionParser())); + + { + // Define a file a.csv + + metadataManager.startFile(filePathA); + + // Build the output schema from the (a, b) table schema + + TupleMetadata twoColSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .addNullable("b", MinorType.VARCHAR, 10) + .buildSchema(); + NullColumnBuilder builder = new NullColumnBuilder(null, false); + ResolvedRow rootTuple = new ResolvedRow(builder); + new ExplicitSchemaProjection( + scanProj, twoColSchema, rootTuple, + ScanTestUtils.resolvers(metadataManager)); + + // Verify the full output schema + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("filename", MinorType.VARCHAR) + .add("a", MinorType.INT) + .addNullable("b", MinorType.VARCHAR, 10) + .buildSchema(); + + // Verify + + List<ResolvedColumn> columns = rootTuple.columns(); + assertEquals(3, columns.size()); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name()); + assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value()); + assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType()); + } + { + // Define a file b.csv + + metadataManager.startFile(filePathB); + + // Build the output schema from the (a) table schema + + TupleMetadata oneColSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .buildSchema(); + NullColumnBuilder builder = new NullColumnBuilder(null, false); + ResolvedRow rootTuple = new ResolvedRow(builder); + new ExplicitSchemaProjection( + scanProj, oneColSchema, rootTuple, + ScanTestUtils.resolvers(metadataManager)); + + // Verify the full output schema + // Since this mode is "discrete", we don't remember the type + // of the missing column. (Instead, it is filled in at the + // vector level as part of vector persistence.) During projection, it is + // marked with type NULL so that the null column builder will fill in + // the proper type. + + TupleMetadata expectedSchema = new SchemaBuilder() + .add("filename", MinorType.VARCHAR) + .add("a", MinorType.INT) + .addNullable("b", MinorType.NULL) + .buildSchema(); + + // Verify + + List<ResolvedColumn> columns = rootTuple.columns(); + assertEquals(3, columns.size()); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name()); + assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value()); + assertEquals(ResolvedTableColumn.ID, columns.get(1).nodeType()); + assertEquals(ResolvedNullColumn.ID, columns.get(2).nodeType()); + } + } + + /** * Low-level test of the smoothing projection, including the exceptions * it throws when things are not going its way. */ @@ -463,6 +565,150 @@ public class TestSchemaSmoothing extends SubOperatorTest { assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema)); } } + + /** + * If using the legacy wildcard expansion, reuse schema if partition paths + * are the same length. + */ + + @Test + public void testSamePartitionLength() { + + // Set up the file metadata manager + + Path filePathA = new Path("hdfs:///w/x/y/a.csv"); + Path filePathB = new Path("hdfs:///w/x/y/b.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + new Path("hdfs:///w"), + Lists.newArrayList(filePathA, filePathB)); + + // Set up the scan level projection + + ScanLevelProjection scanProj = new ScanLevelProjection( + ScanTestUtils.projectAllWithMetadata(2), + ScanTestUtils.parsers(metadataManager.projectionParser())); + + // Define the schema smoother + + SchemaSmoother smoother = new SchemaSmoother(scanProj, + ScanTestUtils.resolvers(metadataManager)); + + TupleMetadata tableSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .add("b", MinorType.VARCHAR) + .buildSchema(); + + TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); + { + metadataManager.startFile(filePathA); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + { + metadataManager.startFile(filePathB); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertEquals(1, smoother.schemaVersion()); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + } + + /** + * If using the legacy wildcard expansion, reuse schema if the new partition path + * is shorter than the previous. (Unneeded partitions will be set to null by the + * scan projector.) + */ + + @Test + public void testShorterPartitionLength() { + + // Set up the file metadata manager + + Path filePathA = new Path("hdfs:///w/x/y/a.csv"); + Path filePathB = new Path("hdfs:///w/x/b.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + new Path("hdfs:///w"), + Lists.newArrayList(filePathA, filePathB)); + + // Set up the scan level projection + + ScanLevelProjection scanProj = new ScanLevelProjection( + ScanTestUtils.projectAllWithMetadata(2), + ScanTestUtils.parsers(metadataManager.projectionParser())); + + // Define the schema smoother + + SchemaSmoother smoother = new SchemaSmoother(scanProj, + ScanTestUtils.resolvers(metadataManager)); + + TupleMetadata tableSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .add("b", MinorType.VARCHAR) + .buildSchema(); + + TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); + { + metadataManager.startFile(filePathA); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + { + metadataManager.startFile(filePathB); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertEquals(1, smoother.schemaVersion()); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + } + + /** + * If using the legacy wildcard expansion, we are able to use the same + * schema even if the new partition path is longer than the previous. + * Because all file names are provided up front. + */ + + @Test + public void testLongerPartitionLength() { + + // Set up the file metadata manager + + Path filePathA = new Path("hdfs:///w/x/a.csv"); + Path filePathB = new Path("hdfs:///w/x/y/b.csv"); + FileMetadataManager metadataManager = new FileMetadataManager( + fixture.getOptionManager(), + new Path("hdfs:///w"), + Lists.newArrayList(filePathA, filePathB)); + + // Set up the scan level projection + + ScanLevelProjection scanProj = new ScanLevelProjection( + ScanTestUtils.projectAllWithMetadata(2), + ScanTestUtils.parsers(metadataManager.projectionParser())); + + // Define the schema smoother + + SchemaSmoother smoother = new SchemaSmoother(scanProj, + ScanTestUtils.resolvers(metadataManager)); + + TupleMetadata tableSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .add("b", MinorType.VARCHAR) + .buildSchema(); + + TupleMetadata expectedSchema = ScanTestUtils.expandMetadata(tableSchema, metadataManager, 2); + { + metadataManager.startFile(filePathA); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + { + metadataManager.startFile(filePathB); + ResolvedRow rootTuple = doResolve(smoother, tableSchema); + assertEquals(1, smoother.schemaVersion()); + assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema)); + } + } + /** * Integrated test across multiple schemas at the batch level. */ |