aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/test/java/org
diff options
context:
space:
mode:
authorPaul Rogers <progers@cloudera.com>2019-02-23 17:48:21 -0800
committerArina Ielchiieva <arina.yelchiyeva@gmail.com>2019-03-11 11:48:37 +0200
commitd585452b52e94a91ae76a24550c5c476847a9cba (patch)
tree4e91dc0d55bfb5efb93348a323645d5d4ebced40 /exec/java-exec/src/test/java/org
parent4a79e2a52ad2dac64ba001645da1442f0b06fd62 (diff)
DRILL-6952: Host compliant text reader on the row set framework
The result set loader allows controlling batch sizes. The new scan framework built on top of that framework handles projection, implicit columns, null columns and more. This commit converts the "new" ("compliant") text reader to use the new framework. Options select the use of the V2 ("new") or V3 (row-set based) versions. Unit tests demonstrate V3 functionality. closes #1683
Diffstat (limited to 'exec/java-exec/src/test/java/org')
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java1
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/protocol/TestOperatorRecordBatch.java3
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java3
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java3
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java29
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java9
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java8
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java15
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java12
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java2
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestProjectedTuple.java8
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java101
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java271
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvIgnoreHeaders.java114
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java873
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithoutHeaders.java397
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestHeaderBuilder.java91
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestPartitionRace.java375
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java2
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java2
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java2
21 files changed, 1986 insertions, 335 deletions
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java b/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
index e00e5dc60..87757782b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestPartitionFilter.java
@@ -128,6 +128,7 @@ public class TestPartitionFilter extends PlanTestBase {
String query = "select * from dfs.`multilevel/parquet` where (dir0=1994 and dir1='Q1' and o_custkey < 500) or (dir0=1995 and dir1='Q2' and o_custkey > 500)";
testIncludeFilter(query, 2, "Filter\\(", 8);
}
+
@Test //Parquet: partition filters are ANDed and belong to a top-level OR
public void testPartitionFilter3_Parquet_from_CTAS() throws Exception {
String query = "select * from dfs.tmp.parquet where (yr=1994 and qrtr='Q1' and o_custkey < 500) or (yr=1995 and qrtr='Q2' and o_custkey > 500)";
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/protocol/TestOperatorRecordBatch.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/protocol/TestOperatorRecordBatch.java
index e5ac6d873..89df5986f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/protocol/TestOperatorRecordBatch.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/protocol/TestOperatorRecordBatch.java
@@ -27,6 +27,7 @@ import static org.junit.Assert.fail;
import java.util.Iterator;
+import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.DataMode;
@@ -50,12 +51,14 @@ import org.apache.drill.exec.vector.VarCharVector;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.junit.Test;
+import org.junit.experimental.categories.Category;
/**
* Test the implementation of the Drill Volcano iterator protocol that
* wraps the modular operator implementation.
*/
+@Category(RowSetTests.class)
public class TestOperatorRecordBatch extends SubOperatorTest {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SubOperatorTest.class);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
index 88ccd3c41..2d066dee4 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArray.java
@@ -64,7 +64,10 @@ public class TestColumnsArray extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
// ...and the columns array manager
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
index d1e91a283..2a5b00e9f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java
@@ -232,7 +232,10 @@ public class TestColumnsArrayParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
index a6de5e6f3..bbc5e19a3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataColumnParser.java
@@ -45,7 +45,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partiton columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
// Simulate SELECT a, b, c ...
@@ -70,7 +73,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
// Simulate SELECT a, fqn, filEPath, filename, suffix ...
@@ -114,7 +120,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
String dir0 = ScanTestUtils.partitionColName(0);
@@ -146,7 +155,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -171,6 +183,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
true, // Put partitions at end
new Path("hdfs:///w"),
+ 3, // Max partition depth is 3, though this "scan" sees only 2
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -178,12 +191,14 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Lists.newArrayList(metadataManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
- assertEquals(3, cols.size());
+ assertEquals(4, cols.size());
assertEquals(UnresolvedColumn.WILDCARD, cols.get(0).nodeType());
assertEquals(PartitionColumn.ID, cols.get(1).nodeType());
assertEquals(0, ((PartitionColumn) cols.get(1)).partition());
assertEquals(PartitionColumn.ID, cols.get(2).nodeType());
assertEquals(1, ((PartitionColumn) cols.get(2)).partition());
+ assertEquals(PartitionColumn.ID, cols.get(3).nodeType());
+ assertEquals(2, ((PartitionColumn) cols.get(3)).partition());
}
/**
@@ -199,6 +214,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
false, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -230,6 +246,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
false, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -262,7 +279,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -284,6 +304,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
true, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -310,6 +331,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
false, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -343,6 +365,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
true, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -367,6 +390,7 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
true, // Use legacy wildcard expansion
false, // Put partitions at end
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -394,7 +418,10 @@ public class TestFileMetadataColumnParser extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
index 314bc2a13..dae89ab0a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileMetadataProjection.java
@@ -163,7 +163,10 @@ public class TestFileMetadataProjection extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partiton columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -249,7 +252,10 @@ public class TestFileMetadataProjection extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
@@ -300,7 +306,10 @@ public class TestFileMetadataProjection extends SubOperatorTest {
Path filePath = new Path("hdfs:///x/0/1/2/3/4/5/6/7/8/9/10/d11/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///x"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanLevelProjection scanProj = new ScanLevelProjection(
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
index b05bb28f3..7b9fbfbe4 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestFileScanFramework.java
@@ -103,6 +103,7 @@ public class TestFileScanFramework extends SubOperatorTest {
public abstract static class BaseFileScanOpFixture extends AbstractScanOpFixture {
protected Path selectionRoot = MOCK_ROOT_PATH;
+ protected int partitionDepth = 3;
protected List<FileWork> files = new ArrayList<>();
protected Configuration fsConfig = new Configuration();
@@ -116,7 +117,7 @@ public class TestFileScanFramework extends SubOperatorTest {
protected abstract BaseFileScanFramework<?> buildFramework();
private void configureFileScan(BaseFileScanFramework<?> framework) {
- framework.setSelectionRoot(selectionRoot);
+ framework.setSelectionRoot(selectionRoot, partitionDepth);
}
}
@@ -311,10 +312,11 @@ public class TestFileScanFramework extends SubOperatorTest {
.add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR)
.addNullable(ScanTestUtils.partitionColName(0), MinorType.VARCHAR)
.addNullable(ScanTestUtils.partitionColName(1), MinorType.VARCHAR)
+ .addNullable(ScanTestUtils.partitionColName(2), MinorType.VARCHAR)
.buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .addRow(30, "fred", MOCK_FILE_FQN, MOCK_FILE_PATH, MOCK_FILE_NAME, MOCK_SUFFIX, MOCK_DIR0, MOCK_DIR1)
- .addRow(40, "wilma", MOCK_FILE_FQN, MOCK_FILE_PATH, MOCK_FILE_NAME, MOCK_SUFFIX, MOCK_DIR0, MOCK_DIR1)
+ .addRow(30, "fred", MOCK_FILE_FQN, MOCK_FILE_PATH, MOCK_FILE_NAME, MOCK_SUFFIX, MOCK_DIR0, MOCK_DIR1, null)
+ .addRow(40, "wilma", MOCK_FILE_FQN, MOCK_FILE_PATH, MOCK_FILE_NAME, MOCK_SUFFIX, MOCK_DIR0, MOCK_DIR1, null)
.build();
RowSetComparison verifier = new RowSetComparison(expected);
assertEquals(expected.batchSchema(), scan.batchAccessor().getSchema());
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
index c7b52e2da..7ee91a504 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestScanOrchestratorMetadata.java
@@ -59,7 +59,10 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator());
@@ -124,7 +127,10 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
scanner.withMetadata(metadataManager);
@@ -193,7 +199,10 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
scanner.withMetadata(metadataManager);
@@ -269,7 +278,10 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePath));
scanner.withMetadata(metadataManager);
@@ -334,7 +346,10 @@ public class TestScanOrchestratorMetadata extends SubOperatorTest {
Path filePathB = new Path("hdfs:///w/x/b.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePathA, filePathB));
scanner.withMetadata(metadataManager);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
index 8adc0372a..cdfabc444 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/project/TestSchemaSmoothing.java
@@ -102,7 +102,10 @@ public class TestSchemaSmoothing extends SubOperatorTest {
Path filePathB = new Path("hdfs:///w/x/y/b.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePathA, filePathB));
// Set up the scan level projection
@@ -580,7 +583,10 @@ public class TestSchemaSmoothing extends SubOperatorTest {
Path filePathB = new Path("hdfs:///w/x/y/b.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePathA, filePathB));
// Set up the scan level projection
@@ -628,7 +634,10 @@ public class TestSchemaSmoothing extends SubOperatorTest {
Path filePathB = new Path("hdfs:///w/x/b.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePathA, filePathB));
// Set up the scan level projection
@@ -676,7 +685,10 @@ public class TestSchemaSmoothing extends SubOperatorTest {
Path filePathB = new Path("hdfs:///w/x/y/b.csv");
FileMetadataManager metadataManager = new FileMetadataManager(
fixture.getOptionManager(),
+ false, // Don't expand partition columns for wildcard
+ false, // N/A
new Path("hdfs:///w"),
+ FileMetadataManager.AUTO_PARTITION_DEPTH,
Lists.newArrayList(filePathA, filePathB));
// Set up the scan level projection
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
index 838c889f8..9c8bc9dd3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
@@ -20,7 +20,6 @@ package org.apache.drill.exec.physical.impl.validate;
import static org.junit.Assert.assertFalse;
import org.apache.drill.exec.ExecConstants;
-import org.apache.drill.exec.store.easy.text.compliant.CompliantTextRecordReader;
import org.apache.drill.test.BaseDirTestWatcher;
import org.apache.drill.test.ClientFixture;
import org.apache.drill.test.ClusterFixture;
@@ -49,7 +48,6 @@ public class TestValidationOptions extends DrillTest {
.toConsole()
.logger(BatchValidator.class, Level.TRACE)
.logger(IteratorValidatorCreator.class, Level.TRACE)
- .logger(CompliantTextRecordReader.class, Level.TRACE)
.build();
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestProjectedTuple.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestProjectedTuple.java
index 2966bd561..5fb046e32 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestProjectedTuple.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestProjectedTuple.java
@@ -35,7 +35,6 @@ import org.apache.drill.exec.physical.rowSet.project.RequestedTuple;
import org.apache.drill.exec.physical.rowSet.project.RequestedTuple.RequestedColumn;
import org.apache.drill.exec.physical.rowSet.project.RequestedTupleImpl;
import org.apache.drill.exec.record.metadata.ProjectionType;
-import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -353,9 +352,10 @@ public class TestProjectedTuple {
assertEquals(ProjectionType.UNPROJECTED, projSet.projectionType("foo"));
}
- @Test
- @Ignore("Drill syntax does not support map arrays")
- public void testMapArray() {
+ //@Test
+ //@Ignore("Drill syntax does not support map arrays")
+ @SuppressWarnings("unused")
+ private void testMapArray() {
RequestedTuple projSet = RequestedTupleImpl.parse(
RowSetTestUtils.projectList("a[1].x"));
List<RequestedColumn> cols = projSet.projections();
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java
new file mode 100644
index 000000000..056b8e45a
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/BaseCsvTest.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text.compliant;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.store.easy.text.TextFormatPlugin.TextFormatConfig;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+
+public class BaseCsvTest extends ClusterTest {
+
+ protected static final String PART_DIR = "root";
+ protected static final String NESTED_DIR = "nested";
+ protected static final String ROOT_FILE = "first.csv";
+ protected static final String NESTED_FILE = "second.csv";
+
+ protected static String validHeaders[] = {
+ "a,b,c",
+ "10,foo,bar"
+ };
+
+ protected static String secondFile[] = {
+ "a,b,c",
+ "20,fred,wilma"
+ };
+
+ protected static File testDir;
+
+ protected static void setup(boolean skipFirstLine, boolean extractHeader) throws Exception {
+ setup(skipFirstLine, extractHeader, 1);
+ }
+
+ protected static void setup(boolean skipFirstLine, boolean extractHeader,
+ int maxParallelization) throws Exception {
+ startCluster(
+ ClusterFixture.builder(dirTestWatcher)
+ .maxParallelization(maxParallelization));
+
+ // Set up CSV storage plugin using headers.
+
+ TextFormatConfig csvFormat = new TextFormatConfig();
+ csvFormat.fieldDelimiter = ',';
+ csvFormat.skipFirstLine = skipFirstLine;
+ csvFormat.extractHeader = extractHeader;
+
+ testDir = cluster.makeDataDir("data", "csv", csvFormat);
+ }
+
+ protected static void buildNestedTable() throws IOException {
+
+ // Two-level partitioned table
+
+ File rootDir = new File(testDir, PART_DIR);
+ rootDir.mkdir();
+ buildFile(new File(rootDir, ROOT_FILE), validHeaders);
+ File nestedDir = new File(rootDir, NESTED_DIR);
+ nestedDir.mkdir();
+ buildFile(new File(nestedDir, NESTED_FILE), secondFile);
+ }
+
+ protected void enableV3(boolean enable) {
+ client.alterSession(ExecConstants.ENABLE_V3_TEXT_READER_KEY, enable);
+ }
+
+ protected void resetV3() {
+ client.resetSession(ExecConstants.ENABLE_V3_TEXT_READER_KEY);
+ }
+
+ protected static void buildFile(String fileName, String[] data) throws IOException {
+ buildFile(new File(testDir, fileName), data);
+ }
+
+ protected static void buildFile(File file, String[] data) throws IOException {
+ try(PrintWriter out = new PrintWriter(new FileWriter(file))) {
+ for (String line : data) {
+ out.println(line);
+ }
+ }
+ }
+
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
deleted file mode 100644
index 25aa738eb..000000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.store.easy.text.compliant;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.metadata.SchemaBuilder;
-import org.apache.drill.exec.record.metadata.TupleMetadata;
-import org.apache.drill.exec.store.easy.text.TextFormatPlugin.TextFormatConfig;
-import org.apache.drill.test.ClusterFixture;
-import org.apache.drill.test.ClusterTest;
-import org.apache.drill.test.rowSet.RowSet;
-import org.apache.drill.test.rowSet.RowSetBuilder;
-import org.apache.drill.test.rowSet.RowSetUtilities;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * SQL-level tests for CSV headers. See
- * {@link TestHeaderBuilder} for detailed unit tests.
- * This test does not attempt to duplicate all the cases
- * from the unit tests; instead it just does a sanity check.
- */
-
-public class TestCsv extends ClusterTest {
-
- private static final String CASE2_FILE_NAME = "case2.csv";
-
- private static File testDir;
-
- @BeforeClass
- public static void setup() throws Exception {
- startCluster(ClusterFixture.builder(dirTestWatcher).maxParallelization(1));
-
- // Set up CSV storage plugin using headers.
-
- TextFormatConfig csvFormat = new TextFormatConfig();
- csvFormat.fieldDelimiter = ',';
- csvFormat.skipFirstLine = false;
- csvFormat.extractHeader = true;
-
- testDir = cluster.makeDataDir("data", "csv", csvFormat);
- buildFile(CASE2_FILE_NAME, validHeaders);
- }
-
- private static String emptyHeaders[] = {
- "",
- "10,foo,bar"
- };
-
- @Test
- public void testEmptyCsvHeaders() throws IOException {
- String fileName = "case1.csv";
- buildFile(fileName, emptyHeaders);
- try {
- client.queryBuilder().sql(makeStatement(fileName)).run();
- fail();
- } catch (Exception e) {
- assertTrue(e.getMessage().contains("must define at least one header"));
- }
- }
-
- private static String validHeaders[] = {
- "a,b,c",
- "10,foo,bar"
- };
-
- @Test
- public void testValidCsvHeaders() throws IOException {
- RowSet actual = client.queryBuilder().sql(makeStatement(CASE2_FILE_NAME)).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .add("b", MinorType.VARCHAR)
- .add("c", MinorType.VARCHAR)
- .buildSchema();
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar")
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- private static String invalidHeaders[] = {
- "$,,9b,c,c,c_2",
- "10,foo,bar,fourth,fifth,sixth"
- };
-
- @Test
- public void testInvalidCsvHeaders() throws IOException {
- String fileName = "case3.csv";
- buildFile(fileName, invalidHeaders);
- RowSet actual = client.queryBuilder().sql(makeStatement(fileName)).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("column_1", MinorType.VARCHAR)
- .add("column_2", MinorType.VARCHAR)
- .add("col_9b", MinorType.VARCHAR)
- .add("c", MinorType.VARCHAR)
- .add("c_2", MinorType.VARCHAR)
- .add("c_2_2", MinorType.VARCHAR)
- .buildSchema();
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar", "fourth", "fifth", "sixth")
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- // Test fix for DRILL-5590
- @Test
- public void testCsvHeadersCaseInsensitive() throws IOException {
- String sql = "SELECT A, b, C FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .add("b", MinorType.VARCHAR)
- .add("C", MinorType.VARCHAR)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar")
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- private String makeStatement(String fileName) {
- return "SELECT * FROM `dfs.data`.`" + fileName + "`";
- }
-
- private static void buildFile(String fileName, String[] data) throws IOException {
- try(PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, fileName)))) {
- for (String line : data) {
- out.println(line);
- }
- }
- }
-
- /**
- * Verify that the wildcard expands columns to the header names, including
- * case
- */
- @Test
- public void testWildcard() throws IOException {
- String sql = "SELECT * FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .add("b", MinorType.VARCHAR)
- .add("c", MinorType.VARCHAR)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar")
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- /**
- * Verify that implicit columns are recognized and populated. Sanity test
- * of just one implicit column.
- */
- @Test
- public void testImplicitColsExplicitSelect() throws IOException {
- String sql = "SELECT A, filename FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("A", MinorType.VARCHAR)
- .addNullable("filename", MinorType.VARCHAR)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", CASE2_FILE_NAME)
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- /**
- * Verify that implicit columns are recognized and populated. Sanity test
- * of just one implicit column.
- */
- @Test
- public void testImplicitColsWildcard() throws IOException {
- String sql = "SELECT *, filename FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .add("b", MinorType.VARCHAR)
- .add("c", MinorType.VARCHAR)
- .addNullable("filename", MinorType.VARCHAR)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar", CASE2_FILE_NAME)
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- /**
- * CSV does not allow explicit use of dir0, dir1, etc. columns. Treated
- * as undefined nullable int columns.
- * <p>
- * Note that the class path storage plugin does not support directories
- * (partitions). It is unclear if that should show up here as the
- * partition column names being undefined (hence Nullable INT) or should
- * they still be defined, but set to a null Nullable VARCHAR?
- */
- @Test
- public void testPartitionColsWildcard() throws IOException {
- String sql = "SELECT *, dir0, dir5 FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .add("b", MinorType.VARCHAR)
- .add("c", MinorType.VARCHAR)
- .addNullable("dir0", MinorType.INT)
- .addNullable("dir5", MinorType.INT)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", "foo", "bar", null, null)
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-
- /**
- * CSV does not allow explicit use of dir0, dir1, etc. columns. Treated
- * as undefined nullable int columns.
- */
- @Test
- public void testPartitionColsExplicit() throws IOException {
- String sql = "SELECT a, dir0, dir5 FROM `dfs.data`.`%s`";
- RowSet actual = client.queryBuilder().sql(sql, CASE2_FILE_NAME).rowSet();
-
- TupleMetadata expectedSchema = new SchemaBuilder()
- .add("a", MinorType.VARCHAR)
- .addNullable("dir0", MinorType.INT)
- .addNullable("dir5", MinorType.INT)
- .buildSchema();
-
- RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .addRow("10", null, null)
- .build();
- RowSetUtilities.verify(expected, actual);
- }
-}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvIgnoreHeaders.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvIgnoreHeaders.java
new file mode 100644
index 000000000..d983f87e3
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvIgnoreHeaders.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text.compliant;
+
+import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.drill.categories.RowSetTests;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+// CSV reader now hosted on the row set framework
+@Category(RowSetTests.class)
+public class TestCsvIgnoreHeaders extends BaseCsvTest{
+
+ private static String withHeaders[] = {
+ "a,b,c",
+ "10,foo,bar",
+ "20,fred,wilma"
+ };
+
+ private static String raggedRows[] = {
+ "a,b,c",
+ "10,dino",
+ "20,foo,bar",
+ "30"
+ };
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ BaseCsvTest.setup(true, false);
+ }
+
+ @Test
+ public void testColumns() throws IOException {
+ try {
+ enableV3(false);
+ doTestColumns();
+ enableV3(true);
+ doTestColumns();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestColumns() throws IOException {
+ String fileName = "simple.csv";
+ buildFile(fileName, withHeaders);
+ String sql = "SELECT columns FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, fileName).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addSingleCol(strArray("10", "foo", "bar"))
+ .addSingleCol(strArray("20", "fred", "wilma"))
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testRaggedRows() throws IOException {
+ try {
+ enableV3(false);
+ doTestRaggedRows();
+ enableV3(true);
+ doTestRaggedRows();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestRaggedRows() throws IOException {
+ String fileName = "ragged.csv";
+ TestCsvWithHeaders.buildFile(new File(testDir, fileName), raggedRows);
+ String sql = "SELECT columns FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, fileName).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addSingleCol(strArray("10", "dino"))
+ .addSingleCol(strArray("20", "foo", "bar"))
+ .addSingleCol(strArray("30"))
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
new file mode 100644
index 000000000..655d04db6
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
@@ -0,0 +1,873 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text.compliant;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.drill.categories.RowSetTests;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.rowSet.DirectRowSet;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Sanity test of CSV files with headers. Tests both the original
+ * "compliant" version and the V3 version based on the row set
+ * framework.
+ * <p>
+ * The CSV reader is a "canary in the coal mine" for many scan features.
+ * It turns out that there are several bugs in "V2" (AKA "new text reader")
+ * that are fixed in "V3" (the one based on the row set framework), and one
+ * that is not yet fixed.
+ *
+ * <ul>
+ * <li>Ragged rows will crash the V2 text reader when headers are used.
+ * No V2 test exists as a result. Fixed in V3.</li>
+ * <li>DRILL-7083: in V2, if files are nested to 2 levels, but we ask
+ * for dir2 (the non-existent third level), the type of dir2 will be
+ * nullable INT. In V3, the type is Nullable VARCHAR (just like for the
+ * existing partition levels.)</li>
+ * <li>DRILL-7080: A query like SELECT *, dir0 produces the result schema
+ * of (dir0, a, b, ...) in V2 and (a, b, ... dir0, dir00) in V3. This
+ * seems to be a bug in the Project operator.</li>
+ * </ul>
+ *
+ * The V3 tests all demonstrate that the row set scan framework
+ * delivers a first empty batch from each scan. I (Paul) had understood
+ * that we had an "fast schema" path as the result of the "empty batch"
+ * project. However, the V2 reader does not provide the schema-only
+ * first batch. So, not sure if doing so is a feature, or a bug because
+ * things changed. Easy enough to change if we choose to. If so, the
+ * tests here would remove the test for that schema-only batch.
+ * <p>
+ * Tests are run for both V2 and V3. When the results are the same,
+ * the test occurs once, wrapped in a "driver" to select V2 or V3 mode.
+ * When behavior differs, there are separate tests for V2 and V3.
+ * <p>
+ * The V2 tests are temporary. Once we accept that V3 is stable, we
+ * can remove V2 (and the "old text reader.") The behavior in V3 is
+ * more correct, no reason to keep the old, broken behavior.
+ *
+ * @see {@link TestHeaderBuilder}
+ */
+
+// CSV reader now hosted on the row set framework
+@Category(RowSetTests.class)
+public class TestCsvWithHeaders extends BaseCsvTest {
+
+ private static final String TEST_FILE_NAME = "case2.csv";
+
+ private static String invalidHeaders[] = {
+ "$,,9b,c,c,c_2",
+ "10,foo,bar,fourth,fifth,sixth"
+ };
+
+ private static String emptyHeaders[] = {
+ "",
+ "10,foo,bar"
+ };
+
+ private static String raggedRows[] = {
+ "a,b,c",
+ "10,dino",
+ "20,foo,bar",
+ "30"
+ };
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ BaseCsvTest.setup(false, true);
+ buildFile(TEST_FILE_NAME, validHeaders);
+ buildNestedTable();
+ }
+
+ private static final String EMPTY_FILE = "empty.csv";
+
+ @Test
+ public void testEmptyFile() throws IOException {
+ buildFile(EMPTY_FILE, new String[] {});
+ try {
+ enableV3(false);
+ doTestEmptyFile();
+ enableV3(true);
+ doTestEmptyFile();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestEmptyFile() throws IOException {
+ RowSet rowSet = client.queryBuilder().sql(makeStatement(EMPTY_FILE)).rowSet();
+ assertNull(rowSet);
+ }
+
+ private static final String EMPTY_HEADERS_FILE = "noheaders.csv";
+
+ /**
+ * Trivial case: empty header. This case should fail.
+ */
+
+ @Test
+ public void testEmptyCsvHeaders() throws IOException {
+ buildFile(EMPTY_HEADERS_FILE, emptyHeaders);
+ try {
+ enableV3(false);
+ doTestEmptyCsvHeaders();
+ enableV3(true);
+ doTestEmptyCsvHeaders();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestEmptyCsvHeaders() throws IOException {
+ try {
+ client.queryBuilder().sql(makeStatement(EMPTY_HEADERS_FILE)).run();
+ fail();
+ } catch (Exception e) {
+ assertTrue(e.getMessage().contains("must define at least one header"));
+ }
+ }
+
+ @Test
+ public void testValidCsvHeaders() throws IOException {
+ try {
+ enableV3(false);
+ doTestValidCsvHeaders();
+ enableV3(true);
+ doTestValidCsvHeaders();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestValidCsvHeaders() throws IOException {
+ RowSet actual = client.queryBuilder().sql(makeStatement(TEST_FILE_NAME)).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .buildSchema();
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testInvalidCsvHeaders() throws IOException {
+ try {
+ enableV3(false);
+ doTestInvalidCsvHeaders();
+ enableV3(true);
+ doTestInvalidCsvHeaders();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestInvalidCsvHeaders() throws IOException {
+ String fileName = "case3.csv";
+ buildFile(fileName, invalidHeaders);
+ RowSet actual = client.queryBuilder().sql(makeStatement(fileName)).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("column_1", MinorType.VARCHAR)
+ .add("column_2", MinorType.VARCHAR)
+ .add("col_9b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .add("c_2", MinorType.VARCHAR)
+ .add("c_2_2", MinorType.VARCHAR)
+ .buildSchema();
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", "fourth", "fifth", "sixth")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testCsvHeadersCaseInsensitive() throws IOException {
+ try {
+ enableV3(false);
+ doTestCsvHeadersCaseInsensitive();
+ enableV3(true);
+ doTestCsvHeadersCaseInsensitive();
+ } finally {
+ resetV3();
+ }
+ }
+
+ // Test fix for DRILL-5590
+ private void doTestCsvHeadersCaseInsensitive() throws IOException {
+ String sql = "SELECT A, b, C FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("A", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("C", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ private String makeStatement(String fileName) {
+ return "SELECT * FROM `dfs.data`.`" + fileName + "`";
+ }
+
+ @Test
+ public void testWildcard() throws IOException {
+ try {
+ enableV3(false);
+ doTestWildcard();
+ enableV3(true);
+ doTestWildcard();
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Verify that the wildcard expands columns to the header names, including
+ * case
+ */
+ private void doTestWildcard() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ /**
+ * Verify that implicit columns are recognized and populated. Sanity test
+ * of just one implicit column. V2 uses nullable VARCHAR for file
+ * metadata columns.
+ */
+
+ @Test
+ public void testImplicitColsExplicitSelectV2() throws IOException {
+ try {
+ enableV3(false);
+ String sql = "SELECT A, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("A", MinorType.VARCHAR)
+ .addNullable("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Verify that implicit columns are recognized and populated. Sanity test
+ * of just one implicit column. V3 uses non-nullable VARCHAR for file
+ * metadata columns.
+ */
+
+ @Test
+ public void testImplicitColsExplicitSelectV3() throws IOException {
+ try {
+ enableV3(true);
+ String sql = "SELECT A, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("A", MinorType.VARCHAR)
+ .add("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Verify that implicit columns are recognized and populated. Sanity test
+ * of just one implicit column. V2 uses nullable VARCHAR for file
+ * metadata columns.
+ */
+
+ @Test
+ public void testImplicitColWildcardV2() throws IOException {
+ try {
+ enableV3(false);
+ String sql = "SELECT *, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Verify that implicit columns are recognized and populated. Sanity test
+ * of just one implicit column. V3 uses non-nullable VARCHAR for file
+ * metadata columns.
+ */
+
+ @Test
+ public void testImplicitColWildcardV3() throws IOException {
+ try {
+ enableV3(true);
+ String sql = "SELECT *, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .add("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void testColsWithWildcard() throws IOException {
+ try {
+ enableV3(false);
+ doTestColsWithWildcard();
+ enableV3(true);
+ doTestColsWithWildcard();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestColsWithWildcard() throws IOException {
+ String sql = "SELECT *, a as d FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .add("d", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", "10")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ /**
+ * V2 does not allow explicit use of dir0, dir1, etc. columns for a non-partitioned
+ * file. Treated as undefined nullable int columns.
+ */
+
+ @Test
+ public void testPartitionColsExplicitV2() throws IOException {
+ try {
+ enableV3(false);
+ String sql = "SELECT a, dir0, dir5 FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.INT)
+ .addNullable("dir5", MinorType.INT)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", null, null)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * V3 allows the use of partition columns, even for a non-partitioned file.
+ * The columns are null of type Nullable VARCHAR. This is area of Drill
+ * is a bit murky: it seems reasonable to support partition columns consistently
+ * rather than conditionally based on the structure of the input.
+ */
+ @Test
+ public void testPartitionColsExplicitV3() throws IOException {
+ try {
+ enableV3(true);
+ String sql = "SELECT a, dir0, dir5 FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .addNullable("dir5", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", null, null)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void testDupColumn() throws IOException {
+ try {
+ enableV3(false);
+ doTestDupColumn();
+ enableV3(true);
+ doTestDupColumn();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestDupColumn() throws IOException {
+ String sql = "SELECT a, b, a FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("a0", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "10")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ // This test cannot be run for V2. The data gets corrupted and we get
+ // internal errors.
+
+ /**
+ * Test that ragged rows result in the "missing" columns being filled
+ * in with the moral equivalent of a null column for CSV: a blank string.
+ */
+ @Test
+ public void testRaggedRowsV3() throws IOException {
+ try {
+ enableV3(true);
+ String fileName = "case4.csv";
+ buildFile(fileName, raggedRows);
+ RowSet actual = client.queryBuilder().sql(makeStatement(fileName)).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .buildSchema();
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "dino", "")
+ .addRow("20", "foo", "bar")
+ .addRow("30", "", "")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test partition expansion. Because the two files are read in the
+ * same scan operator, the schema is consistent. See
+ * {@link TestPartitionRace} for the multi-threaded race where all
+ * hell breaks loose.
+ * <p>
+ * V2, since Drill 1.12, puts partition columns ahead of data columns.
+ */
+ @Test
+ public void testPartitionExpansionV2() throws IOException {
+ try {
+ enableV3(false);
+
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("dir0", MinorType.VARCHAR)
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .buildSchema();
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String col2 = reader.scalar(1).getString();
+ if (col2.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(null, "10", "foo", "bar")
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(NESTED_DIR, "20", "fred", "wilma")
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test partition expansion in V3.
+ * <p>
+ * This test is tricky because it will return two data batches
+ * (preceded by an empty schema batch.) File read order is random
+ * so we have to expect the files in either order.
+ * <p>
+ * V3, as in V2 before Drill 1.12, puts partition columns after
+ * data columns (so that data columns don't shift positions if
+ * files are nested to another level.)
+ */
+ @Test
+ public void testPartitionExpansionV3() throws IOException {
+ try {
+ enableV3(true);
+
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .buildSchema();
+
+ // First batch is empty; just carries the schema.
+
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ assertEquals(0, rowSet.rowCount());
+ rowSet.clear();
+
+ // Read the other two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String col1 = reader.scalar(0).getString();
+ if (col1.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test the use of partition columns with the wildcard. This works for file
+ * metadata columns, but confuses the project operator when used for
+ * partition columns. DRILL-7080.
+ */
+ @Test
+ public void testWilcardAndPartitionsMultiFilesV2() throws IOException {
+ try {
+ enableV3(false);
+
+ String sql = "SELECT *, dir0, dir1 FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("dir0", MinorType.VARCHAR)
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir00", MinorType.VARCHAR)
+ .addNullable("dir1", MinorType.INT)
+ .buildSchema();
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String aCol = reader.scalar("a").getString();
+ if (aCol.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(null, "10", "foo", "bar", null, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(NESTED_DIR, "20", "fred", "wilma", NESTED_DIR, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test the use of partition columns with the wildcard. This works for file
+ * metadata columns, but confuses the project operator when used for
+ * partition columns. DRILL-7080. Still broken in V3 because this appears
+ * to be a Project operator issue, not reader issue. Not that the
+ * partition column moves after data columns.
+ */
+ @Test
+ public void testWilcardAndPartitionsMultiFilesV3() throws IOException {
+ try {
+ enableV3(true);
+
+ String sql = "SELECT *, dir0, dir1 FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .addNullable("dir1", MinorType.VARCHAR)
+ .addNullable("dir00", MinorType.VARCHAR)
+ .addNullable("dir10", MinorType.VARCHAR)
+ .buildSchema();
+
+ // First batch is empty; just carries the schema.
+
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ RowSetUtilities.verify(new RowSetBuilder(client.allocator(), expectedSchema).build(),
+ rowSet);
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String aCol = reader.scalar("a").getString();
+ if (aCol.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null, null, null, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR, null, NESTED_DIR, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test using partition columns with partitioned files in V2. Since the
+ * file is nested to one level, dir0 is a nullable VARCHAR, but dir1 is
+ * a nullable INT. Since both files are read in a single scan operator,
+ * the schema is consistent.
+ */
+ @Test
+ public void doTestExplicitPartitionsMultiFilesV2() throws IOException {
+ try {
+ enableV3(false);
+
+ String sql = "SELECT a, b, c, dir0, dir1 FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .addNullable("dir1", MinorType.INT)
+ .buildSchema();
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String aCol = reader.scalar("a").getString();
+ if (aCol.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test using partition columns with partitioned files in V3. Although the
+ * file is nested to one level, both dir0 and dir1 are nullable VARCHAR.
+ * See {@link TestPartitionRace} to show that the types and schemas
+ * are consistent even when used across multiple scans.
+ */
+ @Test
+ public void doTestExplicitPartitionsMultiFilesV3() throws IOException {
+ try {
+ enableV3(true);
+
+ String sql = "SELECT a, b, c, dir0, dir1 FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .addNullable("dir1", MinorType.VARCHAR)
+ .buildSchema();
+
+ // First batch is empty; just carries the schema.
+
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ RowSetUtilities.verify(new RowSetBuilder(client.allocator(), expectedSchema).build(),
+ rowSet);
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String aCol = reader.scalar("a").getString();
+ if (aCol.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR, null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ }
+ finally {
+ resetV3();
+ }
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithoutHeaders.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithoutHeaders.java
new file mode 100644
index 000000000..6051875f2
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithoutHeaders.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text.compliant;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.drill.categories.RowSetTests;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.test.rowSet.DirectRowSet;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+
+import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+// CSV reader now hosted on the row set framework
+@Category(RowSetTests.class)
+public class TestCsvWithoutHeaders extends BaseCsvTest {
+
+ private static final String TEST_FILE_NAME = "simple.csv";
+
+ private static String sampleData[] = {
+ "10,foo,bar",
+ "20,fred,wilma"
+ };
+
+ private static String raggedRows[] = {
+ "10,dino",
+ "20,foo,bar",
+ "30"
+ };
+
+ private static String secondSet[] = {
+ "30,barney,betty"
+ };
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ BaseCsvTest.setup(false, false);
+
+ buildFile(TEST_FILE_NAME, sampleData);
+ buildNestedTableWithoutHeaders();
+ }
+
+ protected static void buildNestedTableWithoutHeaders() throws IOException {
+
+ // Two-level partitioned table
+
+ File rootDir = new File(testDir, PART_DIR);
+ rootDir.mkdir();
+ buildFile(new File(rootDir, ROOT_FILE), sampleData);
+ File nestedDir = new File(rootDir, NESTED_DIR);
+ nestedDir.mkdir();
+ buildFile(new File(nestedDir, NESTED_FILE), secondSet);
+ }
+
+ @Test
+ public void testWildcard() throws IOException {
+ try {
+ enableV3(false);
+ doTestWildcard();
+ enableV3(true);
+ doTestWildcard();
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Verify that the wildcard expands to the `columns` array
+ */
+
+ private void doTestWildcard() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addSingleCol(strArray("10", "foo", "bar"))
+ .addSingleCol(strArray("20", "fred", "wilma"))
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testColumns() throws IOException {
+ try {
+ enableV3(false);
+ doTestColumns();
+ enableV3(true);
+ doTestColumns();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestColumns() throws IOException {
+ String sql = "SELECT columns FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addSingleCol(strArray("10", "foo", "bar"))
+ .addSingleCol(strArray("20", "fred", "wilma"))
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void doTestWildcardAndMetadataV2() throws IOException {
+ try {
+ enableV3(false);
+ String sql = "SELECT *, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .addNullable("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("10", "foo", "bar"), TEST_FILE_NAME)
+ .addRow(strArray("20", "fred", "wilma"), TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void doTestWildcardAndMetadataV3() throws IOException {
+ try {
+ enableV3(true);
+ String sql = "SELECT *, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .add("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("10", "foo", "bar"), TEST_FILE_NAME)
+ .addRow(strArray("20", "fred", "wilma"), TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void testColumnsAndMetadataV2() throws IOException {
+ try {
+ enableV3(false);
+ String sql = "SELECT columns, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .addNullable("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("10", "foo", "bar"), TEST_FILE_NAME)
+ .addRow(strArray("20", "fred", "wilma"), TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void testColumnsAndMetadataV3() throws IOException {
+ try {
+ enableV3(true);
+ String sql = "SELECT columns, filename FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .add("filename", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("10", "foo", "bar"), TEST_FILE_NAME)
+ .addRow(strArray("20", "fred", "wilma"), TEST_FILE_NAME)
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ } finally {
+ resetV3();
+ }
+ }
+
+ @Test
+ public void testSpecificColumns() throws IOException {
+ try {
+ enableV3(false);
+ doTestSpecificColumns();
+ enableV3(true);
+ doTestSpecificColumns();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestSpecificColumns() throws IOException {
+ String sql = "SELECT columns[0], columns[2] FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, TEST_FILE_NAME).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("EXPR$0", MinorType.VARCHAR)
+ .addNullable("EXPR$1", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "bar")
+ .addRow("20", "wilma")
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ @Test
+ public void testRaggedRows() throws IOException {
+ try {
+ enableV3(false);
+ doTestRaggedRows();
+ enableV3(true);
+ doTestRaggedRows();
+ } finally {
+ resetV3();
+ }
+ }
+
+ private void doTestRaggedRows() throws IOException {
+ String fileName = "ragged.csv";
+ buildFile(fileName, raggedRows);
+ String sql = "SELECT columns FROM `dfs.data`.`%s`";
+ RowSet actual = client.queryBuilder().sql(sql, fileName).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addSingleCol(strArray("10", "dino"))
+ .addSingleCol(strArray("20", "foo", "bar"))
+ .addSingleCol(strArray("30"))
+ .build();
+ RowSetUtilities.verify(expected, actual);
+ }
+
+ /**
+ * Test partition expansion. Because the two files are read in the
+ * same scan operator, the schema is consistent.
+ * <p>
+ * V2, since Drill 1.12, puts partition columns ahead of data columns.
+ */
+ @Test
+ public void testPartitionExpansionV2() throws IOException {
+ try {
+ enableV3(false);
+
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("dir0", MinorType.VARCHAR)
+ .addArray("columns", MinorType.VARCHAR)
+ .buildSchema();
+
+ // Read the two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ ArrayReader ar = reader.array(1);
+ assertTrue(ar.next());
+ String col1 = ar.scalar().getString();
+ if (col1.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(null, strArray("10", "foo", "bar"))
+ .addRow(null, strArray("20", "fred", "wilma"))
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(NESTED_DIR, strArray("30", "barney", "betty"))
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * Test partition expansion in V3.
+ * <p>
+ * V3, as in V2 before Drill 1.12, puts partition columns after
+ * data columns (so that data columns don't shift positions if
+ * files are nested to another level.)
+ */
+ @Test
+ public void testPartitionExpansionV3() throws IOException {
+ try {
+ enableV3(true);
+
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("columns", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .buildSchema();
+
+ // First batch is empty; just carries the schema.
+
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ assertEquals(0, rowSet.rowCount());
+ rowSet.clear();
+
+ // Read the other two batches.
+
+ for (int i = 0; i < 2; i++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ ArrayReader ar = reader.array(0);
+ assertTrue(ar.next());
+ String col1 = ar.scalar().getString();
+ if (col1.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("10", "foo", "bar"), null)
+ .addRow(strArray("20", "fred", "wilma"), null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(strArray("30", "barney", "betty"), NESTED_DIR)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestHeaderBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestHeaderBuilder.java
index 20bf79652..67429fbd7 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestHeaderBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestHeaderBuilder.java
@@ -21,25 +21,32 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.store.easy.text.compliant.v3.HeaderBuilder;
import org.apache.drill.test.DrillTest;
+import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.apache.drill.shaded.guava.com.google.common.base.Charsets;
+/**
+ * Test the mechanism that builds column names from a set of CSV
+ * headers. The mechanism provides reasonable defaults for missing
+ * or invalid headers.
+ */
+
public class TestHeaderBuilder extends DrillTest {
@Test
public void testEmptyHeader() {
- HeaderBuilder hb = new HeaderBuilder();
- hb.startBatch();
+ Path dummyPath = new Path("file:/dummy.csv");
+ HeaderBuilder hb = new HeaderBuilder(dummyPath);
try {
hb.finishRecord();
} catch (UserException e) {
assertTrue(e.getMessage().contains("must define at least one header"));
}
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"");
try {
hb.finishRecord();
@@ -47,127 +54,107 @@ public class TestHeaderBuilder extends DrillTest {
assertTrue(e.getMessage().contains("must define at least one header"));
}
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb," ");
validateHeader(hb, new String[] {"column_1"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,",");
validateHeader(hb, new String[] {"column_1", "column_2"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb," , ");
validateHeader(hb, new String[] {"column_1", "column_2"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"a, ");
validateHeader(hb, new String[] {"a", "column_2"});
}
@Test
public void testWhiteSpace() {
- HeaderBuilder hb = new HeaderBuilder();
- hb.startBatch();
+ Path dummyPath = new Path("file:/dummy.csv");
+ HeaderBuilder hb = new HeaderBuilder(dummyPath);
parse(hb,"a");
validateHeader(hb, new String[] {"a"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb," a ");
validateHeader(hb, new String[] {"a"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb," a ");
validateHeader(hb, new String[] {"a"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"a,b,c");
validateHeader(hb, new String[] {"a","b","c"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb," a , b , c ");
validateHeader(hb, new String[] {"a","b","c"});
}
@Test
public void testSyntax() {
- HeaderBuilder hb = new HeaderBuilder();
- hb.startBatch();
+ Path dummyPath = new Path("file:/dummy.csv");
+ HeaderBuilder hb = new HeaderBuilder(dummyPath);
parse(hb,"a_123");
validateHeader(hb, new String[] {"a_123"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"a_123_");
validateHeader(hb, new String[] {"a_123_"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"az09_");
validateHeader(hb, new String[] {"az09_"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"+");
validateHeader(hb, new String[] {"column_1"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"+,-");
validateHeader(hb, new String[] {"column_1", "column_2"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"+9a");
validateHeader(hb, new String[] {"col_9a"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"9a");
validateHeader(hb, new String[] {"col_9a"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"a+b");
validateHeader(hb, new String[] {"a_b"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"a_b");
validateHeader(hb, new String[] {"a_b"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"EXPR$0");
validateHeader(hb, new String[] {"EXPR_0"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"(_-^-_)");
validateHeader(hb, new String[] {"col_______"});
}
@Test
public void testUnicode() {
- HeaderBuilder hb = new HeaderBuilder();
- hb.startBatch();
+ Path dummyPath = new Path("file:/dummy.csv");
+ HeaderBuilder hb = new HeaderBuilder(dummyPath);
parse(hb,"Αθήνα");
validateHeader(hb, new String[] {"Αθήνα"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"Москва");
validateHeader(hb, new String[] {"Москва"});
- hb = new HeaderBuilder();
- hb.startBatch();
+ hb = new HeaderBuilder(dummyPath);
parse(hb,"Paris,Αθήνα,Москва");
validateHeader(hb, new String[] {"Paris","Αθήνα","Москва"});
}
@@ -183,8 +170,8 @@ public class TestHeaderBuilder extends DrillTest {
}
private void testParser(String input, String[] expected) {
- HeaderBuilder hb = new HeaderBuilder();
- hb.startBatch();
+ Path dummyPath = new Path("file:/dummy.csv");
+ HeaderBuilder hb = new HeaderBuilder(dummyPath);
parse(hb,input);
hb.finishRecord();
validateHeader(hb, expected);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestPartitionRace.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestPartitionRace.java
new file mode 100644
index 000000000..6e98339ee
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestPartitionRace.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text.compliant;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.test.rowSet.DirectRowSet;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Demonstrates a race condition inherent in the way that partition
+ * columns are currently implemented. Two files: one at the root directory,
+ * one down one level. Parallelization is forced to two. (Most tests use
+ * small files and both files end up being read in the same scanner, which
+ * masks the problem shown here.)
+ * <p>
+ * Depending on which file is read first, the output row may start with
+ * or without the partition column. Once the column occurs, it will
+ * persist.
+ * <p>
+ * The solution is to figure out the max partition depth in the
+ * EasySubScan rather than in each scan operator.
+ * <p>
+ * The tests here test both the "V2" (AKA "new text reader") which has
+ * many issues, and the "V3" (row-set-based version) that has fixes.
+ * <p>
+ * See DRILL-7082 for the multi-scan race (fixed in V3), and
+ * DRILL-7083 for the problem with partition columns returning nullable INT
+ * (also fixed in V3.)
+ */
+
+public class TestPartitionRace extends BaseCsvTest {
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ BaseCsvTest.setup(false, true, 2);
+
+ // Two-level partitioned table
+
+ File rootDir = new File(testDir, PART_DIR);
+ rootDir.mkdir();
+ buildFile(new File(rootDir, "first.csv"), validHeaders);
+ File nestedDir = new File(rootDir, NESTED_DIR);
+ nestedDir.mkdir();
+ buildFile(new File(nestedDir, "second.csv"), secondFile);
+ }
+
+ /**
+ * Oddly, when run in a single fragment, the files occur in a
+ * stable order, the partition always appars, and it appears in
+ * the first column position.
+ */
+ @Test
+ public void testSingleScanV2() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+
+ try {
+ enableV3(false);
+
+ // Loop to run the query 10 times, or until we see the race
+
+ boolean sawMissingPartition = false;
+ boolean sawPartitionFirst = false;
+ boolean sawPartitionLast = false;
+
+ // Read the two batches.
+
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+ for (int j = 0; j < 2; j++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Check location of partition column
+
+ int posn = rowSet.schema().index("dir0");
+ if (posn == -1) {
+ sawMissingPartition = true;
+ } else if (posn == 0) {
+ sawPartitionFirst = true;
+ } else {
+ sawPartitionLast = true;
+ }
+ rowSet.clear();
+ }
+ assertFalse(iter.hasNext());
+
+ // When run in a single fragment, the partition column appears
+ // all the time, and is in the first column position.
+
+ assertFalse(sawMissingPartition);
+ assertTrue(sawPartitionFirst);
+ assertFalse(sawPartitionLast);
+ } finally {
+ resetV3();
+ client.resetSession(ExecConstants.MIN_READER_WIDTH_KEY);
+ }
+ }
+
+ /**
+ * V3 provides the same schema for the single- and multi-scan
+ * cases.
+ */
+ @Test
+ public void testSingleScanV3() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .buildSchema();
+
+ try {
+ enableV3(true);
+
+ // Loop to run the query 10 times to verify no race
+
+ // First batch is empty; just carries the schema.
+
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ assertEquals(0, rowSet.rowCount());
+ rowSet.clear();
+
+ // Read the two batches.
+
+ for (int j = 0; j < 2; j++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String col1 = reader.scalar("a").getString();
+ if (col1.equals("10")) {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ } finally {
+ resetV3();
+ }
+ }
+
+ /**
+ * When forced to run in two fragments, the fun really starts. The
+ * partition column (usually) appears in the last column position instead
+ * of the first. The partition may or may not occur in the first row
+ * depending on which file is read first. The result is that the
+ * other columns will jump around. If we tried to create an expected
+ * result set, we'd be frustrated because the schema randomly changes.
+ * <p>
+ * Just to be clear: this behavior is a bug, not a feature. But, it is
+ * an established baseline for the "V2" reader.
+ * <p>
+ * This is really a test (demonstration) of the wrong behavior. This test
+ * is pretty unreliable. In particular, the position of the partition column
+ * seems to randomly shift from first to last position across runs.
+ */
+ @Test
+ public void testRaceV2() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+
+ try {
+ enableV3(false);
+
+ // Special test-only feature to force even small scans
+ // to use more than one thread. Requires that the max
+ // parallelization option be set when starting the cluster.
+
+ client.alterSession(ExecConstants.MIN_READER_WIDTH_KEY, 2);
+
+ // Loop to run the query 10 times, or until we see the race
+
+ boolean sawRootFirst = false;
+ boolean sawNestedFirst = false;
+ boolean sawMissingPartition = false;
+ boolean sawPartitionFirst = false;
+ boolean sawPartitionLast = false;
+ for (int i = 0; i < 10; i++) {
+
+ // Read the two batches.
+
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+ for (int j = 0; j < 2; j++) {
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+
+ // Check location of partition column
+
+ int posn = rowSet.schema().index("dir0");
+ if (posn == -1) {
+ sawMissingPartition = true;
+ } else if (posn == 0) {
+ sawPartitionFirst = true;
+ } else {
+ sawPartitionLast = true;
+ }
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String col1 = reader.scalar("a").getString();
+ if (col1.equals("10")) {
+ if (i == 0) {
+ sawRootFirst = true;
+ }
+ } else {
+ if (i == 0) {
+ sawNestedFirst = true;
+ }
+ }
+ rowSet.clear();
+ }
+ assertFalse(iter.hasNext());
+ if (sawMissingPartition &&
+ sawPartitionFirst &&
+ sawPartitionLast &&
+ sawRootFirst &&
+ sawNestedFirst) {
+ // The following should appear most of the time.
+ System.out.println("All variations occurred");
+ return;
+ }
+ }
+
+ // If you see this, maybe something got fixed. Or, maybe the
+ // min parallelization hack above stopped working.
+ // Or, you were just unlucky and can try the test again.
+ // We print messages, rather than using assertTrue, to avoid
+ // introducing a flaky test.
+
+ System.out.println("Some variations did not occur");
+ System.out.println(String.format("Missing partition: %s", sawMissingPartition));
+ System.out.println(String.format("Partition first: %s", sawPartitionFirst));
+ System.out.println(String.format("Partition last: %s", sawPartitionLast));
+ System.out.println(String.format("Outer first: %s", sawRootFirst));
+ System.out.println(String.format("Nested first: %s", sawNestedFirst));
+ } finally {
+ resetV3();
+ client.resetSession(ExecConstants.MIN_READER_WIDTH_KEY);
+ }
+ }
+
+ /**
+ * V3 computes partition depth in the group scan (which sees all files), and
+ * so the partition column count does not vary across scans. Also, V3 puts
+ * partition columns at the end of the row so that data columns don't
+ * "jump around" when files are shifted to a new partition depth.
+ */
+ @Test
+ public void testNoRaceV3() throws IOException {
+ String sql = "SELECT * FROM `dfs.data`.`%s`";
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.VARCHAR)
+ .addNullable("dir0", MinorType.VARCHAR)
+ .buildSchema();
+
+ try {
+ enableV3(true);
+ client.alterSession(ExecConstants.MIN_READER_WIDTH_KEY, 2);
+
+ // Loop to run the query 10 times or until we see both files
+ // in the first position.
+
+ boolean sawRootFirst = false;
+ boolean sawNestedFirst = false;
+ for (int i = 0; i < 10; i++) {
+
+ // First batch is empty; just carries the schema.
+
+ Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, PART_DIR).rowSetIterator();
+ assertTrue(iter.hasNext());
+ RowSet rowSet = iter.next();
+ assertEquals(0, rowSet.rowCount());
+ rowSet.clear();
+
+ // Read the two batches.
+
+ for (int j = 0; j < 2; j++) {
+ assertTrue(iter.hasNext());
+ rowSet = iter.next();
+
+ // Figure out which record this is and test accordingly.
+
+ RowSetReader reader = rowSet.reader();
+ assertTrue(reader.next());
+ String col1 = reader.scalar("a").getString();
+ if (col1.equals("10")) {
+ if (i == 0) {
+ sawRootFirst = true;
+ }
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("10", "foo", "bar", null)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ } else {
+ if (i == 0) {
+ sawNestedFirst = true;
+ }
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("20", "fred", "wilma", NESTED_DIR)
+ .build();
+ RowSetUtilities.verify(expected, rowSet);
+ }
+ }
+ assertFalse(iter.hasNext());
+ if (sawRootFirst &&
+ sawNestedFirst) {
+ // The following should appear most of the time.
+ System.out.println("Both variations occurred");
+ return;
+ }
+ }
+
+ // If you see this, maybe something got fixed. Or, maybe the
+ // min parallelization hack above stopped working.
+ // Or, you were just unlucky and can try the test again.
+ // We print messages, rather than using assertTrue, to avoid
+ // introducing a flaky test.
+
+ System.out.println("Some variations did not occur");
+ System.out.println(String.format("Outer first: %s", sawRootFirst));
+ System.out.println(String.format("Nested first: %s", sawNestedFirst));
+ } finally {
+ resetV3();
+ client.resetSession(ExecConstants.MIN_READER_WIDTH_KEY);
+ }
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
index 6eb9bbfbe..43ad0d86d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
@@ -61,6 +61,8 @@ public class TestNewTextReader extends BaseTestQuery {
fail("Query should have failed");
} catch(UserRemoteException ex) {
assertEquals(ErrorType.DATA_READ, ex.getErrorType());
+ // Change to the following if V3 is enabled
+ // assertEquals(ErrorType.VALIDATION, ex.getErrorType());
assertTrue("Error message should contain " + COL_NAME, ex.getMessage().contains(COL_NAME));
}
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java b/exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java
index a9d2977b4..601356e28 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/ClusterFixture.java
@@ -525,7 +525,7 @@ public class ClusterFixture extends BaseFixture implements AutoCloseable {
public static final String EXPLAIN_PLAN_JSON = "json";
public static ClusterFixtureBuilder builder(BaseDirTestWatcher dirTestWatcher) {
- ClusterFixtureBuilder builder = new ClusterFixtureBuilder(dirTestWatcher)
+ ClusterFixtureBuilder builder = new ClusterFixtureBuilder(dirTestWatcher)
.sessionOption(ExecConstants.MAX_WIDTH_PER_NODE_KEY, MAX_WIDTH_PER_NODE);
Properties props = new Properties();
props.putAll(ClusterFixture.TEST_CONFIGURATIONS);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
index 629714b36..63b818e2e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
@@ -366,7 +366,7 @@ public class QueryBuilder {
}
}
- public QueryRowSetIterator rowSetIterator( ) {
+ public QueryRowSetIterator rowSetIterator() {
return new QueryRowSetIterator(client.allocator(), withEventListener());
}