aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Rogers <progers@maprtech.com>2017-05-11 12:46:15 -0700
committerJinfeng Ni <jni@apache.org>2017-06-02 21:43:14 -0700
commitd7bc213ba7cf9a49657cb0699540ca375014a828 (patch)
treea435d5c540795e66fe72dd1c0f48eb00fa9ca7f7
parent78739889164c8df84fee249310f6d72d1199ea04 (diff)
DRILL-5504: Add vector validator to diagnose offset vector issues
Validates offset vectors in VarChar and repeated vectors. Validates the special case of repeated VarChar vectors (two layers of offsets.) Provides two new session variables to turn on validation. One enables the existing operator (iterator) validation, the other adds vector validation. This allows validation to occur in a “production” Drill (without restarting Drill with assertions, as previously required.) Unit tests validate the validator. Another test validates the integration, but requires manual steps, so is ignored by default. This version is first-cut: all work is done within a single class. Allows back-porting to an earlier version to solve a specific issues. A revision should move some of the work into generated code (or refactor vectors to allow outside access), since offset vectors appear for each subclass; not on a base class that would allow generic operations. * Added boot-time options to allow enabling vector validation in Maven unit tests. * Code cleanup per suggestions. * Additional (manual) tests for boot-time options and default options. closes #832
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java31
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java12
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java208
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java20
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java12
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java4
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java1
-rw-r--r--exec/java-exec/src/main/resources/drill-module.conf10
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java323
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java135
10 files changed, 749 insertions, 7 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 007e39a1b..83ffb201f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -447,11 +447,40 @@ public interface ExecConstants {
String USE_DYNAMIC_UDFS_KEY = "exec.udf.use_dynamic";
BooleanValidator USE_DYNAMIC_UDFS = new BooleanValidator(USE_DYNAMIC_UDFS_KEY, true);
-
String QUERY_TRANSIENT_STATE_UPDATE_KEY = "exec.query.progress.update";
BooleanValidator QUERY_TRANSIENT_STATE_UPDATE = new BooleanValidator(QUERY_TRANSIENT_STATE_UPDATE_KEY, true);
String PERSISTENT_TABLE_UMASK = "exec.persistent_table.umask";
StringValidator PERSISTENT_TABLE_UMASK_VALIDATOR = new StringValidator(PERSISTENT_TABLE_UMASK, "002");
+ /**
+ * Enables batch iterator (operator) validation. Validation is normally enabled
+ * only when assertions are enabled. This option enables iterator validation even
+ * if assertions are not enabled. That is, it allows iterator validation even on
+ * a "production" Drill instance.
+ */
+ String ENABLE_ITERATOR_VALIDATION_OPTION = "debug.validate_iterators";
+ BooleanValidator ENABLE_ITERATOR_VALIDATOR = new BooleanValidator(ENABLE_ITERATOR_VALIDATION_OPTION, false);
+
+ /**
+ * Boot-time config option to enable validation. Primarily used for tests.
+ * If true, overrrides the above. (That is validation is done if assertions are on,
+ * if the above session option is set to true, or if this config option is set to true.
+ */
+
+ String ENABLE_ITERATOR_VALIDATION = "drill.exec.debug.validate_iterators";
+
+ /**
+ * When iterator validation is enabled, additionally validates the vectors in
+ * each batch passed to each iterator.
+ */
+ String ENABLE_VECTOR_VALIDATION_OPTION = "debug.validate_vectors";
+ BooleanValidator ENABLE_VECTOR_VALIDATOR = new BooleanValidator(ENABLE_VECTOR_VALIDATION_OPTION, false);
+
+ /**
+ * Boot-time config option to enable vector validation. Primarily used for
+ * tests. Add the following to the command line to enable:<br>
+ * <tt>-ea -Ddrill.exec.debug.validate_vectors=true</tt>
+ */
+ String ENABLE_VECTOR_VALIDATION = "drill.exec.debug.validate_vectors";
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
index 5872ef171..58bf383af 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -26,6 +26,7 @@ import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import org.apache.drill.common.AutoCloseables;
import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.base.FragmentRoot;
import org.apache.drill.exec.physical.base.PhysicalOperator;
@@ -69,9 +70,16 @@ public class ImplCreator {
Preconditions.checkNotNull(root);
Preconditions.checkNotNull(context);
- if (AssertionUtil.isAssertionsEnabled()) {
+ // Enable iterator (operator) validation if assertions are enabled (debug mode)
+ // or if in production mode and the ENABLE_ITERATOR_VALIDATION option is set
+ // to true.
+
+ if (AssertionUtil.isAssertionsEnabled() ||
+ context.getOptionSet().getOption(ExecConstants.ENABLE_ITERATOR_VALIDATOR) ||
+ context.getConfig().getBoolean(ExecConstants.ENABLE_ITERATOR_VALIDATION)) {
root = IteratorValidatorInjector.rewritePlanWithIteratorValidator(context, root);
}
+
final ImplCreator creator = new ImplCreator();
Stopwatch watch = Stopwatch.createStarted();
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
new file mode 100644
index 000000000..e0f3ff2b9
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill.exec.physical.impl.validate;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.SimpleVectorWrapper;
+import org.apache.drill.exec.record.VectorAccessible;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.FixedWidthVector;
+import org.apache.drill.exec.vector.NullableVarCharVector;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.RepeatedVarCharVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.VariableWidthVector;
+import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
+import org.apache.drill.exec.vector.complex.RepeatedFixedWidthVectorLike;
+
+
+/**
+ * Validate a batch of value vectors. It is not possible to validate the
+ * data, but we can validate the structure, especially offset vectors.
+ * Only handles single (non-hyper) vectors at present. Current form is
+ * self-contained. Better checks can be done by moving checks inside
+ * vectors or by exposing more metadata from vectors.
+ */
+
+public class BatchValidator {
+ private static final org.slf4j.Logger logger =
+ org.slf4j.LoggerFactory.getLogger(BatchValidator.class);
+
+ public static final int MAX_ERRORS = 100;
+
+ private final int rowCount;
+ private final VectorAccessible batch;
+ private final List<String> errorList;
+ private int errorCount;
+
+ public BatchValidator(VectorAccessible batch) {
+ rowCount = batch.getRecordCount();
+ this.batch = batch;
+ errorList = null;
+ }
+
+ public BatchValidator(VectorAccessible batch, boolean captureErrors) {
+ rowCount = batch.getRecordCount();
+ this.batch = batch;
+ if (captureErrors) {
+ errorList = new ArrayList<>();
+ } else {
+ errorList = null;
+ }
+ }
+
+ public void validate() {
+ if (batch.getRecordCount() == 0) {
+ return;
+ }
+ for (VectorWrapper<? extends ValueVector> w : batch) {
+ validateWrapper(w);
+ }
+ }
+
+ private void validateWrapper(VectorWrapper<? extends ValueVector> w) {
+ if (w instanceof SimpleVectorWrapper) {
+ validateVector(w.getValueVector());
+ }
+ }
+
+ private void validateVector(ValueVector vector) {
+ String name = vector.getField().getName();
+ if (vector instanceof NullableVector) {
+ validateNullableVector(name, (NullableVector) vector);
+ } else if (vector instanceof VariableWidthVector) {
+ validateVariableWidthVector(name, (VariableWidthVector) vector, rowCount);
+ } else if (vector instanceof FixedWidthVector) {
+ validateFixedWidthVector(name, (FixedWidthVector) vector);
+ } else if (vector instanceof BaseRepeatedValueVector) {
+ validateRepeatedVector(name, (BaseRepeatedValueVector) vector);
+ } else {
+ logger.debug("Don't know how to validate vector: " + name + " of class " + vector.getClass().getSimpleName());
+ }
+ }
+
+ private void validateVariableWidthVector(String name, VariableWidthVector vector, int entryCount) {
+
+ // Offsets are in the derived classes. Handle only VarChar for now.
+
+ if (vector instanceof VarCharVector) {
+ validateVarCharVector(name, (VarCharVector) vector, entryCount);
+ } else {
+ logger.debug("Don't know how to validate vector: " + name + " of class " + vector.getClass().getSimpleName());
+ }
+ }
+
+ private void validateVarCharVector(String name, VarCharVector vector, int entryCount) {
+// int dataLength = vector.getAllocatedByteCount(); // Includes offsets and data.
+ int dataLength = vector.getBuffer().capacity();
+ validateOffsetVector(name + "-offsets", vector.getOffsetVector(), entryCount, dataLength);
+ }
+
+ private void validateRepeatedVector(String name, BaseRepeatedValueVector vector) {
+
+ int dataLength = Integer.MAX_VALUE;
+ if (vector instanceof RepeatedVarCharVector) {
+ dataLength = ((RepeatedVarCharVector) vector).getOffsetVector().getValueCapacity();
+ } else if (vector instanceof RepeatedFixedWidthVectorLike) {
+ dataLength = ((BaseDataValueVector) ((BaseRepeatedValueVector) vector).getDataVector()).getBuffer().capacity();
+ }
+ int itemCount = validateOffsetVector(name + "-offsets", vector.getOffsetVector(), rowCount, dataLength);
+
+ // Special handling of repeated VarChar vectors
+ // The nested data vectors are not quite exactly like top-level vectors.
+
+ @SuppressWarnings("resource")
+ ValueVector dataVector = vector.getDataVector();
+ if (dataVector instanceof VariableWidthVector) {
+ validateVariableWidthVector(name + "-data", (VariableWidthVector) dataVector, itemCount);
+ }
+ }
+
+ private int validateOffsetVector(String name, UInt4Vector offsetVector, int valueCount, int maxOffset) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ UInt4Vector.Accessor accessor = offsetVector.getAccessor();
+
+ // First value must be zero in current version.
+
+ int prevOffset = accessor.get(0);
+ if (prevOffset != 0) {
+ error(name, offsetVector, "Offset (0) must be 0 but was " + prevOffset);
+ }
+
+ // Note <= comparison: offset vectors have (n+1) entries.
+
+ for (int i = 1; i <= valueCount; i++) {
+ int offset = accessor.get(i);
+ if (offset < prevOffset) {
+ error(name, offsetVector, "Decreasing offsets at (" + (i-1) + ", " + i + ") = (" + prevOffset + ", " + offset + ")");
+ } else if (offset > maxOffset) {
+ error(name, offsetVector, "Invalid offset at index " + i + " = " + offset + " exceeds maximum of " + maxOffset);
+ }
+ prevOffset = offset;
+ }
+ return prevOffset;
+ }
+
+ private void error(String name, ValueVector vector, String msg) {
+ if (errorCount == 0) {
+ logger.error("Found one or more vector errors from " + batch.getClass().getSimpleName());
+ }
+ errorCount++;
+ if (errorCount >= MAX_ERRORS) {
+ return;
+ }
+ String fullMsg = "Column " + name + " of type " + vector.getClass().getSimpleName( ) + ": " + msg;
+ logger.error(fullMsg);
+ if (errorList != null) {
+ errorList.add(fullMsg);
+ }
+ }
+
+ private void validateNullableVector(String name, NullableVector vector) {
+ // Can't validate at this time because the bits vector is in each
+ // generated subtype.
+
+ // Validate a VarChar vector because it is common.
+
+ if (vector instanceof NullableVarCharVector) {
+ @SuppressWarnings("resource")
+ VarCharVector values = ((NullableVarCharVector) vector).getValuesVector();
+ validateVarCharVector(name + "-values", values, rowCount);
+ }
+ }
+
+ private void validateFixedWidthVector(String name, FixedWidthVector vector) {
+ // TODO Auto-generated method stub
+
+ }
+
+ /**
+ * Obtain the list of errors. For use in unit-testing this class.
+ * @return the list of errors found, or null if error capture was
+ * not enabled
+ */
+
+ public List<String> errors() { return errorList; }
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
index 01c3c92d1..0d7fccca0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -94,6 +94,11 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
/** High-level IterOutcome sequence state. */
private ValidationState validationState = ValidationState.INITIAL_NO_SCHEMA;
+ /**
+ * Enable/disable per-batch vector validation. Enable only to debug vector
+ * corruption issues.
+ */
+ private boolean validateBatches;
public IteratorValidatorBatchIterator(RecordBatch incoming) {
this.incoming = incoming;
@@ -103,6 +108,11 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
logger.trace( "[#{}; on {}]: Being constructed.", instNum, batchTypeName);
}
+
+ public void enableBatchValidation(boolean option) {
+ validateBatches = option;
+ }
+
@Override
public String toString() {
return
@@ -224,6 +234,7 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
// above).
// OK_NEW_SCHEMA moves to have-seen-schema state.
validationState = ValidationState.HAVE_SCHEMA;
+ validateBatch();
break;
case OK:
// OK is allowed as long as OK_NEW_SCHEMA was seen, except if terminated
@@ -234,6 +245,7 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
"next() returned %s without first returning %s [#%d, %s]",
batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
}
+ validateBatch();
// OK doesn't change high-level state.
break;
case NONE:
@@ -326,6 +338,12 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
}
}
+ private void validateBatch() {
+ if (validateBatches) {
+ new BatchValidator(incoming).validate();
+ }
+ }
+
@Override
public WritableBatch getWritableBatch() {
validateReadState("getWritableBatch()");
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
index cc30326fd..228841945 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -20,6 +20,7 @@ package org.apache.drill.exec.physical.impl.validate;
import java.util.List;
import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.config.IteratorValidator;
import org.apache.drill.exec.physical.impl.BatchCreator;
@@ -35,6 +36,13 @@ public class IteratorValidatorCreator implements BatchCreator<IteratorValidator>
List<RecordBatch> children)
throws ExecutionSetupException {
Preconditions.checkArgument(children.size() == 1);
- return new IteratorValidatorBatchIterator(children.iterator().next());
+ RecordBatch child = children.iterator().next();
+ IteratorValidatorBatchIterator iter = new IteratorValidatorBatchIterator(child);
+ boolean validateBatches = context.getOptionSet().getOption(ExecConstants.ENABLE_VECTOR_VALIDATOR) ||
+ context.getConfig().getBoolean(ExecConstants.ENABLE_VECTOR_VALIDATION);
+ iter.enableBatchValidation(validateBatches);
+ logger.trace("Iterator validation enabled for " + child.getClass().getSimpleName() +
+ (validateBatches ? " with vector validation" : ""));
+ return iter;
}
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 8d0e96cfc..4f7ecc2cb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -170,7 +170,9 @@ public class SystemOptionManager extends BaseOptionManager implements OptionMana
ExecConstants.QUERY_PROFILE_DEBUG_VALIDATOR,
ExecConstants.USE_DYNAMIC_UDFS,
ExecConstants.QUERY_TRANSIENT_STATE_UPDATE,
- ExecConstants.PERSISTENT_TABLE_UMASK_VALIDATOR
+ ExecConstants.PERSISTENT_TABLE_UMASK_VALIDATOR,
+ ExecConstants.ENABLE_ITERATOR_VALIDATOR,
+ ExecConstants.ENABLE_VECTOR_VALIDATOR
};
final Map<String, OptionValidator> tmp = new HashMap<>();
for (final OptionValidator validator : validators) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index e2537308e..4a35c3beb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -151,6 +151,7 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
}
// setup Input using InputStream
+ logger.trace("Opening file {}", split.getPath());
stream = dfs.openPossiblyCompressedStream(split.getPath());
input = new TextInput(settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());
diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf
index 19e1b1f67..7c095ac1a 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -187,6 +187,16 @@ drill.exec: {
// Use plain Java compilation where available
prefer_plain_java: false
},
+ debug: {
+ // If true, inserts the iterator validator atop each operator.
+ // Primrily used for testing.
+ validate_iterators: false,
+ // If iterator validation is enabled, also validates the vectors
+ // in each batch. Primarily used for testing. To enable from
+ // the command line:
+ // java ... -ea -Ddrill.exec.debug.validate_vectors=true ...
+ validate_vectors: false
+ },
sort: {
purge.threshold : 1000,
external: {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
new file mode 100644
index 000000000..eafb4c8eb
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill.exec.physical.impl.validate;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.validate.BatchValidator;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.VectorAccessible;
+import org.apache.drill.exec.vector.RepeatedVarCharVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.test.LogFixture;
+import org.apache.drill.test.OperatorFixture;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import ch.qos.logback.classic.Level;
+
+public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
+
+ protected static OperatorFixture fixture;
+ protected static LogFixture logFixture;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ logFixture = LogFixture.builder()
+ .toConsole()
+ .logger(BatchValidator.class, Level.TRACE)
+ .build();
+ fixture = OperatorFixture.standardFixture();
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ fixture.close();
+ logFixture.close();
+ }
+
+ @Test
+ public void testValidFixed() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.INT)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add(10, 100)
+ .add(20, 120)
+ .add(30, null)
+ .add(40, 140)
+ .build();
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ assertTrue(validator.errors().isEmpty());
+ batch.clear();
+ }
+
+ @Test
+ public void testValidVariable() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .addNullable("b", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("col1.1", "col1.2")
+ .add("col2.1", "col2.2")
+ .add("col3.1", null)
+ .add("col4.1", "col4.2")
+ .build();
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ assertTrue(validator.errors().isEmpty());
+ batch.clear();
+ }
+
+ @Test
+ public void testValidRepeated() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.INT, DataMode.REPEATED)
+ .add("b", MinorType.VARCHAR, DataMode.REPEATED)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add(new int[] {}, new String[] {})
+ .add(new int[] {1, 2, 3}, new String[] {"fred", "barney", "wilma"})
+ .add(new int[] {4}, new String[] {"dino"})
+ .build();
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ assertTrue(validator.errors().isEmpty());
+ batch.clear();
+ }
+
+ @Test
+ public void testVariableMissingLast() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("x")
+ .add("y")
+ .add("z")
+ .build();
+
+ // Here we are evil: stomp on the last offset to simulate corruption.
+ // Don't do this in real code!
+
+ VectorAccessible va = batch.vectorAccessible();
+ @SuppressWarnings("resource")
+ ValueVector v = va.iterator().next().getValueVector();
+ VarCharVector vc = (VarCharVector) v;
+ @SuppressWarnings("resource")
+ UInt4Vector ov = vc.getOffsetVector();
+ assertTrue(ov.getAccessor().get(3) > 0);
+ ov.getMutator().set(3, 0);
+
+ // Validator should catch the error.
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Decreasing offsets"));
+ batch.clear();
+ }
+
+ @Test
+ public void testVariableCorruptFirst() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("x")
+ .add("y")
+ .add("z")
+ .build();
+
+ zapOffset(batch, 0, 1);
+
+ // Validator should catch the error.
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Offset (0) must be 0"));
+ batch.clear();
+ }
+
+ public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
+
+ // Here we are evil: stomp on an offset to simulate corruption.
+ // Don't do this in real code!
+
+ VectorAccessible va = batch.vectorAccessible();
+ @SuppressWarnings("resource")
+ ValueVector v = va.iterator().next().getValueVector();
+ VarCharVector vc = (VarCharVector) v;
+ @SuppressWarnings("resource")
+ UInt4Vector ov = vc.getOffsetVector();
+ ov.getMutator().set(index, bogusValue);
+ }
+
+ @Test
+ public void testVariableCorruptMiddleLow() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("xx")
+ .add("yy")
+ .add("zz")
+ .build();
+
+ zapOffset(batch, 2, 1);
+
+ // Validator should catch the error.
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Decreasing offsets"));
+ batch.clear();
+ }
+
+ @Test
+ public void testVariableCorruptMiddleHigh() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("xx")
+ .add("yy")
+ .add("zz")
+ .build();
+
+ zapOffset(batch, 1, 10);
+
+ // Validator should catch the error.
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Decreasing offsets"));
+ batch.clear();
+ }
+
+ @Test
+ public void testVariableCorruptLastOutOfRange() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add("xx")
+ .add("yy")
+ .add("zz")
+ .build();
+
+ zapOffset(batch, 3, 100_000);
+
+ // Validator should catch the error.
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Invalid offset"));
+ batch.clear();
+ }
+
+ @Test
+ public void testRepeatedBadArrayOffset() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR, DataMode.REPEATED)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add((Object) new String[] {})
+ .add((Object) new String[] {"fred", "barney", "wilma"})
+ .add((Object) new String[] {"dino"})
+ .build();
+
+ VectorAccessible va = batch.vectorAccessible();
+ @SuppressWarnings("resource")
+ ValueVector v = va.iterator().next().getValueVector();
+ RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
+ @SuppressWarnings("resource")
+ UInt4Vector ov = vc.getOffsetVector();
+ ov.getMutator().set(3, 1);
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Decreasing offsets"));
+ batch.clear();
+ }
+
+ @Test
+ public void testRepeatedBadValueOffset() {
+ BatchSchema schema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR, DataMode.REPEATED)
+ .build();
+
+ SingleRowSet batch = fixture.rowSetBuilder(schema)
+ .add((Object) new String[] {})
+ .add((Object) new String[] {"fred", "barney", "wilma"})
+ .add((Object) new String[] {"dino"})
+ .build();
+
+ VectorAccessible va = batch.vectorAccessible();
+ @SuppressWarnings("resource")
+ ValueVector v = va.iterator().next().getValueVector();
+ RepeatedVarCharVector rvc = (RepeatedVarCharVector) v;
+ @SuppressWarnings("resource")
+ VarCharVector vc = rvc.getDataVector();
+ @SuppressWarnings("resource")
+ UInt4Vector ov = vc.getOffsetVector();
+ ov.getMutator().set(4, 100_000);
+
+ BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
+ validator.validate();
+ List<String> errors = validator.errors();
+ assertEquals(1, errors.size());
+ assertTrue(errors.get(0).contains("Invalid offset"));
+ batch.clear();
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
new file mode 100644
index 000000000..d4e33b097
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.validate;
+
+import static org.junit.Assert.assertFalse;
+
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.store.easy.text.compliant.CompliantTextRecordReader;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.DrillTest;
+import org.apache.drill.test.FixtureBuilder;
+import org.apache.drill.test.LogFixture;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import ch.qos.logback.classic.Level;
+
+@Ignore("requires manual verification")
+public class TestValidationOptions extends DrillTest {
+
+ protected static LogFixture logFixture;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ logFixture = LogFixture.builder()
+ .toConsole()
+ .logger(BatchValidator.class, Level.TRACE)
+ .logger(IteratorValidatorCreator.class, Level.TRACE)
+ .logger(CompliantTextRecordReader.class, Level.TRACE)
+ .build();
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ logFixture.close();
+ }
+
+ // To validate these tests, set breakpoints in ImplCreator
+ // and IteratorValidatorBatchIterator to see if the options
+ // work as expected.
+
+ @Test
+ public void testOptions() throws Exception {
+ FixtureBuilder builder = ClusterFixture.builder()
+ .maxParallelization(1)
+ .configProperty(ExecConstants.ENABLE_ITERATOR_VALIDATION, false)
+ .configProperty(ExecConstants.ENABLE_VECTOR_VALIDATION, false)
+ .sessionOption(ExecConstants.ENABLE_ITERATOR_VALIDATION_OPTION, true)
+ .sessionOption(ExecConstants.ENABLE_VECTOR_VALIDATION_OPTION, true)
+ ;
+ try (ClusterFixture cluster = builder.build();
+ ClientFixture client = cluster.clientFixture()) {
+
+ boolean hasAssertions = false;
+ assert hasAssertions = true;
+ assertFalse(hasAssertions);
+ String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+ client.queryBuilder().sql(sql).run();
+
+ client.alterSession(ExecConstants.ENABLE_VECTOR_VALIDATION, false);
+ client.queryBuilder().sql(sql).run();
+
+ client.alterSession(ExecConstants.ENABLE_ITERATOR_VALIDATION, false);
+ client.queryBuilder().sql(sql).run();
+ }
+ }
+
+ /**
+ * Config options override session options. Config options allow passing in
+ * the setting at run time on the command line. This is a work-around for the
+ * fact that the config system has no generic solution at present.
+ *
+ * @throws Exception if anything goes wrong
+ */
+
+ @Test
+ public void testConfig() throws Exception {
+ FixtureBuilder builder = ClusterFixture.builder()
+ .maxParallelization(1)
+ .configProperty(ExecConstants.ENABLE_ITERATOR_VALIDATION, true)
+ .configProperty(ExecConstants.ENABLE_VECTOR_VALIDATION, true)
+ .sessionOption(ExecConstants.ENABLE_ITERATOR_VALIDATION_OPTION, false)
+ .sessionOption(ExecConstants.ENABLE_VECTOR_VALIDATION_OPTION, false)
+ ;
+ try (ClusterFixture cluster = builder.build();
+ ClientFixture client = cluster.clientFixture()) {
+
+ boolean hasAssertions = false;
+ assert hasAssertions = true;
+ assertFalse(hasAssertions);
+ String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+ client.queryBuilder().sql(sql).run();
+ }
+ }
+
+ /**
+ * Should do no validation with all-default options.
+ *
+ * @throws Exception
+ */
+
+ @Test
+ public void testDefaults() throws Exception {
+ FixtureBuilder builder = ClusterFixture.builder()
+ .maxParallelization(1)
+ ;
+ try (ClusterFixture cluster = builder.build();
+ ClientFixture client = cluster.clientFixture()) {
+
+ boolean hasAssertions = false;
+ assert hasAssertions = true;
+ assertFalse(hasAssertions);
+ String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+ client.queryBuilder().sql(sql).run();
+ }
+ }
+}