/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.test; import java.math.BigDecimal; import java.math.BigInteger; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import java.io.UnsupportedEncodingException; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.drill.shaded.guava.com.google.common.base.Preconditions; import org.apache.commons.lang3.tuple.Pair; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.Types; import org.apache.drill.exec.HyperVectorValueIterator; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.RecordBatchSizer; import org.apache.drill.exec.proto.UserBitShared; import org.apache.drill.exec.proto.UserBitShared.QueryType; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.HyperVectorWrapper; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatchLoader; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.record.selection.SelectionVector4; import org.apache.drill.exec.rpc.user.QueryDataBatch; import org.apache.drill.exec.util.Text; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.test.rowSet.RowSetComparison; import org.junit.Assert; /** * An object to encapsulate the options for a Drill unit test, as well as the execution methods to perform the tests and * validation of results. * * To construct an instance easily, look at the TestBuilder class. From an implementation of * the BaseTestQuery class, and instance of the builder is accessible through the testBuilder() method. */ public class DrillTestWrapper { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class); public interface TestServices { BufferAllocator allocator(); void test(String query) throws Exception; List testRunAndReturn(QueryType type, Object query) throws Exception; } // TODO - when in JSON, read baseline in all text mode to avoid precision loss for decimal values // This flag will enable all of the values that are validated to be logged. For large validations this is time consuming // so this is not exposed in a way that it can be enabled for an individual test. It can be changed here while debugging // a test to see all of the output, but as this framework is doing full validation, there is no reason to keep it on as // it will only make the test slower. private static boolean VERBOSE_DEBUG = false; // Unit test doesn't expect any specific batch count public static final int EXPECTED_BATCH_COUNT_NOT_SET = -1; public static final int EXPECTED_NUM_RECORDS_NOT_SET = - 1; // The motivation behind the TestBuilder was to provide a clean API for test writers. The model is mostly designed to // prepare all of the components necessary for running the tests, before the TestWrapper is initialized. There is however // one case where the setup for the baseline is driven by the test query results, and this is implicit type enforcement // for the baseline data. In this case there needs to be a call back into the TestBuilder once we know the type information // from the test query. private TestBuilder testBuilder; /** * Test query to run. Type of object depends on the {@link #queryType} */ private Object query; // The type of query provided private UserBitShared.QueryType queryType; // The type of query provided for the baseline private UserBitShared.QueryType baselineQueryType; // should ordering be enforced in the baseline check private boolean ordered; private TestServices services; // queries to run before the baseline or test queries, can be used to set options private String baselineOptionSettingQueries; private String testOptionSettingQueries; // allow approximate equality tests for number types private boolean approximateEquality; // tolerance for approximate equality tests defined as |Expected - Actual|/|Expected| <= Tolerance private double tolerance; // two different methods are available for comparing ordered results, the default reads all of the records // into giant lists of objects, like one giant on-heap batch of 'vectors' // this flag enables the other approach which iterates through a hyper batch for the test query results and baseline // while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a // single list private boolean highPerformanceComparison; // if the baseline is a single option test writers can provide the baseline values and columns // without creating a file, these are provided to the builder in the baselineValues() and baselineColumns() methods // and translated into a map in the builder private String[] baselineColumns; private List> baselineRecords; private int expectedNumBatches; private int expectedNumRecords; public DrillTestWrapper(TestBuilder testBuilder, TestServices services, Object query, QueryType queryType, String baselineOptionSettingQueries, String testOptionSettingQueries, QueryType baselineQueryType, boolean ordered, boolean approximateEquality, double tolerance, boolean highPerformanceComparison, String[] baselineColumns, List> baselineRecords, int expectedNumBatches, int expectedNumRecords) { this.testBuilder = testBuilder; this.services = services; this.query = query; this.queryType = queryType; this.baselineQueryType = baselineQueryType; this.ordered = ordered; this.approximateEquality = approximateEquality; this.tolerance = tolerance; this.baselineOptionSettingQueries = baselineOptionSettingQueries; this.testOptionSettingQueries = testOptionSettingQueries; this.highPerformanceComparison = highPerformanceComparison; this.baselineColumns = baselineColumns; this.baselineRecords = baselineRecords; this.expectedNumBatches = expectedNumBatches; this.expectedNumRecords = expectedNumRecords; Preconditions.checkArgument(!(baselineRecords != null && !ordered && highPerformanceComparison)); Preconditions.checkArgument((baselineRecords != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineRecords == null, "Cannot define both baselineRecords and the expectedNumRecords."); Preconditions.checkArgument((baselineQueryType != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineQueryType == null, "Cannot define both a baselineQueryType and the expectedNumRecords."); } public void run() throws Exception { if (testBuilder.getExpectedSchema() != null) { compareSchemaOnly(); } else { if (ordered) { compareOrderedResults(); } else { compareUnorderedResults(); } } } private BufferAllocator getAllocator() { return services.allocator(); } private void compareHyperVectors(Map expectedRecords, Map actualRecords) throws Exception { for (String s : expectedRecords.keySet()) { assertNotNull("Expected column '" + s + "' not found.", actualRecords.get(s)); assertEquals(expectedRecords.get(s).getTotalRecords(), actualRecords.get(s).getTotalRecords()); HyperVectorValueIterator expectedValues = expectedRecords.get(s); HyperVectorValueIterator actualValues = actualRecords.get(s); int i = 0; while (expectedValues.hasNext()) { compareValuesErrorOnMismatch(expectedValues.next(), actualValues.next(), i, s); i++; } } cleanupHyperValueIterators(expectedRecords.values()); cleanupHyperValueIterators(actualRecords.values()); } private void cleanupHyperValueIterators(Collection hyperBatches) { for (HyperVectorValueIterator hvi : hyperBatches) { for (ValueVector vv : hvi.getHyperVector().getValueVectors()) { vv.clear(); } } } public static void compareMergedVectors(Map> expectedRecords, Map> actualRecords) throws Exception { for (String s : actualRecords.keySet()) { assertNotNull("Unexpected extra column " + s + " returned by query.", expectedRecords.get(s)); assertEquals("Incorrect number of rows returned by query.", expectedRecords.get(s).size(), actualRecords.get(s).size()); List expectedValues = expectedRecords.get(s); List actualValues = actualRecords.get(s); assertEquals("Different number of records returned", expectedValues.size(), actualValues.size()); for (int i = 0; i < expectedValues.size(); i++) { try { compareValuesErrorOnMismatch(expectedValues.get(i), actualValues.get(i), i, s); } catch (Exception ex) { throw new Exception(ex.getMessage() + "\n\n" + printNearbyRecords(expectedRecords, actualRecords, i), ex); } } } if (actualRecords.size() < expectedRecords.size()) { throw new Exception(findMissingColumns(expectedRecords.keySet(), actualRecords.keySet())); } } private static String printNearbyRecords(Map> expectedRecords, Map> actualRecords, int offset) { StringBuilder expected = new StringBuilder(); StringBuilder actual = new StringBuilder(); expected.append("Expected Records near verification failure:\n"); actual.append("Actual Records near verification failure:\n"); int firstRecordToPrint = Math.max(0, offset - 5); List expectedValuesInFirstColumn = expectedRecords.get(expectedRecords.keySet().iterator().next()); List actualValuesInFirstColumn = expectedRecords.get(expectedRecords.keySet().iterator().next()); int numberOfRecordsToPrint = Math.min(Math.min(10, expectedValuesInFirstColumn.size()), actualValuesInFirstColumn.size()); for (int i = firstRecordToPrint; i < numberOfRecordsToPrint; i++) { expected.append("Record Number: ").append(i).append(" { "); actual.append("Record Number: ").append(i).append(" { "); for (String s : actualRecords.keySet()) { List actualValues = actualRecords.get(s); actual.append(s).append(" : ").append(actualValues.get(i)).append(","); } for (String s : expectedRecords.keySet()) { List expectedValues = expectedRecords.get(s); expected.append(s).append(" : ").append(expectedValues.get(i)).append(","); } expected.append(" }\n"); actual.append(" }\n"); } return expected.append("\n\n").append(actual).toString(); } private Map addToHyperVectorMap(final List records, final RecordBatchLoader loader) throws SchemaChangeException, UnsupportedEncodingException { // TODO - this does not handle schema changes Map combinedVectors = new TreeMap<>(); long totalRecords = 0; QueryDataBatch batch; int size = records.size(); for (int i = 0; i < size; i++) { batch = records.get(i); loader.load(batch.getHeader().getDef(), batch.getData()); logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); totalRecords += loader.getRecordCount(); for (VectorWrapper w : loader) { String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr(); if (!combinedVectors.containsKey(field)) { MaterializedField mf = w.getField(); ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1); vvList[0] = w.getValueVector(); combinedVectors.put(field, new HyperVectorValueIterator(mf, new HyperVectorWrapper<>(mf, vvList))); } else { combinedVectors.get(field).getHyperVector().addVector(w.getValueVector()); } } } for (HyperVectorValueIterator hvi : combinedVectors.values()) { hvi.determineTotalSize(); } return combinedVectors; } private static class BatchIterator implements Iterable, AutoCloseable { private final List dataBatches; private final RecordBatchLoader batchLoader; public BatchIterator(List dataBatches, RecordBatchLoader batchLoader) { this.dataBatches = dataBatches; this.batchLoader = batchLoader; } @Override public Iterator iterator() { return new Iterator() { int index = -1; @Override public boolean hasNext() { return index < dataBatches.size() - 1; } @Override public VectorAccessible next() { index++; if (index == dataBatches.size()) { throw new RuntimeException("Tried to call next when iterator had no more items."); } batchLoader.clear(); QueryDataBatch batch = dataBatches.get(index); try { batchLoader.load(batch.getHeader().getDef(), batch.getData()); } catch (SchemaChangeException e) { throw new RuntimeException(e); } return batchLoader; } @Override public void remove() { throw new UnsupportedOperationException("Removing is not supported"); } }; } @Override public void close() throws Exception { batchLoader.clear(); } } /** * Iterate over batches, and combine the batches into a map, where key is schema path, and value is * the list of column values across all the batches. * @param batches * @param expectedTotalRecords * @return * @throws SchemaChangeException * @throws UnsupportedEncodingException */ public static Map> addToCombinedVectorResults(Iterable batches, Long expectedBatchSize, Integer expectedNumBatches, Integer expectedTotalRecords) throws SchemaChangeException, UnsupportedEncodingException { Map> combinedVectors = new TreeMap<>(); addToCombinedVectorResults(batches, null, expectedBatchSize, expectedNumBatches, combinedVectors, expectedTotalRecords); return combinedVectors; } /** * Add to result vectors and compare batch schema against expected schema while iterating batches. * @param batches * @param expectedSchema: the expected schema the batches should contain. Through SchemaChangeException * if encounter different batch schema. * @param combinedVectors: the vectors to hold the values when iterate the batches. * * @return number of batches * @throws SchemaChangeException * @throws UnsupportedEncodingException */ public static int addToCombinedVectorResults(Iterable batches, BatchSchema expectedSchema, Long expectedBatchSize, Integer expectedNumBatches, Map> combinedVectors, Integer expectedTotalRecords) throws SchemaChangeException, UnsupportedEncodingException { // TODO - this does not handle schema changes int numBatch = 0; long totalRecords = 0; BatchSchema schema = null; for (VectorAccessible loader : batches) { numBatch++; if (expectedSchema != null) { if (! expectedSchema.isEquivalent(loader.getSchema())) { throw new SchemaChangeException(String.format("Batch schema does not match expected schema\n" + "Actual schema: %s. Expected schema : %s", loader.getSchema(), expectedSchema)); } } if (expectedBatchSize != null) { RecordBatchSizer sizer = new RecordBatchSizer(loader); // Not checking actualSize as accounting is not correct when we do // split and transfer ownership across operators. Assert.assertTrue(sizer.getNetBatchSize() <= expectedBatchSize); } // TODO: Clean: DRILL-2933: That load(...) no longer throws // SchemaChangeException, so check/clean throws clause above. if (schema == null) { schema = loader.getSchema(); for (MaterializedField mf : schema) { combinedVectors.put(SchemaPath.getSimplePath(mf.getName()).toExpr(), new ArrayList<>()); } } else { // TODO - actually handle schema changes, this is just to get access to the SelectionVectorMode // of the current batch, the check for a null schema is used to only mutate the schema once // need to add new vectors and null fill for previous batches? distinction between null and non-existence important? schema = loader.getSchema(); } logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); totalRecords += loader.getRecordCount(); for (VectorWrapper w : loader) { String field = SchemaPath.getSimplePath(w.getField().getName()).toExpr(); ValueVector[] vectors; if (w.isHyper()) { vectors = w.getValueVectors(); } else { vectors = new ValueVector[] {w.getValueVector()}; } SelectionVector2 sv2 = null; SelectionVector4 sv4 = null; switch(schema.getSelectionVectorMode()) { case TWO_BYTE: sv2 = loader.getSelectionVector2(); break; case FOUR_BYTE: sv4 = loader.getSelectionVector4(); break; } if (sv4 != null) { for (int j = 0; j < sv4.getCount(); j++) { int complexIndex = sv4.get(j); int batchIndex = complexIndex >> 16; int recordIndexInBatch = complexIndex & 65535; Object obj = vectors[batchIndex].getAccessor().getObject(recordIndexInBatch); if (obj != null) { if (obj instanceof Text) { obj = obj.toString(); } } combinedVectors.get(field).add(obj); } } else { for (ValueVector vv : vectors) { for (int j = 0; j < loader.getRecordCount(); j++) { int index; if (sv2 != null) { index = sv2.getIndex(j); } else { index = j; } Object obj = vv.getAccessor().getObject(index); if (obj != null) { if (obj instanceof Text) { obj = obj.toString(); } } combinedVectors.get(field).add(obj); } } } } } if (expectedNumBatches != null) { // Based on how much memory is actually taken by value vectors (because of doubling stuff), // we have to do complex math for predicting exact number of batches. // Instead, check that number of batches is at least the minimum that is expected // and no more than twice of that. Assert.assertTrue(numBatch >= expectedNumBatches); Assert.assertTrue(numBatch <= (2*expectedNumBatches)); } if ( expectedTotalRecords != null ) { Assert.assertEquals(expectedTotalRecords.longValue(), totalRecords); } return numBatch; } protected void compareSchemaOnly() throws Exception { RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); List actual = null; QueryDataBatch batch = null; try { test(testOptionSettingQueries); actual = testRunAndReturn(queryType, query); batch = actual.get(0); loader.load(batch.getHeader().getDef(), batch.getData()); final BatchSchema schema = loader.getSchema(); final List> expectedSchema = testBuilder.getExpectedSchema(); if (schema.getFieldCount() != expectedSchema.size()) { throw new Exception("Expected and actual numbers of columns do not match."); } for (int i = 0; i < schema.getFieldCount(); ++i) { final String actualSchemaPath = schema.getColumn(i).getName(); final TypeProtos.MajorType actualMajorType = schema.getColumn(i).getType(); final String expectedSchemaPath = expectedSchema.get(i).getLeft().getRootSegmentPath(); final TypeProtos.MajorType expectedMajorType = expectedSchema.get(i).getValue(); if (! actualSchemaPath.equals(expectedSchemaPath) || ! Types.isEquivalent(actualMajorType, expectedMajorType)) { throw new Exception(String.format("Schema path or type mismatch for column #%d:\n" + "Expected schema path: %s\nActual schema path: %s\nExpected type: %s\nActual type: %s", i, expectedSchemaPath, actualSchemaPath, Types.toString(expectedMajorType), Types.toString(actualMajorType))); } } } finally { if (actual != null) { for (QueryDataBatch b : actual) { b.release(); } } loader.clear(); } } /** * Use this method only if necessary to validate one query against another. If you are just validating against a * baseline file use one of the simpler interfaces that will write the validation query for you. * * @throws Exception */ protected void compareUnorderedResults() throws Exception { RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); List actual = Collections.emptyList(); List expected = Collections.emptyList(); List> expectedRecords = new ArrayList<>(); List> actualRecords = new ArrayList<>(); try { test(testOptionSettingQueries); actual = testRunAndReturn(queryType, query); checkNumBatches(actual); addTypeInfoIfMissing(actual.get(0), testBuilder); addToMaterializedResults(actualRecords, actual, loader); // If actual result record number is 0, // and the baseline records does not exist, and baselineColumns provided, // compare actual column number/names with expected columns if (actualRecords.size() == 0 && (baselineRecords == null || baselineRecords.size()==0) && baselineColumns != null) { checkColumnDef(loader.getSchema()); } // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes // the cases where the baseline is stored in a file. if (baselineRecords == null) { if (expectedNumRecords != DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) { Assert.assertEquals(expectedNumRecords, actualRecords.size()); return; } else { test(baselineOptionSettingQueries); expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery()); addToMaterializedResults(expectedRecords, expected, loader); } } else { expectedRecords = baselineRecords; } compareResults(expectedRecords, actualRecords); } finally { cleanupBatches(actual, expected); } } public void checkColumnDef(BatchSchema batchSchema) throws Exception{ assert (batchSchema != null && batchSchema.getFieldCount()==baselineColumns.length); for (int i=0; i actual = Collections.emptyList(); List expected = Collections.emptyList(); Map> actualSuperVectors; Map> expectedSuperVectors = null; try { test(testOptionSettingQueries); actual = testRunAndReturn(queryType, query); checkNumBatches(actual); // To avoid extra work for test writers, types can optionally be inferred from the test query addTypeInfoIfMissing(actual.get(0), testBuilder); BatchIterator batchIter = new BatchIterator(actual, loader); actualSuperVectors = addToCombinedVectorResults(batchIter, null, null, null); batchIter.close(); // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes // the cases where the baseline is stored in a file. if (baselineRecords == null) { if (baselineQueryType == null && baselineColumns != null) { checkAscendingOrdering(actualSuperVectors); return; } else { test(baselineOptionSettingQueries); expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery()); BatchIterator exBatchIter = new BatchIterator(expected, loader); expectedSuperVectors = addToCombinedVectorResults(exBatchIter, null, null, null); exBatchIter.close(); } } else { // data is built in the TestBuilder in a row major format as it is provided by the user // translate it here to vectorized, the representation expected by the ordered comparison expectedSuperVectors = translateRecordListToHeapVectors(baselineRecords); } compareMergedVectors(expectedSuperVectors, actualSuperVectors); } catch (Exception e) { throw new Exception(e.getMessage() + "\nFor query: " + query, e); } finally { cleanupBatches(expected, actual); } } private void checkAscendingOrdering(Map> results) { int numRecords = results.get(baselineColumns[0]).size(); for (int index = 1; index < numRecords; index++) { int prevIndex = index - 1; for (String column: baselineColumns) { List