aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java
diff options
context:
space:
mode:
authorGautam Parai <gparai@maprtech.com>2014-08-21 14:59:53 -0700
committerGautam Parai <gparai@maprtech.com>2019-02-28 12:01:24 -0800
commit469be17597e7b7c6bc1de9863dcb6c5604a55f0c (patch)
tree76a1c2572cfb19a75a0f82e6d165db333797fe3b /exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java
parent3233d8aaff57ac71bd3b726efcd5fdaa92aef861 (diff)
DRILL-1328: Support table statistics - Part 2
Add support for avg row-width and major type statistics. Parallelize the ANALYZE implementation and stats UDF implementation to improve stats collection performance. Update/fix rowcount, selectivity and ndv computations to improve plan costing. Add options for configuring collection/usage of statistics. Add new APIs and implementation for stats writer (as a precursor to Drill Metastore APIs). Fix several stats/costing related issues identified while running TPC-H nad TPC-DS queries. Add support for CPU sampling and nested scalar columns. Add more testcases for collection and usage of statistics and fix remaining unit/functional test failures. Thanks to Venki Korukanti (@vkorukanti) for the description below (modified to account for new changes). He graciously agreed to rebase the patch to latest master, fixed few issues and added few tests. FUNCS: Statistics functions as UDFs: Separate Currently using FieldReader to ensure consistent output type so that Unpivot doesn't get confused. All stats columns should be Nullable, so that stats functions can return NULL when N/A. * custom versions of "count" that always return BigInt * HyperLogLog based NDV that returns BigInt that works only on VarChars * HyperLogLog with binary output that only works on VarChars OPS: Updated protobufs for new ops OPS: Implemented StatisticsMerge OPS: Implemented StatisticsUnpivot ANALYZE: AnalyzeTable functionality * JavaCC syntax more-or-less copied from LucidDB. * (Basic) AnalyzePrule: DrillAnalyzeRel -> UnpivotPrel StatsMergePrel FilterPrel(for sampling) StatsAggPrel ScanPrel ANALYZE: Add getMetadataTable() to AbstractSchema USAGE: Change field access in QueryWrapper USAGE: Add getDrillTable() to DrillScanRelBase and ScanPrel * since ScanPrel does not inherit from DrillScanRelBase, this requires adding a DrillTable to the constructor * This is done so that a custom ReflectiveRelMetadataProvider can access the DrillTable associated with Logical/Physical scans. USAGE: Attach DrillStatsTable to DrillTable. * DrillStatsTable represents the data scanned from a corresponding ".stats.drill" table * In order to avoid doing query execution right after the ".stats.drill" table is found, metadata is not actually collected until the MaterializationVisitor is used. ** Currently, the metadata source must be a string (so that a SQL query can be created). Doing this with a table is probably more complicated. ** Query is set up to extract only the most recent statistics results for each column. closes #729
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java38
1 files changed, 38 insertions, 0 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java
new file mode 100644
index 000000000..7599ab0d9
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.statistics;
+
+import java.util.List;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ops.ExecutorFragmentContext;
+import org.apache.drill.exec.physical.config.StatisticsMerge;
+import org.apache.drill.exec.physical.impl.BatchCreator;
+import org.apache.drill.exec.record.CloseableRecordBatch;
+import org.apache.drill.exec.record.RecordBatch;
+import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
+
+@SuppressWarnings("unused")
+public class StatisticsMergeBatchCreator implements BatchCreator<StatisticsMerge>{
+
+ @Override
+ public CloseableRecordBatch getBatch(ExecutorFragmentContext context, StatisticsMerge config,
+ List<RecordBatch> children) throws ExecutionSetupException {
+ Preconditions.checkArgument(children.size() == 1);
+ return new StatisticsMergeBatch(config, children.iterator().next(), context);
+ }
+}