aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main
diff options
context:
space:
mode:
authorJinfeng Ni <jni@maprtech.com>2014-07-24 11:51:35 -0700
committerJacques Nadeau <jacques@apache.org>2014-07-29 08:36:15 -0700
commit91b33f98501cab8081f9cf2dce74418b4a018019 (patch)
tree0daa52876a7abe83e60b2334c5671d49bd738314 /exec/java-exec/src/main
parentdeaca5d8ec81ed03b0ad6a187c16723717ba2c51 (diff)
DRILL-931: Support select * query in Drill planner. (Part of change is in Optiq).
Diffstat (limited to 'exec/java-exec/src/main')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java72
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java25
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java67
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java182
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java10
6 files changed, 354 insertions, 3 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
new file mode 100644
index 000000000..4c04b22e3
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.planner;
+
+import java.util.List;
+import java.util.Set;
+
+import org.eigenbase.reltype.RelDataType;
+
+public class StarColumnHelper {
+
+ public final static String PREFIX_DELIMITER = "\u00a6\u00a6";
+
+ public final static String STAR_COLUMN = "*";
+
+ public final static String PREFIXED_STAR_COLUMN = PREFIX_DELIMITER + STAR_COLUMN;
+
+ public static boolean containsStarColumn(RelDataType type) {
+ List<String> fieldNames = type.getFieldNames();
+
+ for (String s : fieldNames) {
+ if (s.startsWith(STAR_COLUMN))
+ return true;
+ }
+
+ return false;
+ }
+
+ public static boolean isPrefixedStarColumn(String fieldName) {
+ return fieldName.indexOf(PREFIXED_STAR_COLUMN) > 0 ; // the delimiter * starts at none-zero position.
+ }
+
+ public static String extractStarColumnPrefix(String fieldName) {
+
+ assert (isPrefixedStarColumn(fieldName));
+
+ return fieldName.substring(0, fieldName.indexOf(PREFIXED_STAR_COLUMN));
+ }
+
+ public static String extractColumnPrefix(String fieldName) {
+ if (fieldName.indexOf(PREFIX_DELIMITER) >=0) {
+ return fieldName.substring(0, fieldName.indexOf(PREFIX_DELIMITER));
+ } else {
+ return "";
+ }
+ }
+
+ // Given a set of prefixes, check if a regular column is subsumed by any of the prefixed star column in the set.
+ public static boolean subsumeRegColumn(Set<String> prefixes, String fieldName) {
+ if (isPrefixedStarColumn(fieldName))
+ return false; // only applies to regular column.
+
+ return prefixes.contains(extractColumnPrefix(fieldName));
+ }
+
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
index 14817be9a..7658bb0b0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java
@@ -17,11 +17,13 @@
*/
package org.apache.drill.exec.planner.common;
+import java.util.HashSet;
import java.util.List;
import org.apache.drill.common.expression.FieldReference;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.logical.data.NamedExpression;
+import org.apache.drill.exec.planner.StarColumnHelper;
import org.apache.drill.exec.planner.cost.DrillCostBase;
import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory;
import org.apache.drill.exec.planner.logical.DrillOptiq;
@@ -69,11 +71,30 @@ public abstract class DrillProjectRelBase extends ProjectRelBase implements Dril
return Pair.zip(exps, getRowType().getFieldNames());
}
+ // By default, the project will not allow duplicate columns, caused by expanding from * column.
+ // For example, if we have T1_*, T1_Col1, T1_Col2, Col1 and Col2 will have two copies if we expand
+ // * into a list of regular columns. For the intermediate project, the duplicate columns are not
+ // necessary; it will impact performance.
protected List<NamedExpression> getProjectExpressions(DrillParseContext context) {
List<NamedExpression> expressions = Lists.newArrayList();
+
+ HashSet<String> starColPrefixes = new HashSet<String>();
+
+ // To remove duplicate columns caused by expanding from * column, we'll keep track of
+ // all the prefix in the project expressions. If a regular column C1 have the same prefix, that
+ // regular column is not included in the project expression, since at execution time, * will be
+ // expanded into a list of column, including column C1.
+ for (String fieldName : getRowType().getFieldNames()) {
+ if (StarColumnHelper.isPrefixedStarColumn(fieldName)) {
+ starColPrefixes.add(StarColumnHelper.extractStarColumnPrefix(fieldName));
+ }
+ }
+
for (Pair<RexNode, String> pair : projects()) {
- LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left);
- expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right)));
+ if (! StarColumnHelper.subsumeRegColumn(starColPrefixes, pair.right)) {
+ LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left);
+ expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right)));
+ }
}
return expressions;
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java
new file mode 100644
index 000000000..a67dbf2a5
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.planner.physical;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.drill.common.expression.FieldReference;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.logical.data.NamedExpression;
+import org.apache.drill.exec.physical.base.PhysicalOperator;
+import org.apache.drill.exec.physical.config.Project;
+import org.apache.drill.exec.planner.logical.DrillOptiq;
+import org.apache.drill.exec.planner.logical.DrillParseContext;
+import org.apache.drill.exec.planner.physical.ProjectPrel;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.util.Pair;
+
+import com.google.common.collect.Lists;
+
+public class ProjectAllowDupPrel extends ProjectPrel {
+
+ public ProjectAllowDupPrel(RelOptCluster cluster, RelTraitSet traits, RelNode child, List<RexNode> exps,
+ RelDataType rowType) {
+ super(cluster, traits, child, exps, rowType);
+ }
+
+ @Override
+ public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
+ Prel child = (Prel) this.getChild();
+
+ PhysicalOperator childPOP = child.getPhysicalOperator(creator);
+
+ Project p = new Project(this.getProjectExpressions(new DrillParseContext()), childPOP);
+ return creator.addMetadata(this, p);
+ }
+
+ protected List<NamedExpression> getProjectExpressions(DrillParseContext context) {
+ List<NamedExpression> expressions = Lists.newArrayList();
+ for (Pair<RexNode, String> pair : Pair.zip(exps, getRowType().getFieldNames())) {
+ LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left);
+ expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right)));
+ }
+ return expressions;
+ }
+
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java
index 3d38484db..fa750c23a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java
@@ -20,7 +20,6 @@ package org.apache.drill.exec.planner.physical.visitor;
import java.util.List;
-import org.apache.drill.exec.planner.physical.ExchangePrel;
import org.apache.drill.exec.planner.physical.JoinPrel;
import org.apache.drill.exec.planner.physical.Prel;
import org.eigenbase.rel.RelNode;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
new file mode 100644
index 000000000..ac7fb8e91
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.planner.physical.visitor;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.drill.exec.planner.StarColumnHelper;
+import org.apache.drill.exec.planner.physical.JoinPrel;
+import org.apache.drill.exec.planner.physical.Prel;
+import org.apache.drill.exec.planner.physical.ProjectAllowDupPrel;
+import org.apache.drill.exec.planner.physical.ProjectPrel;
+import org.apache.drill.exec.planner.physical.ScanPrel;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.rex.RexInputRef;
+import org.eigenbase.rex.RexNode;
+import org.eigenbase.rex.RexUtil;
+import org.eigenbase.util.Pair;
+
+import com.google.common.collect.Lists;
+
+public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], RuntimeException>{
+
+ private static StarColumnConverter INSTANCE = new StarColumnConverter();
+
+ private static final AtomicLong tableNumber = new AtomicLong(0);
+
+ public static Prel insertRenameProject(Prel root, RelDataType origRowType){
+ // Insert top project to do rename only when : 1) there is a join
+ // 2) there is a SCAN with * column. We pass two boolean to keep track of
+ // these two conditions.
+ boolean [] renamedForStar = new boolean [2];
+ renamedForStar[0] = false;
+ renamedForStar[1] = false;
+
+ //root should be screen / writer : no need to rename for the root.
+
+ Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, renamedForStar);
+
+ if (renamedForStar[0] && renamedForStar[1]) {
+ List<RexNode> exprs = Lists.newArrayList();
+ for (int i = 0; i < origRowType.getFieldCount(); i++) {
+ RexNode expr = child.getCluster().getRexBuilder().makeInputRef(origRowType.getFieldList().get(i).getType(), i);
+ exprs.add(expr);
+ }
+
+ RelDataType newRowType = RexUtil.createStructType(child.getCluster().getTypeFactory(), exprs, origRowType.getFieldNames());
+
+ // Insert a top project which allows duplicate columns.
+ child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType);
+
+ }
+
+ List<RelNode> children = Lists.newArrayList();
+ children.add( child);
+ return (Prel) root.copy(root.getTraitSet(), children);
+ }
+
+
+ @Override
+ public Prel visitPrel(Prel prel, boolean [] renamedForStar) throws RuntimeException {
+ List<RelNode> children = Lists.newArrayList();
+ for(Prel child : prel){
+ child = child.accept(this, renamedForStar);
+ children.add(child);
+ }
+
+ // For project, we need make sure that the project's field name is same as the input,
+ // when the project expression is RexInPutRef. This is necessary since Optiq may use
+ // an arbitrary name for the project's field name.
+ if (prel instanceof ProjectPrel) {
+ RelNode child = children.get(0);
+
+ List<String> fieldNames = Lists.newArrayList();
+
+ for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), ((ProjectPrel) prel).getProjects())) {
+ if (pair.right instanceof RexInputRef) {
+ String name = child.getRowType().getFieldNames().get(((RexInputRef) pair.right).getIndex());
+ fieldNames.add(name);
+ } else {
+ fieldNames.add(pair.left);
+ }
+ }
+
+ // Make sure the field names are unique : Optiq does not allow duplicate field names in a rowType.
+ fieldNames = makeUniqueNames(fieldNames);
+
+ RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), ((ProjectPrel) prel).getProjects(), fieldNames);
+
+ return (Prel) new ProjectPrel(prel.getCluster(), prel.getTraitSet(), children.get(0), ((ProjectPrel) prel).getProjects(), rowType);
+ } else {
+ return (Prel) prel.copy(prel.getTraitSet(), children);
+ }
+ }
+
+
+ @Override
+ public Prel visitJoin(JoinPrel prel, boolean [] renamedForStar) throws RuntimeException {
+ renamedForStar[0] = true; // indicate there is a join, which may require top rename projet operator.
+ return visitPrel(prel, renamedForStar);
+ }
+
+
+ @Override
+ public Prel visitScan(ScanPrel scanPrel, boolean [] renamedForStar) throws RuntimeException {
+ if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && renamedForStar[0] ) {
+
+ renamedForStar[1] = true; // indicate there is * for a SCAN operator.
+
+ List<RexNode> exprs = Lists.newArrayList();
+
+ for (RelDataTypeField field : scanPrel.getRowType().getFieldList()) {
+ RexNode expr = scanPrel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex());
+ exprs.add(expr);
+ }
+
+ List<String> fieldNames = Lists.newArrayList();
+
+ long tableId = tableNumber.getAndIncrement();
+
+ for (String name : scanPrel.getRowType().getFieldNames()) {
+ fieldNames.add("T" + tableId + StarColumnHelper.PREFIX_DELIMITER + name);
+ }
+ RelDataType rowType = RexUtil.createStructType(scanPrel.getCluster().getTypeFactory(), exprs, fieldNames);
+
+ ProjectPrel proj = new ProjectPrel(scanPrel.getCluster(), scanPrel.getTraitSet(), scanPrel, exprs, rowType);
+
+ return proj;
+ } else {
+ return visitPrel(scanPrel, renamedForStar);
+ }
+ }
+
+
+ private List<String> makeUniqueNames(List<String> names) {
+
+ // We have to search the set of original names, plus the set of unique names that will be used finally .
+ // Eg : the original names : ( C1, C1, C10 )
+ // There are two C1, we may rename C1 to C10, however, this new name will conflict with the original C10.
+ // That means we should pick a different name that does not conflict with the original names, in additional
+ // to make sure it's unique in the set of unique names.
+
+ HashSet<String> uniqueNames = new HashSet<String>();
+ HashSet<String> origNames = new HashSet<String>(names);
+
+ List<String> newNames = Lists.newArrayList();
+
+ for (String s : names) {
+ if (uniqueNames.contains(s)) {
+ for (int i = 0; ; i++ ) {
+ s = s + i;
+ if (! origNames.contains(s) && ! uniqueNames.contains(s))
+ break;
+ }
+ }
+ uniqueNames.add(s);
+ newNames.add(s);
+ }
+
+ return newNames;
+ }
+
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
index 8c9b499e5..0c7580985 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
@@ -52,6 +52,7 @@ import org.apache.drill.exec.planner.physical.visitor.ProducerConsumerPrelVisito
import org.apache.drill.exec.planner.physical.visitor.RelUniqifier;
import org.apache.drill.exec.planner.physical.visitor.SelectionVectorPrelVisitor;
import org.apache.drill.exec.planner.sql.DrillOperatorTable;
+import org.apache.drill.exec.planner.physical.visitor.StarColumnConverter;
import org.apache.drill.exec.planner.sql.DrillSqlWorker;
import org.apache.drill.exec.util.Pointer;
import org.eigenbase.rel.RelNode;
@@ -158,6 +159,15 @@ public class DefaultSqlHandler extends AbstractSqlHandler {
/* The order of the following transformation is important */
/*
+ * 0.) For select * from join query, we need insert project on top of scan and a top project just
+ * under screen operator. The project on top of scan will rename from * to T1*, while the top project
+ * will rename T1* to *, before it output the final result. Only the top project will allow
+ * duplicate columns, since user could "explicitly" ask for duplicate columns ( select *, col, *).
+ * The rest of projects will remove the duplicate column when we generate POP in json format.
+ */
+ phyRelNode = StarColumnConverter.insertRenameProject(phyRelNode, phyRelNode.getRowType());
+
+ /*
* 1.)
* Join might cause naming conflicts from its left and right child.
* In such case, we have to insert Project to rename the conflicting names.