diff options
author | Jinfeng Ni <jni@maprtech.com> | 2014-07-24 11:51:35 -0700 |
---|---|---|
committer | Jacques Nadeau <jacques@apache.org> | 2014-07-29 08:36:15 -0700 |
commit | 91b33f98501cab8081f9cf2dce74418b4a018019 (patch) | |
tree | 0daa52876a7abe83e60b2334c5671d49bd738314 /exec/java-exec/src/main | |
parent | deaca5d8ec81ed03b0ad6a187c16723717ba2c51 (diff) |
DRILL-931: Support select * query in Drill planner. (Part of change is in Optiq).
Diffstat (limited to 'exec/java-exec/src/main')
6 files changed, 354 insertions, 3 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java new file mode 100644 index 000000000..4c04b22e3 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/StarColumnHelper.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.planner; + +import java.util.List; +import java.util.Set; + +import org.eigenbase.reltype.RelDataType; + +public class StarColumnHelper { + + public final static String PREFIX_DELIMITER = "\u00a6\u00a6"; + + public final static String STAR_COLUMN = "*"; + + public final static String PREFIXED_STAR_COLUMN = PREFIX_DELIMITER + STAR_COLUMN; + + public static boolean containsStarColumn(RelDataType type) { + List<String> fieldNames = type.getFieldNames(); + + for (String s : fieldNames) { + if (s.startsWith(STAR_COLUMN)) + return true; + } + + return false; + } + + public static boolean isPrefixedStarColumn(String fieldName) { + return fieldName.indexOf(PREFIXED_STAR_COLUMN) > 0 ; // the delimiter * starts at none-zero position. + } + + public static String extractStarColumnPrefix(String fieldName) { + + assert (isPrefixedStarColumn(fieldName)); + + return fieldName.substring(0, fieldName.indexOf(PREFIXED_STAR_COLUMN)); + } + + public static String extractColumnPrefix(String fieldName) { + if (fieldName.indexOf(PREFIX_DELIMITER) >=0) { + return fieldName.substring(0, fieldName.indexOf(PREFIX_DELIMITER)); + } else { + return ""; + } + } + + // Given a set of prefixes, check if a regular column is subsumed by any of the prefixed star column in the set. + public static boolean subsumeRegColumn(Set<String> prefixes, String fieldName) { + if (isPrefixedStarColumn(fieldName)) + return false; // only applies to regular column. + + return prefixes.contains(extractColumnPrefix(fieldName)); + } + +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java index 14817be9a..7658bb0b0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillProjectRelBase.java @@ -17,11 +17,13 @@ */ package org.apache.drill.exec.planner.common; +import java.util.HashSet; import java.util.List; import org.apache.drill.common.expression.FieldReference; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.logical.data.NamedExpression; +import org.apache.drill.exec.planner.StarColumnHelper; import org.apache.drill.exec.planner.cost.DrillCostBase; import org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory; import org.apache.drill.exec.planner.logical.DrillOptiq; @@ -69,11 +71,30 @@ public abstract class DrillProjectRelBase extends ProjectRelBase implements Dril return Pair.zip(exps, getRowType().getFieldNames()); } + // By default, the project will not allow duplicate columns, caused by expanding from * column. + // For example, if we have T1_*, T1_Col1, T1_Col2, Col1 and Col2 will have two copies if we expand + // * into a list of regular columns. For the intermediate project, the duplicate columns are not + // necessary; it will impact performance. protected List<NamedExpression> getProjectExpressions(DrillParseContext context) { List<NamedExpression> expressions = Lists.newArrayList(); + + HashSet<String> starColPrefixes = new HashSet<String>(); + + // To remove duplicate columns caused by expanding from * column, we'll keep track of + // all the prefix in the project expressions. If a regular column C1 have the same prefix, that + // regular column is not included in the project expression, since at execution time, * will be + // expanded into a list of column, including column C1. + for (String fieldName : getRowType().getFieldNames()) { + if (StarColumnHelper.isPrefixedStarColumn(fieldName)) { + starColPrefixes.add(StarColumnHelper.extractStarColumnPrefix(fieldName)); + } + } + for (Pair<RexNode, String> pair : projects()) { - LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left); - expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right))); + if (! StarColumnHelper.subsumeRegColumn(starColPrefixes, pair.right)) { + LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left); + expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right))); + } } return expressions; } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java new file mode 100644 index 000000000..a67dbf2a5 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/ProjectAllowDupPrel.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.planner.physical; + +import java.io.IOException; +import java.util.List; + +import org.apache.drill.common.expression.FieldReference; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.logical.data.NamedExpression; +import org.apache.drill.exec.physical.base.PhysicalOperator; +import org.apache.drill.exec.physical.config.Project; +import org.apache.drill.exec.planner.logical.DrillOptiq; +import org.apache.drill.exec.planner.logical.DrillParseContext; +import org.apache.drill.exec.planner.physical.ProjectPrel; +import org.eigenbase.rel.RelNode; +import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.relopt.RelTraitSet; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.rex.RexNode; +import org.eigenbase.util.Pair; + +import com.google.common.collect.Lists; + +public class ProjectAllowDupPrel extends ProjectPrel { + + public ProjectAllowDupPrel(RelOptCluster cluster, RelTraitSet traits, RelNode child, List<RexNode> exps, + RelDataType rowType) { + super(cluster, traits, child, exps, rowType); + } + + @Override + public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException { + Prel child = (Prel) this.getChild(); + + PhysicalOperator childPOP = child.getPhysicalOperator(creator); + + Project p = new Project(this.getProjectExpressions(new DrillParseContext()), childPOP); + return creator.addMetadata(this, p); + } + + protected List<NamedExpression> getProjectExpressions(DrillParseContext context) { + List<NamedExpression> expressions = Lists.newArrayList(); + for (Pair<RexNode, String> pair : Pair.zip(exps, getRowType().getFieldNames())) { + LogicalExpression expr = DrillOptiq.toDrill(context, getChild(), pair.left); + expressions.add(new NamedExpression(expr, FieldReference.getWithQuotedRef(pair.right))); + } + return expressions; + } + +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java index 3d38484db..fa750c23a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/JoinPrelRenameVisitor.java @@ -20,7 +20,6 @@ package org.apache.drill.exec.planner.physical.visitor; import java.util.List; -import org.apache.drill.exec.planner.physical.ExchangePrel; import org.apache.drill.exec.planner.physical.JoinPrel; import org.apache.drill.exec.planner.physical.Prel; import org.eigenbase.rel.RelNode; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java new file mode 100644 index 000000000..ac7fb8e91 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/visitor/StarColumnConverter.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.planner.physical.visitor; + +import java.util.HashSet; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.drill.exec.planner.StarColumnHelper; +import org.apache.drill.exec.planner.physical.JoinPrel; +import org.apache.drill.exec.planner.physical.Prel; +import org.apache.drill.exec.planner.physical.ProjectAllowDupPrel; +import org.apache.drill.exec.planner.physical.ProjectPrel; +import org.apache.drill.exec.planner.physical.ScanPrel; +import org.eigenbase.rel.RelNode; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.rex.RexInputRef; +import org.eigenbase.rex.RexNode; +import org.eigenbase.rex.RexUtil; +import org.eigenbase.util.Pair; + +import com.google.common.collect.Lists; + +public class StarColumnConverter extends BasePrelVisitor<Prel, boolean[], RuntimeException>{ + + private static StarColumnConverter INSTANCE = new StarColumnConverter(); + + private static final AtomicLong tableNumber = new AtomicLong(0); + + public static Prel insertRenameProject(Prel root, RelDataType origRowType){ + // Insert top project to do rename only when : 1) there is a join + // 2) there is a SCAN with * column. We pass two boolean to keep track of + // these two conditions. + boolean [] renamedForStar = new boolean [2]; + renamedForStar[0] = false; + renamedForStar[1] = false; + + //root should be screen / writer : no need to rename for the root. + + Prel child = ((Prel) root.getInput(0)).accept(INSTANCE, renamedForStar); + + if (renamedForStar[0] && renamedForStar[1]) { + List<RexNode> exprs = Lists.newArrayList(); + for (int i = 0; i < origRowType.getFieldCount(); i++) { + RexNode expr = child.getCluster().getRexBuilder().makeInputRef(origRowType.getFieldList().get(i).getType(), i); + exprs.add(expr); + } + + RelDataType newRowType = RexUtil.createStructType(child.getCluster().getTypeFactory(), exprs, origRowType.getFieldNames()); + + // Insert a top project which allows duplicate columns. + child = new ProjectAllowDupPrel(child.getCluster(), child.getTraitSet(), child, exprs, newRowType); + + } + + List<RelNode> children = Lists.newArrayList(); + children.add( child); + return (Prel) root.copy(root.getTraitSet(), children); + } + + + @Override + public Prel visitPrel(Prel prel, boolean [] renamedForStar) throws RuntimeException { + List<RelNode> children = Lists.newArrayList(); + for(Prel child : prel){ + child = child.accept(this, renamedForStar); + children.add(child); + } + + // For project, we need make sure that the project's field name is same as the input, + // when the project expression is RexInPutRef. This is necessary since Optiq may use + // an arbitrary name for the project's field name. + if (prel instanceof ProjectPrel) { + RelNode child = children.get(0); + + List<String> fieldNames = Lists.newArrayList(); + + for (Pair<String, RexNode> pair : Pair.zip(prel.getRowType().getFieldNames(), ((ProjectPrel) prel).getProjects())) { + if (pair.right instanceof RexInputRef) { + String name = child.getRowType().getFieldNames().get(((RexInputRef) pair.right).getIndex()); + fieldNames.add(name); + } else { + fieldNames.add(pair.left); + } + } + + // Make sure the field names are unique : Optiq does not allow duplicate field names in a rowType. + fieldNames = makeUniqueNames(fieldNames); + + RelDataType rowType = RexUtil.createStructType(prel.getCluster().getTypeFactory(), ((ProjectPrel) prel).getProjects(), fieldNames); + + return (Prel) new ProjectPrel(prel.getCluster(), prel.getTraitSet(), children.get(0), ((ProjectPrel) prel).getProjects(), rowType); + } else { + return (Prel) prel.copy(prel.getTraitSet(), children); + } + } + + + @Override + public Prel visitJoin(JoinPrel prel, boolean [] renamedForStar) throws RuntimeException { + renamedForStar[0] = true; // indicate there is a join, which may require top rename projet operator. + return visitPrel(prel, renamedForStar); + } + + + @Override + public Prel visitScan(ScanPrel scanPrel, boolean [] renamedForStar) throws RuntimeException { + if (StarColumnHelper.containsStarColumn(scanPrel.getRowType()) && renamedForStar[0] ) { + + renamedForStar[1] = true; // indicate there is * for a SCAN operator. + + List<RexNode> exprs = Lists.newArrayList(); + + for (RelDataTypeField field : scanPrel.getRowType().getFieldList()) { + RexNode expr = scanPrel.getCluster().getRexBuilder().makeInputRef(field.getType(), field.getIndex()); + exprs.add(expr); + } + + List<String> fieldNames = Lists.newArrayList(); + + long tableId = tableNumber.getAndIncrement(); + + for (String name : scanPrel.getRowType().getFieldNames()) { + fieldNames.add("T" + tableId + StarColumnHelper.PREFIX_DELIMITER + name); + } + RelDataType rowType = RexUtil.createStructType(scanPrel.getCluster().getTypeFactory(), exprs, fieldNames); + + ProjectPrel proj = new ProjectPrel(scanPrel.getCluster(), scanPrel.getTraitSet(), scanPrel, exprs, rowType); + + return proj; + } else { + return visitPrel(scanPrel, renamedForStar); + } + } + + + private List<String> makeUniqueNames(List<String> names) { + + // We have to search the set of original names, plus the set of unique names that will be used finally . + // Eg : the original names : ( C1, C1, C10 ) + // There are two C1, we may rename C1 to C10, however, this new name will conflict with the original C10. + // That means we should pick a different name that does not conflict with the original names, in additional + // to make sure it's unique in the set of unique names. + + HashSet<String> uniqueNames = new HashSet<String>(); + HashSet<String> origNames = new HashSet<String>(names); + + List<String> newNames = Lists.newArrayList(); + + for (String s : names) { + if (uniqueNames.contains(s)) { + for (int i = 0; ; i++ ) { + s = s + i; + if (! origNames.contains(s) && ! uniqueNames.contains(s)) + break; + } + } + uniqueNames.add(s); + newNames.add(s); + } + + return newNames; + } + +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java index 8c9b499e5..0c7580985 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java @@ -52,6 +52,7 @@ import org.apache.drill.exec.planner.physical.visitor.ProducerConsumerPrelVisito import org.apache.drill.exec.planner.physical.visitor.RelUniqifier; import org.apache.drill.exec.planner.physical.visitor.SelectionVectorPrelVisitor; import org.apache.drill.exec.planner.sql.DrillOperatorTable; +import org.apache.drill.exec.planner.physical.visitor.StarColumnConverter; import org.apache.drill.exec.planner.sql.DrillSqlWorker; import org.apache.drill.exec.util.Pointer; import org.eigenbase.rel.RelNode; @@ -158,6 +159,15 @@ public class DefaultSqlHandler extends AbstractSqlHandler { /* The order of the following transformation is important */ /* + * 0.) For select * from join query, we need insert project on top of scan and a top project just + * under screen operator. The project on top of scan will rename from * to T1*, while the top project + * will rename T1* to *, before it output the final result. Only the top project will allow + * duplicate columns, since user could "explicitly" ask for duplicate columns ( select *, col, *). + * The rest of projects will remove the duplicate column when we generate POP in json format. + */ + phyRelNode = StarColumnConverter.insertRenameProject(phyRelNode, phyRelNode.getRowType()); + + /* * 1.) * Join might cause naming conflicts from its left and right child. * In such case, we have to insert Project to rename the conflicting names. |