diff options
author | Boaz Ben-Zvi <boaz@mapr.com> | 2018-11-13 12:58:31 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-11-13 12:58:31 -0800 |
commit | 6267185823c4c50ab31c029ee5b4d9df2fc94d03 (patch) | |
tree | 3539d736acb0920e675c4f190dc58bb7271e7bdd /exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java | |
parent | 7079b8cd8e2f7ee07dac2fca4f2459af17eb8d6d (diff) |
Drill 6735: Implement Semi-Join for the Hash-Join operator (#1522)
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java')
-rw-r--r-- | exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java index 2f17ff2c1..f1c618165 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java @@ -122,6 +122,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem // Join type, INNER, LEFT, RIGHT or OUTER private final JoinRelType joinType; + private boolean semiJoin; private boolean joinIsLeftOrFull; private boolean joinIsRightOrFull; private boolean skipHashTableBuild; // when outer side is empty, and the join is inner or left (see DRILL-6755) @@ -486,7 +487,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem final double hashTableDoublingFactor = context.getOptions().getDouble(ExecConstants.HASHJOIN_HASH_DOUBLE_FACTOR_KEY); final String hashTableCalculatorType = context.getOptions().getString(ExecConstants.HASHJOIN_HASHTABLE_CALC_TYPE_KEY); - return new HashJoinMemoryCalculatorImpl(safetyFactor, fragmentationFactor, hashTableDoublingFactor, hashTableCalculatorType); + return new HashJoinMemoryCalculatorImpl(safetyFactor, fragmentationFactor, hashTableDoublingFactor, hashTableCalculatorType, semiJoin); } else { return new HashJoinMechanicalMemoryCalculator(maxBatchesInMemory); } @@ -566,6 +567,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem hashJoinProbe.setupHashJoinProbe(probeBatch, this, joinType, + semiJoin, leftUpstream, partitions, spilledState.getCycle(), @@ -777,7 +779,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem baseHashTable.updateIncoming(buildBatch, probeBatch); // in case we process the spilled files // Recreate the partitions every time build is initialized for (int part = 0; part < numPartitions; part++ ) { - partitions[part] = new HashPartition(context, allocator, baseHashTable, buildBatch, probeBatch, + partitions[part] = new HashPartition(context, allocator, baseHashTable, buildBatch, probeBatch, semiJoin, RECORDS_PER_BATCH, spillSet, part, spilledState.getCycle(), numPartitions); } @@ -998,6 +1000,10 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem : read_right_HV_vector.getAccessor().get(ind); // get the hash value from the HV column int currPart = hashCode & spilledState.getPartitionMask(); hashCode >>>= spilledState.getBitsInMask(); + // semi-join skips join-key-duplicate rows + if ( semiJoin ) { + + } // Append the new inner row to the appropriate partition; spill (that partition) if needed partitions[currPart].appendInnerRow(buildBatch.getContainer(), ind, hashCode, buildCalc); // may spill if needed } @@ -1093,7 +1099,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem private void setupOutputContainerSchema() { - if (buildSchema != null) { + if (buildSchema != null && ! semiJoin ) { for (final MaterializedField field : buildSchema) { final MajorType inputType = field.getType(); final MajorType outputType; @@ -1160,6 +1166,7 @@ public class HashJoinBatch extends AbstractBinaryRecordBatch<HashJoinPOP> implem this.buildBatch = right; this.probeBatch = left; joinType = popConfig.getJoinType(); + semiJoin = popConfig.isSemiJoin(); joinIsLeftOrFull = joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL; joinIsRightOrFull = joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL; conditions = popConfig.getConditions(); |