Skip to content

Commit 3be39b7

Browse files
committed
[CALCITE-6893] Remove agg from Union children in IntersectToDistinctRule
1 parent 9b51667 commit 3be39b7

File tree

2 files changed

+31
-37
lines changed

2 files changed

+31
-37
lines changed

core/src/main/java/org/apache/calcite/rel/rules/IntersectToDistinctRule.java

+22-28
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.calcite.rel.core.Intersect;
2424
import org.apache.calcite.rel.logical.LogicalIntersect;
2525
import org.apache.calcite.rex.RexBuilder;
26+
import org.apache.calcite.rex.RexNode;
2627
import org.apache.calcite.tools.RelBuilder;
2728
import org.apache.calcite.tools.RelBuilderFactory;
2829
import org.apache.calcite.util.ImmutableBitSet;
@@ -31,6 +32,8 @@
3132
import org.immutables.value.Value;
3233

3334
import java.math.BigDecimal;
35+
import java.util.ArrayList;
36+
import java.util.List;
3437

3538
/**
3639
* Planner rule that translates a distinct
@@ -44,25 +47,16 @@
4447
*
4548
* <h2>Example</h2>
4649
*
47-
* <p>Query: <code>R1 Intersect All R2</code>
50+
* <p>Query: <code>R1 Intersect R2</code>
4851
*
49-
* <p><code>R3 = GB(R1 on all attributes, count(*) as c)<br>
52+
* <p><code>R3 = R1 on all attributes, R1Index as i<br>
5053
* union all<br>
51-
* GB(R2 on all attributes, count(*) as c)</code>
54+
* R2 on all attributes, R2Index as i</code>
5255
*
53-
* <p><code>R4 = GB(R3 on all attributes, count(c) as cnt, min(c) as m)</code>
54-
*
55-
* <p>Note that we do not need <code>min(c)</code> in intersect distinct.
56+
* <p><code>R4 = GB(R3 on all attributes, count(distinct i) as cnt)</code>
5657
*
5758
* <p><code>R5 = Filter(cnt == #branch)</code>
5859
*
59-
* <p>If it is intersect all then
60-
*
61-
* <p><code>R6 = UDTF (R5) which will explode the tuples based on min(c)<br>
62-
* R7 = Project(R6 on all attributes)</code>
63-
*
64-
* <p>Else
65-
*
6660
* <p><code>R6 = Proj(R5 on all attributes)</code>
6761
*
6862
* @see org.apache.calcite.rel.rules.UnionToDistinctRule
@@ -97,38 +91,38 @@ public IntersectToDistinctRule(Class<? extends Intersect> intersectClass,
9791
final RexBuilder rexBuilder = cluster.getRexBuilder();
9892
final RelBuilder relBuilder = call.builder();
9993

100-
// 1st level GB: create a GB (col0, col1, count() as c) for each branch
101-
for (RelNode input : intersect.getInputs()) {
102-
relBuilder.push(input);
103-
relBuilder.aggregate(relBuilder.groupKey(relBuilder.fields()),
104-
relBuilder.countStar(null));
94+
for (int i = 0; i < intersect.getInputs().size(); ++i) {
95+
relBuilder.push(intersect.getInputs().get(i));
96+
List<RexNode> fields = new ArrayList<>(relBuilder.fields());
97+
// Add a constant column "i" for each input
98+
fields.add(
99+
relBuilder.alias(
100+
rexBuilder.makeBigintLiteral(new BigDecimal(i)), "i"));
101+
relBuilder.project(fields);
105102
}
106103

107104
// create a union above all the branches
108105
final int branchCount = intersect.getInputs().size();
109106
relBuilder.union(true, branchCount);
110107
final RelNode union = relBuilder.peek();
111108

112-
// 2nd level GB: create a GB (col0, col1, count(c)) for each branch
113-
// the index of c is union.getRowType().getFieldList().size() - 1
114109
final int fieldCount = union.getRowType().getFieldCount();
115110

116111
final ImmutableBitSet groupSet =
117-
ImmutableBitSet.range(fieldCount - 1);
112+
ImmutableBitSet.range(fieldCount);
113+
114+
// Perform count(distinct) on column "i"
118115
relBuilder.aggregate(relBuilder.groupKey(groupSet),
119-
relBuilder.countStar(null));
116+
relBuilder.count(true, "i", relBuilder.field("i")));
120117

121118
// add a filter count(c) = #branches
122119
relBuilder.filter(
123-
relBuilder.equals(relBuilder.field(fieldCount - 1),
120+
relBuilder.equals(relBuilder.field(fieldCount),
124121
rexBuilder.makeBigintLiteral(new BigDecimal(branchCount))));
125122

126-
// Project all but the last field
127-
relBuilder.project(Util.skipLast(relBuilder.fields()));
123+
// Project all but the last two field(i and c)
124+
relBuilder.project(Util.skipLast(relBuilder.fields(), 2));
128125

129-
// the schema for intersect distinct is like this
130-
// R3 on all attributes + count(c) as cnt
131-
// finally add a project to project out the last column
132126
call.transformTo(relBuilder.build());
133127
}
134128

core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml

+9-9
Original file line numberDiff line numberDiff line change
@@ -5917,18 +5917,18 @@ LogicalIntersect(all=[false])
59175917
<Resource name="planAfter">
59185918
<![CDATA[
59195919
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
5920-
LogicalFilter(condition=[=($9, 3)])
5921-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5920+
LogicalFilter(condition=[=($10, 3)])
5921+
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}], i=[COUNT(DISTINCT $9)])
59225922
LogicalUnion(all=[true])
5923-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5923+
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], i=[0:BIGINT])
59245924
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
59255925
LogicalFilter(condition=[=($7, 10)])
59265926
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
5927-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5927+
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], i=[1:BIGINT])
59285928
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
59295929
LogicalFilter(condition=[=($7, 20)])
59305930
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
5931-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5931+
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], i=[2:BIGINT])
59325932
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
59335933
LogicalFilter(condition=[=($7, 30)])
59345934
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
@@ -5963,14 +5963,14 @@ LogicalIntersect(all=[true])
59635963
<![CDATA[
59645964
LogicalIntersect(all=[true])
59655965
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
5966-
LogicalFilter(condition=[=($9, 2)])
5967-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5966+
LogicalFilter(condition=[=($10, 2)])
5967+
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}], i=[COUNT(DISTINCT $9)])
59685968
LogicalUnion(all=[true])
5969-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5969+
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], i=[0:BIGINT])
59705970
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
59715971
LogicalFilter(condition=[=($7, 10)])
59725972
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
5973-
LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], agg#0=[COUNT()])
5973+
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], i=[1:BIGINT])
59745974
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
59755975
LogicalFilter(condition=[=($7, 20)])
59765976
LogicalTableScan(table=[[CATALOG, SALES, EMP]])

0 commit comments

Comments
 (0)