From af5d65e4163b5a7421255075462c5ca690c45ff2 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Fri, 20 Mar 2026 23:20:56 +0800 Subject: [PATCH] [fix](nereids) Fix MATCH crash on alias slots and push down as virtual column ### What problem does this PR solve? Issue Number: close #xxx Problem Summary: When MATCH expressions reference alias slots that have lost column metadata (e.g., `CAST(variant_col['subkey'] AS VARCHAR) AS fn`), and the MATCH is in a predicate that cannot be pushed below a join (due to OR with join-dependent conditions like EXISTS mark or LEFT JOIN null checks), ExpressionTranslator's visitMatch() crashes with "SlotReference in Match failed to get Column". Root cause: `Alias.toSlot()` only preserves originalColumn/originalTable when its child is a direct SlotReference. When wrapped in Cast/ElementAt, all metadata is lost. Combined with OR preventing filter pushdown, the MATCH is stuck at the join layer referencing a metadata-less slot. This PR fixes the issue with two changes: 1. **Graceful fallback in visitMatch()**: When the slot has lost column/table metadata, fall back to `invertedIndex = null` instead of throwing. The BE evaluates MATCH correctly via slow-path expression evaluation, or the virtual column mechanism (below) provides fast-path index evaluation. 2. **New rewrite rule PushDownMatchPredicateAsVirtualColumn**: Extracts MATCH from join/filter predicates, traces the alias slot back through the Project to find the original column expression, and creates a virtual column on OlapScan. The BE evaluates the virtual column via inverted index using fast_execute(), and the join layer references the boolean result. ### Release note Fix MATCH expressions crashing when used with CTE aliases involving type casts combined with EXISTS/LEFT JOIN and OR conditions. Also enables inverted index evaluation for such MATCH expressions via virtual column pushdown. ### Check List (For Author) - Test: Manual test - Behavior changed: No - Does this need documentation: No --- .../glue/translator/ExpressionTranslator.java | 27 +- .../doris/nereids/jobs/executor/Rewriter.java | 2 + .../apache/doris/nereids/rules/RuleType.java | 1 + ...PushDownMatchPredicateAsVirtualColumn.java | 316 ++++++++++++++++++ 4 files changed, 333 insertions(+), 13 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java index f16384872586b6..57597125ef06ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java @@ -219,20 +219,21 @@ public Expr visitMatch(Match match, PlanTranslatorContext context) { .orElseThrow(() -> new AnalysisException( "No SlotReference found in Match, SQL is " + match.toSql())); - Column column = slot.getOriginalColumn() - .orElseThrow(() -> new AnalysisException( - "SlotReference in Match failed to get Column, SQL is " + match.toSql())); - - OlapTable olapTbl = getOlapTableDirectly(slot); - if (olapTbl == null) { - throw new AnalysisException("SlotReference in Match failed to get OlapTable, SQL is " + match.toSql()); - } - + // Try to resolve inverted index metadata. When the slot has lost its original + // column/table reference (e.g., after CTE inlining or join projection remapping), + // we gracefully fall back to invertedIndex = null. The BE can still evaluate MATCH + // correctly without inverted index (slow path), or the PushDownMatchProjection rule + // may have already converted this to a virtual column on the OlapScan (fast path). + Index invertedIndex = null; String analyzer = match.getAnalyzer().orElse(null); - Index invertedIndex = olapTbl.getInvertedIndex(column, slot.getSubPath(), analyzer); - if (analyzer != null && invertedIndex == null) { - throw new AnalysisException("No inverted index found for analyzer '" + analyzer - + "' on column " + column.getName()); + Column column = slot.getOriginalColumn().orElse(null); + OlapTable olapTbl = getOlapTableDirectly(slot); + if (column != null && olapTbl != null) { + invertedIndex = olapTbl.getInvertedIndex(column, slot.getSubPath(), analyzer); + if (analyzer != null && invertedIndex == null) { + throw new AnalysisException("No inverted index found for analyzer '" + analyzer + + "' on column " + column.getName()); + } } MatchPredicate.Operator op = match.op(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 7996c987c11d95..a49a3e3905118a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -139,6 +139,7 @@ import org.apache.doris.nereids.rules.rewrite.PushDownLimit; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughUnion; +import org.apache.doris.nereids.rules.rewrite.PushDownMatchPredicateAsVirtualColumn; import org.apache.doris.nereids.rules.rewrite.PushDownMatchProjectionAsVirtualColumn; import org.apache.doris.nereids.rules.rewrite.PushDownProjectThroughLimit; import org.apache.doris.nereids.rules.rewrite.PushDownScoreTopNIntoOlapScan; @@ -795,6 +796,7 @@ public class Rewriter extends AbstractBatchJobExecutor { topDown(new PushDownVectorTopNIntoOlapScan()), topDown(new PushDownVirtualColumnsIntoOlapScan()), topDown(new PushDownMatchProjectionAsVirtualColumn()), + topDown(new PushDownMatchPredicateAsVirtualColumn()), topic("score optimize", topDown(new PushDownScoreTopNIntoOlapScan(), new CheckScoreUsage()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index f98dd37b40f2e1..b35283a0e5ebaa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -338,6 +338,7 @@ public enum RuleType { PUSH_CONJUNCTS_INTO_ES_SCAN(RuleTypeClass.REWRITE), PUSH_DOWN_VIRTUAL_COLUMNS_INTO_OLAP_SCAN(RuleTypeClass.REWRITE), PUSH_DOWN_MATCH_PROJECTION_AS_VIRTUAL_COLUMN(RuleTypeClass.REWRITE), + PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN(RuleTypeClass.REWRITE), PUSH_DOWN_SCORE_TOPN_INTO_OLAP_SCAN(RuleTypeClass.REWRITE), PUSH_DOWN_VECTOR_TOPN_INTO_OLAP_SCAN(RuleTypeClass.REWRITE), CHECK_SCORE_USAGE(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java new file mode 100644 index 00000000000000..cc8eff6d522d4b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchPredicateAsVirtualColumn.java @@ -0,0 +1,316 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Match; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Push down MATCH expressions from join/filter predicates as virtual columns on OlapScan. + * + * When MATCH appears in a predicate that cannot be pushed below a join (e.g., OR with + * join-dependent conditions like EXISTS mark or outer join null checks), this rule: + * 1. Extracts the MATCH expression from the predicate + * 2. Traces the alias slot back through the Project to find the original column expression + * 3. Creates a virtual column on the OlapScan with the MATCH on the original expression + * 4. Replaces the MATCH in the predicate with the virtual column's boolean slot + * + * Before: + * Filter(fn MATCH_ANY 'hello' OR l.col IS NOT NULL) + * └── Join[LEFT_OUTER] + * └── Project[objectId, CAST(col) as fn] + * └── OlapScan[table] + * └── ... + * + * After: + * Filter(__match_vc OR l.col IS NOT NULL) + * └── Join[LEFT_OUTER] + * └── Project[objectId, fn, __match_vc] + * └── OlapScan[table, virtualColumns=[(CAST(col) MATCH_ANY 'hello')]] + * └── ... + */ +public class PushDownMatchPredicateAsVirtualColumn implements RewriteRuleFactory { + + private boolean canPushDown(LogicalOlapScan scan) { + return scan.getTable().getKeysType() == KeysType.DUP_KEYS + || (scan.getTable().getTableProperty() != null + && scan.getTable().getTableProperty().getEnableUniqueKeyMergeOnWrite()); + } + + @Override + public List buildRules() { + return ImmutableList.of( + // Pattern 1: Filter -> Join -> Project -> OlapScan + logicalFilter(logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(this::handleFilterProjectScan) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 2: Filter -> Join -> Project -> Filter -> OlapScan + logicalFilter(logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any())) + .when(filter -> hasMatchInSet(filter.getConjuncts())) + .then(this::handleFilterProjectFilterScan) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 3: Join(otherPredicates has MATCH) -> Project -> OlapScan + logicalJoin( + logicalProject(logicalOlapScan().when(this::canPushDown)), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(this::handleJoinProjectScan) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN), + + // Pattern 4: Join(otherPredicates has MATCH) -> Project -> Filter -> OlapScan + logicalJoin( + logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown))), any()) + .when(join -> hasMatchInList(join.getOtherJoinConjuncts())) + .then(this::handleJoinProjectFilterScan) + .toRule(RuleType.PUSH_DOWN_MATCH_PREDICATE_AS_VIRTUAL_COLUMN) + ); + } + + private Plan handleFilterProjectScan(LogicalFilter, Plan>> filter) { + LogicalJoin, Plan> join = filter.child(); + LogicalProject project = join.left(); + LogicalOlapScan scan = project.child(); + return doHandleFilter(filter, join, project, scan, newScan -> newScan); + } + + private Plan handleFilterProjectFilterScan( + LogicalFilter>, Plan>> filter) { + LogicalJoin>, Plan> join = filter.child(); + LogicalProject> project = join.left(); + LogicalFilter scanFilter = project.child(); + LogicalOlapScan scan = scanFilter.child(); + return doHandleFilter(filter, join, project, scan, + newScan -> scanFilter.withChildren(ImmutableList.of(newScan))); + } + + private Plan handleJoinProjectScan(LogicalJoin, Plan> join) { + LogicalProject project = join.left(); + LogicalOlapScan scan = project.child(); + return doHandleJoin(join, project, scan, newScan -> newScan); + } + + private Plan handleJoinProjectFilterScan( + LogicalJoin>, Plan> join) { + LogicalProject> project = join.left(); + LogicalFilter scanFilter = project.child(); + LogicalOlapScan scan = scanFilter.child(); + return doHandleJoin(join, project, scan, + newScan -> scanFilter.withChildren(ImmutableList.of(newScan))); + } + + private interface ScanRebuilder { + Plan rebuild(LogicalOlapScan newScan); + } + + private Plan doHandleFilter(LogicalFilter filter, LogicalJoin join, + LogicalProject project, LogicalOlapScan scan, ScanRebuilder rebuilder) { + Set leftOutputSlots = ImmutableSet.copyOf(project.getOutput()); + List predicateList = new ArrayList<>(filter.getConjuncts()); + PushDownResult result = buildVirtualColumnsFromList(predicateList, project, scan, leftOutputSlots); + if (result == null) { + return null; + } + + LogicalProject newProject = (LogicalProject) project.withProjectsAndChild( + result.newProjections, rebuilder.rebuild(result.newScan)); + Plan newJoin = join.withChildren(newProject, join.right()); + return filter.withConjunctsAndChild(ImmutableSet.copyOf(result.newPredicateList), newJoin); + } + + private Plan doHandleJoin(LogicalJoin join, LogicalProject project, + LogicalOlapScan scan, ScanRebuilder rebuilder) { + Set leftOutputSlots = ImmutableSet.copyOf(project.getOutput()); + List otherConjuncts = join.getOtherJoinConjuncts(); + PushDownResult result = buildVirtualColumnsFromList(otherConjuncts, project, scan, leftOutputSlots); + if (result == null) { + return null; + } + + LogicalProject newProject = (LogicalProject) project.withProjectsAndChild( + result.newProjections, rebuilder.rebuild(result.newScan)); + return join.withJoinConjuncts(join.getHashJoinConjuncts(), + result.newPredicateList, join.getJoinReorderContext()) + .withChildren(newProject, join.right()); + } + + private boolean hasMatchInSet(Set conjuncts) { + return conjuncts.stream().anyMatch(this::containsMatch); + } + + private boolean hasMatchInList(List exprs) { + return exprs.stream().anyMatch(this::containsMatch); + } + + private boolean containsMatch(Expression expr) { + if (expr instanceof Match) { + return true; + } + for (Expression child : expr.children()) { + if (containsMatch(child)) { + return true; + } + } + return false; + } + + private PushDownResult buildVirtualColumnsFromList(List predicates, + LogicalProject project, LogicalOlapScan scan, Set leftOutputSlots) { + Map matchToVirtualColumn = new HashMap<>(); + Map matchToVirtualSlot = new HashMap<>(); + + for (Expression predicate : predicates) { + collectMatchesNeedingPushDown(predicate, project, leftOutputSlots, + matchToVirtualColumn, matchToVirtualSlot); + } + + if (matchToVirtualColumn.isEmpty()) { + return null; + } + + List virtualColumns = new ArrayList<>(scan.getVirtualColumns()); + virtualColumns.addAll(matchToVirtualColumn.values()); + LogicalOlapScan newScan = scan.withVirtualColumns(virtualColumns); + + List newProjections = new ArrayList<>(project.getProjects()); + for (Alias vcAlias : matchToVirtualColumn.values()) { + newProjections.add(vcAlias.toSlot()); + } + + List newPredicateList = new ArrayList<>(); + for (Expression predicate : predicates) { + newPredicateList.add(replaceMatch(predicate, matchToVirtualSlot)); + } + + PushDownResult result = new PushDownResult(); + result.newScan = newScan; + result.newProjections = newProjections; + result.newPredicateList = newPredicateList; + return result; + } + + private void collectMatchesNeedingPushDown(Expression expr, + LogicalProject project, Set leftOutputSlots, + Map matchToVirtualColumn, Map matchToVirtualSlot) { + if (expr instanceof Match) { + Match match = (Match) expr; + Set inputSlots = match.left().getInputSlots(); + SlotReference matchSlot = null; + for (Slot s : inputSlots) { + if (s instanceof SlotReference) { + matchSlot = (SlotReference) s; + break; + } + } + if (matchSlot == null) { + return; + } + + if (!leftOutputSlots.contains(matchSlot)) { + return; + } + + if (matchSlot.getOriginalColumn().isPresent() && matchSlot.getOriginalTable().isPresent()) { + return; + } + + Expression sourceExpr = findSourceExpression(matchSlot, project); + if (sourceExpr == null) { + return; + } + + Match newMatch = (Match) match.withChildren( + ImmutableList.of(sourceExpr, match.right())); + Alias vcAlias = new Alias(newMatch); + Slot vcSlot = vcAlias.toSlot(); + + matchToVirtualColumn.put(match, vcAlias); + matchToVirtualSlot.put(match, vcSlot); + return; + } + + for (Expression child : expr.children()) { + collectMatchesNeedingPushDown(child, project, leftOutputSlots, + matchToVirtualColumn, matchToVirtualSlot); + } + } + + private Expression findSourceExpression(SlotReference slot, LogicalProject project) { + for (NamedExpression ne : project.getProjects()) { + if (ne.getExprId().equals(slot.getExprId())) { + if (ne instanceof Alias) { + return ((Alias) ne).child(); + } else if (ne instanceof SlotReference) { + return ne; + } + } + } + return null; + } + + private Expression replaceMatch(Expression expr, Map matchToSlot) { + if (expr instanceof Match && matchToSlot.containsKey(expr)) { + return matchToSlot.get(expr); + } + + boolean changed = false; + List newChildren = new ArrayList<>(); + for (Expression child : expr.children()) { + Expression newChild = replaceMatch(child, matchToSlot); + if (newChild != child) { + changed = true; + } + newChildren.add(newChild); + } + + if (!changed) { + return expr; + } + return expr.withChildren(newChildren); + } + + private static class PushDownResult { + LogicalOlapScan newScan; + List newProjections; + List newPredicateList; + } +}