diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 5a0a978ece882f..dd0c15a0025f82 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -988,11 +988,6 @@ Status VariantCompactionUtil::check_path_stats(const std::vectortablet_schema()->columns()) { - if (!column->is_variant_type()) { - continue; - } - } std::unordered_map original_uid_to_path_stats; for (const auto& rs : intputs) { RETURN_IF_ERROR(aggregate_path_to_stats(rs, &original_uid_to_path_stats)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchDslParser.java index 832ec5a37f3594..d31c52c9155465 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchDslParser.java @@ -277,6 +277,37 @@ private static void validateFieldsList(List fields) { } } + private static String buildFieldPath(SearchParser.FieldPathContext ctx) { + if (ctx == null) { + throw new RuntimeException("Invalid field query: missing field path"); + } + + StringBuilder fullPath = new StringBuilder(); + List segments = ctx.fieldSegment(); + for (int i = 0; i < segments.size(); i++) { + if (i > 0) { + fullPath.append('.'); + } + String segment = segments.get(i).getText(); + if (segment.startsWith("\"") && segment.endsWith("\"")) { + segment = segment.substring(1, segment.length() - 1); + } + fullPath.append(segment); + } + return fullPath.toString(); + } + + private static String normalizeNestedFieldPath(String fieldPath, @Nullable String nestedPath) { + if (nestedPath == null || nestedPath.isEmpty()) { + return fieldPath; + } + if (fieldPath.equals(nestedPath) || fieldPath.startsWith(nestedPath + ".")) { + throw new SearchDslSyntaxException("Fields in NESTED predicates must be relative to nested path: " + + nestedPath + ", but got: " + fieldPath); + } + return nestedPath + "." + fieldPath; + } + /** * Collect all field names from an AST node recursively. * @param node The AST node to collect from @@ -472,6 +503,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Build AST using first field as placeholder for bare queries, with default operator QsAstBuilder visitor = new QsAstBuilder(fields.get(0), defaultOperator); QsNode root = visitor.visit(tree); + validateNestedTopLevelOnly(root); // Apply multi-field expansion based on type QsNode expandedRoot; @@ -563,6 +595,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Use constructor with override to avoid mutating shared options object (thread-safety) QsLuceneModeAstBuilder visitor = new QsLuceneModeAstBuilder(effectiveOptions, fields.get(0)); QsNode root = visitor.visit(tree); + validateNestedTopLevelOnly(root); // In ES query_string, both best_fields and cross_fields use per-clause expansion // (each clause is independently expanded across fields). The difference is only @@ -646,6 +679,8 @@ private static class QsAstBuilder extends SearchParserBaseVisitor implem private final Set fieldNames = new LinkedHashSet<>(); // Context stack to track current field name during parsing private String currentFieldName = null; + // Current nested path when visiting NESTED(path, predicates) + private String currentNestedPath = null; // Default field for bare queries (without field: prefix) private final String defaultField; // Default operator for implicit conjunction (space-separated terms): "AND" or "OR" @@ -822,6 +857,9 @@ public QsNode visitAtomClause(SearchParser.AtomClauseContext ctx) { @Override public QsNode visitBareQuery(SearchParser.BareQueryContext ctx) { + if (currentNestedPath != null && (currentFieldName == null || currentFieldName.isEmpty())) { + throw new SearchDslSyntaxException("Bare queries are not supported inside NESTED predicates"); + } // Use currentFieldName if inside a field group context (set by visitFieldGroupQuery), // otherwise fall back to the configured defaultField. String effectiveField = (currentFieldName != null && !currentFieldName.isEmpty()) @@ -858,60 +896,29 @@ public QsNode visitNestedQuery(SearchParser.NestedQueryContext ctx) { if (ctx.NESTED_PATH() == null) { throw new RuntimeException("Invalid NESTED clause: missing path"); } - String nestedPath = ctx.NESTED_PATH().getText(); - QsNode innerQuery = visit(ctx.clause()); - if (innerQuery == null) { - throw new RuntimeException("Invalid NESTED clause: missing inner query"); + if (currentNestedPath != null) { + throw new SearchDslSyntaxException("Nested NESTED() is not supported"); } - - validateNestedFieldPaths(innerQuery, nestedPath); - - QsNode node = new QsNode(QsClauseType.NESTED, Collections.singletonList(innerQuery)); - node.nestedPath = nestedPath; - return node; - } - - private void validateNestedFieldPaths(QsNode node, String nestedPath) { - if (node == null) { - return; - } - if (node.type == QsClauseType.NESTED) { - throw new RuntimeException("Nested NESTED() is not supported: " + nestedPath); - } - if (node.field != null && !node.field.startsWith(nestedPath + ".")) { - throw new RuntimeException("Fields in NESTED query must start with nested path: " - + nestedPath + ", but got: " + node.field); - } - if (node.children != null) { - for (QsNode child : node.children) { - validateNestedFieldPaths(child, nestedPath); + String nestedPath = ctx.NESTED_PATH().getText(); + String previousNestedPath = currentNestedPath; + currentNestedPath = nestedPath; + try { + QsNode innerQuery = visit(ctx.clause()); + if (innerQuery == null) { + throw new RuntimeException("Invalid NESTED clause: missing inner query"); } + + QsNode node = new QsNode(QsClauseType.NESTED, Collections.singletonList(innerQuery)); + node.nestedPath = nestedPath; + return node; + } finally { + currentNestedPath = previousNestedPath; } } @Override public QsNode visitFieldQuery(SearchParser.FieldQueryContext ctx) { - if (ctx.fieldPath() == null) { - throw new RuntimeException("Invalid field query: missing field path"); - } - - // Build complete field path from segments (support field.subcolumn syntax) - StringBuilder fullPath = new StringBuilder(); - List segments = ctx.fieldPath().fieldSegment(); - - for (int i = 0; i < segments.size(); i++) { - if (i > 0) { - fullPath.append('.'); - } - String segment = segments.get(i).getText(); - // Remove quotes if present - if (segment.startsWith("\"") && segment.endsWith("\"")) { - segment = segment.substring(1, segment.length() - 1); - } - fullPath.append(segment); - } - - String fieldPath = fullPath.toString(); + String fieldPath = normalizeNestedFieldPath(buildFieldPath(ctx.fieldPath()), currentNestedPath); fieldNames.add(fieldPath); // Set current field context before visiting search value @@ -941,21 +948,7 @@ public QsNode visitFieldGroupQuery(SearchParser.FieldGroupQueryContext ctx) { throw new SearchDslSyntaxException("Invalid field group query: missing field path"); } - // Build complete field path from segments (support field.subcolumn syntax) - StringBuilder fullPath = new StringBuilder(); - List segments = ctx.fieldPath().fieldSegment(); - for (int i = 0; i < segments.size(); i++) { - if (i > 0) { - fullPath.append('.'); - } - String segment = segments.get(i).getText(); - if (segment.startsWith("\"") && segment.endsWith("\"")) { - segment = segment.substring(1, segment.length() - 1); - } - fullPath.append(segment); - } - - String fieldPath = fullPath.toString(); + String fieldPath = normalizeNestedFieldPath(buildFieldPath(ctx.fieldPath()), currentNestedPath); fieldNames.add(fieldPath); // Set field group context so bare terms inside use this field @@ -2075,6 +2068,7 @@ private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor fieldNames = new LinkedHashSet<>(); private final SearchOptions options; private String currentFieldName = null; + private String currentNestedPath = null; // Override for default field - used in multi-field mode to avoid mutating options private final String overrideDefaultField; private int nestingLevel = 0; @@ -2301,6 +2295,8 @@ private void collectTermsFromNotClause(SearchParser.NotClauseContext ctx, List segments = ctx.fieldPath().fieldSegment(); - - for (int i = 0; i < segments.size(); i++) { - if (i > 0) { - fullPath.append('.'); - } - String segment = segments.get(i).getText(); - if (segment.startsWith("\"") && segment.endsWith("\"")) { - segment = segment.substring(1, segment.length() - 1); - } - fullPath.append(segment); - } - - String fieldPath = fullPath.toString(); + String fieldPath = normalizeNestedFieldPath(buildFieldPath(ctx.fieldPath()), currentNestedPath); fieldNames.add(fieldPath); String previousFieldName = currentFieldName; @@ -2571,21 +2544,7 @@ public QsNode visitFieldGroupQuery(SearchParser.FieldGroupQueryContext ctx) { throw new SearchDslSyntaxException("Invalid field group query: missing field path"); } - // Build complete field path from segments (support field.subcolumn syntax) - StringBuilder fullPath = new StringBuilder(); - List segments = ctx.fieldPath().fieldSegment(); - for (int i = 0; i < segments.size(); i++) { - if (i > 0) { - fullPath.append('.'); - } - String segment = segments.get(i).getText(); - if (segment.startsWith("\"") && segment.endsWith("\"")) { - segment = segment.substring(1, segment.length() - 1); - } - fullPath.append(segment); - } - - String fieldPath = fullPath.toString(); + String fieldPath = normalizeNestedFieldPath(buildFieldPath(ctx.fieldPath()), currentNestedPath); fieldNames.add(fieldPath); // Set field group context so bare terms inside use this field @@ -2724,7 +2683,7 @@ private static void validateNestedTopLevelOnly(QsNode node, boolean isRoot) { return; } if (node.type == QsClauseType.NESTED && !isRoot) { - throw new RuntimeException("NESTED clause must be evaluated at top level"); + throw new SearchDslSyntaxException("NESTED clause must be evaluated at top level"); } if (node.children == null || node.children.isEmpty()) { return; diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java index 8a5602c3317490..c1e82b894e66f4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java @@ -153,6 +153,35 @@ public void testBuildThriftParam() { Assertions.assertEquals("content", param.field_bindings.get(1).field_name); } + @Test + public void testNestedRelativeFieldsAreNormalizedBeforeThrift() { + String dsl = "NESTED(data.items, msg:hello AND meta.channel:action)"; + SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); + List children = Arrays.asList(createTestSlotRef("data"), createTestSlotRef("data")); + + SearchPredicate predicate = new SearchPredicate(dsl, plan, children, true); + + TExprNode thriftNode = new TExprNode(); + predicate.accept(ExprToThriftVisitor.INSTANCE, thriftNode); + + TSearchParam param = thriftNode.search_param; + Assertions.assertNotNull(param); + Assertions.assertEquals("NESTED", param.root.clause_type); + Assertions.assertEquals("data.items", param.root.nested_path); + Assertions.assertEquals(1, param.root.children.size()); + Assertions.assertEquals("AND", param.root.children.get(0).clause_type); + Assertions.assertEquals("data.items.msg", param.root.children.get(0).children.get(0).field_name); + Assertions.assertEquals("data.items.meta.channel", param.root.children.get(0).children.get(1).field_name); + + Assertions.assertEquals(2, param.field_bindings.size()); + Assertions.assertEquals("data.items.msg", param.field_bindings.get(0).field_name); + Assertions.assertEquals("data", param.field_bindings.get(0).parent_field_name); + Assertions.assertEquals("items.msg", param.field_bindings.get(0).subcolumn_path); + Assertions.assertEquals("data.items.meta.channel", param.field_bindings.get(1).field_name); + Assertions.assertEquals("data", param.field_bindings.get(1).parent_field_name); + Assertions.assertEquals("items.meta.channel", param.field_bindings.get(1).subcolumn_path); + } + @Test public void testClone() { String dsl = "title:hello"; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index c5f228cf1189c5..6dc16a1da7aa0f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -2531,7 +2531,7 @@ public void testSingleFieldMatchAllDocsLuceneMode() { @Test public void testNestedQuerySimple() { - String dsl = "NESTED(data, data.msg:hello)"; + String dsl = "NESTED(data, msg:hello)"; QsPlan plan = SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); Assertions.assertNotNull(plan); @@ -2545,7 +2545,7 @@ public void testNestedQuerySimple() { @Test public void testNestedQueryAnd() { - String dsl = "NESTED(data, data.msg:hello AND data.title:news)"; + String dsl = "NESTED(data, msg:hello AND title:news)"; QsPlan plan = SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); Assertions.assertNotNull(plan); @@ -2558,29 +2558,109 @@ public void testNestedQueryAnd() { } @Test - public void testNestedQueryFieldValidation() { - String dsl = "NESTED(data, other.msg:hello)"; + public void testNestedQueryAbsolutePathRejected() { + String dsl = "NESTED(data, data.msg:hello)"; RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); }); - Assertions.assertTrue(exception.getMessage().contains("Fields in NESTED query must start with nested path")); + Assertions.assertTrue(exception.getMessage().contains("Fields in NESTED predicates must be relative")); } @Test public void testNestedQueryPathWithDot() { - String dsl = "NESTED(data.items, data.items.msg:hello)"; + String dsl = "NESTED(data.items, meta.channel:action)"; QsPlan plan = SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); Assertions.assertNotNull(plan); Assertions.assertEquals(QsClauseType.NESTED, plan.getRoot().getType()); Assertions.assertEquals("data.items", plan.getRoot().getNestedPath()); Assertions.assertTrue(plan.getFieldBindings().stream() - .anyMatch(b -> "data.items.msg".equals(b.getFieldName()))); + .anyMatch(b -> "data.items.meta.channel".equals(b.getFieldName()))); + } + + @Test + public void testNestedQuerySimpleLuceneMode() { + String dsl = "NESTED(data, msg:hello)"; + QsPlan plan = SearchDslParser.parseDsl(dsl, + "{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}"); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.NESTED, plan.getRoot().getType()); + Assertions.assertEquals("data", plan.getRoot().getNestedPath()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getChildren().get(0).getType()); + Assertions.assertEquals("data.msg", plan.getRoot().getChildren().get(0).getField()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "data.msg".equals(b.getFieldName()))); + } + + @Test + public void testNestedQueryAndLuceneMode() { + String dsl = "NESTED(data, msg:hello AND title:news)"; + QsPlan plan = SearchDslParser.parseDsl(dsl, + "{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}"); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.NESTED, plan.getRoot().getType()); + Assertions.assertEquals("data", plan.getRoot().getNestedPath()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getChildren().get(0).getType()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "data.msg".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "data.title".equals(b.getFieldName()))); + } + + @Test + public void testNestedQueryDescendantFieldLuceneMode() { + String dsl = "NESTED(data.items, input.display_text:selforigin)"; + QsPlan plan = SearchDslParser.parseDsl(dsl, + "{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}"); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.NESTED, plan.getRoot().getType()); + Assertions.assertEquals("data.items", plan.getRoot().getNestedPath()); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "data.items.input.display_text".equals(b.getFieldName()))); + } + + @Test + public void testNestedQueryMustBeTopLevelInAndLuceneMode() { + String dsl = "title:hello AND NESTED(data, msg:hello)"; + RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { + SearchDslParser.parseDsl(dsl, + "{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}"); + }); + Assertions.assertTrue(exception.getMessage().contains("NESTED clause must be evaluated at top level")); + } + + @Test + public void testNestedQueryMixedRelativeAndAbsoluteRejected() { + String dsl = "NESTED(data.items, msg:hello AND data.items.title:news)"; + RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { + SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); + }); + Assertions.assertTrue(exception.getMessage().contains("Fields in NESTED predicates must be relative")); + } + + @Test + public void testNestedQueryBareQueryRejected() { + String dsl = "NESTED(data.items, hello)"; + RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { + SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); + }); + Assertions.assertTrue(exception.getMessage().contains("Bare queries are not supported inside NESTED predicates")); + } + + @Test + public void testNestedQueryNestedNestedRejected() { + String dsl = "NESTED(data, NESTED(data.items, msg:hello))"; + RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { + SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); + }); + Assertions.assertTrue(exception.getMessage().contains("Nested NESTED() is not supported")); } @Test public void testNestedQueryMustBeTopLevelInAnd() { - String dsl = "title:hello AND NESTED(data, data.msg:hello)"; + String dsl = "title:hello AND NESTED(data, msg:hello)"; RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); }); @@ -2589,7 +2669,7 @@ public void testNestedQueryMustBeTopLevelInAnd() { @Test public void testNestedQueryMustBeTopLevelInOr() { - String dsl = "NESTED(data, data.msg:hello) OR title:hello"; + String dsl = "NESTED(data, msg:hello) OR title:hello"; RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); }); @@ -2598,7 +2678,7 @@ public void testNestedQueryMustBeTopLevelInOr() { @Test public void testNestedQueryMustBeTopLevelInNot() { - String dsl = "NOT NESTED(data, data.msg:hello)"; + String dsl = "NOT NESTED(data, msg:hello)"; RuntimeException exception = Assertions.assertThrows(RuntimeException.class, () -> { SearchDslParser.parseDsl(dsl, "{\"mode\":\"standard\"}"); });