From d11a6e34e46db307b2d2fc28c6decae6862218b5 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Wed, 25 Feb 2026 22:20:42 -0500 Subject: [PATCH] Fix MATCH after CREATE returning 0 rows (issue #2308) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a MATCH clause follows CREATE + WITH and re-uses bound variables (e.g. CREATE (a)-[e]->(b) WITH a,e,b MATCH p=(a)-[e]->(b)), the MATCH generates filter quals (age_start_id(e) = age_id(a), etc.) that reference only columns from the predecessor subquery. PostgreSQL's optimizer pushes these quals through the transparent subquery layers into the CREATE's child plan, where they evaluate on NULL values before CREATE has executed — always yielding 0 rows. Fix: mark the predecessor subquery RTE as security_barrier when the clause chain contains a data-modifying operation (CREATE, SET, DELETE, or MERGE). This prevents PostgreSQL from pushing filter quals into the subquery, ensuring they evaluate after the DML produces output values. Co-Authored-By: Claude Opus 4.6 --- regress/expected/cypher_match.out | 100 +++++++++++++++++++++++++++++ regress/sql/cypher_match.sql | 55 ++++++++++++++++ src/backend/parser/cypher_clause.c | 38 +++++++++++ 3 files changed, 193 insertions(+) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index ea425e463..ff2825ae0 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -3533,6 +3533,106 @@ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x: Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"school"'::agtype]) = '{"name": "XYZ College", "program": {"major": "Psyc", "degree": "BSc"}}'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"phone"'::agtype]) = '[123456789, 987654321, 456987123]'::agtype)) (2 rows) +-- +-- issue 2308: MATCH after CREATE returns 0 rows +-- +-- When all MATCH variables are already bound from a preceding CREATE + WITH, +-- the MATCH filter quals must evaluate after CREATE, not before. +-- +SELECT create_graph('issue_2308'); +NOTICE: graph "issue_2308" has been created + create_graph +-------------- + +(1 row) + +-- Reporter's exact case: CREATE + WITH + MATCH + SET + RETURN +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + WITH a, e, b + MATCH p = (a)-[e]->(b) + SET a.something = 'something' + RETURN a +$$) AS (a agtype); + a +---------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "TestB3", "properties": {"something": "something"}}::vertex +(1 row) + +-- Bound variables, no SET +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T2)-[e:R2]->(b:T2) + WITH a, e, b + MATCH (a)-[e]->(b) + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + a | e | b +-------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------- + {"id": 1407374883553281, "label": "T2", "properties": {}}::vertex | {"id": 1688849860263937, "label": "R2", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {}}::edge | {"id": 1407374883553282, "label": "T2", "properties": {}}::vertex +(1 row) + +-- Reversed direction: filter should reject (0 rows expected) +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T3)-[e:R3]->(b:T3) + WITH a, e, b + MATCH (b)-[e]->(a) + RETURN a +$$) AS (a agtype); + a +--- +(0 rows) + +-- Node-only MATCH with bound variable +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T4 {name: 'test'}) + WITH a + MATCH (a) + RETURN a +$$) AS (a agtype); + a +--------------------------------------------------------------------------------- + {"id": 2533274790395905, "label": "T4", "properties": {"name": "test"}}::vertex +(1 row) + +-- MATCH after SET (SET is also DML, chain must be protected) +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T5 {val: 1})-[e:R5]->(b:T5 {val: 2}) +$$) AS (r agtype); + r +--- +(0 rows) + +SELECT * FROM cypher('issue_2308', $$ + MATCH (a:T5)-[e:R5]->(b:T5) + SET a.val = 10 + WITH a, e, b + MATCH (a)-[e]->(b) + RETURN a.val +$$) AS (val agtype); + val +----- + 10 +(1 row) + +SELECT drop_graph('issue_2308', true); +NOTICE: drop cascades to 11 other objects +DETAIL: drop cascades to table issue_2308._ag_label_vertex +drop cascades to table issue_2308._ag_label_edge +drop cascades to table issue_2308."TestB3" +drop cascades to table issue_2308."B3REL" +drop cascades to table issue_2308."T2" +drop cascades to table issue_2308."R2" +drop cascades to table issue_2308."T3" +drop cascades to table issue_2308."R3" +drop cascades to table issue_2308."T4" +drop cascades to table issue_2308."T5" +drop cascades to table issue_2308."R5" +NOTICE: graph "issue_2308" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index 2817f36f6..ebcd67b84 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1437,6 +1437,61 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); +-- +-- issue 2308: MATCH after CREATE returns 0 rows +-- +-- When all MATCH variables are already bound from a preceding CREATE + WITH, +-- the MATCH filter quals must evaluate after CREATE, not before. +-- +SELECT create_graph('issue_2308'); + +-- Reporter's exact case: CREATE + WITH + MATCH + SET + RETURN +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + WITH a, e, b + MATCH p = (a)-[e]->(b) + SET a.something = 'something' + RETURN a +$$) AS (a agtype); + +-- Bound variables, no SET +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T2)-[e:R2]->(b:T2) + WITH a, e, b + MATCH (a)-[e]->(b) + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +-- Reversed direction: filter should reject (0 rows expected) +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T3)-[e:R3]->(b:T3) + WITH a, e, b + MATCH (b)-[e]->(a) + RETURN a +$$) AS (a agtype); + +-- Node-only MATCH with bound variable +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T4 {name: 'test'}) + WITH a + MATCH (a) + RETURN a +$$) AS (a agtype); + +-- MATCH after SET (SET is also DML, chain must be protected) +SELECT * FROM cypher('issue_2308', $$ + CREATE (a:T5 {val: 1})-[e:R5]->(b:T5 {val: 2}) +$$) AS (r agtype); +SELECT * FROM cypher('issue_2308', $$ + MATCH (a:T5)-[e:R5]->(b:T5) + SET a.val = 10 + WITH a, e, b + MATCH (a)-[e]->(b) + RETURN a.val +$$) AS (val agtype); + +SELECT drop_graph('issue_2308', true); + -- -- Clean up -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 991e3f785..446e97b3f 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -345,6 +345,7 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +static bool clause_chain_has_dml(cypher_clause *clause); /* * Add required permissions to the RTEPermissionInfo for a relation. @@ -2917,6 +2918,21 @@ static Query *transform_cypher_match_pattern(cypher_parsestate *cpstate, pnsi = transform_prev_cypher_clause(cpstate, clause->prev, true); rte = pnsi->p_rte; + + /* + * If the predecessor clause chain contains a data-modifying + * operation (CREATE, SET, DELETE, MERGE), mark the subquery + * RTE as a security barrier. This prevents PostgreSQL's + * optimizer from pushing MATCH filter quals down into the + * subquery, which would cause them to evaluate before the + * DML executes -- resulting in quals checking NULL values + * and filtering out all rows. + */ + if (clause_chain_has_dml(clause->prev)) + { + rte->security_barrier = true; + } + rtindex = list_length(pstate->p_rtable); /* rte is the first RangeTblEntry in pstate */ if (rtindex != 1) @@ -6545,6 +6561,28 @@ static void advance_transform_entities_to_next_clause(List *entities) } } +/* + * Walk the clause chain and return true if any clause is a + * data-modifying operation (CREATE, SET, DELETE, or MERGE). + */ +static bool clause_chain_has_dml(cypher_clause *clause) +{ + while (clause != NULL) + { + if (is_ag_node(clause->self, cypher_create) || + is_ag_node(clause->self, cypher_set) || + is_ag_node(clause->self, cypher_delete) || + is_ag_node(clause->self, cypher_merge)) + { + return true; + } + + clause = clause->prev; + } + + return false; +} + static Query *analyze_cypher_clause(transform_method transform, cypher_clause *clause, cypher_parsestate *parent_cpstate)