From ca87958faaf779f1ae302ee74d0b5e811bbb3587 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Wed, 25 Feb 2026 23:50:46 -0500 Subject: [PATCH 1/2] Fix entity_exists() CID visibility for CREATE + WITH + MERGE (#1954) When a Cypher query chains CREATE ... WITH ... MERGE, vertices created by CREATE become invisible to entity_exists() after a threshold number of input rows. This causes MERGE to throw "vertex assigned to variable was deleted". Root cause: CREATE calls CommandCounterIncrement() which advances the global command ID, but does not update es_snapshot->curcid. The Decrement/Increment CID macros used by the executors bring curcid back to the same value on each iteration. After enough rows, newly inserted vertices have a Cmin >= curcid and HeapTupleSatisfiesMVCC rejects them (requires Cmin < curcid). Fix: In entity_exists(), temporarily set es_snapshot->curcid to the current global command ID (via GetCurrentCommandId) for the duration of the scan, then restore it. This makes all entities inserted by preceding clauses in the same query visible to the existence check. Co-Authored-By: Claude Opus 4.6 --- regress/expected/cypher_merge.out | 133 +++++++++++++++++++++++++++- regress/sql/cypher_merge.sql | 65 ++++++++++++++ src/backend/executor/cypher_utils.c | 16 ++++ 3 files changed, 212 insertions(+), 2 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 8c37dc2de..4c58a0f19 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -1728,9 +1728,9 @@ SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype -- SELECT * FROM create_graph('issue_1446'); NOTICE: graph "issue_1446" has been created - create_graph + create_graph -------------- - + (1 row) -- Reporter's exact setup: two initial nodes @@ -1864,6 +1864,113 @@ $$) AS (edge_count agtype); ------------ 0 (1 row) +-- Issue 1954: CREATE + WITH + MERGE causes "vertex was deleted" error +-- when the number of input rows exceeds the snapshot's command ID window. +-- entity_exists() used a stale curcid, making recently-created vertices +-- invisible on later iterations. +-- +SELECT * FROM create_graph('issue_1954'); +NOTICE: graph "issue_1954" has been created + create_graph +-------------- + +(1 row) + +-- Setup: create source nodes and relationships (3 rows to trigger the bug) +SELECT * FROM cypher('issue_1954', $$ + CREATE (:A {name: 'a1'})-[:R]->(:B {name: 'b1'}), + (:A {name: 'a2'})-[:R]->(:B {name: 'b2'}), + (:A {name: 'a3'})-[:R]->(:B {name: 'b3'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- This query would fail with "vertex assigned to variable c was deleted" +-- on the 3rd row before the fix. +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:R]->(b:B) + CREATE (c:C {name: a.name + '|' + b.name}) + WITH a, b, c + MERGE (a)-[:LINK]->(c) + RETURN a.name, b.name, c.name + ORDER BY a.name +$$) AS (a agtype, b agtype, c agtype); + a | b | c +------+------+--------- + "a1" | "b1" | "a1|b1" + "a2" | "b2" | "a2|b2" + "a3" | "b3" | "a3|b3" +(3 rows) + +-- Verify edges were created +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:LINK]->(c:C) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + a | c +------+--------- + "a1" | "a1|b1" + "a2" | "a2|b2" + "a3" | "a3|b3" +(3 rows) + +-- Test with two MERGEs (more complex case from the original report) +SELECT * FROM cypher('issue_1954', $$ + MATCH ()-[e:LINK]->() DELETE e +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('issue_1954', $$ + MATCH (c:C) DELETE c +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:R]->(b:B) + CREATE (c:C {name: a.name + '|' + b.name}) + WITH a, b, c + MERGE (a)-[:LINK1]->(c) + MERGE (b)-[:LINK2]->(c) + RETURN a.name, b.name, c.name + ORDER BY a.name +$$) AS (a agtype, b agtype, c agtype); + a | b | c +------+------+--------- + "a1" | "b1" | "a1|b1" + "a2" | "b2" | "a2|b2" + "a3" | "b3" | "a3|b3" +(3 rows) + +-- Verify both sets of edges +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:LINK1]->(c:C) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + a | c +------+--------- + "a1" | "a1|b1" + "a2" | "a2|b2" + "a3" | "a3|b3" +(3 rows) + +SELECT * FROM cypher('issue_1954', $$ + MATCH (b:B)-[:LINK2]->(c:C) + RETURN b.name, c.name + ORDER BY b.name +$$) AS (b agtype, c agtype); + b | c +------+--------- + "b1" | "a1|b1" + "b2" | "a2|b2" + "b3" | "a3|b3" +(3 rows) -- -- clean up graphs @@ -1888,6 +1995,11 @@ SELECT * FROM cypher('issue_1446', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype --- (0 rows) +SELECT * FROM cypher('issue_1954', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); + a +--- +(0 rows) + -- -- delete graphs -- @@ -1985,6 +2097,23 @@ NOTICE: graph "issue_1446" has been dropped (1 row) +SELECT drop_graph('issue_1954', true); +NOTICE: drop cascades to 9 other objects +DETAIL: drop cascades to table issue_1954._ag_label_vertex +drop cascades to table issue_1954._ag_label_edge +drop cascades to table issue_1954."A" +drop cascades to table issue_1954."R" +drop cascades to table issue_1954."B" +drop cascades to table issue_1954."C" +drop cascades to table issue_1954."LINK" +drop cascades to table issue_1954."LINK1" +drop cascades to table issue_1954."LINK2" +NOTICE: graph "issue_1954" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index cc900e73d..5939c42a8 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -860,6 +860,69 @@ SELECT * FROM cypher('issue_1446', $$ RETURN count(*) AS edge_count $$) AS (edge_count agtype); +-- Issue 1954: CREATE + WITH + MERGE causes "vertex was deleted" error +-- when the number of input rows exceeds the snapshot's command ID window. +-- entity_exists() used a stale curcid, making recently-created vertices +-- invisible on later iterations. +-- +SELECT * FROM create_graph('issue_1954'); + +-- Setup: create source nodes and relationships (3 rows to trigger the bug) +SELECT * FROM cypher('issue_1954', $$ + CREATE (:A {name: 'a1'})-[:R]->(:B {name: 'b1'}), + (:A {name: 'a2'})-[:R]->(:B {name: 'b2'}), + (:A {name: 'a3'})-[:R]->(:B {name: 'b3'}) +$$) AS (result agtype); + +-- This query would fail with "vertex assigned to variable c was deleted" +-- on the 3rd row before the fix. +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:R]->(b:B) + CREATE (c:C {name: a.name + '|' + b.name}) + WITH a, b, c + MERGE (a)-[:LINK]->(c) + RETURN a.name, b.name, c.name + ORDER BY a.name +$$) AS (a agtype, b agtype, c agtype); + +-- Verify edges were created +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:LINK]->(c:C) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + +-- Test with two MERGEs (more complex case from the original report) +SELECT * FROM cypher('issue_1954', $$ + MATCH ()-[e:LINK]->() DELETE e +$$) AS (result agtype); +SELECT * FROM cypher('issue_1954', $$ + MATCH (c:C) DELETE c +$$) AS (result agtype); + +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:R]->(b:B) + CREATE (c:C {name: a.name + '|' + b.name}) + WITH a, b, c + MERGE (a)-[:LINK1]->(c) + MERGE (b)-[:LINK2]->(c) + RETURN a.name, b.name, c.name + ORDER BY a.name +$$) AS (a agtype, b agtype, c agtype); + +-- Verify both sets of edges +SELECT * FROM cypher('issue_1954', $$ + MATCH (a:A)-[:LINK1]->(c:C) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + +SELECT * FROM cypher('issue_1954', $$ + MATCH (b:B)-[:LINK2]->(c:C) + RETURN b.name, c.name + ORDER BY b.name +$$) AS (b agtype, c agtype); + -- -- clean up graphs -- @@ -867,6 +930,7 @@ SELECT * FROM cypher('cypher_merge', $$ MATCH (n) DETACH DELETE n $$) AS (a agty SELECT * FROM cypher('issue_1630', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); SELECT * FROM cypher('issue_1709', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); SELECT * FROM cypher('issue_1446', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); +SELECT * FROM cypher('issue_1954', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); -- -- delete graphs @@ -877,6 +941,7 @@ SELECT drop_graph('issue_1630', true); SELECT drop_graph('issue_1691', true); SELECT drop_graph('issue_1709', true); SELECT drop_graph('issue_1446', true); +SELECT drop_graph('issue_1954', true); -- -- End diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index eff829925..282acbd29 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -208,6 +208,7 @@ bool entity_exists(EState *estate, Oid graph_oid, graphid id) HeapTuple tuple; Relation rel; bool result = true; + CommandId saved_curcid; /* * Extract the label id from the graph id and get the table name @@ -219,6 +220,18 @@ bool entity_exists(EState *estate, Oid graph_oid, graphid id) ScanKeyInit(&scan_keys[0], 1, BTEqualStrategyNumber, F_GRAPHIDEQ, GRAPHID_GET_DATUM(id)); + /* + * Temporarily advance the snapshot's curcid to the current global + * command ID so that entities inserted by preceding clauses (e.g., + * CREATE) in the same query are visible. CREATE calls + * CommandCounterIncrement() which advances the global CID, but does + * not update es_snapshot->curcid. The Decrement/Increment CID + * macros used by the executors can leave curcid behind the global + * CID, making recently created entities invisible to this scan. + */ + saved_curcid = estate->es_snapshot->curcid; + estate->es_snapshot->curcid = GetCurrentCommandId(false); + rel = table_open(label->relation, RowExclusiveLock); scan_desc = table_beginscan(rel, estate->es_snapshot, 1, scan_keys); @@ -236,6 +249,9 @@ bool entity_exists(EState *estate, Oid graph_oid, graphid id) table_endscan(scan_desc); table_close(rel, RowExclusiveLock); + /* Restore the original curcid */ + estate->es_snapshot->curcid = saved_curcid; + return result; } From 3e568e33a1180540e9d27dcb60ff215984ef1d7b Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Fri, 27 Feb 2026 11:13:15 -0500 Subject: [PATCH 2/2] Use Max() to prevent curcid regression in entity_exists() Address review feedback: es_snapshot->curcid can be ahead of the global CID due to Increment_Estate_CommandId macros. Unconditionally assigning GetCurrentCommandId(false) could decrease curcid, making previously visible tuples invisible. Use Max(saved_curcid, GetCurrentCommandId(false)) to ensure we only ever increase visibility. Co-Authored-By: Claude Opus 4.6 --- regress/expected/cypher_merge.out | 5 +++-- src/backend/executor/cypher_utils.c | 19 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 4c58a0f19..4242f2f59 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -1728,9 +1728,9 @@ SELECT * FROM cypher('issue_1907', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype -- SELECT * FROM create_graph('issue_1446'); NOTICE: graph "issue_1446" has been created - create_graph + create_graph -------------- - + (1 row) -- Reporter's exact setup: two initial nodes @@ -1864,6 +1864,7 @@ $$) AS (edge_count agtype); ------------ 0 (1 row) + -- Issue 1954: CREATE + WITH + MERGE causes "vertex was deleted" error -- when the number of input rows exceeds the snapshot's command ID window. -- entity_exists() used a stale curcid, making recently-created vertices diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index 282acbd29..940284234 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -221,16 +221,21 @@ bool entity_exists(EState *estate, Oid graph_oid, graphid id) F_GRAPHIDEQ, GRAPHID_GET_DATUM(id)); /* - * Temporarily advance the snapshot's curcid to the current global - * command ID so that entities inserted by preceding clauses (e.g., - * CREATE) in the same query are visible. CREATE calls - * CommandCounterIncrement() which advances the global CID, but does - * not update es_snapshot->curcid. The Decrement/Increment CID - * macros used by the executors can leave curcid behind the global + * Temporarily advance the snapshot's curcid so that entities inserted + * by preceding clauses (e.g., CREATE) in the same query are visible. + * CREATE calls CommandCounterIncrement() which advances the global + * CID, but does not update es_snapshot->curcid. The Decrement/Increment + * CID macros used by the executors can leave curcid behind the global * CID, making recently created entities invisible to this scan. + * + * Use Max to ensure we never decrease curcid. The executor macros + * (Increment_Estate_CommandId) can push curcid above the global CID, + * and blindly assigning GetCurrentCommandId could make tuples that + * are visible at the current curcid become invisible. */ saved_curcid = estate->es_snapshot->curcid; - estate->es_snapshot->curcid = GetCurrentCommandId(false); + estate->es_snapshot->curcid = Max(saved_curcid, + GetCurrentCommandId(false)); rel = table_open(label->relation, RowExclusiveLock); scan_desc = table_beginscan(rel, estate->es_snapshot, 1, scan_keys);