Skip to content

Commit cda7e1a

Browse files
Merge pull request #1417 from datajoint/fix/1049-skip-duplicates-unique-postgres
docs+test: skip_duplicates enforces secondary unique constraints on PostgreSQL (fixes #1049)
2 parents cb5f1b9 + c304946 commit cda7e1a

File tree

2 files changed

+212
-1
lines changed

2 files changed

+212
-1
lines changed

src/datajoint/table.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,13 @@ def insert(
745745
replace : bool, optional
746746
If True, replaces the existing tuple.
747747
skip_duplicates : bool, optional
748-
If True, silently skip duplicate inserts.
748+
If True, silently skip rows with duplicate primary key values.
749+
On **PostgreSQL**, secondary unique constraint violations still
750+
raise an error even when ``skip_duplicates=True``, because the
751+
generated ``ON CONFLICT (pk) DO NOTHING`` clause targets only
752+
the primary key. On **MySQL**, ``ON DUPLICATE KEY UPDATE``
753+
catches all unique-key conflicts, so secondary unique violations
754+
are also silently skipped.
749755
ignore_extra_fields : bool, optional
750756
If False (default), fields that are not in the heading raise error.
751757
allow_direct_insert : bool, optional
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""
2+
Tests for skip_duplicates behavior with secondary unique constraints.
3+
4+
Verifies that skip_duplicates=True on PostgreSQL skips primary key
5+
duplicates while still raising on secondary unique constraint violations.
6+
Resolves #1049.
7+
"""
8+
9+
import time
10+
11+
import pytest
12+
13+
import datajoint as dj
14+
from datajoint.errors import DuplicateError
15+
16+
17+
@pytest.fixture(scope="function")
18+
def schema_by_backend(connection_by_backend, db_creds_by_backend):
19+
"""Create a fresh schema per test, parameterized across backends."""
20+
backend = db_creds_by_backend["backend"]
21+
test_id = str(int(time.time() * 1000))[-8:]
22+
schema_name = f"djtest_skipdup_{backend}_{test_id}"[:64]
23+
24+
if connection_by_backend.is_connected:
25+
try:
26+
connection_by_backend.query(
27+
f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}"
28+
)
29+
except Exception:
30+
pass
31+
32+
schema = dj.Schema(schema_name, connection=connection_by_backend)
33+
yield schema
34+
35+
if connection_by_backend.is_connected:
36+
try:
37+
connection_by_backend.query(
38+
f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}"
39+
)
40+
except Exception:
41+
pass
42+
43+
44+
def test_skip_duplicates_pk_match(schema_by_backend):
45+
"""skip_duplicates=True silently skips rows whose PK already exists."""
46+
47+
@schema_by_backend
48+
class Item(dj.Manual):
49+
definition = """
50+
item_id : int
51+
---
52+
name : varchar(100)
53+
email : varchar(100)
54+
unique index (email)
55+
"""
56+
57+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
58+
59+
# Same PK, different values — should be silently skipped
60+
Item.insert1(
61+
dict(item_id=1, name="Bob", email="bob@example.com"),
62+
skip_duplicates=True,
63+
)
64+
65+
# Original row unchanged
66+
row = (Item & "item_id=1").fetch1()
67+
assert row["name"] == "Alice"
68+
assert row["email"] == "alice@example.com"
69+
70+
71+
def test_skip_duplicates_unique_violation_raises_on_postgres(schema_by_backend, db_creds_by_backend):
72+
"""On PostgreSQL, skip_duplicates=True still raises on secondary unique violations.
73+
74+
Regression test for #1049: a row with a *new* PK but a *conflicting*
75+
secondary unique index value must raise DuplicateError on PostgreSQL.
76+
"""
77+
if db_creds_by_backend["backend"] != "postgresql":
78+
pytest.skip("PostgreSQL-specific: ON CONFLICT (pk) DO NOTHING preserves unique constraints")
79+
80+
@schema_by_backend
81+
class Item(dj.Manual):
82+
definition = """
83+
item_id : int
84+
---
85+
name : varchar(100)
86+
email : varchar(100)
87+
unique index (email)
88+
"""
89+
90+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
91+
92+
# New PK (2) but email conflicts with existing row (1)
93+
with pytest.raises(DuplicateError):
94+
Item.insert1(
95+
dict(item_id=2, name="Bob", email="alice@example.com"),
96+
skip_duplicates=True,
97+
)
98+
99+
100+
def test_skip_duplicates_unique_on_mysql(schema_by_backend, db_creds_by_backend):
101+
"""On MySQL, skip_duplicates=True silently skips secondary unique conflicts.
102+
103+
Documents the known MySQL asymmetry: ON DUPLICATE KEY UPDATE catches
104+
all unique key conflicts, not just primary key.
105+
"""
106+
if db_creds_by_backend["backend"] != "mysql":
107+
pytest.skip("MySQL-specific: ON DUPLICATE KEY UPDATE catches all unique keys")
108+
109+
@schema_by_backend
110+
class Item(dj.Manual):
111+
definition = """
112+
item_id : int
113+
---
114+
name : varchar(100)
115+
email : varchar(100)
116+
unique index (email)
117+
"""
118+
119+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
120+
121+
# New PK (2) but email conflicts — MySQL silently skips
122+
Item.insert1(
123+
dict(item_id=2, name="Bob", email="alice@example.com"),
124+
skip_duplicates=True,
125+
)
126+
127+
# Only the original row exists
128+
assert len(Item()) == 1
129+
assert (Item & "item_id=1").fetch1()["name"] == "Alice"
130+
131+
132+
def test_skip_duplicates_no_unique_index(schema_by_backend):
133+
"""skip_duplicates=True works normally on tables without secondary unique indexes."""
134+
135+
@schema_by_backend
136+
class Simple(dj.Manual):
137+
definition = """
138+
item_id : int
139+
---
140+
name : varchar(100)
141+
"""
142+
143+
Simple.insert1(dict(item_id=1, name="Alice"))
144+
145+
# Same PK, different name — silently skipped
146+
Simple.insert1(dict(item_id=1, name="Bob"), skip_duplicates=True)
147+
assert (Simple & "item_id=1").fetch1()["name"] == "Alice"
148+
149+
# New PK — inserted
150+
Simple.insert1(dict(item_id=2, name="Bob"), skip_duplicates=True)
151+
assert len(Simple()) == 2
152+
153+
154+
def test_skip_duplicates_composite_unique(schema_by_backend, db_creds_by_backend):
155+
"""skip_duplicates=True with a composite secondary unique index."""
156+
if db_creds_by_backend["backend"] != "postgresql":
157+
pytest.skip("PostgreSQL-specific unique constraint enforcement")
158+
159+
@schema_by_backend
160+
class Record(dj.Manual):
161+
definition = """
162+
record_id : int
163+
---
164+
first_name : varchar(100)
165+
last_name : varchar(100)
166+
data : varchar(255)
167+
unique index (first_name, last_name)
168+
"""
169+
170+
Record.insert1(dict(record_id=1, first_name="Alice", last_name="Smith", data="v1"))
171+
172+
# New PK but composite unique (first_name, last_name) conflicts
173+
with pytest.raises(DuplicateError):
174+
Record.insert1(
175+
dict(record_id=2, first_name="Alice", last_name="Smith", data="v2"),
176+
skip_duplicates=True,
177+
)
178+
179+
180+
def test_skip_duplicates_batch_mixed(schema_by_backend, db_creds_by_backend):
181+
"""Batch insert with skip_duplicates=True: PK duplicates skipped, unique conflicts raise."""
182+
if db_creds_by_backend["backend"] != "postgresql":
183+
pytest.skip("PostgreSQL-specific unique constraint enforcement")
184+
185+
@schema_by_backend
186+
class Item(dj.Manual):
187+
definition = """
188+
item_id : int
189+
---
190+
email : varchar(100)
191+
unique index (email)
192+
"""
193+
194+
Item.insert1(dict(item_id=1, email="alice@example.com"))
195+
196+
# Batch: row 2 is new (OK), row 1 is PK dup (skip), row 3 conflicts on email
197+
with pytest.raises(DuplicateError):
198+
Item.insert(
199+
[
200+
dict(item_id=2, email="bob@example.com"),
201+
dict(item_id=1, email="duplicate-pk@example.com"), # PK dup — skipped
202+
dict(item_id=3, email="alice@example.com"), # unique conflict — error
203+
],
204+
skip_duplicates=True,
205+
)

0 commit comments

Comments
 (0)