From 00a2bc705440b5c78daf8b81d6036ada316b8c8b Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 23 Feb 2026 17:19:01 -0800 Subject: [PATCH] fix: don't hide published score sets from search when superseded by unpublished versions The superseding score set filter used a LEFT OUTER JOIN that excluded any score set with a superseding version, regardless of publication status. This caused published score sets (e.g. urn:mavedb:00000049) to disappear from search results when an unpublished draft replacement existed. Allow superseded score sets through when the superseding version has no published_date. Also enforce published=True on the filter-options endpoint for consistency with the main search endpoint. --- src/mavedb/lib/score_sets.py | 12 ++- src/mavedb/lib/validation/transform.py | 10 +- src/mavedb/routers/score_sets.py | 7 ++ tests/routers/test_score_set.py | 139 +++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 60f2ca68e..134457f77 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -79,10 +79,16 @@ def build_search_score_sets_query_filter( ): superseding_score_set = aliased(ScoreSet) - # Limit to unsuperseded score sets. - # TODO#??? Prevent unpublished superseding score sets from hiding their published precursors in search results. + # Exclude superseded score sets from search results, but only when the superseding + # version is published. An unpublished replacement should not hide its published + # precursor from public search results. query = query.join(superseding_score_set, ScoreSet.superseding_score_set, isouter=True) - query = query.filter(superseding_score_set.id.is_(None)) + query = query.filter( + or_( + superseding_score_set.id.is_(None), + superseding_score_set.published_date.is_(None), + ) + ) if owner_or_contributor is not None: query = query.filter( diff --git a/src/mavedb/lib/validation/transform.py b/src/mavedb/lib/validation/transform.py index 2152eff9d..9eb1991db 100644 --- a/src/mavedb/lib/validation/transform.py +++ b/src/mavedb/lib/validation/transform.py @@ -29,7 +29,15 @@ def transform_score_set_list_to_urn_list(score_sets: Optional[list[ScoreSet]]) - if not score_sets: return [] - return [score_set.urn for score_set in score_sets if score_set.superseding_score_set is None] + # Include a score set if it has no superseding version, or if its superseding version is + # unpublished. An unpublished replacement should not hide its published precursor from URN + # lists, since consumers would lose access to the published data and gain a tmp: URN they + # cannot resolve. + return [ + score_set.urn + for score_set in score_sets + if score_set.superseding_score_set is None or score_set.superseding_score_set.published_date is None + ] def transform_experiment_list_to_urn_list(experiments: Optional[list[Experiment]]) -> list[Optional[str]]: diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 694860d29..7376ca4b1 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -600,6 +600,13 @@ def get_filter_options_for_search( db: Session = Depends(deps.get_db), user_data: Optional[UserData] = Depends(get_current_user), ) -> Any: + # Disallow searches for unpublished score sets via this endpoint, consistent with the main search endpoint. + if search.published is False: + raise HTTPException( + status_code=422, + detail="Cannot search for private score sets options except in the context of the current user's data.", + ) + search.published = True return fetch_score_set_search_filter_options(db, user_data, None, search) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 13bd7ce73..7dd745a2a 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -53,6 +53,7 @@ TEST_SAVED_CLINVAR_CONTROL, TEST_SAVED_GENERIC_CLINICAL_CONTROL, TEST_SAVED_GNOMAD_VARIANT, + TEST_SAVED_TAXONOMY, TEST_USER, VALID_CLINGEN_CA_ID, ) @@ -2255,6 +2256,144 @@ def test_search_public_score_sets_not_showing_private_score_set( assert response.json()["scoreSets"][0]["urn"] == published_score_set_1["urn"] +def test_search_published_score_set_not_hidden_by_unpublished_superseding_version( + session, data_provider, client, setup_router_db, data_files +): + """A published score set should still appear in search results when its superseding version is unpublished.""" + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + # Create an unpublished superseding score set — this should NOT hide the published precursor. + create_seq_score_set( + client, + published_score_set["experiment"]["urn"], + update={"supersededScoreSetUrn": published_score_set["urn"]}, + ) + + search_payload = {"text": "fnord"} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + assert response.json()["numScoreSets"] == 1 + assert response.json()["scoreSets"][0]["urn"] == published_score_set["urn"] + + +def test_search_published_score_set_hidden_by_published_superseding_version( + session, data_provider, client, setup_router_db, data_files +): + """A published score set should be hidden from search results when its superseding version is also published.""" + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + # Create and publish a superseding score set — this SHOULD hide the precursor. + superseding = create_seq_score_set( + client, + published_score_set["experiment"]["urn"], + update={"title": "Test Fnord Score Set v2", "supersededScoreSetUrn": published_score_set["urn"]}, + ) + superseding = mock_worker_variant_insertion(client, session, data_provider, superseding, data_files / "scores.csv") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding = publish_score_set(client, superseding["urn"]) + worker_queue.assert_called_once() + + search_payload = {"text": "fnord"} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + # Only the superseding version should appear; the precursor should be hidden. + assert response.json()["numScoreSets"] == 1 + assert response.json()["scoreSets"][0]["urn"] == published_superseding["urn"] + + +def test_search_filter_options_not_hidden_by_unpublished_superseding_version( + session, data_provider, client, setup_router_db, data_files +): + """Filter options should include targets from published score sets even when superseded by unpublished versions.""" + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + target_name = published_score_set["targetGenes"][0]["name"] + + # Create an unpublished superseding version. + create_seq_score_set( + client, + published_score_set["experiment"]["urn"], + update={"supersededScoreSetUrn": published_score_set["urn"]}, + ) + + response = client.post("/api/v1/score-sets/search/filter-options", json={}) + assert response.status_code == 200 + target_names = [opt["value"] for opt in response.json()["targetGeneNames"]] + assert target_name in target_names + + +def test_search_filter_options_hidden_by_published_superseding_version( + session, data_provider, client, setup_router_db, data_files +): + """Filter options should NOT include targets from published score sets when superseded by published versions.""" + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + target_name = published_score_set["targetGenes"][0]["name"] + + # Create and publish a superseding version. + superseding = create_seq_score_set( + client, + published_score_set["experiment"]["urn"], + update={ + "supersededScoreSetUrn": published_score_set["urn"], + "targetGenes": [ + { + "name": "TEST2", + "category": "protein_coding", + "externalIdentifiers": [], + "targetSequence": { + "sequenceType": "dna", + "sequence": "ACGTTT", + "taxonomy": { + "code": TEST_SAVED_TAXONOMY["code"], + "organismName": TEST_SAVED_TAXONOMY["organism_name"], + "commonName": TEST_SAVED_TAXONOMY["common_name"], + "rank": TEST_SAVED_TAXONOMY["rank"], + "hasDescribedSpeciesName": TEST_SAVED_TAXONOMY["has_described_species_name"], + "articleReference": TEST_SAVED_TAXONOMY["article_reference"], + "id": TEST_SAVED_TAXONOMY["id"], + "url": TEST_SAVED_TAXONOMY["url"], + }, + }, + } + ], + }, + ) + superseding = mock_worker_variant_insertion(client, session, data_provider, superseding, data_files / "scores.csv") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, superseding["urn"]) + worker_queue.assert_called_once() + + response = client.post("/api/v1/score-sets/search/filter-options", json={}) + assert response.status_code == 200 + target_names = [opt["value"] for opt in response.json()["targetGeneNames"]] + assert target_name not in target_names + + ######################################################################################################################## # Score set deletion ########################################################################################################################