From 7780473a11745c29211bfbd8de21dfeb2b2dd2a1 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 14:52:23 -0500 Subject: [PATCH 1/9] First stab at getting conflated IC to work. --- node_normalizer/normalizer.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index 5f92e2f..592ca40 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -558,9 +558,6 @@ async def get_normalized_nodes( # did we get some canonical ids if canonical_nonan: - # get the information content values - info_contents = await get_info_content(app, canonical_nonan) - # Get the equivalent_ids and types eqids, types = await get_eqids_and_types(app, canonical_nonan) @@ -597,6 +594,7 @@ async def get_normalized_nodes( for canon, oids in zip(itertools.cycle(canonical_nonan), other_ids): dereference_others[canon].extend(oids) + # sum(other_ids, []) is basically other_ids.flatten(). all_other_ids = sum(other_ids, []) eqids2, types2 = await get_eqids_and_types(app, all_other_ids) @@ -612,25 +610,39 @@ async def get_normalized_nodes( zipped = zip(canonical_nonan, eqids, types) + # Look up all the information content values. + info_contents_all = await get_info_content(app, all_other_ids) + info_contents = {} + for canonical_id, e, t in zipped: # here's where we replace the eqids, types if len(dereference_others[canonical_id]) > 0: e = [] t = [] + ic_vals = [] for other in dereference_others[canonical_id]: # logging.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}") e += deref_others_eqs[other] t += deref_others_typ[other] + if other in info_contents_all: + ic_vals.append(info_contents_all[other]) final_eqids.append(e) final_types.append(uniquify_list(t)) + # What's the smallest IC value for this canonical ID? + info_contents[canonical_id] = min(ic_vals) if ic_vals else None + dereference_ids = dict(zip(canonical_nonan, final_eqids)) dereference_types = dict(zip(canonical_nonan, final_types)) + else: dereference_ids = dict(zip(canonical_nonan, eqids)) dereference_types = dict(zip(canonical_nonan, types)) + + # get the information content values + info_contents = await get_info_content(app, canonical_nonan) else: dereference_ids = dict() dereference_types = dict() @@ -821,6 +833,10 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i eq_item["type"] = eqid['types'][-1] node["equivalent_identifiers"].append(eq_item) + # TODO: figure out if this slows us down significantly. + if eqid['i'] in info_contents: + eq_item["information_content"] = info_contents[eqid['i']] + if include_descriptions and descriptions: node["descriptions"] = descriptions node["id"]["description"] = descriptions[0] From 3a56fde9986bd44b71129144c8d49b8934d9828a Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 14:53:17 -0500 Subject: [PATCH 2/9] Turned on on:push for testing. --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 800d57b..ed71a18 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,6 +1,7 @@ name: 'Publish to GitHub Packages' on: + push: release: types: [published] From b8e3275ee8afbff5d3ce22d638adf5583eea3ad8 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 15:08:26 -0500 Subject: [PATCH 3/9] Attempt to fix bug. --- node_normalizer/normalizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index 592ca40..fc6f050 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -625,7 +625,7 @@ async def get_normalized_nodes( # logging.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}") e += deref_others_eqs[other] t += deref_others_typ[other] - if other in info_contents_all: + if other in info_contents_all and info_contents_all[other]: ic_vals.append(info_contents_all[other]) final_eqids.append(e) From 7eb9d9b6ff33bc11f0bbe5089a8ae775c61b62c6 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 15:15:20 -0500 Subject: [PATCH 4/9] Pass all the info_content values into create_node(). --- node_normalizer/normalizer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index fc6f050..648ba6e 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -632,7 +632,7 @@ async def get_normalized_nodes( final_types.append(uniquify_list(t)) # What's the smallest IC value for this canonical ID? - info_contents[canonical_id] = min(ic_vals) if ic_vals else None + # info_contents[canonical_id] = min(ic_vals) if ic_vals else None dereference_ids = dict(zip(canonical_nonan, final_eqids)) dereference_types = dict(zip(canonical_nonan, final_types)) @@ -649,7 +649,7 @@ async def get_normalized_nodes( # output the final result normal_nodes = { - input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, + input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, info_contents_all, include_descriptions=include_descriptions, include_individual_types=include_individual_types, include_taxa=include_taxa, @@ -692,7 +692,7 @@ async def get_info_content_attribute(app, canonical_nonan) -> dict: return new_attrib -async def create_node(app, canonical_id, equivalent_ids, types, info_contents, include_descriptions=True, +async def create_node(app, canonical_id, equivalent_ids, types, info_contents, info_contents_all, include_descriptions=True, include_individual_types=False, include_taxa=False, conflations=None): """Construct the output format given the compressed redis data""" # It's possible that we didn't find a canonical_id @@ -834,8 +834,8 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i node["equivalent_identifiers"].append(eq_item) # TODO: figure out if this slows us down significantly. - if eqid['i'] in info_contents: - eq_item["information_content"] = info_contents[eqid['i']] + if eqid['i'] in info_contents_all: + eq_item["information_content"] = info_contents_all[eqid['i']] if include_descriptions and descriptions: node["descriptions"] = descriptions From d6122e9c50f1a275e7605c5699b59ddaf1b0684c Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 15:34:25 -0500 Subject: [PATCH 5/9] Oops. --- node_normalizer/normalizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index 648ba6e..8209008 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -632,7 +632,7 @@ async def get_normalized_nodes( final_types.append(uniquify_list(t)) # What's the smallest IC value for this canonical ID? - # info_contents[canonical_id] = min(ic_vals) if ic_vals else None + info_contents[canonical_id] = min(ic_vals) if ic_vals else None dereference_ids = dict(zip(canonical_nonan, final_eqids)) dereference_types = dict(zip(canonical_nonan, final_types)) From f95deef13fee16b9bb3bbdd5a9d551de1dce6692 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 16:59:54 -0500 Subject: [PATCH 6/9] Removed on:push trigger. --- .github/workflows/release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ed71a18..800d57b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,7 +1,6 @@ name: 'Publish to GitHub Packages' on: - push: release: types: [published] From 57e866e28d8c76aedee4a9a4df57ce731ebf69b6 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 22:58:37 -0500 Subject: [PATCH 7/9] Reset ic_vals before going into the loop so it is set to something. --- node_normalizer/normalizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py index 8209008..2dd66f8 100644 --- a/node_normalizer/normalizer.py +++ b/node_normalizer/normalizer.py @@ -614,6 +614,10 @@ async def get_normalized_nodes( info_contents_all = await get_info_content(app, all_other_ids) info_contents = {} + # Apparently sometimes we can get to the final info_contents calculation without going through + # resetting the ic_vals. + ic_vals = None + for canonical_id, e, t in zipped: # here's where we replace the eqids, types if len(dereference_others[canonical_id]) > 0: From d3d9ad67d7fba0f5b61fc8701190bde0a091db12 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 22:59:23 -0500 Subject: [PATCH 8/9] Added on:push trigger for testing. --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 800d57b..e9d8f8f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,6 +1,7 @@ name: 'Publish to GitHub Packages' on: + push: release: types: [published] From d3eb44689f9b1268c0c663acdc876e99bb0dec7e Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Thu, 26 Feb 2026 23:28:24 -0500 Subject: [PATCH 9/9] Removed on:push trigger. --- .github/workflows/release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e9d8f8f..800d57b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,7 +1,6 @@ name: 'Publish to GitHub Packages' on: - push: release: types: [published]