eclipse-score · MaximilianSoerenPollak · Mar 26, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 12, 2026
@@ -69,23 +69,30 @@ def _rewrite_needs_json_to_sourcelinks(labels):
             out.append(s)
     return out
 
-def _merge_sourcelinks(name, sourcelinks):
+def _merge_sourcelinks(name, sourcelinks, known_good = None):
     """Merge multiple sourcelinks JSON files into a single file.
 
     Args:
         name: Name for the merged sourcelinks target
         sourcelinks: List of sourcelinks JSON file targets
     """
 
+    extra_srcs = []
+    known_good_arg = ""
+    if known_good != None:
+        extra_srcs = [known_good]
+        known_good_arg = "--known_good $(location %s)" % known_good
+
     native.genrule(
         name = name,
-        srcs = sourcelinks,
+        srcs = sourcelinks + extra_srcs,
         outs = [name + ".json"],
         cmd = """
         $(location @score_docs_as_code//scripts_bazel:merge_sourcelinks) \
             --output $@ \
+            {known_good_arg} \
             $(SRCS)
-        """,
+        """.format(known_good_arg = known_good_arg),
         tools = ["@score_docs_as_code//scripts_bazel:merge_sourcelinks"],
     )
 
@@ -120,7 +127,7 @@ def _missing_requirements(deps):
         fail(msg)
     fail("This case should be unreachable?!")
 
-def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
+def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = None):
     """Creates all targets related to documentation.
 
     By using this function, you'll get any and all updates for documentation targets in one place.
@@ -175,34 +182,45 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
 
     data_with_docs_sources = _rewrite_needs_json_to_docs_sources(data)
     additional_combo_sourcelinks = _rewrite_needs_json_to_sourcelinks(data)
-    _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks)
+    _merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks, known_good = known_good)
+    docs_data = data + [":sourcelinks_json"]
+    combo_data = data_with_docs_sources + [":merged_sourcelinks"]
+
+    docs_env = {
+        "SOURCE_DIRECTORY": source_dir,
+        "DATA": str(data),
+        "SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
+    }
+    docs_sources_env = {
+        "SOURCE_DIRECTORY": source_dir,
+        "DATA": str(data_with_docs_sources),
+        "SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
+    }
+    if known_good:
+        docs_env["KNOWN_GOOD_JSON"] = "$(location "+ known_good + ")"
+        docs_sources_env["KNOWN_GOOD_JSON"] = "$(location "+ known_good + ")"
+        docs_data.append(known_good)
+        combo_data.append(known_good)
+
+    docs_env["ACTION"] = "incremental"
 
     py_binary(
         name = "docs",
         tags = ["cli_help=Build documentation:\nbazel run //:docs"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data + [":sourcelinks_json"],
+        data = docs_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data),
-            "ACTION": "incremental",
-            "SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
-        },
+        env = docs_env
     )
 
+    docs_sources_env["ACTION"] = "incremental"
     py_binary(
         name = "docs_combo",
         tags = ["cli_help=Build full documentation with all dependencies:\nbazel run //:docs_combo"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data_with_docs_sources + [":merged_sourcelinks"],
+        data = combo_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data_with_docs_sources),
-            "ACTION": "incremental",
-            "SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
-        },
+        env = docs_sources_env
     )
 
     native.alias(
@@ -211,59 +229,44 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
         deprecation = "Target '//:docs_combo_experimental' is deprecated. Use '//:docs_combo' instead.",
     )
 
+    docs_env["ACTION"] = "linkcheck"
     py_binary(
         name = "docs_link_check",
         tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data,
+        data = docs_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data),
-            "ACTION": "linkcheck",
-        },
+        env = docs_env
     )
 
+    docs_env["ACTION"] = "check"
     py_binary(
         name = "docs_check",
         tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data + [":sourcelinks_json"],
+        data = docs_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data),
-            "ACTION": "check",
-            "SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
-        },
+        env = docs_env
     )
 
+    docs_env["ACTION"] = "live_preview"
     py_binary(
         name = "live_preview",
         tags = ["cli_help=Live preview documentation in the browser:\nbazel run //:live_preview"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data + [":sourcelinks_json"],
+        data = docs_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data),
-            "ACTION": "live_preview",
-            "SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
-        },
+        env = docs_env
     )
 
+    docs_sources_env["ACTION"] = "live_preview"
     py_binary(
         name = "live_preview_combo_experimental",
         tags = ["cli_help=Live preview full documentation with all dependencies in the browser:\nbazel run //:live_preview_combo_experimental"],
         srcs = ["@score_docs_as_code//src:incremental.py"],
-        data = data_with_docs_sources + [":merged_sourcelinks"],
+        data = combo_data,
         deps = deps,
-        env = {
-            "SOURCE_DIRECTORY": source_dir,
-            "DATA": str(data_with_docs_sources),
-            "ACTION": "live_preview",
-            "SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
-        },
+        env = docs_sources_env
     )
 
     score_virtualenv(

@@ -33,6 +33,7 @@ py_binary(
 py_binary(
     name = "merge_sourcelinks",
     srcs = ["merge_sourcelinks.py"],
+    deps= [ "//src/extensions/score_source_code_linker"],
     main = "merge_sourcelinks.py",
     visibility = ["//visibility:public"],
 )
@@ -25,25 +25,43 @@
 from src.extensions.score_source_code_linker.generate_source_code_links_json import (
     _extract_references_from_file,  # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script
 )
+from src.extensions.score_source_code_linker.helpers import parse_repo_name_from_path
 from src.extensions.score_source_code_linker.needlinks import (
-    store_source_code_links_json,
+    DefaultMetaData,
+    store_source_code_links_with_metadata_json,
 )
 
 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 
 
+def clean_external_prefix(path: Path) -> Path:
+    """
+    As it can be in combo builds that the path is:
+    `external/score_docs_as_code+/....` or similar
+    We have to remove this prefix from the file
+    before we pass it to the extract function. Otherwise we have
+    this prefix inside the `file` attribute leading to wrong links
+    """
+    if not str(path).startswith("external/"):
+        return path
+    # This allows for files / folders etc. to have `external` in their name too.
+    path_raw = str(path).removeprefix("external/")
+    filepath_split = str(path_raw).split("/", maxsplit=1)
+    return Path("".join(filepath_split[1:]))
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Generate source code links JSON from source files"
     )
-    parser.add_argument(
+    _ = parser.add_argument(
         "--output",
         required=True,
         type=Path,
         help="Output JSON file path",
     )
-    parser.add_argument(
+    _ = parser.add_argument(
         "files",
         nargs="*",
         type=Path,
@@ -53,15 +71,23 @@ def main():
     args = parser.parse_args()
 
     all_need_references = []
+
+    metadata = DefaultMetaData()
+    metadata_set = False
     for file_path in args.files:
+        if "known_good.json" not in str(file_path) and not metadata_set:
+            metadata["repo_name"] = parse_repo_name_from_path(file_path)
+            metadata_set = True
-        if "known_good.json" not in str(file_path) and not metadata_set:
-            metadata["module_name"] = parse_module_name_from_path(file_path)
-            metadata_set = True
+        if "known_good.json" not in str(file_path) and not metadata_set:
+            # Doing this so we only have to parse the module name once, not every file.
+            metadata["module_name"] = parse_module_name_from_path(file_path)
+            metadata_set = True
-        if "known_good.json" not in str(file_path) and not metadata_set:
-            metadata["module_name"] = parse_module_name_from_path(file_path)
-            metadata_set = True
+        if "known_good.json" not in str(file_path) and not metadata_set:
+            # Doing this so we only have to parse the module name once, not every file.
+            metadata["module_name"] = parse_module_name_from_path(file_path)
+            metadata_set = True
         abs_file_path = file_path.resolve()
         assert abs_file_path.exists(), abs_file_path
+        clean_path = clean_external_prefix(file_path)
         references = _extract_references_from_file(
-            abs_file_path.parent, Path(abs_file_path.name)
+            abs_file_path.parent, Path(abs_file_path.name), clean_path
         )
         all_need_references.extend(references)
-
-    store_source_code_links_json(args.output, all_need_references)
+    store_source_code_links_with_metadata_json(
+        file=args.output, metadata=metadata, needlist=all_need_references
+    )
     logger.info(
         f"Found {len(all_need_references)} need references in {len(args.files)} files"
     )

@@ -21,6 +21,8 @@
 import sys
 from pathlib import Path
 
+from src.extensions.score_source_code_linker.helpers import parse_info_from_known_good
+
 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
 
@@ -29,28 +31,57 @@ def main():
     parser = argparse.ArgumentParser(
         description="Merge multiple sourcelinks JSON files into one"
     )
-    parser.add_argument(
+    _ = parser.add_argument(
         "--output",
         required=True,
         type=Path,
         help="Output merged JSON file path",
     )
-    parser.add_argument(
+    _ = parser.add_argument(
+        "--known_good",
+        required=True,
+        help="Path to a required 'known good' JSON file (provided by Bazel).",
+    )
+    _ = parser.add_argument(
         "files",
         nargs="*",
         type=Path,
         help="Input JSON files to merge",
     )
 
     args = parser.parse_args()
+    all_files = [x for x in args.files if "known_good.json" not in str(x)]
 
     merged = []
-    for json_file in args.files:
+    for json_file in all_files:
         with open(json_file) as f:
             data = json.load(f)
-            assert isinstance(data, list), repr(data)
-            merged.extend(data)
+            # If the file is empty e.g. '[]' there is nothing to parse, we continue
+            if not data:
+                continue
+            metadata = data[0]
+            if not isinstance(metadata, dict) or "repo_name" not in metadata:
+                logger.warning(
+                    f"Unexpected schema in sourcelinks file '{json_file}': "
+                    "expected first element to be a metadata dict "
+                    "with a 'repo_name' key. "
+                )
+                # As we can't deal with bad JSON structure we just skip it
+                continue
+            if metadata["repo_name"] and metadata["repo_name"] != "local_repo":
+                hash, repo = parse_info_from_known_good(
+                    known_good_json=args.known_good, repo_name=metadata["repo_name"]
+                )
+                metadata["hash"] = hash
+                metadata["url"] = repo
+            # In the case that 'metadata[repo_name]' is 'local_module'
+            # hash & url are already existing and empty inside of 'metadata'
+            # Therefore all 3 keys will be written to needlinks in each branch
 
+            for d in data[1:]:
+                d.update(metadata)
+            assert isinstance(data, list), repr(data)
+            merged.extend(data[1:])
     with open(args.output, "w") as f:
         json.dump(merged, f, indent=2, ensure_ascii=False)