Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
bde02c2
WIP: Local Works somewhat
MaximilianSoerenPollak Mar 11, 2026
a898eee
WIP: seems to work in ref & local
MaximilianSoerenPollak Mar 11, 2026
c2861cd
Remove metadata file
MaximilianSoerenPollak Mar 12, 2026
fb5bdc5
Formatting & Linting
MaximilianSoerenPollak Mar 12, 2026
8ea7417
Known_good required in merge script
MaximilianSoerenPollak Mar 12, 2026
f818cf2
Remove debug print statements
MaximilianSoerenPollak Mar 12, 2026
adeae4f
Copilot Findings
MaximilianSoerenPollak Mar 12, 2026
b92bf6e
Formatting & Linting
MaximilianSoerenPollak Mar 12, 2026
5a08e5d
Basepyright linting
MaximilianSoerenPollak Mar 12, 2026
ce31387
WIP: Testlinks endlink still wrong
MaximilianSoerenPollak Mar 13, 2026
c573b2f
Formatting
MaximilianSoerenPollak Mar 13, 2026
9d87c2c
Formating & Linting & Comments
MaximilianSoerenPollak Mar 16, 2026
f50ef0c
Fixing copilot & other findings
MaximilianSoerenPollak Mar 16, 2026
976c2ec
WIP: Tests debuging
MaximilianSoerenPollak Mar 16, 2026
d094914
Tests Passing & Tests adapted
MaximilianSoerenPollak Mar 17, 2026
0d1e490
Fixing tests
MaximilianSoerenPollak Mar 17, 2026
7cda52e
Remove debug print statements
MaximilianSoerenPollak Mar 17, 2026
1d74ad0
Fixed linter warnings
MaximilianSoerenPollak Mar 17, 2026
ddfa34c
Fix copyright year
MaximilianSoerenPollak Mar 17, 2026
6230c47
Fix scl finding test needs
MaximilianSoerenPollak Mar 17, 2026
a8ce766
Formatting
MaximilianSoerenPollak Mar 17, 2026
740b019
Fix: Testlinks didn't clean filepath properly
MaximilianSoerenPollak Mar 17, 2026
6915283
Linting errors
MaximilianSoerenPollak Mar 17, 2026
a836298
Make comment more visibile
MaximilianSoerenPollak Mar 17, 2026
133a44d
Small cleanup
MaximilianSoerenPollak Mar 18, 2026
748467d
Renaming module => repo
MaximilianSoerenPollak Mar 20, 2026
ae5969d
Formating & Linting
MaximilianSoerenPollak Mar 20, 2026
78b0095
Rename helpers file
MaximilianSoerenPollak Mar 23, 2026
9c35436
Fix: Bugfixing action
MaximilianSoerenPollak Mar 23, 2026
fdfeb2b
Remove emoji headers
MaximilianSoerenPollak Mar 23, 2026
8c7aee6
Bugfix, id's should not be uppercase
MaximilianSoerenPollak Mar 26, 2026
9473852
Fix myst errors
MaximilianSoerenPollak Mar 26, 2026
feb9863
Add config value
MaximilianSoerenPollak Mar 26, 2026
2d58de2
Fix several small things found by copilot
MaximilianSoerenPollak Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 49 additions & 46 deletions docs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,23 +69,30 @@ def _rewrite_needs_json_to_sourcelinks(labels):
out.append(s)
return out

def _merge_sourcelinks(name, sourcelinks):
def _merge_sourcelinks(name, sourcelinks, known_good = None):
"""Merge multiple sourcelinks JSON files into a single file.

Args:
name: Name for the merged sourcelinks target
sourcelinks: List of sourcelinks JSON file targets
"""

extra_srcs = []
known_good_arg = ""
if known_good != None:
extra_srcs = [known_good]
known_good_arg = "--known_good $(location %s)" % known_good

native.genrule(
name = name,
srcs = sourcelinks,
srcs = sourcelinks + extra_srcs,
outs = [name + ".json"],
cmd = """
$(location @score_docs_as_code//scripts_bazel:merge_sourcelinks) \
--output $@ \
{known_good_arg} \
$(SRCS)
""",
""".format(known_good_arg = known_good_arg),
tools = ["@score_docs_as_code//scripts_bazel:merge_sourcelinks"],
)

Expand Down Expand Up @@ -120,7 +127,7 @@ def _missing_requirements(deps):
fail(msg)
fail("This case should be unreachable?!")

def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = None):
"""Creates all targets related to documentation.

By using this function, you'll get any and all updates for documentation targets in one place.
Expand Down Expand Up @@ -175,34 +182,45 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):

data_with_docs_sources = _rewrite_needs_json_to_docs_sources(data)
additional_combo_sourcelinks = _rewrite_needs_json_to_sourcelinks(data)
_merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks)
_merge_sourcelinks(name = "merged_sourcelinks", sourcelinks = [":sourcelinks_json"] + additional_combo_sourcelinks, known_good = known_good)
docs_data = data + [":sourcelinks_json"]
combo_data = data_with_docs_sources + [":merged_sourcelinks"]

docs_env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
}
docs_sources_env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data_with_docs_sources),
"SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
}
if known_good:
docs_env["KNOWN_GOOD_JSON"] = "$(location "+ known_good + ")"
docs_sources_env["KNOWN_GOOD_JSON"] = "$(location "+ known_good + ")"
docs_data.append(known_good)
combo_data.append(known_good)

docs_env["ACTION"] = "incremental"

py_binary(
name = "docs",
tags = ["cli_help=Build documentation:\nbazel run //:docs"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data + [":sourcelinks_json"],
data = docs_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"ACTION": "incremental",
"SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
},
env = docs_env
)

docs_sources_env["ACTION"] = "incremental"
py_binary(
name = "docs_combo",
tags = ["cli_help=Build full documentation with all dependencies:\nbazel run //:docs_combo"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data_with_docs_sources + [":merged_sourcelinks"],
data = combo_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data_with_docs_sources),
"ACTION": "incremental",
"SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
},
env = docs_sources_env
)

native.alias(
Expand All @@ -211,59 +229,44 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
deprecation = "Target '//:docs_combo_experimental' is deprecated. Use '//:docs_combo' instead.",
)

docs_env["ACTION"] = "linkcheck"
py_binary(
name = "docs_link_check",
tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data,
data = docs_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"ACTION": "linkcheck",
},
env = docs_env
)

docs_env["ACTION"] = "check"
py_binary(
name = "docs_check",
tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data + [":sourcelinks_json"],
data = docs_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"ACTION": "check",
"SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
},
env = docs_env
)

docs_env["ACTION"] = "live_preview"
py_binary(
name = "live_preview",
tags = ["cli_help=Live preview documentation in the browser:\nbazel run //:live_preview"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data + [":sourcelinks_json"],
data = docs_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data),
"ACTION": "live_preview",
"SCORE_SOURCELINKS": "$(location :sourcelinks_json)",
},
env = docs_env
)

docs_sources_env["ACTION"] = "live_preview"
py_binary(
name = "live_preview_combo_experimental",
tags = ["cli_help=Live preview full documentation with all dependencies in the browser:\nbazel run //:live_preview_combo_experimental"],
srcs = ["@score_docs_as_code//src:incremental.py"],
data = data_with_docs_sources + [":merged_sourcelinks"],
data = combo_data,
deps = deps,
env = {
"SOURCE_DIRECTORY": source_dir,
"DATA": str(data_with_docs_sources),
"ACTION": "live_preview",
"SCORE_SOURCELINKS": "$(location :merged_sourcelinks)",
},
env = docs_sources_env
)

score_virtualenv(
Expand Down
1 change: 1 addition & 0 deletions scripts_bazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ py_binary(
py_binary(
name = "merge_sourcelinks",
srcs = ["merge_sourcelinks.py"],
deps= [ "//src/extensions/score_source_code_linker"],
main = "merge_sourcelinks.py",
visibility = ["//visibility:public"],
)
38 changes: 32 additions & 6 deletions scripts_bazel/generate_sourcelinks_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,43 @@
from src.extensions.score_source_code_linker.generate_source_code_links_json import (
_extract_references_from_file, # pyright: ignore[reportPrivateUsage] TODO: move it out of the extension and into this script
)
from src.extensions.score_source_code_linker.helpers import parse_repo_name_from_path
from src.extensions.score_source_code_linker.needlinks import (
store_source_code_links_json,
DefaultMetaData,
store_source_code_links_with_metadata_json,
)

logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)


def clean_external_prefix(path: Path) -> Path:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this still here? With our new design, this should not be necessary.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is neccessary. Otherwise the filename will be wrong that is passed to the extraction function.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder if this can be used in the testlink cleaning too, or if they are too different.

"""
As it can be in combo builds that the path is:
`external/score_docs_as_code+/....` or similar
We have to remove this prefix from the file
before we pass it to the extract function. Otherwise we have
this prefix inside the `file` attribute leading to wrong links
"""
if not str(path).startswith("external/"):
return path
# This allows for files / folders etc. to have `external` in their name too.
path_raw = str(path).removeprefix("external/")
filepath_split = str(path_raw).split("/", maxsplit=1)
return Path("".join(filepath_split[1:]))


def main():
parser = argparse.ArgumentParser(
description="Generate source code links JSON from source files"
)
parser.add_argument(
_ = parser.add_argument(
"--output",
required=True,
type=Path,
help="Output JSON file path",
)
parser.add_argument(
_ = parser.add_argument(
"files",
nargs="*",
type=Path,
Expand All @@ -53,15 +71,23 @@ def main():
args = parser.parse_args()

all_need_references = []

metadata = DefaultMetaData()
metadata_set = False
for file_path in args.files:
if "known_good.json" not in str(file_path) and not metadata_set:
metadata["repo_name"] = parse_repo_name_from_path(file_path)
metadata_set = True
Comment on lines +78 to +80
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if "known_good.json" not in str(file_path) and not metadata_set:
metadata["module_name"] = parse_module_name_from_path(file_path)
metadata_set = True
if "known_good.json" not in str(file_path) and not metadata_set:
# Doing this so we only have to parse the module name once, not every file.
metadata["module_name"] = parse_module_name_from_path(file_path)
metadata_set = True

abs_file_path = file_path.resolve()
assert abs_file_path.exists(), abs_file_path
clean_path = clean_external_prefix(file_path)
references = _extract_references_from_file(
abs_file_path.parent, Path(abs_file_path.name)
abs_file_path.parent, Path(abs_file_path.name), clean_path
)
all_need_references.extend(references)

store_source_code_links_json(args.output, all_need_references)
store_source_code_links_with_metadata_json(
file=args.output, metadata=metadata, needlist=all_need_references
)
Comment on lines +88 to +90
Copy link

Copilot AI Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This switches the generated JSON format from a plain list of NeedLinks to a list whose first element is a metadata dict. Any existing consumers/tests that expect the old schema will now fail. Consider either keeping the old format as default (with an opt-in flag for metadata), or updating all in-repo consumers and tests in the same PR to avoid a partially-migrated state.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@a-zw Valid issue here .

Do you think it would be better to rename this specific cache that comes from here a bit so it is clear it is with metadata?
Like 'scl_metadata_cache.json' or whatever? SO that the name makes it clear to use the metadata reader?

logger.info(
f"Found {len(all_need_references)} need references in {len(args.files)} files"
)
Expand Down
41 changes: 36 additions & 5 deletions scripts_bazel/merge_sourcelinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import sys
from pathlib import Path

from src.extensions.score_source_code_linker.helpers import parse_info_from_known_good

logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)

Expand All @@ -29,28 +31,57 @@ def main():
parser = argparse.ArgumentParser(
description="Merge multiple sourcelinks JSON files into one"
)
parser.add_argument(
_ = parser.add_argument(
"--output",
required=True,
type=Path,
help="Output merged JSON file path",
)
parser.add_argument(
_ = parser.add_argument(
"--known_good",
required=True,
help="Path to a required 'known good' JSON file (provided by Bazel).",
)
_ = parser.add_argument(
"files",
nargs="*",
type=Path,
help="Input JSON files to merge",
)

args = parser.parse_args()
all_files = [x for x in args.files if "known_good.json" not in str(x)]
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment:
#Known_good.json is getting passed in 'args.files' as well.
#Therefore we filter it out here in order to make following logic cleaner


merged = []
for json_file in args.files:
for json_file in all_files:
with open(json_file) as f:
data = json.load(f)
assert isinstance(data, list), repr(data)
merged.extend(data)
# If the file is empty e.g. '[]' there is nothing to parse, we continue
if not data:
continue
metadata = data[0]
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a worldview assertion here?
e.g. check if data is a list?

if not isinstance(metadata, dict) or "repo_name" not in metadata:
logger.warning(
f"Unexpected schema in sourcelinks file '{json_file}': "
"expected first element to be a metadata dict "
"with a 'repo_name' key. "
)
Comment on lines +64 to +68
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be an error instead?

# As we can't deal with bad JSON structure we just skip it
continue
if metadata["repo_name"] and metadata["repo_name"] != "local_repo":
hash, repo = parse_info_from_known_good(
known_good_json=args.known_good, repo_name=metadata["repo_name"]
)
metadata["hash"] = hash
metadata["url"] = repo
# In the case that 'metadata[repo_name]' is 'local_module'
# hash & url are already existing and empty inside of 'metadata'
# Therefore all 3 keys will be written to needlinks in each branch

for d in data[1:]:
d.update(metadata)
assert isinstance(data, list), repr(data)
merged.extend(data[1:])
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment:
# Taking only the needlinks. As data[0] = metadata we start from 1

with open(args.output, "w") as f:
json.dump(merged, f, indent=2, ensure_ascii=False)

Expand Down
Loading
Loading