From 3473b0e0ea4965d45cde8d77210c9a43a19e9998 Mon Sep 17 00:00:00 2001 From: Saumya Rai Date: Fri, 13 Mar 2026 13:31:10 +0530 Subject: [PATCH 1/3] Fix script to handle SARIF file recategorization Signed-off-by: Saumya Rai --- scripts/workflow/recategorize_guidelines.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/workflow/recategorize_guidelines.sh b/scripts/workflow/recategorize_guidelines.sh index 8fa4b736020..97ceef401f0 100755 --- a/scripts/workflow/recategorize_guidelines.sh +++ b/scripts/workflow/recategorize_guidelines.sh @@ -26,3 +26,5 @@ python3 "$RECATEGORIZE_SCRIPT" \ "sarif-results-recategorized/$(basename "$SARIF_FILE")" rm "$SARIF_FILE" mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE" + + #Test From 0bfca2dd992b68f977ed9788a59f2486f4d11381 Mon Sep 17 00:00:00 2001 From: Saumya-R Date: Fri, 13 Mar 2026 15:46:11 +0530 Subject: [PATCH 2/3] adding debug --- .github/workflows/codeql-multiple-repo-scan.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml index a22531153b2..cebd968b41c 100644 --- a/.github/workflows/codeql-multiple-repo-scan.yml +++ b/.github/workflows/codeql-multiple-repo-scan.yml @@ -59,6 +59,10 @@ jobs: id: checkout-repos run: | scripts/workflow/checkout_repos.sh + - name: List files in repos directory (debug) + run: | + echo "Listing all files in repos directory before CodeQL analysis:" + find repos || echo "repos directory not found" - name: Initialize CodeQL for all repositories uses: github/codeql-action/init@v4 with: From 1922d687e46a954901a0b13377e2b73fa48085c1 Mon Sep 17 00:00:00 2001 From: Saumya-R Date: Tue, 24 Mar 2026 12:38:19 +0530 Subject: [PATCH 3/3] adding python files for codeql scripts --- .../workflows/codeql-multiple-repo-scan.yml | 6 +- scripts/workflow/checkout_repos.py | 163 ++++++++++++++++++ scripts/workflow/parse_repos.py | 156 +++++++++++++++++ scripts/workflow/recategorize_guidelines.py | 124 +++++++++++++ 4 files changed, 446 insertions(+), 3 deletions(-) create mode 100755 scripts/workflow/checkout_repos.py create mode 100755 scripts/workflow/parse_repos.py create mode 100755 scripts/workflow/recategorize_guidelines.py diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml index cebd968b41c..768a7fd0451 100644 --- a/.github/workflows/codeql-multiple-repo-scan.yml +++ b/.github/workflows/codeql-multiple-repo-scan.yml @@ -54,11 +54,11 @@ jobs: - name: Parse known_good.json and create repos.json id: parse-repos run: | - scripts/workflow/parse_repos.sh + python3 scripts/workflow/parse_repos.py - name: Checkout all pinned repositories id: checkout-repos run: | - scripts/workflow/checkout_repos.sh + python3 scripts/workflow/checkout_repos.py - name: List files in repos directory (debug) run: | echo "Listing all files in repos directory before CodeQL analysis:" @@ -79,7 +79,7 @@ jobs: - name: Recategorize Guidelines if: always() run: | - scripts/workflow/recategorize_guidelines.sh + python3 scripts/workflow/recategorize_guidelines.py - name: Generate HTML Report from SARIF run: | SARIF_FILE="sarif-results/cpp.sarif" diff --git a/scripts/workflow/checkout_repos.py b/scripts/workflow/checkout_repos.py new file mode 100755 index 00000000000..0395168ed86 --- /dev/null +++ b/scripts/workflow/checkout_repos.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +""" +Checkout all pinned repositories based on repos.json configuration. +""" + +import json +import sys +import subprocess +import re +import os +from pathlib import Path + + +def load_repos_config(config_file="./repos.json"): + """ + Load repository configuration from repos.json. + + Args: + config_file: Path to repos.json file + + Returns: + List of repository configurations + """ + config_path = Path(config_file) + + if not config_path.exists(): + print(f"Error: file not found '{config_file}'", file=sys.stderr) + sys.exit(1) + + try: + with open(config_path, 'r') as f: + repos = json.load(f) + return repos + except (json.JSONDecodeError, IOError) as e: + print(f"Error: Failed to load repos.json: {e}", file=sys.stderr) + sys.exit(1) + + +def is_commit_hash(ref): + """ + Check if reference looks like a commit hash (40 hex characters for SHA-1). + + Args: + ref: Git reference (branch, tag, or hash) + + Returns: + True if ref matches commit hash pattern + """ + return bool(re.match(r'^[0-9a-fA-F]{40}$', ref)) + + +def checkout_repo(name, url, ref, path): + """ + Checkout a single repository. + + Args: + name: Repository name + url: Repository URL + ref: Git reference (branch, tag, or commit hash) + path: Local path to checkout into + + Returns: + True if successful, False otherwise + """ + path_obj = Path(path) + + try: + # Create parent directory if needed + path_obj.parent.mkdir(parents=True, exist_ok=True) + + if is_commit_hash(ref): + print(f"Checking out {name} ({ref}) to {path}") + print(f" Detected commit hash. Cloning and then checking out.") + + # Clone the repository + subprocess.run( + ["git", "clone", url, path], + check=True, + capture_output=True + ) + + # Checkout specific commit + subprocess.run( + ["git", "-C", path, "checkout", ref], + check=True, + capture_output=True + ) + else: + print(f"Checking out {name} ({ref}) to {path}") + print(f" Detected branch/tag. Cloning with --branch.") + + # Clone with shallow copy and specific branch/tag + # Add 'v' prefix if not already present (common convention) + branch_ref = ref if ref.startswith('v') else f'v{ref}' + subprocess.run( + ["git", "clone", "--depth", "1", "--branch", branch_ref, url, path], + check=True, + capture_output=True + ) + + return True + + except subprocess.CalledProcessError as e: + print(f"Error: Failed to checkout {name}: {e}", file=sys.stderr) + return False + + +def main(): + """Main entry point.""" + # Load repository configurations + repos = load_repos_config('./repos.json') + repo_count = len(repos) + + # Track successfully checked out repositories + repo_paths = [] + + # Checkout each repository + for i, repo in enumerate(repos): + name = repo.get('name', f'repo-{i}') + url = repo.get('url', '') + ref = repo.get('version', '') + path = repo.get('path', '') + + if not all([url, ref, path]): + print(f"Warning: Skipping {name} - missing required fields", file=sys.stderr) + continue + + if checkout_repo(name, url, ref, path): + repo_paths.append(path) + + # Output all paths (comma-separated for GitHub Actions compatibility) + repo_paths_output = ','.join(repo_paths) + + # Write to GITHUB_OUTPUT if available + github_output = os.environ.get('GITHUB_OUTPUT') + if github_output: + try: + with open(github_output, 'a') as f: + f.write(f"repo_paths={repo_paths_output}\n") + except IOError as e: + print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr) + + # Also print for debugging + print(f"\nSuccessfully checked out {len(repo_paths)} of {repo_count} repositories") + print(f"repo_paths={repo_paths_output}") + + return 0 if len(repo_paths) == repo_count else 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/workflow/parse_repos.py b/scripts/workflow/parse_repos.py new file mode 100755 index 00000000000..21dbfb6dd43 --- /dev/null +++ b/scripts/workflow/parse_repos.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +""" +Parse known_good.json and create repos.json for multi-repository CodeQL analysis. +""" + +import json +import sys +import subprocess +from pathlib import Path + + +def install_dependencies(): + """Ensure jq is installed (for reference, though we use Python's json).""" + try: + subprocess.run( + ["sudo", "apt-get", "update"], + check=True, + capture_output=True + ) + subprocess.run( + ["sudo", "apt-get", "install", "-y", "jq"], + check=True, + capture_output=True + ) + except subprocess.CalledProcessError as e: + print(f"Warning: Failed to install jq: {e}", file=sys.stderr) + + +def parse_known_good(json_file="./known_good.json"): + """ + Parse known_good.json and transform modules into repository objects. + + Args: + json_file: Path to known_good.json file + + Returns: + Tuple of (repos list, module count, module outputs dict) + """ + json_path = Path(json_file) + + if not json_path.exists(): + print(f"Error: file not found '{json_file}'", file=sys.stderr) + print(f"Current directory: {Path.cwd()}", file=sys.stderr) + sys.exit(1) + + try: + with open(json_path, 'r') as f: + data = json.load(f) + except json.JSONDecodeError as e: + print(f"Error: Failed to parse JSON: {e}", file=sys.stderr) + sys.exit(1) + + # Extract target_sw modules + modules = data.get('modules', {}).get('target_sw', {}) + + # Transform modules into repository objects + repos = [] + module_outputs = {} + + for name, config in modules.items(): + repo_url = config.get('repo', '') + version = config.get('version', '') + branch = config.get('branch', '') + hash_val = config.get('hash', '') + + # Use version, branch, or hash (in that order of preference) + ref = version or branch or hash_val + + repo_obj = { + 'name': name, + 'url': repo_url, + 'version': ref, + 'path': f'repos/{name}' + } + repos.append(repo_obj) + + # Store module outputs for GITHUB_OUTPUT compatibility + module_outputs[f'{name}_url'] = repo_url + if version: + module_outputs[f'{name}_version'] = version + if branch: + module_outputs[f'{name}_branch'] = branch + if hash_val: + module_outputs[f'{name}_hash'] = hash_val + + return repos, len(modules), module_outputs + + +def write_repos_json(repos, output_file="./repos.json"): + """Write repositories to repos.json file.""" + output_path = Path(output_file) + + try: + with open(output_path, 'w') as f: + json.dump(repos, f, indent=2) + print(f"Generated {output_file}:") + print(json.dumps(repos, indent=2)) + print() # Add newline for readability + except IOError as e: + print(f"Error: Failed to write {output_file}: {e}", file=sys.stderr) + sys.exit(1) + + +def write_github_output(outputs): + """ + Write outputs to GITHUB_OUTPUT for GitHub Actions compatibility. + + Args: + outputs: Dictionary of key-value pairs to output + """ + github_output = Path(os.environ.get('GITHUB_OUTPUT', '/dev/null')) + + if github_output.exists() or github_output.parent.exists(): + try: + with open(github_output, 'a') as f: + for key, value in outputs.items(): + f.write(f"{key}={value}\n") + except IOError as e: + print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr) + + +def main(): + """Main entry point.""" + import os + + # Install dependencies (optional, jq not strictly needed in Python version) + # install_dependencies() + + # Parse known_good.json + repos, module_count, module_outputs = parse_known_good('./known_good.json') + + # Write repos.json + write_repos_json(repos) + + # Write GitHub Actions outputs + github_outputs = {'MODULE_COUNT': str(module_count)} + github_outputs.update(module_outputs) + write_github_output(github_outputs) + + print("Parse complete!") + + +if __name__ == '__main__': + main() diff --git a/scripts/workflow/recategorize_guidelines.py b/scripts/workflow/recategorize_guidelines.py new file mode 100755 index 00000000000..7b7dca48b15 --- /dev/null +++ b/scripts/workflow/recategorize_guidelines.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +""" +Recategorize CodeQL SARIF results according to coding standards. +""" + +import subprocess +import sys +from pathlib import Path + + +# Configuration paths +RECATEGORIZE_SCRIPT = "codeql-coding-standards-repo/scripts/guideline_recategorization/recategorize.py" +CODING_STANDARDS_CONFIG = "./.github/codeql/coding-standards.yml" +CODING_STANDARDS_SCHEMA = "codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json" +SARIF_SCHEMA = "codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json" +SARIF_FILE = "sarif-results/cpp.sarif" +OUTPUT_DIR = "sarif-results-recategorized" + + +def validate_paths(): + """Validate that required files and directories exist.""" + required_files = [ + RECATEGORIZE_SCRIPT, + CODING_STANDARDS_CONFIG, + CODING_STANDARDS_SCHEMA, + SARIF_SCHEMA, + SARIF_FILE, + ] + + for file_path in required_files: + if not Path(file_path).exists(): + print(f"Error: Required file not found: {file_path}", file=sys.stderr) + return False + + return True + + +def recategorize_sarif(): + """ + Run the CodeQL recategorization script on SARIF results. + + Returns: + True if successful, False otherwise + """ + # Create output directory + output_path = Path(OUTPUT_DIR) + output_path.mkdir(parents=True, exist_ok=True) + + output_file = output_path / Path(SARIF_FILE).name + + print(f"Processing {SARIF_FILE} for recategorization...") + + try: + # Run recategorization script + result = subprocess.run( + [ + "python3", + RECATEGORIZE_SCRIPT, + "--coding-standards-schema-file", CODING_STANDARDS_SCHEMA, + "--sarif-schema-file", SARIF_SCHEMA, + CODING_STANDARDS_CONFIG, + SARIF_FILE, + str(output_file), + ], + check=True, + capture_output=True, + text=True + ) + + print("Recategorization completed successfully") + if result.stdout: + print("Output:", result.stdout) + + # Replace original SARIF file with recategorized version + sarif_path = Path(SARIF_FILE) + + if sarif_path.exists(): + sarif_path.unlink() + print(f"Removed original {SARIF_FILE}") + + # Move recategorized file to original location + output_file.replace(sarif_path) + print(f"Moved recategorized SARIF to {SARIF_FILE}") + + return True + + except subprocess.CalledProcessError as e: + print(f"Error: Recategorization script failed: {e}", file=sys.stderr) + if e.stderr: + print(f"Error output: {e.stderr}", file=sys.stderr) + return False + except (FileNotFoundError, OSError) as e: + print(f"Error: File operation failed: {e}", file=sys.stderr) + return False + + +def main(): + """Main entry point.""" + # Validate required files exist + if not validate_paths(): + sys.exit(1) + + # Run recategorization + if not recategorize_sarif(): + sys.exit(1) + + print("Recategorization workflow completed successfully") + sys.exit(0) + + +if __name__ == '__main__': + main()