From 3473b0e0ea4965d45cde8d77210c9a43a19e9998 Mon Sep 17 00:00:00 2001
From: Saumya Rai <saumya.rai@qorix.ai>
Date: Fri, 13 Mar 2026 13:31:10 +0530
Subject: [PATCH 1/3] Fix script to handle SARIF file recategorization

Signed-off-by: Saumya Rai <saumya.rai@qorix.ai>
---
 scripts/workflow/recategorize_guidelines.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/workflow/recategorize_guidelines.sh b/scripts/workflow/recategorize_guidelines.sh
index 8fa4b736020..97ceef401f0 100755
--- a/scripts/workflow/recategorize_guidelines.sh
+++ b/scripts/workflow/recategorize_guidelines.sh
@@ -26,3 +26,5 @@ python3 "$RECATEGORIZE_SCRIPT" \
   "sarif-results-recategorized/$(basename "$SARIF_FILE")"
   rm "$SARIF_FILE"
   mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE"
+
+  #Test

From 0bfca2dd992b68f977ed9788a59f2486f4d11381 Mon Sep 17 00:00:00 2001
From: Saumya-R <saumya.rai@qorix.ai>
Date: Fri, 13 Mar 2026 15:46:11 +0530
Subject: [PATCH 2/3] adding debug

---
 .github/workflows/codeql-multiple-repo-scan.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml
index a22531153b2..cebd968b41c 100644
--- a/.github/workflows/codeql-multiple-repo-scan.yml
+++ b/.github/workflows/codeql-multiple-repo-scan.yml
@@ -59,6 +59,10 @@ jobs:
         id: checkout-repos
         run: |
           scripts/workflow/checkout_repos.sh
+      - name: List files in repos directory (debug)
+        run: |
+          echo "Listing all files in repos directory before CodeQL analysis:"
+          find repos || echo "repos directory not found"
       - name: Initialize CodeQL for all repositories
         uses: github/codeql-action/init@v4
         with:

From 1922d687e46a954901a0b13377e2b73fa48085c1 Mon Sep 17 00:00:00 2001
From: Saumya-R <saumya.rai@qorix.ai>
Date: Tue, 24 Mar 2026 12:38:19 +0530
Subject: [PATCH 3/3] adding python files for codeql scripts

---
 .../workflows/codeql-multiple-repo-scan.yml   |   6 +-
 scripts/workflow/checkout_repos.py            | 163 ++++++++++++++++++
 scripts/workflow/parse_repos.py               | 156 +++++++++++++++++
 scripts/workflow/recategorize_guidelines.py   | 124 +++++++++++++
 4 files changed, 446 insertions(+), 3 deletions(-)
 create mode 100755 scripts/workflow/checkout_repos.py
 create mode 100755 scripts/workflow/parse_repos.py
 create mode 100755 scripts/workflow/recategorize_guidelines.py

diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml
index cebd968b41c..768a7fd0451 100644
--- a/.github/workflows/codeql-multiple-repo-scan.yml
+++ b/.github/workflows/codeql-multiple-repo-scan.yml
@@ -54,11 +54,11 @@ jobs:
       - name: Parse known_good.json and create repos.json
         id: parse-repos
         run: |
-          scripts/workflow/parse_repos.sh
+          python3 scripts/workflow/parse_repos.py
       - name: Checkout all pinned repositories
         id: checkout-repos
         run: |
-          scripts/workflow/checkout_repos.sh
+          python3 scripts/workflow/checkout_repos.py
       - name: List files in repos directory (debug)
         run: |
           echo "Listing all files in repos directory before CodeQL analysis:"
@@ -79,7 +79,7 @@ jobs:
       - name: Recategorize Guidelines
         if: always()
         run: |
-          scripts/workflow/recategorize_guidelines.sh
+          python3 scripts/workflow/recategorize_guidelines.py
       - name: Generate HTML Report from SARIF
         run: |
           SARIF_FILE="sarif-results/cpp.sarif"
diff --git a/scripts/workflow/checkout_repos.py b/scripts/workflow/checkout_repos.py
new file mode 100755
index 00000000000..0395168ed86
--- /dev/null
+++ b/scripts/workflow/checkout_repos.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Checkout all pinned repositories based on repos.json configuration.
+"""
+
+import json
+import sys
+import subprocess
+import re
+import os
+from pathlib import Path
+
+
+def load_repos_config(config_file="./repos.json"):
+    """
+    Load repository configuration from repos.json.
+
+    Args:
+        config_file: Path to repos.json file
+
+    Returns:
+        List of repository configurations
+    """
+    config_path = Path(config_file)
+
+    if not config_path.exists():
+        print(f"Error: file not found '{config_file}'", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        with open(config_path, 'r') as f:
+            repos = json.load(f)
+        return repos
+    except (json.JSONDecodeError, IOError) as e:
+        print(f"Error: Failed to load repos.json: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def is_commit_hash(ref):
+    """
+    Check if reference looks like a commit hash (40 hex characters for SHA-1).
+
+    Args:
+        ref: Git reference (branch, tag, or hash)
+
+    Returns:
+        True if ref matches commit hash pattern
+    """
+    return bool(re.match(r'^[0-9a-fA-F]{40}$', ref))
+
+
+def checkout_repo(name, url, ref, path):
+    """
+    Checkout a single repository.
+
+    Args:
+        name: Repository name
+        url: Repository URL
+        ref: Git reference (branch, tag, or commit hash)
+        path: Local path to checkout into
+
+    Returns:
+        True if successful, False otherwise
+    """
+    path_obj = Path(path)
+
+    try:
+        # Create parent directory if needed
+        path_obj.parent.mkdir(parents=True, exist_ok=True)
+
+        if is_commit_hash(ref):
+            print(f"Checking out {name} ({ref}) to {path}")
+            print(f"  Detected commit hash. Cloning and then checking out.")
+
+            # Clone the repository
+            subprocess.run(
+                ["git", "clone", url, path],
+                check=True,
+                capture_output=True
+            )
+
+            # Checkout specific commit
+            subprocess.run(
+                ["git", "-C", path, "checkout", ref],
+                check=True,
+                capture_output=True
+            )
+        else:
+            print(f"Checking out {name} ({ref}) to {path}")
+            print(f"  Detected branch/tag. Cloning with --branch.")
+
+            # Clone with shallow copy and specific branch/tag
+            # Add 'v' prefix if not already present (common convention)
+            branch_ref = ref if ref.startswith('v') else f'v{ref}'
+            subprocess.run(
+                ["git", "clone", "--depth", "1", "--branch", branch_ref, url, path],
+                check=True,
+                capture_output=True
+            )
+
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Failed to checkout {name}: {e}", file=sys.stderr)
+        return False
+
+
+def main():
+    """Main entry point."""
+    # Load repository configurations
+    repos = load_repos_config('./repos.json')
+    repo_count = len(repos)
+
+    # Track successfully checked out repositories
+    repo_paths = []
+
+    # Checkout each repository
+    for i, repo in enumerate(repos):
+        name = repo.get('name', f'repo-{i}')
+        url = repo.get('url', '')
+        ref = repo.get('version', '')
+        path = repo.get('path', '')
+
+        if not all([url, ref, path]):
+            print(f"Warning: Skipping {name} - missing required fields", file=sys.stderr)
+            continue
+
+        if checkout_repo(name, url, ref, path):
+            repo_paths.append(path)
+
+    # Output all paths (comma-separated for GitHub Actions compatibility)
+    repo_paths_output = ','.join(repo_paths)
+
+    # Write to GITHUB_OUTPUT if available
+    github_output = os.environ.get('GITHUB_OUTPUT')
+    if github_output:
+        try:
+            with open(github_output, 'a') as f:
+                f.write(f"repo_paths={repo_paths_output}\n")
+        except IOError as e:
+            print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr)
+
+    # Also print for debugging
+    print(f"\nSuccessfully checked out {len(repo_paths)} of {repo_count} repositories")
+    print(f"repo_paths={repo_paths_output}")
+
+    return 0 if len(repo_paths) == repo_count else 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/scripts/workflow/parse_repos.py b/scripts/workflow/parse_repos.py
new file mode 100755
index 00000000000..21dbfb6dd43
--- /dev/null
+++ b/scripts/workflow/parse_repos.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Parse known_good.json and create repos.json for multi-repository CodeQL analysis.
+"""
+
+import json
+import sys
+import subprocess
+from pathlib import Path
+
+
+def install_dependencies():
+    """Ensure jq is installed (for reference, though we use Python's json)."""
+    try:
+        subprocess.run(
+            ["sudo", "apt-get", "update"],
+            check=True,
+            capture_output=True
+        )
+        subprocess.run(
+            ["sudo", "apt-get", "install", "-y", "jq"],
+            check=True,
+            capture_output=True
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: Failed to install jq: {e}", file=sys.stderr)
+
+
+def parse_known_good(json_file="./known_good.json"):
+    """
+    Parse known_good.json and transform modules into repository objects.
+
+    Args:
+        json_file: Path to known_good.json file
+
+    Returns:
+        Tuple of (repos list, module count, module outputs dict)
+    """
+    json_path = Path(json_file)
+
+    if not json_path.exists():
+        print(f"Error: file not found '{json_file}'", file=sys.stderr)
+        print(f"Current directory: {Path.cwd()}", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"Error: Failed to parse JSON: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Extract target_sw modules
+    modules = data.get('modules', {}).get('target_sw', {})
+
+    # Transform modules into repository objects
+    repos = []
+    module_outputs = {}
+
+    for name, config in modules.items():
+        repo_url = config.get('repo', '')
+        version = config.get('version', '')
+        branch = config.get('branch', '')
+        hash_val = config.get('hash', '')
+
+        # Use version, branch, or hash (in that order of preference)
+        ref = version or branch or hash_val
+
+        repo_obj = {
+            'name': name,
+            'url': repo_url,
+            'version': ref,
+            'path': f'repos/{name}'
+        }
+        repos.append(repo_obj)
+
+        # Store module outputs for GITHUB_OUTPUT compatibility
+        module_outputs[f'{name}_url'] = repo_url
+        if version:
+            module_outputs[f'{name}_version'] = version
+        if branch:
+            module_outputs[f'{name}_branch'] = branch
+        if hash_val:
+            module_outputs[f'{name}_hash'] = hash_val
+
+    return repos, len(modules), module_outputs
+
+
+def write_repos_json(repos, output_file="./repos.json"):
+    """Write repositories to repos.json file."""
+    output_path = Path(output_file)
+
+    try:
+        with open(output_path, 'w') as f:
+            json.dump(repos, f, indent=2)
+        print(f"Generated {output_file}:")
+        print(json.dumps(repos, indent=2))
+        print()  # Add newline for readability
+    except IOError as e:
+        print(f"Error: Failed to write {output_file}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def write_github_output(outputs):
+    """
+    Write outputs to GITHUB_OUTPUT for GitHub Actions compatibility.
+
+    Args:
+        outputs: Dictionary of key-value pairs to output
+    """
+    github_output = Path(os.environ.get('GITHUB_OUTPUT', '/dev/null'))
+
+    if github_output.exists() or github_output.parent.exists():
+        try:
+            with open(github_output, 'a') as f:
+                for key, value in outputs.items():
+                    f.write(f"{key}={value}\n")
+        except IOError as e:
+            print(f"Warning: Failed to write GITHUB_OUTPUT: {e}", file=sys.stderr)
+
+
+def main():
+    """Main entry point."""
+    import os
+
+    # Install dependencies (optional, jq not strictly needed in Python version)
+    # install_dependencies()
+
+    # Parse known_good.json
+    repos, module_count, module_outputs = parse_known_good('./known_good.json')
+
+    # Write repos.json
+    write_repos_json(repos)
+
+    # Write GitHub Actions outputs
+    github_outputs = {'MODULE_COUNT': str(module_count)}
+    github_outputs.update(module_outputs)
+    write_github_output(github_outputs)
+
+    print("Parse complete!")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/workflow/recategorize_guidelines.py b/scripts/workflow/recategorize_guidelines.py
new file mode 100755
index 00000000000..7b7dca48b15
--- /dev/null
+++ b/scripts/workflow/recategorize_guidelines.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+# *******************************************************************************
+# Copyright (c) 2025 Contributors to the Eclipse Foundation
+#
+# See the NOTICE file(s) distributed with this work for additional
+# information regarding copyright ownership.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Apache License Version 2.0 which is available at
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# SPDX-License-Identifier: Apache-2.0
+# *******************************************************************************
+"""
+Recategorize CodeQL SARIF results according to coding standards.
+"""
+
+import subprocess
+import sys
+from pathlib import Path
+
+
+# Configuration paths
+RECATEGORIZE_SCRIPT = "codeql-coding-standards-repo/scripts/guideline_recategorization/recategorize.py"
+CODING_STANDARDS_CONFIG = "./.github/codeql/coding-standards.yml"
+CODING_STANDARDS_SCHEMA = "codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json"
+SARIF_SCHEMA = "codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json"
+SARIF_FILE = "sarif-results/cpp.sarif"
+OUTPUT_DIR = "sarif-results-recategorized"
+
+
+def validate_paths():
+    """Validate that required files and directories exist."""
+    required_files = [
+        RECATEGORIZE_SCRIPT,
+        CODING_STANDARDS_CONFIG,
+        CODING_STANDARDS_SCHEMA,
+        SARIF_SCHEMA,
+        SARIF_FILE,
+    ]
+
+    for file_path in required_files:
+        if not Path(file_path).exists():
+            print(f"Error: Required file not found: {file_path}", file=sys.stderr)
+            return False
+
+    return True
+
+
+def recategorize_sarif():
+    """
+    Run the CodeQL recategorization script on SARIF results.
+
+    Returns:
+        True if successful, False otherwise
+    """
+    # Create output directory
+    output_path = Path(OUTPUT_DIR)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    output_file = output_path / Path(SARIF_FILE).name
+
+    print(f"Processing {SARIF_FILE} for recategorization...")
+
+    try:
+        # Run recategorization script
+        result = subprocess.run(
+            [
+                "python3",
+                RECATEGORIZE_SCRIPT,
+                "--coding-standards-schema-file", CODING_STANDARDS_SCHEMA,
+                "--sarif-schema-file", SARIF_SCHEMA,
+                CODING_STANDARDS_CONFIG,
+                SARIF_FILE,
+                str(output_file),
+            ],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+
+        print("Recategorization completed successfully")
+        if result.stdout:
+            print("Output:", result.stdout)
+
+        # Replace original SARIF file with recategorized version
+        sarif_path = Path(SARIF_FILE)
+
+        if sarif_path.exists():
+            sarif_path.unlink()
+            print(f"Removed original {SARIF_FILE}")
+
+        # Move recategorized file to original location
+        output_file.replace(sarif_path)
+        print(f"Moved recategorized SARIF to {SARIF_FILE}")
+
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Recategorization script failed: {e}", file=sys.stderr)
+        if e.stderr:
+            print(f"Error output: {e.stderr}", file=sys.stderr)
+        return False
+    except (FileNotFoundError, OSError) as e:
+        print(f"Error: File operation failed: {e}", file=sys.stderr)
+        return False
+
+
+def main():
+    """Main entry point."""
+    # Validate required files exist
+    if not validate_paths():
+        sys.exit(1)
+
+    # Run recategorization
+    if not recategorize_sarif():
+        sys.exit(1)
+
+    print("Recategorization workflow completed successfully")
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()