From 2a92a8d4eeca42d5477f0c366464ec691357abda Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:41:38 -0400 Subject: [PATCH 1/6] Add 'PRs in Release' table to report --- .../ci_run_report.html.jinja | 9 + .../create_workflow_report.py | 311 +++++++++++++++--- 2 files changed, 279 insertions(+), 41 deletions(-) diff --git a/.github/actions/create_workflow_report/ci_run_report.html.jinja b/.github/actions/create_workflow_report/ci_run_report.html.jinja index 4c94465a16c6..a6e0df17c49f 100644 --- a/.github/actions/create_workflow_report/ci_run_report.html.jinja +++ b/.github/actions/create_workflow_report/ci_run_report.html.jinja @@ -168,6 +168,7 @@ {% endif %}
Some PRs are missing verification.
+ {% endif %} + {{ prs_in_release_html }} + {%- endif %} + {%- if pr_number != 0 -%}Compared with base sha {{ base_sha }}
diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index adb682be2dcd..dda22bbcf4dd 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -6,10 +6,12 @@ from itertools import combinations import json from datetime import datetime +from datetime import timezone from functools import lru_cache from glob import glob import urllib.parse import re +import subprocess import pandas as pd from jinja2 import Environment, FileSystemLoader @@ -169,6 +171,196 @@ def get_run_details(run_id: str) -> dict: return response.json() +def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: + if len(df) == 0: + return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + if not GITHUB_TOKEN: + raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + rows = [] + missing_verification = False + for pr_number in df["pr_number"].tolist(): + response = requests.get( + f"https://api.github.com/repos/{repo}/pulls/{pr_number}", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"Failed to fetch pull request info: {response.status_code} {response.text}" + ) + pr = response.json() + label_names = [l["name"] for l in pr.get("labels", [])] + if any(ln.lower() == "cicd" for ln in label_names): + continue + lowered = {ln.lower() for ln in label_names} + if "verified" not in lowered and "verified-with-issue" not in lowered: + missing_verification = True + rows.append( + { + "pr_number": pr_number, + "pr_name": pr.get("title", ""), + "labels": ", ".join(sorted(label_names)), + } + ) + return pd.DataFrame(rows), missing_verification + + +def _git_rev_parse(ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + ["git", "rev-parse", "--verify", ref], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + return p.stdout.strip() + + +def _git_is_ancestor(ancestor: str, descendant: str, cwd: str | None) -> bool: + p = subprocess.run( + ["git", "merge-base", "--is-ancestor", ancestor, descendant], + cwd=cwd, + capture_output=True, + ) + return p.returncode == 0 + + +def _git_log_merge_prs( + baseline: str, branch_ref: str, cwd: str | None, repo: str +) -> pd.DataFrame: + p = subprocess.run( + [ + "git", + "-c", + "core.quotepath=false", + "log", + f"{baseline}..{branch_ref}", + "--merges", + "--format=%H%x09%s", + ], + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + rows = [] + for line in p.stdout.splitlines(): + if not line.strip(): + continue + sha, subject = line.split("\t", 1) + m = re.match( + r"Merge pull request #(\d+) from ([^/\s]+)/", subject, re.IGNORECASE + ) + if not m: + continue + pr_number, head_owner = int(m.group(1)), m.group(2) + if head_owner.lower() != repo.split("/")[0].lower(): + continue + rows.append( + { + "pr_number": pr_number, + "merge_commit_sha": sha, + "merge_subject": subject, + } + ) + if not rows: + return pd.DataFrame(columns=["pr_number", "merge_commit_sha", "merge_subject"]) + df = pd.DataFrame(rows) + df = df.drop_duplicates(subset=["pr_number"], keep="first") + return df + + +def _find_release_baseline( + branch_ref: str, repo: str, cwd: str | None +) -> tuple[str | None, str | None]: + if not GITHUB_TOKEN: + return None, None + headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get( + f"https://api.github.com/repos/{repo}/releases?per_page=100", + headers=headers, + ) + if response.status_code != 200: + raise Exception( + f"GitHub API request failed: {response.status_code} {response.text}" + ) + for rel in response.json(): + if rel.get("draft"): + continue + tag_name = rel.get("tag_name") + if not tag_name: + continue + tag_sha = _git_rev_parse(tag_name, cwd) + if not tag_sha: + continue + if not _git_is_ancestor(tag_sha, branch_ref, cwd): + continue + return tag_name, tag_sha + return None, None + + +def _find_rebase_baseline(branch_ref: str, cwd: str | None) -> str | None: + p = subprocess.run( + [ + "git", + "log", + branch_ref, + "--reverse", + "--merges", + "-i", + "--grep=rebase-cicd", + "--grep=rebase/", + "--format=%H", + ], + cwd=cwd, + capture_output=True, + text=True, + ) + if p.returncode != 0: + return None + lines = [ln for ln in p.stdout.splitlines() if ln.strip()] + if not lines: + return None + return lines[0] + + +def get_prs_in_release_dataframe( + branch_ref: str = "HEAD", + *, + repo: str = GITHUB_REPO, + cwd: str | None = None, +) -> tuple[pd.DataFrame, bool]: + """ + PRs merged into branch_ref that belong in the next release notes: after the latest GitHub + Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. + Only merge commits whose subject has fromNothing to report
" - results.columns = [col.replace("_", " ").title() for col in results.columns] + + results = results.copy() + + def format_col_name(col_name: str) -> str: + return col_name.replace("_", " ").title().replace("Pr ", "PR ") + + results.columns = [format_col_name(col) for col in results.columns] + + formatters = { + "Results Link": url_to_html_link, + "Test Name": format_test_name_for_linewrap, + "Test Status": format_test_status, + "Job Status": format_test_status, + "Status": format_test_status, + "Message": lambda m: m.replace("\n", " "), + "Identifier": lambda i: url_to_html_link( + "https://nvd.nist.gov/vuln/detail/" + i + ), + "Severity": lambda s: ( + f'{s}' + ), + "PR Number": lambda n: url_to_html_link( + f"https://github.com/{GITHUB_REPO}/pull/{n}" + ), + } + html = results.to_html( index=False, - formatters={ - "Results Link": url_to_html_link, - "Test Name": format_test_name_for_linewrap, - "Test Status": format_test_status, - "Job Status": format_test_status, - "Status": format_test_status, - "Message": lambda m: m.replace("\n", " "), - "Identifier": lambda i: url_to_html_link( - "https://nvd.nist.gov/vuln/detail/" + i - ), - "Severity": lambda s: ( - f'{s}' - ), - }, + formatters=formatters, escape=False, border=0, classes=["test-results-table"], @@ -816,7 +1023,9 @@ def create_workflow_report( settings={"use_numpy": True}, ) - fail_results = { + prs_in_release_missing_verification = False + results_dfs = { + "prs_in_release": [], "job_statuses": get_commit_statuses(commit_sha), "checks_fails": get_checks_fails(db_client, commit_sha, branch_name), "checks_known_fails": [], @@ -826,8 +1035,17 @@ def create_workflow_report( "docker_images_cves": [], } + if pr_number == 0 and not mark_preview: + try: + prs_df, prs_in_release_missing_verification = get_prs_in_release_dataframe( + branch_name, cwd=os.getcwd() + ) + results_dfs["prs_in_release"] = prs_df + except Exception as e: + print(f"Error in get_prs_in_release_dataframe: {e}") + try: - fail_results["docker_images_cves"] = ( + results_dfs["docker_images_cves"] = ( [] if not check_cves else get_cves(pr_number, commit_sha, branch_name) ) except Exception as e: @@ -835,7 +1053,7 @@ def create_workflow_report( # get_cves returns ... in the case where no Grype result files were found. # This might occur when run in preview mode. - cves_not_checked = not check_cves or fail_results["docker_images_cves"] is ... + cves_not_checked = not check_cves or results_dfs["docker_images_cves"] is ... if known_fails_file_path: if not os.path.exists(known_fails_file_path): @@ -843,7 +1061,7 @@ def create_workflow_report( else: known_fails = get_broken_tests_rules(known_fails_file_path) - fail_results["checks_known_fails"] = get_checks_known_fails( + results_dfs["checks_known_fails"] = get_checks_known_fails( db_client, commit_sha, branch_name, known_fails ) @@ -855,24 +1073,24 @@ def create_workflow_report( pr_info_html = f""" #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")} """ - fail_results["pr_new_fails"] = get_new_fails_this_pr( + results_dfs["pr_new_fails"] = get_new_fails_this_pr( db_client, pr_info, - fail_results["checks_fails"], - fail_results["regression_fails"], + results_dfs["checks_fails"], + results_dfs["regression_fails"], ) except Exception as e: pr_info_html = e pr_info = {} - fail_results["job_statuses"] = backfill_skipped_statuses( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"] = backfill_skipped_statuses( + results_dfs["job_statuses"], pr_number, branch_name, commit_sha ) high_cve_count = 0 - if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0: + if not cves_not_checked and len(results_dfs["docker_images_cves"]) > 0: high_cve_count = ( - fail_results["docker_images_cves"]["severity"] + results_dfs["docker_images_cves"]["severity"] .str.lower() .isin(("high", "critical")) .sum() @@ -893,43 +1111,54 @@ def create_workflow_report( "workflow_id": run_id, "commit_sha": commit_sha, "base_sha": "" if pr_number == 0 else pr_info.get("base", {}).get("sha"), - "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC", + "date": f"{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC", "is_preview": mark_preview, + "prs_in_release_missing_verification": prs_in_release_missing_verification, "counts": { - "jobs_status": f"{sum(fail_results['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", - "checks_errors": len(fail_results["checks_errors"]), - "checks_new_fails": len(fail_results["checks_fails"]), - "regression_new_fails": len(fail_results["regression_fails"]), + "jobs_status": f"{sum(results_dfs['job_statuses']['job_status'].value_counts().get(x, 0) for x in ('failure', 'error'))} fail/error", + "checks_errors": len(results_dfs["checks_errors"]), + "checks_new_fails": len(results_dfs["checks_fails"]), + "regression_new_fails": len(results_dfs["regression_fails"]), "cves": "N/A" if cves_not_checked else f"{high_cve_count} high/critical", "checks_known_fails": ( - "N/A" if not known_fails else len(fail_results["checks_known_fails"]) + "N/A" if not known_fails else len(results_dfs["checks_known_fails"]) + ), + "pr_new_fails": len(results_dfs["pr_new_fails"]), + "prs_in_release": ( + "N/A" + if mark_preview or pr_number != 0 + else len(results_dfs["prs_in_release"]) ), - "pr_new_fails": len(fail_results["pr_new_fails"]), }, "build_report_links": get_build_report_links( - fail_results["job_statuses"], pr_number, branch_name, commit_sha + results_dfs["job_statuses"], pr_number, branch_name, commit_sha + ), + "prs_in_release_html": ( + "PR details are not loaded during preview.
" + if mark_preview or pr_number != 0 + else format_results_as_html_table(results_dfs["prs_in_release"]) ), "ci_jobs_status_html": format_results_as_html_table( - fail_results["job_statuses"] + results_dfs["job_statuses"] ), "checks_errors_html": format_results_as_html_table( - fail_results["checks_errors"] + results_dfs["checks_errors"] ), - "checks_fails_html": format_results_as_html_table(fail_results["checks_fails"]), + "checks_fails_html": format_results_as_html_table(results_dfs["checks_fails"]), "regression_fails_html": format_results_as_html_table( - fail_results["regression_fails"] + results_dfs["regression_fails"] ), "docker_images_cves_html": ( "Not Checked
" if cves_not_checked - else format_results_as_html_table(fail_results["docker_images_cves"]) + else format_results_as_html_table(results_dfs["docker_images_cves"]) ), "checks_known_fails_html": ( "Not Checked
" if not known_fails - else format_results_as_html_table(fail_results["checks_known_fails"]) + else format_results_as_html_table(results_dfs["checks_known_fails"]) ), - "new_fails_html": format_results_as_html_table(fail_results["pr_new_fails"]), + "new_fails_html": format_results_as_html_table(results_dfs["pr_new_fails"]), } # Render the template with the context From a5d87f90ab8d0d063e2657e99582568c972ebd44 Mon Sep 17 00:00:00 2001 From: strtgbb <146047128+strtgbb@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:37:14 -0400 Subject: [PATCH 2/6] highlight labels for unverified PRs --- .../create_workflow_report.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index dda22bbcf4dd..f5fee14e6159 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -32,6 +32,8 @@ CVE_SEVERITY_ORDER = {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} +PR_LABELS_VERIFIED = {"verified", "verified-with-issue", "no-verification-needed"} + def _is_clickhouse_memory_limit_error(exc: BaseException) -> bool: if isinstance(exc, ServerException) and getattr(exc, "code", None) == 241: return True @@ -173,7 +175,7 @@ def get_run_details(run_id: str) -> dict: def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: if len(df) == 0: - return pd.DataFrame(columns=["pr_number", "pr_name", "labels"]), False + return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]), False if not GITHUB_TOKEN: raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") headers = { @@ -193,16 +195,15 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da ) pr = response.json() label_names = [l["name"] for l in pr.get("labels", [])] - if any(ln.lower() == "cicd" for ln in label_names): + if "cicd" in label_names: continue - lowered = {ln.lower() for ln in label_names} - if "verified" not in lowered and "verified-with-issue" not in lowered: + if not PR_LABELS_VERIFIED.intersection(label_names): missing_verification = True rows.append( { "pr_number": pr_number, "pr_name": pr.get("title", ""), - "labels": ", ".join(sorted(label_names)), + "pr_labels": ", ".join(sorted(label_names)), } ) return pd.DataFrame(rows), missing_verification @@ -337,12 +338,12 @@ def get_prs_in_release_dataframe( repo: str = GITHUB_REPO, cwd: str | None = None, ) -> tuple[pd.DataFrame, bool]: - """ + f""" PRs merged into branch_ref that belong in the next release notes: after the latest GitHub Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. Only merge commits whose subject has fromSome PRs are missing verification.
- {% endif %} {{ prs_in_release_html }} {%- endif %} diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py index b27f60ac481f..d0ad4eddce0a 100755 --- a/.github/actions/create_workflow_report/create_workflow_report.py +++ b/.github/actions/create_workflow_report/create_workflow_report.py @@ -33,7 +33,6 @@ CVE_SEVERITY_ORDER = {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5} -PR_LABELS_VERIFIED = {"verified", "verified-with-issue"} def _is_clickhouse_memory_limit_error(exc: BaseException) -> bool: if isinstance(exc, ServerException) and getattr(exc, "code", None) == 241: @@ -174,9 +173,9 @@ def get_run_details(run_id: str) -> dict: return response.json() -def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.DataFrame, bool]: +def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> pd.DataFrame: if len(df) == 0: - return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]), False + return pd.DataFrame(columns=["pr_number", "pr_name", "pr_labels"]) if not GITHUB_TOKEN: raise Exception("GITHUB_TOKEN is required to fetch PR titles and labels") headers = { @@ -184,7 +183,6 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da "Accept": "application/vnd.github.v3+json", } rows = [] - missing_verification = False for pr_number in df["pr_number"].tolist(): response = requests.get( f"https://api.github.com/repos/{repo}/pulls/{pr_number}", @@ -196,8 +194,6 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da ) pr = response.json() label_names = [l["name"] for l in pr.get("labels", [])] - if not PR_LABELS_VERIFIED.intersection(label_names): - missing_verification = True rows.append( { "pr_number": pr_number, @@ -205,7 +201,7 @@ def _enrich_prs_in_release_merge_prs(df: pd.DataFrame, repo: str) -> tuple[pd.Da "pr_labels": ", ".join(sorted(label_names)), } ) - return pd.DataFrame(rows), missing_verification + return pd.DataFrame(rows) def _git_rev_parse(ref: str, cwd: str | None) -> str | None: @@ -337,13 +333,12 @@ def get_prs_in_release_dataframe( *, repo: str = GITHUB_REPO, cwd: str, -) -> tuple[pd.DataFrame, bool]: +) -> pd.DataFrame: f""" PRs merged into branch_ref that belong in the next release notes: after the latest GitHub Release tag on this history, or after the oldest rebase bootstrap if no such tag exists. Only merge commits whose subject has from