Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/benchmark-comparison.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ jobs:
else
echo "BENCHMARK_EXTRA_ARG=--dry-run" >> "$GITHUB_ENV"
echo "Benchmark secrets are unavailable; running benchmark comparison in dry-run mode." >> "$GITHUB_STEP_SUMMARY"
echo "To run the real benchmark, add these secrets and rerun the workflow: OPENAI_API_KEY, GRAPHRAG_API_BASE, AZURE_AI_SEARCH_URL_ENDPOINT, AZURE_AI_SEARCH_API_KEY." >> "$GITHUB_STEP_SUMMARY"
echo "You can trigger a rerun from Actions → Benchmark Comparison → Run workflow once the secrets are configured." >> "$GITHUB_STEP_SUMMARY"
fi

- name: Benchmark Python implementation
Expand Down
17 changes: 17 additions & 0 deletions scripts/benchmark_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,11 @@ def summarize_results(results: list[OperationResult]) -> dict[str, int]:
return summary


def has_dry_run_results(*result_groups: list[OperationResult]) -> bool:
"""Return whether any result group contains dry-run benchmark results."""
return any(result.status == "dry_run" for results in result_groups for result in results)
Copy link

Copilot AI Mar 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

has_dry_run_results return statement is likely to fail ruff format --check due to line wrapping (the generator expression exceeds the formatter's line length). Reformat this line (e.g., split the any(...) call across multiple lines) or run ruff format so CI formatting checks pass.

Suggested change
return any(result.status == "dry_run" for results in result_groups for result in results)
return any(
result.status == "dry_run"
for results in result_groups
for result in results
)

Copilot uses AI. Check for mistakes.


def render_markdown_report(
python_results: list[OperationResult],
dotnet_results: list[OperationResult],
Expand All @@ -399,6 +404,18 @@ def render_markdown_report(
f"{summary.get('dry_run', 0)} |"
)

if has_dry_run_results(python_results, dotnet_results):
lines.extend(
[
"",
"> [!IMPORTANT]",
"> This comparison used `--dry-run`, so it validated commands without executing the real benchmark workload.",
"> To get a real benchmark run in GitHub Actions, configure these secrets and rerun the `Benchmark Comparison` workflow:",
"> `OPENAI_API_KEY`, `GRAPHRAG_API_BASE`, `AZURE_AI_SEARCH_URL_ENDPOINT`, and `AZURE_AI_SEARCH_API_KEY`.",
"> You can rerun it manually with **Actions → Benchmark Comparison → Run workflow** after those secrets are available.",
]
)

lines.extend(
[
"",
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/test_benchmark_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,30 @@ def test_render_markdown_report_includes_missing_output_notes():
assert "extract_graph_nlp" in report


def test_render_markdown_report_explains_how_to_get_real_benchmark_run():
module = load_module()

dry_run_result = module.OperationResult(
implementation="python",
fixture="text",
operation_type="index",
operation_label="index:fast",
method="fast",
query=None,
command=["uv", "run", "python", "-m", "graphrag", "index"],
duration_seconds=0.0,
exit_code=0,
status="dry_run",
stdout="[dry-run]",
)

report = module.render_markdown_report([dry_run_result], [])

assert "This comparison used `--dry-run`" in report
assert "OPENAI_API_KEY" in report
assert "Run workflow" in report


def test_python_query_command_uses_cli_shape_from_fixture():
module = load_module()
repo_root = Path(__file__).resolve().parents[2]
Expand Down
Loading