diff --git a/.github/workflows/benchmark-comparison.yml b/.github/workflows/benchmark-comparison.yml index 38f97d318..aefe5a61d 100644 --- a/.github/workflows/benchmark-comparison.yml +++ b/.github/workflows/benchmark-comparison.yml @@ -84,6 +84,8 @@ jobs: else echo "BENCHMARK_EXTRA_ARG=--dry-run" >> "$GITHUB_ENV" echo "Benchmark secrets are unavailable; running benchmark comparison in dry-run mode." >> "$GITHUB_STEP_SUMMARY" + echo "To run the real benchmark, add these secrets and rerun the workflow: OPENAI_API_KEY, GRAPHRAG_API_BASE, AZURE_AI_SEARCH_URL_ENDPOINT, AZURE_AI_SEARCH_API_KEY." >> "$GITHUB_STEP_SUMMARY" + echo "You can trigger a rerun from Actions → Benchmark Comparison → Run workflow once the secrets are configured." >> "$GITHUB_STEP_SUMMARY" fi - name: Benchmark Python implementation diff --git a/scripts/benchmark_smoke.py b/scripts/benchmark_smoke.py index af605d096..819768806 100644 --- a/scripts/benchmark_smoke.py +++ b/scripts/benchmark_smoke.py @@ -374,6 +374,11 @@ def summarize_results(results: list[OperationResult]) -> dict[str, int]: return summary +def has_dry_run_results(*result_groups: list[OperationResult]) -> bool: + """Return whether any result group contains dry-run benchmark results.""" + return any(result.status == "dry_run" for results in result_groups for result in results) + + def render_markdown_report( python_results: list[OperationResult], dotnet_results: list[OperationResult], @@ -399,6 +404,18 @@ def render_markdown_report( f"{summary.get('dry_run', 0)} |" ) + if has_dry_run_results(python_results, dotnet_results): + lines.extend( + [ + "", + "> [!IMPORTANT]", + "> This comparison used `--dry-run`, so it validated commands without executing the real benchmark workload.", + "> To get a real benchmark run in GitHub Actions, configure these secrets and rerun the `Benchmark Comparison` workflow:", + "> `OPENAI_API_KEY`, `GRAPHRAG_API_BASE`, `AZURE_AI_SEARCH_URL_ENDPOINT`, and `AZURE_AI_SEARCH_API_KEY`.", + "> You can rerun it manually with **Actions → Benchmark Comparison → Run workflow** after those secrets are available.", + ] + ) + lines.extend( [ "", diff --git a/tests/unit/test_benchmark_smoke.py b/tests/unit/test_benchmark_smoke.py index f4404d7b5..23639cc41 100644 --- a/tests/unit/test_benchmark_smoke.py +++ b/tests/unit/test_benchmark_smoke.py @@ -90,6 +90,30 @@ def test_render_markdown_report_includes_missing_output_notes(): assert "extract_graph_nlp" in report +def test_render_markdown_report_explains_how_to_get_real_benchmark_run(): + module = load_module() + + dry_run_result = module.OperationResult( + implementation="python", + fixture="text", + operation_type="index", + operation_label="index:fast", + method="fast", + query=None, + command=["uv", "run", "python", "-m", "graphrag", "index"], + duration_seconds=0.0, + exit_code=0, + status="dry_run", + stdout="[dry-run]", + ) + + report = module.render_markdown_report([dry_run_result], []) + + assert "This comparison used `--dry-run`" in report + assert "OPENAI_API_KEY" in report + assert "Run workflow" in report + + def test_python_query_command_uses_cli_shape_from_fixture(): module = load_module() repo_root = Path(__file__).resolve().parents[2]