diff --git a/.github/workflows/collect-evals.yml b/.github/workflows/collect-evals.yml index 3b3f08bf1..65f71d850 100644 --- a/.github/workflows/collect-evals.yml +++ b/.github/workflows/collect-evals.yml @@ -27,12 +27,24 @@ jobs: path: eval_results/ pattern: ${{ inputs.result-prefix && format('eval_{0}_*', inputs.result-prefix) || 'eval_*' }} + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + + - name: Install dependencies + run: | + pip install -r utils/requirements.txt + - name: Summarize evals run: | - pip install tabulate - echo "## Eval Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - python3 utils/collect_eval_results.py eval_results/ ${{ inputs.result-prefix || 'all' }} >> $GITHUB_STEP_SUMMARY + { + echo "## Eval Summary" + echo "" + python3 utils/collect_eval_results.py eval_results/ "${{ inputs.result-prefix || 'all' }}" + } >> "$GITHUB_STEP_SUMMARY" - name: Upload aggregated evals uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index 353918609..843c3452f 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -28,15 +28,22 @@ jobs: path: results/ pattern: ${{ inputs.result-prefix && format('{0}_*', inputs.result-prefix) || '*' }} - - name: Print summary + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + + - name: Install dependencies run: | - pip install tabulate - python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY + pip install -r utils/requirements.txt + + - name: Print summary + run: python3 utils/summarize.py results/ >> "$GITHUB_STEP_SUMMARY" - name: Aggregate results - run: | - pip install tabulate - python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} + run: python3 utils/collect_results.py results/ ${{ inputs.result-prefix || 'all' }} - name: Upload aggregated results uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index afbec49b0..126b96384 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -49,9 +49,19 @@ jobs: if: ${{ !inputs.ref || inputs.ref == '' }} uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + + - name: Install dependencies + run: | + pip install -r utils/requirements.txt + - id: get-jobs run: | - pip install pydantic CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \ ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }}) SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and not x.get('eval-only', False)]))") @@ -196,11 +206,19 @@ jobs: path: ${{ env.RESULTS_DIR }} pattern: results_* + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + - name: Install python dependencies - run: pip install PyGithub + run: | + pip install -r utils/requirements.txt - name: Calculate success rate - run: python3 utils/calc_success_rate.py $STATS_FILENAME + run: python3 utils/calc_success_rate.py "$STATS_FILENAME" - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 65c04a6c2..62b3777bb 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -50,10 +50,20 @@ jobs: with: ref: ${{ inputs.ref || github.ref }} + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + + - name: Install dependencies + run: | + pip install -r utils/requirements.txt + - id: gen name: Generate matrix via script run: | - pip install pydantic CLI_ARGS="test-config --config-files ${{ inputs.config-file }} --config-keys ${{ inputs.config-key }} --conc ${{ inputs.conc }}" CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py $CLI_ARGS) echo "raw=$CONFIG_JSON" >> $GITHUB_OUTPUT diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 44e335f49..e35ad4bab 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -50,10 +50,19 @@ jobs: with: fetch-depth: 0 - - id: setup + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + + - name: Install dependencies run: | - pip install pydantic + pip install -r utils/requirements.txt + - id: setup + run: | if [ "${{ github.event_name }}" == "pull_request" ]; then BASE_REF="origin/${{ github.base_ref }}" HEAD_REF="${{ github.event.pull_request.head.sha }}" @@ -251,11 +260,20 @@ jobs: path: ${{ env.RESULTS_DIR }} pattern: results_* + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + - name: Install python dependencies - run: pip install PyGithub + run: | + python -m pip install --upgrade pip + pip install -r utils/requirements.txt - name: Calculate success rate - run: python3 utils/calc_success_rate.py $STATS_FILENAME + run: python3 utils/calc_success_rate.py "$STATS_FILENAME" - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: @@ -286,11 +304,20 @@ jobs: path: results/ pattern: results_bmk + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt + - name: Install dependencies - run: pip install psycopg2-binary tabulate + run: | + python -m pip install --upgrade pip + pip install -r utils/requirements.txt - name: Compare results against main - run: python3 utils/compare_results.py results/ >> $GITHUB_STEP_SUMMARY + run: python3 utils/compare_results.py results/ >> "$GITHUB_STEP_SUMMARY" trigger-ingest: needs: diff --git a/.github/workflows/test-matrix-logic.yml b/.github/workflows/test-matrix-logic.yml index cb98fddf8..e14303e98 100644 --- a/.github/workflows/test-matrix-logic.yml +++ b/.github/workflows/test-matrix-logic.yml @@ -5,6 +5,7 @@ on: pull_request: paths: - 'utils/matrix_logic/**' + - 'utils/requirements.txt' permissions: contents: read @@ -24,11 +25,13 @@ jobs: uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest pydantic pyyaml + pip install -r utils/requirements.txt - name: test_generate_sweep_configs tests run: | diff --git a/.github/workflows/test-process-result.yml b/.github/workflows/test-process-result.yml index d6967775e..ca80bb0c3 100644 --- a/.github/workflows/test-process-result.yml +++ b/.github/workflows/test-process-result.yml @@ -5,6 +5,7 @@ on: paths: - 'utils/process_result.py' - 'utils/test_process_result.py' + - 'utils/requirements.txt' permissions: contents: read @@ -24,11 +25,13 @@ jobs: uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.12' + cache: 'pip' + cache-dependency-path: utils/requirements.txt - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest + pip install -r utils/requirements.txt - name: Run pytest run: | diff --git a/utils/requirements.txt b/utils/requirements.txt new file mode 100644 index 000000000..0cc2b0b5d --- /dev/null +++ b/utils/requirements.txt @@ -0,0 +1,8 @@ +# Direct dependencies for utils/ scripts and CI tests. +# Exact pins for deterministic resolution on Python 3.12 in CI. +pydantic==2.13.0 +PyGithub==2.8.1 +psycopg2-binary==2.9.11 +pytest==9.0.3 +PyYAML==6.0.3 +tabulate==0.10.0