Skip to content

Commit 8f937bb

Browse files
committed
initial commit for seed-env integration
Refactor CI/CD workflows: streamline Docker image build process and remove obsolete requirements
1 parent 2a74af1 commit 8f937bb

18 files changed

Lines changed: 760 additions & 418 deletions

File tree

.github/workflows/UnitTests.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,8 @@ jobs:
4242
python-version: '3.12'
4343
- name: Install dependencies
4444
run: |
45-
pip install -e .
46-
pip uninstall jax jaxlib libtpu-nightly libtpu -y
47-
bash setup.sh MODE=stable
45+
bash setup.sh MODE=stable DEVICE=tpu
4846
export PATH=$PATH:$HOME/.local/bin
49-
pip install ruff
50-
pip install isort
51-
pip install pytest
5247
- name: Analysing the code with ruff
5348
run: |
5449
ruff check .

.github/workflows/UploadDockerImages.yml

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,60 @@ name: Build Images
2020
on:
2121
schedule:
2222
# Run the job daily at 12AM UTC
23-
- cron: '0 0 * * *'
24-
23+
- cron: '0 0 * * *'
2524
workflow_dispatch:
25+
inputs:
26+
target_device:
27+
description: 'Specify target device (all or tpu)'
28+
required: true
29+
type: choice
30+
default: 'tpu'
31+
options:
32+
- all
33+
- tpu
34+
35+
permissions:
36+
contents: read
2637

2738
jobs:
28-
build-image:
29-
runs-on: ["self-hosted", "e2", "cpu"]
39+
setup:
40+
runs-on: ubuntu-latest
41+
outputs:
42+
maxdiffusion_sha: ${{ steps.vars.outputs.maxdiffusion_sha }}
43+
image_date: ${{ steps.vars.outputs.image_date }}
3044
steps:
31-
- uses: actions/checkout@v3
32-
- name: Cleanup old docker images
33-
run: docker system prune --all --force
34-
- name: build maxdiffusion jax ai image
35-
run: |
36-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_stable_stack MODE=jax_ai_image PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_stable_stack BASEIMAGE=us-docker.pkg.dev/cloud-tpu-images/jax-ai-image/tpu:latest
37-
- name: build maxdiffusion w/ nightly jax ai image
38-
run: |
39-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_stable_stack_nightly MODE=jax_ai_image PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_stable_stack BASEIMAGE=us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/tpu/jax_nightly:latest
40-
- name: build maxdiffusion jax nightly image
41-
run: |
42-
bash .github/workflows/build_and_upload_images.sh CLOUD_IMAGE_NAME=maxdiffusion_jax_nightly MODE=nightly PROJECT=tpu-prod-env-multipod LOCAL_IMAGE_NAME=maxdiffusion_jax_nightly
45+
- name: Checkout MaxDiffusion
46+
uses: actions/checkout@v5
47+
48+
- name: Get metadata
49+
id: vars
50+
run: |
51+
# MaxDiffusion SHA
52+
echo "maxdiffusion_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
53+
54+
# Image date
55+
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
56+
57+
build-image:
58+
name: ${{ matrix.image_name }}
59+
needs: setup
60+
strategy:
61+
fail-fast: false
62+
matrix:
63+
include:
64+
- device: tpu
65+
build_mode: stable
66+
image_name: maxdiffusion_jax_stable
67+
dockerfile: ./maxdiffusion_dependencies.Dockerfile
68+
- device: tpu
69+
build_mode: nightly
70+
image_name: maxdiffusion_jax_nightly
71+
dockerfile: ./maxdiffusion_dependencies.Dockerfile
72+
uses: ./.github/workflows/build_and_push_docker_image.yml
73+
with:
74+
image_name: ${{ matrix.image_name }}
75+
device: ${{ matrix.device }}
76+
build_mode: ${{ matrix.build_mode }}
77+
dockerfile: ${{ matrix.dockerfile }}
78+
maxdiffusion_sha: ${{ needs.setup.outputs.maxdiffusion_sha }}
79+
image_date: ${{ needs.setup.outputs.image_date }}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright 2025 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This workflow will build and push MaxDiffusion Docker image to GCR.
16+
17+
name: Build and Push MaxDiffusion Docker Images
18+
19+
on:
20+
workflow_call:
21+
inputs:
22+
image_name:
23+
required: true
24+
type: string
25+
device:
26+
required: true
27+
type: string
28+
build_mode:
29+
required: true
30+
type: string
31+
dockerfile:
32+
required: true
33+
type: string
34+
maxdiffusion_sha:
35+
required: true
36+
type: string
37+
image_date:
38+
required: true
39+
type: string
40+
base_image:
41+
required: false
42+
type: string
43+
default: ''
44+
45+
permissions:
46+
contents: read
47+
48+
jobs:
49+
build_and_push:
50+
runs-on: linux-x86-n2-16-buildkit
51+
container: google/cloud-sdk:524.0.0
52+
if: >
53+
github.event_name == 'schedule' ||
54+
github.event_name == 'pull_request' ||
55+
github.event_name == 'workflow_dispatch' && (
56+
github.event.inputs.target_device == 'all' ||
57+
github.event.inputs.target_device == 'tpu' ||
58+
github.event.inputs.target_device == 'gpu'
59+
)
60+
steps:
61+
- name: Check if build should run
62+
id: check
63+
shell: bash
64+
run: |
65+
if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.target_device }}" != "all" && "${{ github.event.inputs.target_device }}" != "${{ inputs.device }}" ]]; then
66+
echo "should_run=false" >> $GITHUB_OUTPUT
67+
echo "Skipping ${{ inputs.image_name }} build for device: ${{ inputs.device }} in ${{ inputs.build_mode }} mode."
68+
else
69+
echo "should_run=true" >> $GITHUB_OUTPUT
70+
echo "Building ${{ inputs.image_name }} for device: ${{ inputs.device }} in ${{ inputs.build_mode }} mode."
71+
fi
72+
73+
- name: Checkout MaxDiffusion
74+
uses: actions/checkout@v5
75+
if: steps.check.outputs.should_run == 'true'
76+
with:
77+
# This ensures that every job clones the exact same commit as "setup" job
78+
ref: ${{ inputs.maxdiffusion_sha }}
79+
80+
- name: Mark git repositories as safe
81+
run: git config --global --add safe.directory '*'
82+
if: steps.check.outputs.should_run == 'true'
83+
84+
- name: Configure Docker
85+
run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
86+
if: steps.check.outputs.should_run == 'true'
87+
88+
- name: Set up Docker BuildX
89+
uses: docker/setup-buildx-action@v3.11.1
90+
if: steps.check.outputs.should_run == 'true'
91+
with:
92+
driver: remote
93+
endpoint: tcp://localhost:1234
94+
95+
- name: Build and push Docker image
96+
uses: docker/build-push-action@v6
97+
if: steps.check.outputs.should_run == 'true'
98+
with:
99+
push: true
100+
context: .
101+
file: ${{ inputs.dockerfile }}
102+
tags: gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}:latest
103+
cache-from: type=gha
104+
outputs: type=image,compression=zstd,force-compression=true
105+
build-args: |
106+
DEVICE=${{ inputs.device }}
107+
MODE=${{ inputs.build_mode }}
108+
JAX_VERSION=NONE
109+
${{ inputs.base_image != '' && format('BASEIMAGE={0}', inputs.base_image) || '' }}
110+
111+
- name: Add tags to Docker image
112+
if: steps.check.outputs.should_run == 'true'
113+
shell: bash
114+
run: |
115+
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}"
116+
117+
# Add date tag
118+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:${{ inputs.image_date }}" --quiet
119+
120+
# Convert date to YYYYMMDD format
121+
clean_date=$(echo "${{ inputs.image_date }}" | sed 's/[-:]//g' | cut -c1-8)
122+
123+
# Add MaxDiffusion tag
124+
maxdiffusion_hash=$(git rev-parse --short HEAD)
125+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:maxdiffusion_${maxdiffusion_hash}_${clean_date}" --quiet
126+

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ modified_only_fixup:
1818
# Update src/maxdiffusion/dependency_versions_table.py
1919

2020
deps_table_update:
21-
@python setup.py deps_table_update
21+
@python utils/update_dependency_table.py
2222

2323
deps_table_check_updated:
2424
@md5sum src/maxdiffusion/dependency_versions_table.py > md5sum.saved
25-
@python setup.py deps_table_update
25+
@python utils/update_dependency_table.py
2626
@md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
2727
@rm md5sum.saved
2828

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
--extra-index-url https://download.pytorch.org/whl/cpu
2+
absl-py
3+
aqtp
4+
chex
5+
datasets
6+
einops
7+
flax
8+
ftfy
9+
google-cloud-storage
10+
grain
11+
hf_transfer
12+
huggingface_hub
13+
imageio-ffmpeg
14+
imageio
15+
jax
16+
jaxlib
17+
Jinja2
18+
opencv-python-headless
19+
optax
20+
orbax-checkpoint
21+
parameterized
22+
Pillow
23+
pyink
24+
pylint
25+
pytest
26+
ruff
27+
scikit-image
28+
sentencepiece
29+
tensorboard-plugin-profile
30+
tensorboard
31+
tensorboardx
32+
tensorflow-datasets
33+
tensorflow
34+
tokamax
35+
tokenizers
36+
transformers<5.0.0
37+
38+
# pinning torch and torchvision to specific versions to avoid
39+
# installing GPU versions from PyPI when running seed-env
40+
torch @ https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl
41+
torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl
42+
qwix @ https://github.com/google/qwix/archive/408a0f48f988b6c5b180e07f0cb1d05997bf0dcc.zip
43+

0 commit comments

Comments
 (0)