Skip to content

Commit e00ff2b

Browse files
committed
[build] update dockerfile for deepep, deep_gemm and ci proxy speed fix
1 parent 3bf2916 commit e00ff2b

3 files changed

Lines changed: 102 additions & 63 deletions

File tree

.dev_scripts/xtuner_rl_path.pth

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import sys
3+
import warnings
4+
5+
if os.getenv('XTUNER_USE_LMDEPLOY').lower() in ['1', 'on', 'true']:
6+
lmdeploy_envs_dir = os.getenv('XTUNER_LMDEPLOY_ENVS_DIR', '/envs/lmdeploy')
7+
if lmdeploy_envs_dir not in sys.path:
8+
sys.path.insert(0, lmdeploy_envs_dir)
9+
warnings.warn(
10+
f"XTUNER_USE_LMDEPLOY is set to true. Injected {lmdeploy_envs_dir} into sys.path for lmdeploy imports."
11+
)
12+
13+
elif os.getenv('XTUNER_USE_SGLANG').lower() in ['1', 'on', 'true']:
14+
sglang_envs_dir = os.getenv('XTUNER_SGLANG_ENVS_DIR', '/envs/sglang')
15+
if sglang_envs_dir not in sys.path:
16+
sys.path.insert(0, sglang_envs_dir)
17+
warnings.warn(
18+
f"XTUNER_USE_SGLANG is set to true. Injected {sglang_envs_dir} into sys.path for sglang imports."
19+
)
20+
21+
# else:
22+
# warnings.warn(
23+
# "Neither XTUNER_USE_LMDEPLOY nor XTUNER_USE_SGLANG is set to true. No custom paths will be injected."
24+
# )
25+

Dockerfile

Lines changed: 65 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,38 @@
22
# builder
33
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3
44

5-
## build args
5+
## build base env
66
FROM ${BASE_IMAGE} AS setup_env
77

8-
ARG TORCH_VERSION
98
ARG PPA_SOURCE
10-
11-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
12-
sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
9+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
10+
RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
1311
apt update && \
1412
apt install --no-install-recommends ca-certificates -y && \
1513
apt install --no-install-recommends bc wget -y && \
1614
apt install --no-install-recommends build-essential sudo -y && \
1715
apt install --no-install-recommends git curl pkg-config tree unzip tmux \
1816
openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone \
19-
iputils-ping telnet netcat-openbsd -y && \
17+
iputils-ping telnet netcat-openbsd htop bubblewrap socat -y && \
2018
apt clean && rm -rf /var/lib/apt/lists/*
2119

2220
RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
23-
RUN pip install pystack py-spy --no-cache-dir
21+
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
2422
RUN git config --system --add safe.directory "*"
2523

24+
# torch
25+
ARG TORCH_VERSION
26+
ARG PYTORCH_WHEELS_URL
2627
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
28+
--mount=type=secret,id=NO_PROXY,env=no_proxy \
2729
if [ -n "${TORCH_VERSION}" ]; then \
2830
pip install torchvision torch==${TORCH_VERSION} \
29-
--index-url https://download.pytorch.org/whl/cu128 \
30-
--extra-index-url https://download.pytorch.org/whl/cu126 \
31+
-i ${PYTORCH_WHEELS_URL}/cu128 \
32+
--extra-index-url ${PYTORCH_WHEELS_URL}/cu126 \
3133
--no-cache-dir; \
3234
fi
33-
3435
# set reasonable default for CUDA architectures when building ngc image
35-
ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
36-
37-
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
36+
ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
3837

3938
ARG FLASH_ATTN_DIR=/tmp/flash-attn
4039
ARG CODESPACE=/root/codespace
@@ -56,6 +55,9 @@ ARG CODESPACE
5655
ARG FLASH_ATTN_DIR
5756
ARG FLASH_ATTN3_DIR
5857
ARG FLASH_ATTN_URL
58+
# force hopper for now, you change it throught build args
59+
ARG FLASH_ATTN_CUDA_ARCHS="90"
60+
ARG FLASH_ATTENTION_DISABLE_SM80="TRUE"
5961

6062
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
6163
git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
@@ -119,42 +121,41 @@ WORKDIR ${CODESPACE}/causal-conv1d
119121

120122
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip wheel -w ${CAUSAL_CONV1D_DIR} -v --no-deps --no-build-isolation .
121123

122-
# pypi install nvshmem and compile deepep
124+
# compile nvshmem and deepep
123125
FROM setup_env AS deep_ep
124126

125127
ARG CODESPACE
126128
ARG DEEP_EP_DIR
127129
ARG DEEP_EP_URL
128-
# build sm90 and sm100 for deep_ep for now
129-
ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
130130

131+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132+
# curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133+
# tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134+
# cd ${CODESPACE}/nvshmem_src && \
135+
# NVSHMEM_SHMEM_SUPPORT=0 \
136+
# NVSHMEM_UCX_SUPPORT=0 \
137+
# NVSHMEM_USE_NCCL=0 \
138+
# NVSHMEM_MPI_SUPPORT=0 \
139+
# NVSHMEM_IBGDA_SUPPORT=1 \
140+
# NVSHMEM_USE_GDRCOPY=0 \
141+
# NVSHMEM_PMIX_SUPPORT=0 \
142+
# NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143+
# NVSHMEM_BUILD_TESTS=0 \
144+
# NVSHMEM_BUILD_EXAMPLES=0 \
145+
# NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146+
# NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147+
# NVSHMEM_BUILD_PYTHON_LIB=OFF \
148+
# cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149+
# cmake --build build --target install --parallel 32 && \
131150
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132-
curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133-
tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134-
cd ${CODESPACE}/nvshmem_src && \
135-
NVSHMEM_SHMEM_SUPPORT=0 \
136-
NVSHMEM_UCX_SUPPORT=0 \
137-
NVSHMEM_USE_NCCL=0 \
138-
NVSHMEM_MPI_SUPPORT=0 \
139-
NVSHMEM_IBGDA_SUPPORT=1 \
140-
NVSHMEM_USE_GDRCOPY=0 \
141-
NVSHMEM_PMIX_SUPPORT=0 \
142-
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143-
NVSHMEM_BUILD_TESTS=0 \
144-
NVSHMEM_BUILD_EXAMPLES=0 \
145-
NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146-
NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147-
NVSHMEM_BUILD_PYTHON_LIB=OFF \
148-
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149-
cmake --build build --target install --parallel 32 && \
150151
cd ${CODESPACE} && git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
151152
cd ${CODESPACE}/DeepEP && \
152153
git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
153154
git submodule update --init --recursive --force
154155

155156
WORKDIR ${CODESPACE}/DeepEP
156157

157-
RUN NVSHMEM_DIR=${NVSHMEM_PREFIX} pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158+
RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158159

159160
# compile deep_gemm
160161
FROM setup_env AS deep_gemm
@@ -192,7 +193,7 @@ COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
192193
COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
193194
COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
194195
COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
195-
COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196+
# COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196197
COPY --from=deep_gemm ${DEEP_GEMM_DIR} ${DEEP_GEMM_DIR}
197198
COPY --from=causal_conv1d ${CAUSAL_CONV1D_DIR} ${CAUSAL_CONV1D_DIR}
198199

@@ -204,11 +205,16 @@ RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
204205
RUN unzip ${DEEP_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
205206
RUN unzip ${CAUSAL_CONV1D_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
206207

207-
# install sglang and its runtime requirements
208+
ARG DEFAULT_PYPI_URL
209+
210+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
211+
RUN pip install pystack py-spy --no-cache-dir -i ${DEFAULT_PYPI_URL}
208212

213+
# install sglang and its runtime requirements
209214
ENV XTUNER_SGLANG_ENVS_DIR=/envs/sglang
210215

211-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
216+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
217+
RUN \
212218
pip install --target ${XTUNER_SGLANG_ENVS_DIR} \
213219
sglang==0.5.9 sgl-kernel==0.3.21 \
214220
apache-tvm-ffi==0.1.9 \
@@ -229,7 +235,7 @@ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
229235
torchao==0.9.0 \
230236
torchaudio==2.9.1 \
231237
torchcodec==0.8.0 \
232-
xgrammar==0.1.27 \
238+
xgrammar==0.1.32 \
233239
smg-grpc-proto==0.4.5 \
234240
grpcio==1.78.1 \
235241
grpcio-reflection==1.78.1 \
@@ -250,54 +256,60 @@ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
250256
llguidance==0.7.11 blobfile==3.0.0 \
251257
pybase64 orjson uvloop setproctitle msgspec \
252258
compressed_tensors python-multipart \
253-
hf_transfer interegular --no-cache-dir --no-deps
259+
hf_transfer interegular --no-cache-dir --no-deps -i ${DEFAULT_PYPI_URL}
254260

255261
# install lmdeploy and its missing runtime requirements
256262
ARG LMDEPLOY_VERSION
257263
ARG LMDEPLOY_URL
258264
ENV XTUNER_LMDEPLOY_ENVS_DIR=/envs/lmdeploy
259265

266+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
267+
ARG LMDEPLOY_WHEELS=https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp312-cp312-manylinux2014_x86_64.whl
260268
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
269+
--mount=type=secret,id=NO_PROXY,env=no_proxy \
261270
pip install fastapi fire openai outlines \
262-
partial_json_parser ray[default] shortuuid uvicorn \
263-
'pydantic>2' openai_harmony dlblas --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-cache-dir && \
271+
partial_json_parser 'ray[default]<3' shortuuid uvicorn \
272+
'pydantic>2' openai_harmony dlblas --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-cache-dir -i ${DEFAULT_PYPI_URL} && \
264273
if [ -n "${LMDEPLOY_VERSION}" ]; then \
265-
pip install lmdeploy==${LMDEPLOY_VERSION} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir; \
274+
# pip install lmdeploy==${LMDEPLOY_VERSION} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
275+
echo pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
276+
pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
266277
else \
267278
git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
268279
cd ${CODESPACE}/lmdeploy && \
269280
git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
270-
pip install . -v --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir; \
281+
pip install . -v --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
271282
fi
272283

273284
## install xtuner
274285
ARG XTUNER_URL
275286
ARG XTUNER_COMMIT
276-
#RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
287+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
277288
# git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
278289
# cd ${CODESPACE}/xtuner && \
279290
# git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
280291
COPY . ${CODESPACE}/xtuner
281292

282293
WORKDIR ${CODESPACE}/xtuner
283-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
284-
pip install .[all] -v --no-cache-dir
285294

286295
# Install custom .pth file for conditional lmdeploy and sglang path injection
287296
RUN cp .dev_scripts/xtuner_rl_path.pth ${PYTHON_SITE_PACKAGE_PATH}/xtuner_rl_path.pth
288297

298+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
299+
RUN pip install .[all] -v --no-cache-dir -i ${DEFAULT_PYPI_URL}
300+
289301
WORKDIR ${CODESPACE}
290302

291303
# nccl update for torch 2.6.0
292-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
293-
if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
294-
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir; \
304+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
305+
RUN if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
306+
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
295307
fi
296308

297309
# cudnn update for torch 2.9.1
298-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
299-
if [ "x${TORCH_VERSION}" = "x2.9.1" ]; then \
300-
pip install nvidia-cudnn-cu12==9.15.1.9 --no-cache-dir; \
310+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
311+
RUN if [ "x${TORCH_VERSION}" = "x2.9.1" ]; then \
312+
pip install nvidia-cudnn-cu12==9.15.1.9 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
301313
fi
302314

303315
# setup sysctl

image_build.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,27 @@ export DEEP_EP_URL=https://github.com/deepseek-ai/DeepEP@9af0e0d0e74f3577af1979c
1010
export DEEP_GEMM_URL=https://github.com/deepseek-ai/DeepGEMM@c9f8b34dcdacc20aa746b786f983492c51072870 # v2.1.1.post3
1111
export CAUSAL_CONV1D_URL=https://github.com/Dao-AILab/causal-conv1d@da6dbaa9fd5a919967f14d3fd031da1288ad5025 # v1.6.0
1212

13-
export TORCH_VERSION=${TORCH_VERSION:-"2.8.0"}
14-
export LMDEPLOY_VERSION="0.11.0"
15-
# export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@a9a24fbd8985374cb01ecb6021d1ce9668253c9c
13+
export TORCH_VERSION=${TORCH_VERSION:-"2.9.0"}
14+
export LMDEPLOY_VERSION="0.12.2"
15+
# export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@9a50f1f4eaf1e4fbe45892bc8017a7359237160c
1616
export PPA_SOURCE="https://mirrors.aliyun.com"
17-
export SGLANG_VERSION="0.5.3"
17+
export DEFAULT_PYPI_URL=${DEFAULT_PYPI_URL:-"https://mirrors.aliyun.com/pypi/simple"}
18+
# mirror https://download.pytorch.org/whl
19+
export PYTORCH_WHEELS_URL=${PYTORCH_WHEELS_URL:-"https://download.pytorch.org/whl"}
1820

1921
image_name=${IMAGE_NAME:-"xtuner"}
2022
image_tag=${IMAGE_TAG:-"pt$(echo ${TORCH_VERSION} | awk -F. '{print $1$2}')_$(date +%Y%m%d)_${XTUNER_COMMIT:0:7}"}
2123

2224
docker build . \
2325
-t "$image_name:$image_tag" \
2426
--secret id=HTTPS_PROXY \
27+
--secret id=NO_PROXY \
2528
--build-arg TORCH_VERSION=$TORCH_VERSION\
2629
--build-arg BASE_IMAGE=$BASE_IMAGE \
27-
--build-arg PPA_SOURCE=$PPA_SOURCE \
28-
--build-arg ADAPTIVE_GEMM_URL=$ADAPTIVE_GEMM_URL \
30+
--build-arg PPA_SOURCE="$PPA_SOURCE" \
31+
--build-arg DEFAULT_PYPI_URL="$DEFAULT_PYPI_URL" \
32+
--build-arg PYTORCH_WHEELS_URL="$PYTORCH_WHEELS_URL" \
33+
--build-arg ADAPTIVE_GEMM_URL="$ADAPTIVE_GEMM_URL" \
2934
--build-arg FLASH_ATTN_URL=$FLASH_ATTN_URL \
3035
--build-arg GROUPED_GEMM_URL=$GROUPED_GEMM_URL \
3136
--build-arg CAUSAL_CONV1D_URL=$CAUSAL_CONV1D_URL \
@@ -34,8 +39,6 @@ docker build . \
3439
--build-arg XTUNER_URL=$XTUNER_URL \
3540
--build-arg XTUNER_COMMIT=$XTUNER_COMMIT \
3641
--build-arg LMDEPLOY_VERSION=$LMDEPLOY_VERSION \
37-
--build-arg LMDEPLOY_URL=$LMDEPLOY_URL \
38-
--build-arg SGLANG_VERSION=$SGLANG_VERSION \
3942
--progress=plain \
4043
--label "BASE_IMAGE=$BASE_IMAGE" \
4144
--label "XTUNER_URL=${XTUNER_URL/@/\/tree\/}" \
@@ -46,5 +49,4 @@ docker build . \
4649
--label "CAUSAL_CONV1D_URL=${CAUSAL_CONV1D_URL/@/\/tree\/}" \
4750
--label "DEEP_EP_URL=${DEEP_EP_URL/@/\/tree\/}" \
4851
--label "DEEP_GEMM_URL=${DEEP_GEMM_URL/@/\/tree\/}" \
49-
--label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION" \
50-
--label "SGLANG_VERSION=$SGLANG_VERSION"
52+
--label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION"

0 commit comments

Comments
 (0)