Skip to content

Commit e3f7ef8

Browse files
authored
Merge newer build components into main (#2)
* Try new cu124 build Builds pytorch from source on both platforms with new Dockerfile approach * Cmake fix, skip using ACL * Don't forget to declare TARGETARCH * Shell shenanigans * Fix var and spacing * Forgot distro tag * Test PR builder * Add ref back in * Continue line please * Switch to release branch
1 parent 9ab601d commit e3f7ef8

File tree

2 files changed

+142
-47
lines changed

2 files changed

+142
-47
lines changed

.github/workflows/build-vllm.yaml

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,33 @@ env:
44
PARALLELISM: 1
55
TORCH_CUDA_ARCH_LIST: 9.0a
66
VLLM_FA_CMAKE_GPU_ARCHES: 90a-real
7+
TORCH_REF: v2.6.0
8+
TORCH_BUILD_VERSION: 2.6.0+cu124
9+
AUDIO_REF: v2.6.0
10+
AUDIO_BUILD_VERSION: 2.6.0+cu124
11+
VISION_REF: v0.21.0
12+
VISION_BUILD_VERSION: 0.21.0+cu124
713
TRITON_REF: release/3.2.x
8-
TRITON_BUILD_SUFFIX: +cu126
14+
TRITON_BUILD_SUFFIX: +cu124
915
XFORMERS_REF: v0.0.29.post2
10-
XFORMERS_BUILD_VERSION: 0.0.29.post2+cu126
16+
XFORMERS_BUILD_VERSION: 0.0.29.post2+cu124
1117
FLASHINFER_REF: v0.2.2.post1
12-
FLASHINFER_BUILD_SUFFIX: cu126
13-
VLLM_REF: v0.8.1
18+
FLASHINFER_BUILD_SUFFIX: cu124
19+
VLLM_REF: v0.8.3
20+
VLLM_BUILD_VERSION: 0.8.3
1421

1522
on:
1623
push:
17-
branches: [main]
24+
branches: [cu124]
1825
pull_request:
1926

2027
jobs:
2128
build:
2229
strategy:
2330
matrix:
2431
arch: [amd64, arm64]
25-
cuda_version: [12.6.3]
26-
image_distro: [ubuntu24.04]
32+
cuda_version: [12.4.1]
33+
image_distro: [ubuntu22.04]
2734
runs-on: [self-hosted, "${{ matrix.arch }}"]
2835
steps:
2936
- name: Prepare some env vars
@@ -65,13 +72,14 @@ jobs:
6572
FLASHINFER_REF=${{ env.FLASHINFER_REF }}
6673
FLASHINFER_BUILD_SUFFIX=${{ env.FLASHINFER_BUILD_SUFFIX }}
6774
VLLM_REF=${{ env.VLLM_REF }}
68-
cache-from: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.arch }}
69-
cache-to: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.arch }},mode=max
75+
VLLM_BUILD_VERSION=${{ env.VLLM_BUILD_VERSION }}
76+
cache-from: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }}
77+
cache-to: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }},mode=max
7078
context: .
7179
file: Dockerfile
7280
platforms: linux/${{ matrix.arch }}
7381
push: true
74-
tags: ${{ env.GHCR_IMAGE }}:${{ env.VLLM_REF }}-cu${{ env.CUDA_TAG }}-${{ matrix.arch }}
82+
tags: ${{ env.GHCR_IMAGE }}:${{ env.VLLM_REF }}-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }}
7583

7684
# Fix this to use matrix and handle imagetools create --append
7785
ghcr:
@@ -81,7 +89,7 @@ jobs:
8189
- name: Prepare some env vars
8290
run: |
8391
echo "GHCR_IMAGE=ghcr.io/${GITHUB_REPOSITORY@L}" >> ${GITHUB_ENV}
84-
echo "CUDA_TAG=126" >> ${GITHUB_ENV}
92+
echo "CUDA_TAG=124" >> ${GITHUB_ENV}
8593
8694
- name: Login to GHCR
8795
uses: docker/login-action@v3
@@ -92,6 +100,6 @@ jobs:
92100

93101
- name: Tag images
94102
run: |
95-
TAGS=(${VLLM_REF}-cu${CUDA_TAG}-{amd,arm}64)
103+
TAGS=(${VLLM_REF}-cu${CUDA_TAG}-ubuntu22.04-{amd,arm}64)
96104
docker buildx imagetools create -t ${GHCR_IMAGE}:${VLLM_REF} ${TAGS[@]/#/${GHCR_IMAGE}:}
97105
docker buildx imagetools create -t ${GHCR_IMAGE}:latest ${TAGS[@]/#/${GHCR_IMAGE}:}

Dockerfile

Lines changed: 122 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1-
ARG CUDA_VERSION=12.6.3
2-
ARG IMAGE_DISTRO=ubuntu24.04
1+
ARG CUDA_VERSION=12.4.1
2+
ARG IMAGE_DISTRO=ubuntu22.04
33
ARG PYTHON_VERSION=3.12
44

55
# ---------- Builder Base ----------
66
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base
77

8+
# Job scaling
9+
ARG MAX_JOBS=32
10+
ENV MAX_JOBS=${MAX_JOBS}
11+
ARG NVCC_THREADS=2
12+
ENV NVCC_THREADS=${NVCC_THREADS}
13+
814
# Set arch lists for all targets
915
# 'a' suffix is not forward compatible but enables all optimizations
1016
ARG TORCH_CUDA_ARCH_LIST="9.0a"
@@ -17,19 +23,17 @@ ENV DEBIAN_FRONTEND=noninteractive
1723
RUN apt update
1824
RUN apt upgrade -y
1925
RUN apt install -y --no-install-recommends \
20-
curl \
21-
git \
22-
libibverbs-dev \
23-
zlib1g-dev
24-
25-
# Clean apt cache
26-
RUN apt clean
27-
RUN rm -rf /var/lib/apt/lists/*
28-
RUN rm -rf /var/cache/apt/archives
26+
curl \
27+
gcc-12 g++-12 \
28+
git \
29+
libibverbs-dev \
30+
libjpeg-turbo8-dev \
31+
libpng-dev \
32+
zlib1g-dev
2933

3034
# Set compiler paths
31-
ENV CC=/usr/bin/gcc
32-
ENV CXX=/usr/bin/g++
35+
ENV CC=/usr/bin/gcc-12
36+
ENV CXX=/usr/bin/g++-12
3337

3438
# Install uv
3539
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR=/usr/local/bin sh
@@ -45,69 +49,146 @@ ENV PATH=${VIRTUAL_ENV}/bin:${PATH}
4549
ENV CUDA_HOME=/usr/local/cuda
4650
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
4751

48-
# Install pytorch nightly
49-
RUN uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu126
50-
5152
FROM base AS build-base
5253
RUN mkdir /wheels
5354

5455
# Install build deps that aren't in project requirements files
5556
# Make sure to upgrade setuptools to avoid triton build bug
56-
RUN uv pip install -U build cmake ninja pybind11 setuptools wheel
57+
# cmake '4.x' isn't parsed right by some tools yet
58+
RUN uv pip install -U build "cmake<4" ninja pybind11 "setuptools<=76" wheel
59+
60+
# Handle arm64 torch build
61+
FROM build-base AS build-torch
62+
ARG TARGETARCH
63+
RUN if [ ${TARGETARCH} = arm64 ]; then \
64+
# Install NVPL for ARM64 \
65+
apt install -y --no-install-recommends nvpl0 && \
66+
export BLAS=NVPL && \
67+
# ARM64 linker optimization \
68+
export CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 && \
69+
export USE_PRIORITIZED_TEXT_FOR_LD=1; \
70+
else \
71+
uv pip install mkl-static mkl-include; \
72+
fi
73+
74+
ARG TORCH_REF=v2.6.0
75+
ARG TORCH_BUILD_VERSION=2.6.0+cu124
76+
ENV PYTORCH_BUILD_VERSION=${TORCH_BUILD_VERSION:-${TORCH_REF#v}}
77+
ENV PYTORCH_BUILD_NUMBER=0
78+
RUN git clone https://github.com/pytorch/pytorch.git
79+
RUN cd pytorch && \
80+
git checkout ${TORCH_REF} && \
81+
git submodule sync --recursive && \
82+
git submodule update --init --recursive -j 8
83+
# # Bump XNNPACK submodule ref to fix compilation bug \
84+
# cd third_party/XNNPACK && \
85+
# git checkout fcc06d1
86+
RUN cd pytorch && \
87+
uv pip install -r requirements.txt && \
88+
uv build --wheel --no-build-isolation -o /wheels
89+
90+
FROM build-base AS build-audio
91+
COPY --from=build-torch /wheels/*.whl wheels/
92+
RUN uv pip install wheels/*
93+
94+
ARG AUDIO_REF=v2.6.0
95+
ARG AUDIO_BUILD_VERSION=2.6.0+cu124
96+
ENV BUILD_VERSION=${AUDIO_BUILD_VERSION:-${AUDIO_REF#v}}
97+
RUN git clone https://github.com/pytorch/audio.git
98+
RUN cd audio && \
99+
git checkout ${AUDIO_REF} && \
100+
git submodule sync --recursive && \
101+
git submodule update --init --recursive -j 8
102+
RUN cd audio && \
103+
uv build --wheel --no-build-isolation -o /wheels
104+
105+
FROM build-base AS build-vision
106+
COPY --from=build-torch /wheels/*.whl wheels/
107+
RUN uv pip install wheels/*
108+
109+
ARG VISION_REF=v0.21.0
110+
ARG VISION_BUILD_VERSION=0.21.0+cu124
111+
ENV BUILD_VERSION=${VISION_BUILD_VERSION:-${VISION_REF#v}}
112+
RUN git clone https://github.com/pytorch/vision.git
113+
RUN cd vision && \
114+
git checkout ${VISION_REF} && \
115+
git submodule sync --recursive && \
116+
git submodule update --init --recursive -j 8
117+
RUN cd vision && \
118+
uv build --wheel --no-build-isolation -o /wheels
57119

58120
FROM build-base AS build-triton
121+
COPY --from=build-torch /wheels/*.whl wheels/
122+
RUN uv pip install wheels/*
123+
59124
ARG TRITON_REF=release/3.2.x
60-
ARG TRITON_BUILD_SUFFIX=+cu126
125+
ARG TRITON_BUILD_SUFFIX=+cu124
61126
ENV TRITON_WHEEL_VERSION_SUFFIX=${TRITON_BUILD_SUFFIX:-}
62127
RUN git clone https://github.com/triton-lang/triton.git
63128
RUN cd triton && \
64129
git checkout ${TRITON_REF} && \
65-
git submodule sync && \
66-
git submodule update --init --recursive -j 8 && \
130+
git submodule sync --recursive && \
131+
git submodule update --init --recursive -j 8
132+
RUN cd triton && \
67133
uv build python --wheel --no-build-isolation -o /wheels
68134

69135
FROM build-base AS build-xformers
136+
COPY --from=build-torch /wheels/*.whl wheels/
137+
RUN uv pip install wheels/*
138+
70139
ARG XFORMERS_REF=v0.0.29.post2
71-
ARG XFORMERS_BUILD_VERSION=0.0.29.post2+cu126
140+
ARG XFORMERS_BUILD_VERSION=0.0.29.post2+cu124
72141
ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
73-
RUN git clone https://github.com/facebookresearch/xformers.git
142+
RUN git clone https://github.com/facebookresearch/xformers.git
74143
RUN cd xformers && \
75144
git checkout ${XFORMERS_REF} && \
76-
git submodule sync && \
77-
git submodule update --init --recursive -j 8 && \
145+
git submodule sync --recursive && \
146+
git submodule update --init --recursive -j 8
147+
RUN cd xformers && \
78148
uv build --wheel --no-build-isolation -o /wheels
79149

80150
FROM build-base AS build-flashinfer
151+
COPY --from=build-torch /wheels/*.whl wheels/
152+
RUN uv pip install wheels/*
153+
81154
ARG FLASHINFER_ENABLE_AOT=1
82155
ARG FLASHINFER_REF=v0.2.2.post1
83-
ARG FLASHINFER_BUILD_SUFFIX=cu126
156+
ARG FLASHINFER_BUILD_SUFFIX=cu124
84157
ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
85158
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
86159
RUN cd flashinfer && \
87160
git checkout ${FLASHINFER_REF} && \
88-
git submodule sync && \
89-
git submodule update --init --recursive -j 8 && \
161+
git submodule sync --recursive && \
162+
git submodule update --init --recursive -j 8
163+
RUN cd flashinfer && \
90164
uv build --wheel --no-build-isolation -o /wheels
91165

92166
FROM build-base AS build-vllm
93-
ARG VLLM_REF=v0.8.1
167+
COPY --from=build-torch /wheels/*.whl wheels/
168+
RUN uv pip install wheels/*
169+
170+
ARG VLLM_REF=v0.8.3
171+
ARG VLLM_BUILD_VERSION=0.8.3
172+
ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}}
173+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=${BUILD_VERSION:-:}
94174
RUN git clone https://github.com/vllm-project/vllm.git
95175
RUN cd vllm && \
96176
git checkout ${VLLM_REF} && \
97-
git submodule sync && \
98-
git submodule update --init --recursive -j 8 && \
177+
python use_existing_torch.py && \
99178
uv pip install -r requirements/build.txt && \
100179
uv build --wheel --no-build-isolation -o /wheels
101180

102181
FROM base AS vllm-openai
103-
COPY --from=build-flashinfer /wheels/* wheels/
104-
COPY --from=build-triton /wheels/* wheels/
105-
COPY --from=build-vllm /wheels/* wheels/
106-
COPY --from=build-xformers /wheels/* wheels/
182+
COPY --from=build-torch /wheels/*.whl wheels/
183+
COPY --from=build-audio /wheels/*.whl wheels/
184+
COPY --from=build-vision /wheels/*.whl wheels/
185+
COPY --from=build-flashinfer /wheels/*.whl wheels/
186+
COPY --from=build-triton /wheels/*.whl wheels/
187+
COPY --from=build-vllm /wheels/*.whl wheels/
188+
COPY --from=build-xformers /wheels/*.whl wheels/
107189

108190
# Install and cleanup wheels
109191
RUN uv pip install wheels/*
110-
RUN rm -r wheels
111192

112193
# Install pynvml
113194
RUN uv pip install pynvml
@@ -118,6 +199,12 @@ RUN uv pip install accelerate hf_transfer modelscope bitsandbytes timm boto3 run
118199
# Clean uv cache
119200
RUN uv clean
120201

202+
# Clean apt cache
203+
RUN apt autoremove --purge -y
204+
RUN apt clean
205+
RUN rm -rf /var/lib/apt/lists/*
206+
RUN rm -rf /var/cache/apt/archives
207+
121208
# Enable hf-transfer
122209
ENV HF_HUB_ENABLE_HF_TRANSFER=1
123210

0 commit comments

Comments
 (0)