Skip to content

Commit 1410484

Browse files
authored
v0.9.0.1 initial push (#13)
* Test bumps * Preinstall torch See if latest flashinfer builds See if use_existing_python works with these * Comment build-torch stage * Disable the wheel installs from stage * Clamp cmake version for flashinfer build * Try bumping packaging/setuptools versions * Add verbose to wheel builds for debugging * Add flashinfer AOT pre-build stage * Try new flashinfer AOT flow * Cleanup to install torch and triton from upstream Still build flashinfer, xformers, and vllm from source * Syntax fix * Normalize build action vars * Cleanup action a bit more * Capitalization
1 parent ff481c0 commit 1410484

File tree

2 files changed

+25
-121
lines changed

2 files changed

+25
-121
lines changed

.github/workflows/build-vllm.yaml

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,12 @@ env:
44
PARALLELISM: 1
55
TORCH_CUDA_ARCH_LIST: 9.0a
66
VLLM_FA_CMAKE_GPU_ARCHES: 90a-real
7-
TORCH_REF: v2.6.0
8-
TORCH_BUILD_VERSION: 2.6.0+cu124
9-
AUDIO_REF: v2.6.0
10-
AUDIO_BUILD_VERSION: 2.6.0+cu124
11-
VISION_REF: v0.21.0
12-
VISION_BUILD_VERSION: 0.21.0+cu124
13-
TRITON_REF: release/3.2.x
14-
TRITON_BUILD_SUFFIX: +cu124
15-
XFORMERS_REF: v0.0.29.post2
16-
XFORMERS_BUILD_VERSION: 0.0.29.post2+cu124
17-
FLASHINFER_REF: v0.2.2.post1
18-
FLASHINFER_BUILD_SUFFIX: cu124
19-
VLLM_REF: v0.8.5.post1
20-
VLLM_BUILD_VERSION: 0.8.5.post1
7+
FLASHINFER_REF: v0.2.6.post1
8+
FLASHINFER_BUILD_SUFFIX: cu128
9+
VLLM_REF: v0.9.0.1
10+
VLLM_BUILD_VERSION: 0.9.0.1
11+
XFORMERS_REF: v0.0.30
12+
XFORMERS_BUILD_VERSION: 0.0.30+cu128
2113

2214
on:
2315
push:
@@ -28,7 +20,7 @@ jobs:
2820
strategy:
2921
matrix:
3022
arch: [amd64, arm64]
31-
cuda_version: [12.4.1]
23+
cuda_version: [12.8.1]
3224
image_distro: [ubuntu22.04]
3325
runs-on: [self-hosted, "${{ matrix.arch }}"]
3426
steps:
@@ -64,14 +56,12 @@ jobs:
6456
IMAGE_DISTRO=${{ matrix.image_distro }}
6557
TORCH_CUDA_ARCH_LIST=${{ env.TORCH_CUDA_ARCH_LIST }}
6658
VLLM_FA_CMAKE_GPU_ARCHES=${{ env.VLLM_FA_CMAKE_GPU_ARCHES }}
67-
TRITON_REF=${{ env.TRITON_REF }}
68-
TRITON_BUILD_SUFFIX=${{ env.TRITON_BUILD_SUFFIX }}
69-
XFORMERS_REF=${{ env.XFORMERS_REF }}
70-
XFORMERS_BUILD_VERSION=${{ env.XFORMERS_BUILD_VERSION }}
7159
FLASHINFER_REF=${{ env.FLASHINFER_REF }}
7260
FLASHINFER_BUILD_SUFFIX=${{ env.FLASHINFER_BUILD_SUFFIX }}
7361
VLLM_REF=${{ env.VLLM_REF }}
7462
VLLM_BUILD_VERSION=${{ env.VLLM_BUILD_VERSION }}
63+
XFORMERS_REF=${{ env.XFORMERS_REF }}
64+
XFORMERS_BUILD_VERSION=${{ env.XFORMERS_BUILD_VERSION }}
7565
cache-from: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }}
7666
cache-to: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }},mode=max
7767
context: .

Dockerfile

Lines changed: 16 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG CUDA_VERSION=12.4.1
1+
ARG CUDA_VERSION=12.8.1
22
ARG IMAGE_DISTRO=ubuntu22.04
33
ARG PYTHON_VERSION=3.12
44

@@ -49,95 +49,19 @@ ENV PATH=${VIRTUAL_ENV}/bin:${PATH}
4949
ENV CUDA_HOME=/usr/local/cuda
5050
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
5151

52-
FROM base AS build-base
52+
FROM base AS torch-base
53+
RUN uv pip install -U torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/cu128
54+
55+
FROM torch-base AS build-base
5356
RUN mkdir /wheels
5457

5558
# Install build deps that aren't in project requirements files
5659
# Make sure to upgrade setuptools to avoid triton build bug
57-
# cmake '4.x' isn't parsed right by some tools yet
58-
RUN uv pip install -U build "cmake<4" ninja pybind11 setuptools wheel
59-
60-
# Handle arm64 torch build
61-
FROM build-base AS build-torch
62-
ARG TARGETARCH
63-
RUN if [ ${TARGETARCH} = arm64 ]; then \
64-
# Install NVPL for ARM64 \
65-
apt install -y --no-install-recommends nvpl0 && \
66-
export BLAS=NVPL && \
67-
# ARM64 linker optimization \
68-
export CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 && \
69-
export USE_PRIORITIZED_TEXT_FOR_LD=1; \
70-
else \
71-
uv pip install mkl-static mkl-include; \
72-
fi
73-
74-
ARG TORCH_REF=v2.6.0
75-
ARG TORCH_BUILD_VERSION=2.6.0+cu124
76-
ENV PYTORCH_BUILD_VERSION=${TORCH_BUILD_VERSION:-${TORCH_REF#v}}
77-
ENV PYTORCH_BUILD_NUMBER=0
78-
RUN git clone https://github.com/pytorch/pytorch.git
79-
RUN cd pytorch && \
80-
git checkout ${TORCH_REF} && \
81-
git submodule sync --recursive && \
82-
git submodule update --init --recursive -j 8
83-
# # Bump XNNPACK submodule ref to fix compilation bug \
84-
# cd third_party/XNNPACK && \
85-
# git checkout fcc06d1
86-
RUN cd pytorch && \
87-
uv pip install -r requirements.txt && \
88-
uv build --wheel --no-build-isolation -o /wheels
89-
90-
FROM build-base AS build-audio
91-
COPY --from=build-torch /wheels/*.whl wheels/
92-
RUN uv pip install wheels/*
93-
94-
ARG AUDIO_REF=v2.6.0
95-
ARG AUDIO_BUILD_VERSION=2.6.0+cu124
96-
ENV BUILD_VERSION=${AUDIO_BUILD_VERSION:-${AUDIO_REF#v}}
97-
RUN git clone https://github.com/pytorch/audio.git
98-
RUN cd audio && \
99-
git checkout ${AUDIO_REF} && \
100-
git submodule sync --recursive && \
101-
git submodule update --init --recursive -j 8
102-
RUN cd audio && \
103-
uv build --wheel --no-build-isolation -o /wheels
104-
105-
FROM build-base AS build-vision
106-
COPY --from=build-torch /wheels/*.whl wheels/
107-
RUN uv pip install wheels/*
108-
109-
ARG VISION_REF=v0.21.0
110-
ARG VISION_BUILD_VERSION=0.21.0+cu124
111-
ENV BUILD_VERSION=${VISION_BUILD_VERSION:-${VISION_REF#v}}
112-
RUN git clone https://github.com/pytorch/vision.git
113-
RUN cd vision && \
114-
git checkout ${VISION_REF} && \
115-
git submodule sync --recursive && \
116-
git submodule update --init --recursive -j 8
117-
RUN cd vision && \
118-
uv build --wheel --no-build-isolation -o /wheels
119-
120-
FROM build-base AS build-triton
121-
COPY --from=build-torch /wheels/*.whl wheels/
122-
RUN uv pip install wheels/*
123-
124-
ARG TRITON_REF=release/3.2.x
125-
ARG TRITON_BUILD_SUFFIX=+cu124
126-
ENV TRITON_WHEEL_VERSION_SUFFIX=${TRITON_BUILD_SUFFIX:-}
127-
RUN git clone https://github.com/triton-lang/triton.git
128-
RUN cd triton && \
129-
git checkout ${TRITON_REF} && \
130-
git submodule sync --recursive && \
131-
git submodule update --init --recursive -j 8
132-
RUN cd triton && \
133-
uv build python --wheel --no-build-isolation -o /wheels
60+
RUN uv pip install -U build cmake ninja packaging pybind11 setuptools wheel
13461

13562
FROM build-base AS build-xformers
136-
COPY --from=build-torch /wheels/*.whl wheels/
137-
RUN uv pip install wheels/*
138-
139-
ARG XFORMERS_REF=v0.0.29.post2
140-
ARG XFORMERS_BUILD_VERSION=0.0.29.post2+cu124
63+
ARG XFORMERS_REF=v0.0.30
64+
ARG XFORMERS_BUILD_VERSION=0.0.30+cu128
14165
ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
14266
RUN git clone https://github.com/facebookresearch/xformers.git
14367
RUN cd xformers && \
@@ -148,42 +72,32 @@ RUN cd xformers && \
14872
uv build --wheel --no-build-isolation -o /wheels
14973

15074
FROM build-base AS build-flashinfer
151-
COPY --from=build-torch /wheels/*.whl wheels/
152-
RUN uv pip install wheels/*
153-
154-
ARG FLASHINFER_ENABLE_AOT=1
155-
ARG FLASHINFER_REF=v0.2.2.post1
156-
ARG FLASHINFER_BUILD_SUFFIX=cu124
75+
ARG FLASHINFER_REF=v0.2.6.post1
76+
ARG FLASHINFER_BUILD_SUFFIX=cu128
15777
ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
15878
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
15979
RUN cd flashinfer && \
16080
git checkout ${FLASHINFER_REF} && \
16181
git submodule sync --recursive && \
16282
git submodule update --init --recursive -j 8
16383
RUN cd flashinfer && \
164-
uv build --wheel --no-build-isolation -o /wheels
84+
python -m flashinfer.aot && \
85+
python -m build -v --wheel --no-isolation -o /wheels
16586

16687
FROM build-base AS build-vllm
167-
COPY --from=build-torch /wheels/*.whl wheels/
168-
RUN uv pip install wheels/*
169-
170-
ARG VLLM_REF=v0.8.5
171-
ARG VLLM_BUILD_VERSION=0.8.5
88+
ARG VLLM_REF=v0.9.0.1
89+
ARG VLLM_BUILD_VERSION=0.9.0.1
17290
ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}}
17391
ENV SETUPTOOLS_SCM_PRETEND_VERSION=${BUILD_VERSION:-:}
17492
RUN git clone https://github.com/vllm-project/vllm.git
17593
RUN cd vllm && \
17694
git checkout ${VLLM_REF} && \
17795
python use_existing_torch.py && \
17896
uv pip install -r requirements/build.txt && \
179-
uv build --wheel --no-build-isolation -o /wheels
97+
uv build -v --wheel --no-build-isolation -o /wheels
18098

181-
FROM base AS vllm-openai
182-
COPY --from=build-torch /wheels/*.whl wheels/
183-
COPY --from=build-audio /wheels/*.whl wheels/
184-
COPY --from=build-vision /wheels/*.whl wheels/
99+
FROM torch-base AS vllm-openai
185100
COPY --from=build-flashinfer /wheels/*.whl wheels/
186-
COPY --from=build-triton /wheels/*.whl wheels/
187101
COPY --from=build-vllm /wheels/*.whl wheels/
188102
COPY --from=build-xformers /wheels/*.whl wheels/
189103

0 commit comments

Comments
 (0)