v0.9.0.1 initial push (#13)

Apsu · web-flow · commit 14104844bcca · 2025-06-10T19:54:08.000-04:00
* Test bumps

* Preinstall torch

See if latest flashinfer builds
See if use_existing_python works with these

* Comment build-torch stage

* Disable the wheel installs from stage

* Clamp cmake version for flashinfer build

* Try bumping packaging/setuptools versions

* Add verbose to wheel builds for debugging

* Add flashinfer AOT pre-build stage

* Try new flashinfer AOT flow

* Cleanup to install torch and triton from upstream

Still build flashinfer, xformers, and vllm from source

* Syntax fix

* Normalize build action vars

* Cleanup action a bit more

* Capitalization
diff --git a/.github/workflows/build-vllm.yaml b/.github/workflows/build-vllm.yaml
@@ -4,20 +4,12 @@ env:
   PARALLELISM: 1
   TORCH_CUDA_ARCH_LIST: 9.0a
   VLLM_FA_CMAKE_GPU_ARCHES: 90a-real
-  TORCH_REF: v2.6.0
-  TORCH_BUILD_VERSION: 2.6.0+cu124
-  AUDIO_REF: v2.6.0
-  AUDIO_BUILD_VERSION: 2.6.0+cu124
-  VISION_REF: v0.21.0
-  VISION_BUILD_VERSION: 0.21.0+cu124
-  TRITON_REF: release/3.2.x
-  TRITON_BUILD_SUFFIX: +cu124
-  XFORMERS_REF: v0.0.29.post2
-  XFORMERS_BUILD_VERSION: 0.0.29.post2+cu124
-  FLASHINFER_REF: v0.2.2.post1
-  FLASHINFER_BUILD_SUFFIX: cu124
-  VLLM_REF: v0.8.5.post1
-  VLLM_BUILD_VERSION: 0.8.5.post1
+  FLASHINFER_REF: v0.2.6.post1
+  FLASHINFER_BUILD_SUFFIX: cu128
+  VLLM_REF: v0.9.0.1
+  VLLM_BUILD_VERSION: 0.9.0.1
+  XFORMERS_REF: v0.0.30
+  XFORMERS_BUILD_VERSION: 0.0.30+cu128
 
 on:
   push:
@@ -28,7 +20,7 @@ jobs:
     strategy:
       matrix:
         arch: [amd64, arm64]
-        cuda_version: [12.4.1]
+        cuda_version: [12.8.1]
         image_distro: [ubuntu22.04]
     runs-on: [self-hosted, "${{ matrix.arch }}"]
     steps:
@@ -64,14 +56,12 @@ jobs:
             IMAGE_DISTRO=${{ matrix.image_distro }}
             TORCH_CUDA_ARCH_LIST=${{ env.TORCH_CUDA_ARCH_LIST }}
             VLLM_FA_CMAKE_GPU_ARCHES=${{ env.VLLM_FA_CMAKE_GPU_ARCHES }}
-            TRITON_REF=${{ env.TRITON_REF }}
-            TRITON_BUILD_SUFFIX=${{ env.TRITON_BUILD_SUFFIX }}
-            XFORMERS_REF=${{ env.XFORMERS_REF }}
-            XFORMERS_BUILD_VERSION=${{ env.XFORMERS_BUILD_VERSION }}
             FLASHINFER_REF=${{ env.FLASHINFER_REF }}
             FLASHINFER_BUILD_SUFFIX=${{ env.FLASHINFER_BUILD_SUFFIX }}
             VLLM_REF=${{ env.VLLM_REF }}
             VLLM_BUILD_VERSION=${{ env.VLLM_BUILD_VERSION }}
+            XFORMERS_REF=${{ env.XFORMERS_REF }}
+            XFORMERS_BUILD_VERSION=${{ env.XFORMERS_BUILD_VERSION }}
           cache-from: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }}
           cache-to: type=registry,ref=${{ env.GHCR_IMAGE }}:cache-cu${{ env.CUDA_TAG }}-${{ matrix.image_distro }}-${{ matrix.arch }},mode=max
           context: .
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION=12.4.1
+ARG CUDA_VERSION=12.8.1
 ARG IMAGE_DISTRO=ubuntu22.04
 ARG PYTHON_VERSION=3.12
 
@@ -49,95 +49,19 @@ ENV PATH=${VIRTUAL_ENV}/bin:${PATH}
 ENV CUDA_HOME=/usr/local/cuda
 ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
 
-FROM base AS build-base
+FROM base AS torch-base
+RUN uv pip install -U torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/cu128
+
+FROM torch-base AS build-base
 RUN mkdir /wheels
 
 # Install build deps that aren't in project requirements files
 # Make sure to upgrade setuptools to avoid triton build bug
-# cmake '4.x' isn't parsed right by some tools yet
-RUN uv pip install -U build "cmake<4" ninja pybind11 setuptools wheel
-
-# Handle arm64 torch build
-FROM build-base AS build-torch
-ARG TARGETARCH
-RUN if [ ${TARGETARCH} = arm64 ]; then \
-        # Install NVPL for ARM64 \
-        apt install -y --no-install-recommends nvpl0 && \
-        export BLAS=NVPL && \
-        # ARM64 linker optimization \
-        export CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 && \
-        export USE_PRIORITIZED_TEXT_FOR_LD=1; \
-    else \
-        uv pip install mkl-static mkl-include; \
-    fi
-
-ARG TORCH_REF=v2.6.0
-ARG TORCH_BUILD_VERSION=2.6.0+cu124
-ENV PYTORCH_BUILD_VERSION=${TORCH_BUILD_VERSION:-${TORCH_REF#v}}
-ENV PYTORCH_BUILD_NUMBER=0
-RUN git clone https://github.com/pytorch/pytorch.git
-RUN cd pytorch && \
-    git checkout ${TORCH_REF} && \
-    git submodule sync --recursive && \
-    git submodule update --init --recursive -j 8
-    # # Bump XNNPACK submodule ref to fix compilation bug \
-    # cd third_party/XNNPACK && \
-    # git checkout fcc06d1
-RUN cd pytorch && \
-    uv pip install -r requirements.txt && \
-    uv build --wheel --no-build-isolation -o /wheels
-
-FROM build-base AS build-audio
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG AUDIO_REF=v2.6.0
-ARG AUDIO_BUILD_VERSION=2.6.0+cu124
-ENV BUILD_VERSION=${AUDIO_BUILD_VERSION:-${AUDIO_REF#v}}
-RUN git clone https://github.com/pytorch/audio.git
-RUN cd audio && \
-    git checkout ${AUDIO_REF} && \
-    git submodule sync --recursive && \
-    git submodule update --init --recursive -j 8
-RUN cd audio && \
-    uv build --wheel --no-build-isolation -o /wheels
-
-FROM build-base AS build-vision
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG VISION_REF=v0.21.0
-ARG VISION_BUILD_VERSION=0.21.0+cu124
-ENV BUILD_VERSION=${VISION_BUILD_VERSION:-${VISION_REF#v}}
-RUN git clone https://github.com/pytorch/vision.git
-RUN cd vision && \
-    git checkout ${VISION_REF} && \
-    git submodule sync --recursive && \
-    git submodule update --init --recursive -j 8
-RUN cd vision && \
-    uv build --wheel --no-build-isolation -o /wheels
-
-FROM build-base AS build-triton
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG TRITON_REF=release/3.2.x
-ARG TRITON_BUILD_SUFFIX=+cu124
-ENV TRITON_WHEEL_VERSION_SUFFIX=${TRITON_BUILD_SUFFIX:-}
-RUN git clone https://github.com/triton-lang/triton.git
-RUN cd triton && \
-    git checkout ${TRITON_REF} && \
-    git submodule sync --recursive && \
-    git submodule update --init --recursive -j 8
-RUN cd triton && \
-    uv build python --wheel --no-build-isolation -o /wheels
+RUN uv pip install -U build cmake ninja packaging pybind11 setuptools wheel
 
 FROM build-base AS build-xformers
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG XFORMERS_REF=v0.0.29.post2
-ARG XFORMERS_BUILD_VERSION=0.0.29.post2+cu124
+ARG XFORMERS_REF=v0.0.30
+ARG XFORMERS_BUILD_VERSION=0.0.30+cu128
 ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
 RUN git clone https://github.com/facebookresearch/xformers.git
 RUN cd xformers && \
@@ -148,42 +72,32 @@ RUN cd xformers && \
     uv build --wheel --no-build-isolation -o /wheels
 
 FROM build-base AS build-flashinfer
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG FLASHINFER_ENABLE_AOT=1
-ARG FLASHINFER_REF=v0.2.2.post1
-ARG FLASHINFER_BUILD_SUFFIX=cu124
+ARG FLASHINFER_REF=v0.2.6.post1
+ARG FLASHINFER_BUILD_SUFFIX=cu128
 ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
 RUN git clone https://github.com/flashinfer-ai/flashinfer.git
 RUN cd flashinfer && \
     git checkout ${FLASHINFER_REF} && \
     git submodule sync --recursive && \
     git submodule update --init --recursive -j 8
 RUN cd flashinfer && \
-    uv build --wheel --no-build-isolation -o /wheels
+    python -m flashinfer.aot && \
+    python -m build -v --wheel --no-isolation -o /wheels
 
 FROM build-base AS build-vllm
-COPY --from=build-torch /wheels/*.whl wheels/
-RUN uv pip install wheels/*
-
-ARG VLLM_REF=v0.8.5
-ARG VLLM_BUILD_VERSION=0.8.5
+ARG VLLM_REF=v0.9.0.1
+ARG VLLM_BUILD_VERSION=0.9.0.1
 ENV BUILD_VERSION=${VLLM_BUILD_VERSION:-${VLLM_REF#v}}
 ENV SETUPTOOLS_SCM_PRETEND_VERSION=${BUILD_VERSION:-:}
 RUN git clone https://github.com/vllm-project/vllm.git
 RUN cd vllm && \
     git checkout ${VLLM_REF} && \
     python use_existing_torch.py && \
     uv pip install -r requirements/build.txt && \
-    uv build --wheel --no-build-isolation -o /wheels
+    uv build -v --wheel --no-build-isolation -o /wheels
 
-FROM base AS vllm-openai
-COPY --from=build-torch /wheels/*.whl wheels/
-COPY --from=build-audio /wheels/*.whl wheels/
-COPY --from=build-vision /wheels/*.whl wheels/
+FROM torch-base AS vllm-openai
 COPY --from=build-flashinfer /wheels/*.whl wheels/
-COPY --from=build-triton /wheels/*.whl wheels/
 COPY --from=build-vllm /wheels/*.whl wheels/
 COPY --from=build-xformers /wheels/*.whl wheels/