Skip to content

Qualcomm AI Engine Direct - CDSP Direct Mode #3272

Qualcomm AI Engine Direct - CDSP Direct Mode

Qualcomm AI Engine Direct - CDSP Direct Mode #3272

Workflow file for this run

# Test ExecuTorch CUDA Windows Artifacts
# This workflow exports models targeting CUDA Windows using optimum-executorch on Linux.
# Then it runs those exported artifacts on a Windows CI machine.
name: Test CUDA Windows Export and E2E
on:
pull_request:
push:
branches:
- main
- release/*
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: false
jobs:
export-model-cuda-windows-artifact:
name: export-model-cuda-windows-artifact
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
secrets: inherit
strategy:
fail-fast: false
matrix:
model:
- repo: "mistralai"
name: "Voxtral-Mini-3B-2507"
- repo: "nvidia"
name: "parakeet-tdt"
quant:
- "non-quantized"
- "quantized-int4-weight-only"
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.8
docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows
submodules: recursive
upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux
echo "::group::Fix libstdc++ GLIBCXX version"
# The executorch pybindings require GLIBCXX_3.4.30 which conda's libstdc++ doesn't have.
# Replace conda's libstdc++ with the system version to fix ImportError.
# Verify system version has GLIBCXX_3.4.30
strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 | grep GLIBCXX_3.4.30
# Backup and replace conda's version
mv /opt/conda/lib/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6.bak || true
ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6
echo "::endgroup::"
echo "::group::Verify pre-installed dependencies"
x86_64-w64-mingw32-g++ --version
nvcc --version
echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}"
ls -la "${WINDOWS_CUDA_HOME}"
echo "::endgroup::"
echo "::group::Setup ExecuTorch"
# Disable MKL to avoid duplicate target error when conda has multiple MKL installations
export USE_MKL=OFF
PYTHON_EXECUTABLE=python ./install_executorch.sh
echo "::endgroup::"
echo "::group::Setup Huggingface"
pip install -U "huggingface_hub[cli]<1.0" accelerate
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
echo "::endgroup::"
source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}"
test-model-cuda-windows-e2e:
name: test-model-cuda-windows-e2e
needs: export-model-cuda-windows-artifact
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
strategy:
fail-fast: false
matrix:
model:
- repo: "mistralai"
name: "Voxtral-Mini-3B-2507"
- repo: "nvidia"
name: "parakeet-tdt"
quant:
- "non-quantized"
- "quantized-int4-weight-only"
with:
timeout: 240
runner: windows.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.8
submodules: recursive
download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
conda init powershell
powershell -Command "& {
Set-PSDebug -Trace 1
\$ErrorActionPreference = 'Stop'
\$PSNativeCommandUseErrorActionPreference = \$true
\$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8'
\$env:CUDA_PATH = \$env:CUDA_HOME
\$env:PATH = \"\$env:CUDA_HOME\bin;\$env:PATH\"
nvcc --version
.ci/scripts/setup-windows.ps1
\$artifactDir = \$env:RUNNER_ARTIFACT_DIR
if ([string]::IsNullOrWhiteSpace(\$artifactDir)) {
throw 'RUNNER_ARTIFACT_DIR is empty. Ensure download-artifact is configured for windows_job.yml.'
}
.ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model.repo }}/${{ matrix.model.name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.8'
}"