Qualcomm AI Engine Direct - CDSP Direct Mode #3272
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Test ExecuTorch CUDA Windows Artifacts | |
| # This workflow exports models targeting CUDA Windows using optimum-executorch on Linux. | |
| # Then it runs those exported artifacts on a Windows CI machine. | |
| name: Test CUDA Windows Export and E2E | |
| on: | |
| pull_request: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: false | |
| jobs: | |
| export-model-cuda-windows-artifact: | |
| name: export-model-cuda-windows-artifact | |
| # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) | |
| if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| secrets: inherit | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - repo: "mistralai" | |
| name: "Voxtral-Mini-3B-2507" | |
| - repo: "nvidia" | |
| name: "parakeet-tdt" | |
| quant: | |
| - "non-quantized" | |
| - "quantized-int4-weight-only" | |
| with: | |
| timeout: 90 | |
| secrets-env: EXECUTORCH_HF_TOKEN | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.8 | |
| docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows | |
| submodules: recursive | |
| upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| echo "::group::Fix libstdc++ GLIBCXX version" | |
| # The executorch pybindings require GLIBCXX_3.4.30 which conda's libstdc++ doesn't have. | |
| # Replace conda's libstdc++ with the system version to fix ImportError. | |
| # Verify system version has GLIBCXX_3.4.30 | |
| strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 | grep GLIBCXX_3.4.30 | |
| # Backup and replace conda's version | |
| mv /opt/conda/lib/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6.bak || true | |
| ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/conda/lib/libstdc++.so.6 | |
| echo "::endgroup::" | |
| echo "::group::Verify pre-installed dependencies" | |
| x86_64-w64-mingw32-g++ --version | |
| nvcc --version | |
| echo "WINDOWS_CUDA_HOME=${WINDOWS_CUDA_HOME}" | |
| ls -la "${WINDOWS_CUDA_HOME}" | |
| echo "::endgroup::" | |
| echo "::group::Setup ExecuTorch" | |
| # Disable MKL to avoid duplicate target error when conda has multiple MKL installations | |
| export USE_MKL=OFF | |
| PYTHON_EXECUTABLE=python ./install_executorch.sh | |
| echo "::endgroup::" | |
| echo "::group::Setup Huggingface" | |
| pip install -U "huggingface_hub[cli]<1.0" accelerate | |
| huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
| OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) | |
| pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} | |
| echo "::endgroup::" | |
| source .ci/scripts/export_model_artifact.sh cuda-windows "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" | |
| test-model-cuda-windows-e2e: | |
| name: test-model-cuda-windows-e2e | |
| needs: export-model-cuda-windows-artifact | |
| uses: pytorch/test-infra/.github/workflows/windows_job.yml@main | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - repo: "mistralai" | |
| name: "Voxtral-Mini-3B-2507" | |
| - repo: "nvidia" | |
| name: "parakeet-tdt" | |
| quant: | |
| - "non-quantized" | |
| - "quantized-int4-weight-only" | |
| with: | |
| timeout: 240 | |
| runner: windows.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.8 | |
| submodules: recursive | |
| download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-windows-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| conda init powershell | |
| powershell -Command "& { | |
| Set-PSDebug -Trace 1 | |
| \$ErrorActionPreference = 'Stop' | |
| \$PSNativeCommandUseErrorActionPreference = \$true | |
| \$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8' | |
| \$env:CUDA_PATH = \$env:CUDA_HOME | |
| \$env:PATH = \"\$env:CUDA_HOME\bin;\$env:PATH\" | |
| nvcc --version | |
| .ci/scripts/setup-windows.ps1 | |
| \$artifactDir = \$env:RUNNER_ARTIFACT_DIR | |
| if ([string]::IsNullOrWhiteSpace(\$artifactDir)) { | |
| throw 'RUNNER_ARTIFACT_DIR is empty. Ensure download-artifact is configured for windows_job.yml.' | |
| } | |
| .ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model.repo }}/${{ matrix.model.name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.8' | |
| }" |