improve benchmark on mps #3779
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| pull_request: | |
| types: | |
| - labeled | |
| - unlabeled | |
| - opened | |
| - synchronize | |
| - reopened | |
| # Allow to trigger the workflow manually | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: "${{ github.workflow }}-${{ github.ref }}" | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| env: | |
| CLANG_TIDY_CMAKE_OPTIONS: "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON" # to be updated | |
| PYTHONDEVMODE: "1" | |
| PYTHONUNBUFFERED: "1" | |
| PYTHONPATH: "" # explicit cleanup | |
| PIP_USER: "" # explicit cleanup | |
| COLUMNS: "100" | |
| FORCE_COLOR: "1" | |
| CLICOLOR_FORCE: "1" | |
| UV_INDEX_STRATEGY: "unsafe-best-match" | |
| UV_HTTP_TIMEOUT: "600" | |
| XDG_CACHE_HOME: "${{ github.workspace }}/.cache" # to be updated | |
| PIP_CACHE_DIR: "${{ github.workspace }}/.cache/pip" # to be updated | |
| UV_CACHE_DIR: "${{ github.workspace }}/.cache/uv" # to be updated | |
| PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pip/.pre-commit" # to be updated | |
| jobs: | |
| lint: | |
| name: Quick Lint | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| submodules: recursive | |
| - name: Setup Python 3.8 | |
| id: setup-pylowest | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.8" # use lowest supported version for linting | |
| update-environment: false | |
| - name: Check AST with Python 3.8 | |
| run: | | |
| "${{ steps.setup-pylowest.outputs.python-path }}" -m compileall -q -f tilelang | |
| - name: Setup Python 3.9 | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.9" | |
| update-environment: true | |
| cache: pip | |
| cache-dependency-path: | | |
| pyproject.toml | |
| requirements*.txt | |
| .pre-commit-config.yaml | |
| - name: Pre-commit Lint | |
| run: | | |
| if ! pipx run pre-commit run --all-files --color=always --show-diff-on-failure; then | |
| echo "::error::Pre-commit checks failed. Please run 'pre-commit install' and 'pre-commit run --all-files' locally to see the issues." | |
| exit 1 | |
| fi | |
| tests: | |
| name: Test for Python ${{ matrix.python-version }} with ${{ matrix.runner.toolkit }} (on ${{ matrix.runner.name }}) | |
| if: | | |
| github.repository_owner == 'tile-ai' && | |
| (github.event_name != 'pull_request' || !github.event.pull_request.draft) | |
| needs: [lint] | |
| runs-on: ${{ matrix.runner.tags }} | |
| strategy: | |
| matrix: | |
| runner: | |
| - tags: [self-hosted, nvidia] | |
| name: self-hosted-nvidia | |
| # Format: [Nightly-]CUDA-<major>.<minor>[.<patch>]. E.g., "CUDA-12.8" or "Nightly-CUDA-13.0". | |
| # Use "Nightly-" prefix to use torch nightly builds. | |
| toolkit: CUDA-12.8 | |
| - tags: [self-hosted, amd, gpu] | |
| name: self-hosted-amd | |
| # Format: [Nightly-]ROCm-<major>.<minor>[.<patch>]. E.g., "ROCm-6.4" or "Nightly-ROCm-7.0". | |
| # Use "Nightly-" prefix to use torch nightly builds. | |
| toolkit: Nightly-ROCm-7.1 | |
| - tags: [macos-latest] | |
| name: macos-latest | |
| toolkit: Metal # or Nightly-Metal | |
| python-version: | |
| - "3.12" | |
| fail-fast: false | |
| timeout-minutes: 120 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| submodules: recursive | |
| - name: Set environment (self-hosted runners) | |
| if: startsWith(matrix.runner.name, 'self-hosted') | |
| run: | | |
| # Hide sensitive data in logs for self-hosted runners | |
| if [[ -n "${{ secrets.SECRET_PATH_PREFIXES }}" ]]; then | |
| echo "::add-mask::${{ secrets.SECRET_PATH_PREFIXES }}" | |
| # Colon separated list of secrets to mask | |
| for secret in $(echo "${{ secrets.SECRET_PATH_PREFIXES }}" | tr ':' '\n'); do | |
| echo "::add-mask::${secret}" | |
| done | |
| fi | |
| # Use runner tool_cache as cache root for self-hosted runners to avoid internet connection | |
| # issues and to share cache between jobs. | |
| export XDG_CACHE_HOME="${{ runner.tool_cache }}/.ci-cache-${{ github.workflow }}" | |
| echo "XDG_CACHE_HOME=${XDG_CACHE_HOME}" | tee -a "${GITHUB_ENV}" | |
| echo "PIP_CACHE_DIR=${XDG_CACHE_HOME}/pip" | tee -a "${GITHUB_ENV}" | |
| echo "UV_CACHE_DIR=${XDG_CACHE_HOME}/uv" | tee -a "${GITHUB_ENV}" | |
| echo "PRE_COMMIT_HOME=${XDG_CACHE_HOME}/pip/.pre-commit" | tee -a "${GITHUB_ENV}" | |
| # Do not use ccache on self-hosted runners, as it will download/upload caches which is slow. | |
| # Self-hosted runners usually have more CPU power to compile without ccache. | |
| - name: Setup ccache (GitHub-hosted runners) | |
| id: setup-ccache | |
| if: ${{ !startsWith(matrix.runner.name, 'self-hosted') }} | |
| uses: hendrikmuhs/ccache-action@v1 | |
| with: | |
| create-symlink: true | |
| evict-old-files: "7d" | |
| append-timestamp: false | |
| key: ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }} | |
| restore-keys: | | |
| ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }} | |
| ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }} | |
| ${{ runner.os }}-${{ runner.arch }} | |
| - name: Set environment (CUDA) | |
| if: contains(matrix.runner.toolkit, 'CUDA') | |
| run: | | |
| TOOLKIT="${{ matrix.runner.toolkit }}" | |
| CUDA_VERSION="${TOOLKIT##*-}" | |
| CUDA_VERSION_MAJMIN="$(echo ${CUDA_VERSION} | cut -d '.' -f-2)" | |
| CUDA_VERSION_MAJMIN_NODOT="${CUDA_VERSION_MAJMIN//./}" | |
| if [[ "${TOOLKIT}" == "Nightly-"* ]]; then | |
| # Use torch nightly builds | |
| export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MAJMIN_NODOT}" | |
| else | |
| export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MAJMIN_NODOT}" | |
| fi | |
| export UV_INDEX="${PIP_EXTRA_INDEX_URL}" | |
| export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_CUDA=ON" | |
| echo "USE_CUDA=ON" | tee -a "${GITHUB_ENV}" | |
| echo "CUDA_VERSION=${CUDA_VERSION}" | tee -a "${GITHUB_ENV}" | |
| echo "CUDA_VERSION_MAJMIN=${CUDA_VERSION_MAJMIN}" | tee -a "${GITHUB_ENV}" | |
| echo "CUDA_VERSION_MAJMIN_NODOT=${CUDA_VERSION_MAJMIN_NODOT}" | tee -a "${GITHUB_ENV}" | |
| echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}" | |
| echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}" | |
| echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}" | |
| if [[ ! -x "$(command -v nvcc)" ]]; then | |
| export PATH="/usr/local/cuda/bin:${PATH}" | |
| export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" | |
| echo "PATH=${PATH}" | tee -a "${GITHUB_ENV}" | |
| echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | tee -a "${GITHUB_ENV}" | |
| fi | |
| if [[ -x "$(command -v nvcc)" ]]; then | |
| echo "\$ $(command -v nvcc) --version" && nvcc --version | |
| else | |
| echo "::warning::nvcc not found in PATH!" | |
| fi | |
| - name: Set environment (ROCm) | |
| if: contains(matrix.runner.toolkit, 'ROCm') | |
| run: | | |
| TOOLKIT="${{ matrix.runner.toolkit }}" | |
| ROCM_VERSION="${TOOLKIT##*-}" | |
| ROCM_VERSION_MAJMIN="$(echo ${ROCM_VERSION} | cut -d '.' -f-2)" | |
| ROCM_VERSION_MAJMIN_NODOT="${ROCM_VERSION_MAJMIN//./}" | |
| if [[ "${TOOLKIT}" == "Nightly-"* ]]; then | |
| # Use torch nightly builds | |
| export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/rocm${ROCM_VERSION_MAJMIN}" | |
| else | |
| export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/rocm${ROCM_VERSION_MAJMIN}" | |
| fi | |
| export UV_INDEX="${PIP_EXTRA_INDEX_URL}" | |
| export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_ROCM=ON" | |
| echo "USE_ROCM=ON" | tee -a "${GITHUB_ENV}" | |
| echo "ROCM_VERSION=${ROCM_VERSION}" | tee -a "${GITHUB_ENV}" | |
| echo "ROCM_VERSION_MAJMIN=${ROCM_VERSION_MAJMIN}" | tee -a "${GITHUB_ENV}" | |
| echo "ROCM_VERSION_MAJMIN_NODOT=${ROCM_VERSION_MAJMIN_NODOT}" | tee -a "${GITHUB_ENV}" | |
| echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}" | |
| echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}" | |
| echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}" | |
| if [[ ! -x "$(command -v hipcc)" ]]; then | |
| export PATH="/opt/rocm/bin:${PATH}" | |
| export LD_LIBRARY_PATH="/opt/rocm/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" | |
| echo "PATH=${PATH}" | tee -a "${GITHUB_ENV}" | |
| echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | tee -a "${GITHUB_ENV}" | |
| fi | |
| if [[ -x "$(command -v hipcc)" ]]; then | |
| echo "\$ $(command -v hipcc) --version" && hipcc --version | |
| else | |
| echo "::warning::hipcc not found in PATH!" | |
| fi | |
| - name: Set environment (Metal) | |
| if: contains(matrix.runner.toolkit, 'Metal') | |
| run: | | |
| if [[ "${{ matrix.runner.toolkit }}" == "Nightly-"* ]]; then | |
| # Use torch nightly builds | |
| export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cpu" | |
| export UV_INDEX="${PIP_EXTRA_INDEX_URL}" | |
| echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" | tee -a "${GITHUB_ENV}" | |
| echo "UV_INDEX=${UV_INDEX}" | tee -a "${GITHUB_ENV}" | |
| fi | |
| export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_METAL=ON" | |
| echo "USE_METAL=ON" | tee -a "${GITHUB_ENV}" | |
| echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" | tee -a "${GITHUB_ENV}" | |
| - name: Setup Python and uv with caching | |
| id: setup-uv | |
| uses: astral-sh/setup-uv@v7 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| activate-environment: true | |
| # Do not use cache for self-hosted runners, as it will download/upload caches which is slow. | |
| enable-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }} | |
| prune-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }} | |
| # Use runner tool_cache for self-hosted runners | |
| cache-local-path: ${{ env.UV_CACHE_DIR }} | |
| ignore-nothing-to-cache: true | |
| # Extra cache key to upload/download caches on GitHub-hosted runners | |
| cache-suffix: uv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.runner.name }}-${{ matrix.runner.toolkit }} | |
| cache-dependency-glob: | | |
| pyproject.toml | |
| requirements*.txt | |
| .pre-commit-config.yaml | |
| - name: Setup venv | |
| id: setup-venv | |
| run: | | |
| set -o pipefail | |
| uv pip install --upgrade pip setuptools wheel | |
| if [[ "${UV_INDEX}" == *"/nightly/"* ]]; then | |
| uv pip install --prerelease=allow -v torch | |
| fi | |
| uv pip install -v -r requirements-test.txt | |
| echo "import torch; print(f'torch: {torch.__version__}')" | uv run --no-project --script - | |
| if [[ "${{ matrix.runner.toolkit }}" == *"CUDA"* ]]; then | |
| uv pip install --no-build-isolation-package=flash-attn -v -r requirements-test-cuda.txt | |
| echo "import flash_attn; print(f'flash_attn: {flash_attn.__version__}')" | uv run --no-project --script - | |
| elif [[ "${{ matrix.runner.toolkit }}" == *"ROCm"* ]]; then | |
| uv pip install -v -r requirements-test-rocm.txt | |
| elif [[ "${{ matrix.runner.toolkit }}" == *"Metal"* ]]; then | |
| uv pip install -v -r requirements-test-metal.txt | |
| else | |
| echo "::error::Unknown toolkit: ${{ matrix.runner.toolkit }}" | |
| exit 1 | |
| fi | |
| echo "::group::torch.utils.collect_env" | |
| uv run --no-project -m -- torch.utils.collect_env | |
| echo "::endgroup::" | |
| - name: Clear uv cache for self-hosted runners (if setup failed) | |
| if: >- | |
| ${{ | |
| failure() && | |
| startsWith(matrix.runner.name, 'self-hosted') && | |
| (steps.setup-uv.conclusion == 'failure' || steps.setup-venv.conclusion == 'failure') | |
| }} | |
| run: | | |
| echo "Clearing uv cache at ${UV_CACHE_DIR} due to failure." | |
| uv cache clean | |
| - name: Enable core dump generation (Linux / GitHub-hosted runners) | |
| if: ${{ runner.os == 'Linux' && !startsWith(matrix.runner.name, 'self-hosted') }} | |
| run: | | |
| sudo sysctl -w kernel.core_pattern="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P" | |
| sudo sysctl -w kernel.core_uses_pid=0 | |
| sudo sysctl -w fs.suid_dumpable=1 | |
| sysctl kernel.core_pattern kernel.core_uses_pid fs.suid_dumpable | |
| - name: Enable core dump generation (macOS / GitHub-hosted runners) | |
| if: ${{ runner.os == 'macOS' && !startsWith(matrix.runner.name, 'self-hosted') }} | |
| run: | | |
| sudo sysctl -w kern.corefile="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P" | |
| sudo sysctl -w kern.coredump=1 | |
| sudo sysctl -w kern.sugid_coredump=1 | |
| sysctl kern.corefile kern.coredump kern.sugid_coredump | |
| - name: Install project (wheel form) | |
| run: | | |
| uv pip install -v . | |
| - name: Run clang-tidy | |
| id: clang-tidy | |
| if: runner.os == 'Linux' | |
| run: | | |
| echo "\$ $(command -v clang-tidy) --version" && clang-tidy --version | |
| # Download run-clang-tidy script | |
| RCT_URL=https://raw.githubusercontent.com/llvm/llvm-project/refs/heads/release/21.x/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py | |
| echo "Downloading run-clang-tidy script from ${RCT_URL}" | |
| echo "import urllib.request; url = '${RCT_URL}'.rstrip('/'); urllib.request.urlretrieve(url, url.split('/')[-1])" | uv run --no-project --script - | |
| RUN_CLANG_TIDY=(uv run --no-project --script -- run-clang-tidy.py) | |
| if [[ -x "$(command -v clang-apply-replacements)" ]]; then | |
| echo "Using clang-apply-replacements from $(command -v clang-apply-replacements)" | |
| RUN_CLANG_TIDY+=(-fix -clang-apply-replacements-binary="$(command -v clang-apply-replacements)") | |
| else | |
| echo "::warning::clang-apply-replacements not found in PATH, automatic fixing disabled." | |
| fi | |
| # Run cmake to create the build directory with compile_commands.json | |
| cmake -S . -B cmake-build --fresh ${CLANG_TIDY_CMAKE_OPTIONS} # no quotes here | |
| echo "::group::compile_commands.json" | |
| ls -alh cmake-build/compile_commands.json | |
| uv run --no-project -m -- json.tool --no-ensure-ascii cmake-build/compile_commands.json | |
| echo "::endgroup::" | |
| CXX_FILES=$(find src -type f -iname "*.[ch]pp" -o -iname "*.cc" -o -iname "*.c" -o -iname "*.h") | |
| rc=0 | |
| echo "::group::run-clang-tidy" | |
| "${RUN_CLANG_TIDY[@]}" -clang-tidy-binary="$(command -v clang-tidy)" \ | |
| -exclude-header-filter='^(3rdparty|tvm)/.*$' \ | |
| -p="cmake-build" ${CXX_FILES} || rc="$?" | |
| echo "::endgroup::" | |
| rm -rf cmake-build run-clang-tidy.py | |
| if (( rc != 0 )); then | |
| echo "::error::clang-tidy found issues (exit code: ${rc}). Please run 'clang-tidy --fix' locally to fix them." | |
| git diff --color=always || true | |
| exit "${rc}" | |
| fi | |
| - name: Run examples with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }}) | |
| if: contains(matrix.runner.toolkit, 'CUDA') | |
| run: | | |
| cd testing | |
| PYTEST=( | |
| uv run --no-project -m -- | |
| pytest --verbose --color=yes --durations=0 --showlocals --cache-clear | |
| ) | |
| "${PYTEST[@]}" --maxfail=3 --numprocesses=4 \ | |
| ../examples | |
| # NVIDIA CUDA tests | |
| - name: Run CUDA tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }}) | |
| id: cuda-tests | |
| if: contains(matrix.runner.toolkit, 'CUDA') | |
| run: | | |
| cd testing | |
| PYTEST=( | |
| uv run --no-project -m -- | |
| pytest --verbose --color=yes --durations=0 --showlocals --cache-clear | |
| ) | |
| "${PYTEST[@]}" --maxfail=3 --numprocesses=4 \ | |
| ./python | |
| # AMD ROCm tests | |
| - name: Run ROCm tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }}) | |
| id: rocm-tests | |
| if: contains(matrix.runner.toolkit, 'ROCm') | |
| run: | | |
| cd testing | |
| PYTEST=( | |
| uv run --no-project -m -- | |
| pytest --verbose --color=yes --durations=0 --showlocals --cache-clear | |
| ) | |
| "${PYTEST[@]}" --maxfail=3 --numprocesses=4 \ | |
| ./python/amd | |
| # Apple Metal tests | |
| - name: Run Metal tests with Python ${{ matrix.python-version }} (${{ matrix.runner.toolkit }}) | |
| id: metal-tests | |
| if: contains(matrix.runner.toolkit, 'Metal') | |
| run: | | |
| cd testing | |
| PYTEST=( | |
| uv run --no-project -m -- | |
| pytest --verbose --color=yes --durations=0 --showlocals --cache-clear | |
| ) | |
| "${PYTEST[@]}" --maxfail=3 --numprocesses=4 \ | |
| -k metal \ | |
| ./python | |
| - name: List generated files | |
| if: ${{ !cancelled() }} | |
| run: | | |
| find . -type f -name '*.py[co]' -delete | |
| find . -depth -type d -name "__pycache__" -exec rm -r "{}" + | |
| if git status --ignored --porcelain | grep -qvE '/$'; then | |
| ls -alh $(git status --ignored --porcelain | grep -vE '/$' | grep -oE '\S+$') | |
| fi |