Skip to content

CI GPU:tracel-ai/burn#4324 #376

CI GPU:tracel-ai/burn#4324

CI GPU:tracel-ai/burn#4324 #376

Workflow file for this run

name: CI GPU
on:
workflow_dispatch:
inputs:
pr_number:
description: "Number of the pull request that triggers this run if any"
type: number
required: false
# important to set the run name to this format so that the CI server
# can track the PR number from the workflow_run events.
run-name: ${{ github.workflow }}:${{ github.repository }}#${{ inputs.pr_number }}
env:
# Note: It is not possible to define top level env vars and pass them to composite actions.
# To work around this issue we use inputs and define all the env vars here.
RUST_PREVIOUS_VERSION: 1.86.0
# Dependency versioning
# from wgpu repo: https://github.com/gfx-rs/wgpu/blob/trunk/.github/workflows/ci.yml
# GCP runners
GCP_RUNNERS_IMAGE_FAMILY: "tracel-ci-ubuntu-2404-amd64-nvidia"
GCP_RUNNERS_MACHINE_TYPE: "g2-standard-4"
GCP_RUNNERS_ZONE: "us-east1-c"
# Test in release mode (make it an empty string to test in debug mode)
TEST_RELEASE_FLAG: "--release"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
prepare-checks:
runs-on: ubuntu-latest
outputs:
rust-prev-version: ${{ env.RUST_PREVIOUS_VERSION }}
gcp_runners_image_family: ${{ env.GCP_RUNNERS_IMAGE_FAMILY }}
gcp_runners_machine_type: ${{ env.GCP_RUNNERS_MACHINE_TYPE }}
gcp_runners_zone: ${{ env.GCP_RUNNERS_ZONE }}
steps:
- name: Do Nothing
if: false
run: echo
linux-std-cuda-tests:
needs: [prepare-checks]
timeout-minutes: 60
# '@id:' label must be unique within this worklow
runs-on: [
'@id:burn-cuda-job-${{github.run_id}}-${{github.run_attempt}}',
'@pr_number:${{ inputs.pr_number }}',
'@organization:tracel-ai',
'@repository:burn',
'@image-family:${{ needs.prepare-checks.outputs.gcp_runners_image_family }}',
'@machine-type:${{ needs.prepare-checks.outputs.gcp_runners_machine_type }}',
'@zones:${{ needs.prepare-checks.outputs.gcp_runners_zone }}',
'@gpu:true' ]
env:
LD_LIBRARY_PATH: '/usr/local/cuda/lib64'
# disable incremental compilation (reduces artifact size)
CARGO_PROFILE_TEST_INCREMENTAL: 'false'
# Keep the stragegy to be able to easily add new rust versions if required
strategy:
matrix:
rust: [stable]
include:
- rust: stable
toolchain: stable
steps:
- name: Setup Rust
uses: tracel-ai/github-actions/setup-rust@v6
with:
rust-toolchain: ${{ matrix.toolchain }}
enable-cache: false
# --------------------------------------------------------------------------------
- name: Tests (burn-cuda)
run: cargo xtask test ${{ env.TEST_RELEASE_FLAG }} --ci gcp-cuda-runner
linux-std-vulkan-tests:
needs: [prepare-checks]
timeout-minutes: 60
# '@id:' label must be unique within this worklow
runs-on: [
'@id:burn-vulkan-job-${{github.run_id}}-${{github.run_attempt}}',
'@pr_number:${{ inputs.pr_number }}',
'@organization:tracel-ai',
'@repository:burn',
'@image-family:${{ needs.prepare-checks.outputs.gcp_runners_image_family }}',
'@machine-type:${{ needs.prepare-checks.outputs.gcp_runners_machine_type }}',
'@zones:${{ needs.prepare-checks.outputs.gcp_runners_zone }}',
'@gpu:true' ]
env:
# disable incremental compilation (reduces artifact size)
CARGO_PROFILE_TEST_INCREMENTAL: 'false'
# Keep the stragegy to be able to easily add new rust versions if required
strategy:
matrix:
rust: [stable]
include:
- rust: stable
toolchain: stable
steps:
- name: Setup Rust
uses: tracel-ai/github-actions/setup-rust@v6
with:
rust-toolchain: ${{ matrix.toolchain }}
enable-cache: false
# --------------------------------------------------------------------------------
- name: Tests (burn-vulkan)
run: cargo xtask test ${{ env.TEST_RELEASE_FLAG }} --ci gcp-vulkan-runner
linux-std-wgpu-tests:
needs: [prepare-checks]
timeout-minutes: 60
# '@id:' label must be unique within this worklow
runs-on: [
'@id:burn-wgpu-job-${{github.run_id}}-${{github.run_attempt}}',
'@pr_number:${{ inputs.pr_number }}',
'@organization:tracel-ai',
'@repository:burn',
'@image-family:${{ needs.prepare-checks.outputs.gcp_runners_image_family }}',
'@machine-type:${{ needs.prepare-checks.outputs.gcp_runners_machine_type }}',
'@zones:${{ needs.prepare-checks.outputs.gcp_runners_zone }}',
'@gpu:true' ]
env:
# disable incremental compilation (reduces artifact size)
CARGO_PROFILE_TEST_INCREMENTAL: 'false'
# Keep the stragegy to be able to easily add new rust versions if required
strategy:
matrix:
rust: [stable]
include:
- rust: stable
toolchain: stable
steps:
- name: Setup Rust
uses: tracel-ai/github-actions/setup-rust@v6
with:
rust-toolchain: ${{ matrix.toolchain }}
enable-cache: false
# --------------------------------------------------------------------------------
- name: Tests (burn-wgpu)
run: cargo xtask test ${{ env.TEST_RELEASE_FLAG }} --ci gcp-wgpu-runner
# TODO: fix macos CI tests (M2 virtualization)
# macos-std-metal-tests:
# runs-on: blaze/macos-14
# needs: [prepare-checks]
# timeout-minutes: 60
# # Keep the stragegy to be able to easily add new rust versions if required
# strategy:
# matrix:
# rust: [stable]
# include:
# - rust: stable
# toolchain: stable
# steps:
# - name: Setup Rust
# uses: tracel-ai/github-actions/setup-rust@v6
# with:
# rust-toolchain: ${{ matrix.toolchain }}
# cache-key: ${{ matrix.rust }}-macos
# # --------------------------------------------------------------------------------
# - name: Device check
# run: system_profiler SPHardwareDataType
# # --------------------------------------------------------------------------------
# - name: Tests
# run: cargo xtask test ${{ env.TEST_RELEASE_FLAG }} --ci github-mac-runner