add detection of cuda_home from package nvidia-cuda-nvcc #209

Workflow file for this run

.github/workflows/pr-regression-test-bot.yml at d70cf36

	name: Performance Regression Bot

	on:
	issue_comment:
	types:
	- created

	permissions:
	contents: read
	issues: write
	pull-requests: write

	concurrency:
	# Use the issue/PR number to differentiate between different PRs
	group: "${{ github.workflow }}-${{ github.event.issue.number }}"
	cancel-in-progress: true

	env:
	PYTHONDEVMODE: "1"
	PYTHONUNBUFFERED: "1"
	PYTHONPATH: "" # explicit cleanup
	PIP_USER: "" # explicit cleanup
	COLUMNS: "100"
	FORCE_COLOR: "1"
	CLICOLOR_FORCE: "1"
	UV_INDEX_STRATEGY: "unsafe-best-match"
	UV_HTTP_TIMEOUT: "600"
	XDG_CACHE_HOME: "${{ github.workspace }}/.cache" # to be updated
	PIP_CACHE_DIR: "${{ github.workspace }}/.cache/pip" # to be updated
	UV_CACHE_DIR: "${{ github.workspace }}/.cache/uv" # to be updated
	PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pip/.pre-commit" # to be updated

	jobs:
	pr-regression:
	name: Performance regression test between PR and main
	if: \|
	github.repository_owner == 'tile-ai' &&
	github.event.issue.pull_request &&
	(contains(github.event.comment.body, '@regression-perf'))
	runs-on: ${{ matrix.runner.tags }}
	strategy:
	matrix:
	runner:
	- tags: [self-hosted, nvidia]
	name: self-hosted-nvidia
	toolkit: CUDA-12.8
	python-version:
	- "3.12"
	fail-fast: false
	timeout-minutes: 120

	steps:
	- name: Get commenter permission
	id: perm
	uses: actions/github-script@v7
	with:
	script: \|
	const username = context.payload.comment.user.login
	const { owner, repo } = context.repo
	const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner, repo, username })
	core.setOutput('permission', data.permission) // admin\|maintain\|write\|triage\|read\|none

	- name: Reject if not allowed
	if: ${{ steps.perm.outputs.permission != 'admin' && steps.perm.outputs.permission != 'maintain' && steps.perm.outputs.permission != 'write' }}
	run: \|
	echo "Not authorized: permission=${{ steps.perm.outputs.permission }}"
	exit 1

	- name: Checkout repository
	uses: actions/checkout@v6
	with:
	ref: refs/pull/${{ github.event.issue.number }}/merge
	fetch-depth: 0
	submodules: recursive

	- name: Set environment (self-hosted runners)
	if: startsWith(matrix.runner.name, 'self-hosted')
	run: \|
	# Hide sensitive data in logs for self-hosted runners
	if [[ -n "${{ secrets.SECRET_PATH_PREFIXES }}" ]]; then
	echo "::add-mask::${{ secrets.SECRET_PATH_PREFIXES }}"
	# Colon separated list of secrets to mask
	for secret in $(echo "${{ secrets.SECRET_PATH_PREFIXES }}" \| tr ':' '\n'); do
	echo "::add-mask::${secret}"
	done
	fi

	# Use runner tool_cache as cache root for self-hosted runners to avoid internet connection
	# issues and to share cache between jobs.
	export XDG_CACHE_HOME="${{ runner.tool_cache }}/.ci-cache-${{ github.workflow }}"
	echo "XDG_CACHE_HOME=${XDG_CACHE_HOME}" \| tee -a "${GITHUB_ENV}"
	echo "PIP_CACHE_DIR=${XDG_CACHE_HOME}/pip" \| tee -a "${GITHUB_ENV}"
	echo "UV_CACHE_DIR=${XDG_CACHE_HOME}/uv" \| tee -a "${GITHUB_ENV}"
	echo "PRE_COMMIT_HOME=${XDG_CACHE_HOME}/pip/.pre-commit" \| tee -a "${GITHUB_ENV}"

	# Do not use ccache on self-hosted runners, as it will download/upload caches which is slow.
	# Self-hosted runners usually have more CPU power to compile without ccache.
	- name: Setup ccache (GitHub-hosted runners)
	id: setup-ccache
	if: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
	uses: hendrikmuhs/ccache-action@v1
	with:
	create-symlink: true
	evict-old-files: "7d"
	append-timestamp: false
	key: ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('*/.cc') }}
	restore-keys: \|
	${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('*/.cc') }}
	${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}
	${{ runner.os }}-${{ runner.arch }}

	- name: Set environment (CUDA)
	if: contains(matrix.runner.toolkit, 'CUDA')
	run: \|
	TOOLKIT="${{ matrix.runner.toolkit }}"
	CUDA_VERSION="${TOOLKIT##*-}"
	CUDA_VERSION_MAJMIN="$(echo ${CUDA_VERSION} \| cut -d '.' -f-2)"
	CUDA_VERSION_MAJMIN_NODOT="${CUDA_VERSION_MAJMIN//./}"
	if [[ "${TOOLKIT}" == "Nightly-"* ]]; then
	# Use torch nightly builds
	export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MAJMIN_NODOT}"
	else
	export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MAJMIN_NODOT}"
	fi
	export UV_INDEX="${PIP_EXTRA_INDEX_URL}"
	export CLANG_TIDY_CMAKE_OPTIONS="${CLANG_TIDY_CMAKE_OPTIONS} -DUSE_CUDA=ON"

	echo "USE_CUDA=ON" \| tee -a "${GITHUB_ENV}"
	echo "CUDA_VERSION=${CUDA_VERSION}" \| tee -a "${GITHUB_ENV}"
	echo "CUDA_VERSION_MAJMIN=${CUDA_VERSION_MAJMIN}" \| tee -a "${GITHUB_ENV}"
	echo "CUDA_VERSION_MAJMIN_NODOT=${CUDA_VERSION_MAJMIN_NODOT}" \| tee -a "${GITHUB_ENV}"
	echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" \| tee -a "${GITHUB_ENV}"
	echo "UV_INDEX=${UV_INDEX}" \| tee -a "${GITHUB_ENV}"
	echo "CLANG_TIDY_CMAKE_OPTIONS=${CLANG_TIDY_CMAKE_OPTIONS}" \| tee -a "${GITHUB_ENV}"

	if [[ ! -x "$(command -v nvcc)" ]]; then
	export PATH="/usr/local/cuda/bin:${PATH}"
	export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
	echo "PATH=${PATH}" \| tee -a "${GITHUB_ENV}"
	echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \| tee -a "${GITHUB_ENV}"
	fi
	if [[ -x "$(command -v nvcc)" ]]; then
	echo "\$ $(command -v nvcc) --version" && nvcc --version
	else
	echo "::warning::nvcc not found in PATH!"
	fi

	- name: Setup Python and uv with caching
	id: setup-uv
	uses: astral-sh/setup-uv@v7
	with:
	python-version: ${{ matrix.python-version }}
	activate-environment: true
	# Do not use cache for self-hosted runners, as it will download/upload caches which is slow.
	enable-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
	prune-cache: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
	# Use runner tool_cache for self-hosted runners
	cache-local-path: ${{ env.UV_CACHE_DIR }}
	ignore-nothing-to-cache: true
	# Extra cache key to upload/download caches on GitHub-hosted runners
	cache-suffix: uv-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.runner.name }}-${{ matrix.runner.toolkit }}
	cache-dependency-glob: \|
	pyproject.toml
	requirements*.txt

	- name: Setup environments
	id: setup-venv
	run: \|
	set -e

	uv venv --python "${{ matrix.python-version }}" new

	source new/bin/activate
	uv pip install -v -r requirements-test.txt
	uv pip install -v .

	- name: Install Main version (Baseline)
	run: \|
	set -e
	git clean -dxf -e new/ -e .cache/
	git checkout main
	git submodule update --init --recursive
	uv venv --python "${{ matrix.python-version }}" old
	source old/bin/activate

	uv pip install -v -r requirements-test.txt
	uv pip install -v .

	- name: Clear uv cache for self-hosted runners (if setup failed)
	if: >-
	${{
	failure() &&
	startsWith(matrix.runner.name, 'self-hosted') &&
	(steps.setup-uv.conclusion == 'failure' \|\| steps.setup-venv.conclusion == 'failure')
	}}
	run: \|
	echo "Clearing uv cache at ${UV_CACHE_DIR} due to failure."
	uv cache clean

	- name: Enable core dump generation (Linux / GitHub-hosted runners)
	if: ${{ runner.os == 'Linux' && !startsWith(matrix.runner.name, 'self-hosted') }}
	run: \|
	sudo sysctl -w kernel.core_pattern="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
	sudo sysctl -w kernel.core_uses_pid=0
	sudo sysctl -w fs.suid_dumpable=1
	sysctl kernel.core_pattern kernel.core_uses_pid fs.suid_dumpable

	- name: Enable core dump generation (macOS / GitHub-hosted runners)
	if: ${{ runner.os == 'macOS' && !startsWith(matrix.runner.name, 'self-hosted') }}
	run: \|
	sudo sysctl -w kern.corefile="core.${{ matrix.python-version }}.${{ matrix.runner.toolkit }}.%P"
	sudo sysctl -w kern.coredump=1
	sudo sysctl -w kern.sugid_coredump=1
	sysctl kern.corefile kern.coredump kern.sugid_coredump

	- name: Run performance regression test
	run: \|
	source new/bin/activate
	OLD_PYTHON=./old/bin/python NEW_PYTHON=./new/bin/python \
	PERF_REGRESSION_MD=regression_result.md PERF_REGRESSION_PNG=regression_result.png \
	python ./maint/scripts/test_perf_regression.py

	- name: Read markdown table
	id: read_md
	run: \|
	echo "content<<EOF" >> $GITHUB_OUTPUT
	cat regression_result.md >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT

	- name: Upload result image as artifact
	uses: actions/upload-artifact@v4
	with:
	name: perf-regression-${{ github.run_id }}
	path: regression_result.png

	- name: Post test results as PR comment
	uses: actions/github-script@v7
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	script: \|
	const fs = require('fs');
	// Read the file directly instead of passing via env/outputs to avoid escaping issues
	const md = fs.readFileSync('regression_result.md', 'utf8');

	const runUrl = `${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`;

	const body =
	'Performance Benchmark Report\n' +
	'============================\n\n' +
	`Triggered by: @${context.payload.comment.user.login}\n` +
	`Workflow run: ${runUrl}\n\n` +
	'Results\n' +
	'-------\n\n' +
	md + '\n\n' +
	'Artifacts\n' +
	'---------\n\n' +
	'- regression_result.png (speedup plot) is attached as a workflow artifact. Download it from the workflow run page above.\n';

	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body
	});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

add detection of cuda_home from package nvidia-cuda-nvcc #209

Workflow file

add detection of cuda_home from package nvidia-cuda-nvcc #209

Uh oh!

Workflow file for this run