Skip to content

feat: add aws_g6e instances #880

feat: add aws_g6e instances

feat: add aws_g6e instances #880

name: Slurm Integration Tests
on:
push:
branches:
- main
pull_request:
env:
SLURM_VERSION: 21.08.6
jobs:
slurm:
runs-on: linux.20_04.4x
permissions:
id-token: write
contents: read
steps:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
- name: Checkout slurm-docker-cluster
run: |
set -ex
# TODO: switch to trunk once https://github.com/giovtorres/slurm-docker-cluster/pull/29 lands
git clone https://github.com/d4l3k/slurm-docker-cluster.git
- name: Pull docker containers
run: |
set -ex
cd slurm-docker-cluster
docker compose pull --ignore-pull-failures
continue-on-error: true
# cache layers to avoid rebuilding slurm
- uses: satackey/[email protected]
continue-on-error: true
- name: Build slurm
run: |
set -ex
cd slurm-docker-cluster
docker build -t slurm-docker-cluster:$SLURM_VERSION .
- name: Start slurm
run: |
set -ex
cd slurm-docker-cluster
IMAGE_TAG=$SLURM_VERSION docker compose up --detach
- name: Install Dependencies
run: |
set -ex
pip install wheel
- name: Run Slurm Integration Tests
run: |
set -e
mkdir -p ~/.ssh
echo "$SLURM_KNOWN_HOST" >> ~/.ssh/known_hosts
scripts/slurmdockerint.sh