Skip to content

Commit

Permalink
AZP/AWS: EFA tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey-Rivkin committed Feb 19, 2025
1 parent baf8cb2 commit 12da037
Show file tree
Hide file tree
Showing 4 changed files with 404 additions and 277 deletions.
64 changes: 64 additions & 0 deletions buildlib/pr/efa_aws.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
jobs:
- job: tests_${{ parameters.name }}
pool:
name: MLNX
demands: ${{ parameters.demands }}
displayName: ${{ parameters.name }}
container: aws_tools
timeoutInMinutes: 360
workspace:
clean: outputs
steps:
- checkout: self
clean: true
fetchDepth: 100
retryCountOnTaskFailure: 5
- bash: |
set -exE
# Debug
echo "Build.SourceVersion: $(Build.SourceVersion)"
echo "PR merge message: $(Build.SourceVersionMessage)"
# Generate properties json from template
envsubst < buildlib/pr/efa_vars.template > efa_vars.json
jq '.' efa_vars.json
# Submit AWS batch job and capture job ID
aws eks update-kubeconfig --name ucx-ci
JOB_ID=$(aws batch submit-job \
--job-name EFA_$(Build.BuildId) \
--job-definition danielpr-test2-EFA \
--job-queue ucx-ci-JQ \
--eks-properties-override file://./efa_vars.json \
--query 'jobId' --output text)
# Wait for job to start running
until aws batch describe-jobs --jobs "$JOB_ID" --query 'jobs[0].status' --output text | grep -q RUNNING; do
sleep 15
done
# Get pod name and stream logs
POD=$(kubectl get pod -n ucx-ci-batch-nodes -o jsonpath='{.items[0].metadata.name}')
kubectl -n ucx-ci-batch-nodes logs -f "$POD"
# Propogate exit status
aws batch describe-jobs --jobs "$JOB_ID" --query 'jobs[0].status' --output text | grep -q FAILED && exit 1
displayName: EFA test in AWS
env:
AWS_ACCESS_KEY_ID: $(AWS_ACCESS_KEY_ID)
AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY)
BUILD_NUMBER: $(Build.BuildId)-$(Build.BuildNumber)
JOB_URL: $(System.TeamFoundationCollectionUri)$(System.TeamProject)/_build/results?buildId=$(Build.BuildId)
EXECUTOR_NUMBER: $(AZP_AGENT_ID)
nworkers: 4
worker: $(worker_id)
RUN_TESTS: yes
TEST_PERF: 0
PROTO_ENABLE: yes
ASAN_CHECK: no
VALGRIND_CHECK: no
RUNNING_IN_AZURE: yes
CMD: "yum groupinstall 'Development Tools' 'C Development Tools and Libraries' -y ; yum install -y git wget environment-modules autoconf libtool python3 python3-pip pkg-config libnl3-devel curl valgrind valgrind-devel rdma-core-devel libibverbs libibverbs-utils librdmacm librdmacm-utils ; git clone https://github.com/openucx/ucx.git; cd ucx; ./contrib/test_jenkins.sh; sleep 1h"
# CMD: "yum groupinstall 'Development Tools' 'C Development Tools and Libraries' -y ; yum install -y git wget environment-modules autoconf libtool python3 python3-pip pkg-config libnl3-devel curl valgrind valgrind-devel rdma-core-devel libibverbs libibverbs-utils librdmacm librdmacm-utils ; git clone --depth 1 https://github.com/openucx/ucx.git; cd ucx; git checkout $(Build.SourceVersion); ./contrib/test_jenkins.sh"
55 changes: 55 additions & 0 deletions buildlib/pr/efa_vars.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"podProperties": {
"containers": [
{
"command": [
"/bin/sh",
"-c",
"${CMD}"
],
"env": [
{
"name": "RUN_TESTS",
"value": "${RUN_TESTS}"
},
{
"name": "TEST_PERF",
"value": "${TEST_PERF}"
},
{
"name": "PROTO_ENABLE",
"value": "${PROTO_ENABLE}"
},
{
"name": "ASAN_CHECK",
"value": "${ASAN_CHECK}"
},
{
"name": "VALGRIND_CHECK",
"value": "${VALGRIND_CHECK}"
},
{
"name": "nworkers",
"value": "${nworkers}"
},
{
"name": "BUILD_NUMBER",
"value": "${BUILD_NUMBER}"
},
{
"name": "EXECUTOR_NUMBER",
"value": "${EXECUTOR_NUMBER}"
},
{
"name": "JOB_URL",
"value": "${JOB_URL}"
},
{
"name": "RUNNING_IN_AZURE",
"value": "${RUNNING_IN_AZURE}"
}
]
}
]
}
}
Loading

0 comments on commit 12da037

Please sign in to comment.