Skip to content

Commit

Permalink
Merge pull request #1688 from CEED/jeremy/cuda-asan
Browse files Browse the repository at this point in the history
GPU CI Updates
  • Loading branch information
jeremylt authored Oct 11, 2024
2 parents dfc3c7d + f329610 commit f529add
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 31 deletions.
38 changes: 23 additions & 15 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ noether-cuda:
- echo "-------------- FC ------------------" && $FC --version
- echo "-------------- NVCC ----------------" && $NVCC --version
- echo "-------------- GCOV ----------------" && gcov --version
# ASAN
- echo "-------------- ASAN ----------------"
- export ASAN=1 AFLAGS="-fsanitize=address -fsanitize=leak" ASAN_OPTIONS=protect_shadow_gap=0
- echo $AFLAGS
script:
- rm -f .SUCCESS
# libCEED
Expand All @@ -200,11 +204,29 @@ noether-cuda:
# Note: PETSC_DIR is set by default in GitLab runner env, unsetting to isolate core tests
- export PETSC_DIR= PETSC_ARCH=
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit realsearch=%
# Rebuild without ASAN
- unset ASAN AFLAGS ASAN_OPTIONS
- make clean
- PEDANTIC=1 make -k -j$NPROC_CPU -l$NPROC_CPU
# Libraries for examples
# -- PETSc with CUDA (minimal)
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-cuda-O PETSC_OPTIONS='-use_gpu_aware_mpi 0' && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit search="petsc fluids-navierstokes solids"
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit search="petsc fluids solids"
# -- MFEM v4.7
- cd .. && export MFEM_VERSION=mfem-4.7 && { [[ -d $MFEM_VERSION ]] || { git clone --depth 1 --branch v4.7 https://github.com/mfem/mfem.git $MFEM_VERSION && make -C $MFEM_VERSION -j$(nproc) serial CXXFLAGS="-O -std=c++11"; }; } && export MFEM_DIR=$PWD/$MFEM_VERSION && cd libCEED
- echo "-------------- MFEM ----------------" && make -C $MFEM_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit search=mfem
# -- Nek5000 v19.0
- export COVERAGE=0
- cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
- echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
- export NPROC_POOL=1
- make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit search=nek NEK5K_DIR=$NEK5K_DIR
# -- deal.II 8bd5c262f13e15793aa206b6eed8774a9b25ce11
- export DEAL_II_ROOT_DIR=/projects/dealii DEAL_II_DIR=/projects/dealii/install
- echo "-------------- deal.II -------------" && git -C $DEAL_II_ROOT_DIR -c safe.directory=$DEAL_II_ROOT_DIR describe --always
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit search=dealii DEAL_II_DIR=$DEAL_II_DIR
# Clang-tidy
- echo "-------------- clang-tidy ----------" && clang-tidy --version
- TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --color=always --exit-code
Expand Down Expand Up @@ -269,20 +291,6 @@ noether-rocm:
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-hip && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search="petsc fluids solids"
# -- MFEM v4.7
- cd .. && export MFEM_VERSION=mfem-4.7 && { [[ -d $MFEM_VERSION ]] || { git clone --depth 1 --branch v4.7 https://github.com/mfem/mfem.git $MFEM_VERSION && make -C $MFEM_VERSION -j$(nproc) serial CXXFLAGS="-O -std=c++11"; }; } && export MFEM_DIR=$PWD/$MFEM_VERSION && cd libCEED
- echo "-------------- MFEM ----------------" && make -C $MFEM_DIR info
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=mfem
# -- Nek5000 v19.0
- export COVERAGE=0
- cd .. && export NEK5K_VERSION=Nek5000-19.0 && { [[ -d $NEK5K_VERSION ]] || { git clone --depth 1 --branch v19.0 https://github.com/Nek5000/Nek5000.git $NEK5K_VERSION && cd $NEK5K_VERSION/tools && ./maketools genbox genmap reatore2 && cd ../..; }; } && export NEK5K_DIR=$PWD/$NEK5K_VERSION && export PATH=$NEK5K_DIR/bin:$PATH MPI=0 && cd libCEED
- echo "-------------- Nek5000 -------------" && git -C $NEK5K_DIR describe --tags
- export NPROC_POOL=1
- make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=nek NEK5K_DIR=$NEK5K_DIR
# -- deal.II 8bd5c262f13e15793aa206b6eed8774a9b25ce11
- export DEAL_II_ROOT_DIR=/projects/dealii DEAL_II_DIR=/projects/dealii/install
- echo "-------------- deal.II -------------" && git -C $DEAL_II_ROOT_DIR -c safe.directory=$DEAL_II_ROOT_DIR describe --always
- make -k -j$((NPROC_GPU / NPROC_POOL)) BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search=dealii DEAL_II_DIR=$DEAL_II_DIR
# Clang-tidy
- echo "-------------- clang-tidy ----------" && clang-tidy --version
- TIDY_OPTS="-fix-errors" make -j$NPROC_CPU tidy && git diff --color=always --exit-code
Expand Down
21 changes: 13 additions & 8 deletions backends/cuda-ref/ceed-cuda-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) {
CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem));
impl->num_points[e] = num_points_elem;
}
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}
impl->max_num_points = max_num_points;

Expand Down Expand Up @@ -779,6 +780,8 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec,
CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
CeedCallBackend(CeedElemRestrictionCreateVector(rstr_points, NULL, &impl->point_coords_elem));
CeedCallBackend(CeedElemRestrictionApply(rstr_points, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
CeedCallBackend(CeedVectorDestroy(&point_coords));
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}

// Process inputs
Expand Down Expand Up @@ -1538,11 +1541,9 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee
CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_in[i * elem_size_in * num_qpts_in], h_B_in, elem_size_in * num_qpts_in * sizeof(CeedScalar),
cudaMemcpyHostToDevice));
}

if (identity) {
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&eval_modes_in));

// Load into B_out, in order that they will be used in eval_modes_out
{
Expand Down Expand Up @@ -1575,11 +1576,9 @@ static int CeedSingleOperatorAssembleSetup_Cuda(CeedOperator op, CeedInt use_cee
CeedCallCuda(ceed, cudaMemcpy(&asmb->d_B_out[i * elem_size_out * num_qpts_out], h_B_out, elem_size_out * num_qpts_out * sizeof(CeedScalar),
cudaMemcpyHostToDevice));
}

if (identity) {
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&eval_modes_out));
return CEED_ERROR_SUCCESS;
}

Expand Down Expand Up @@ -1743,6 +1742,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C
CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
CeedCallBackend(CeedElemRestrictionCreateVector(rstr_points, NULL, &impl->point_coords_elem));
CeedCallBackend(CeedElemRestrictionApply(rstr_points, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
CeedCallBackend(CeedVectorDestroy(&point_coords));
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}

// Process inputs
Expand Down Expand Up @@ -1933,6 +1934,10 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C
for (CeedInt i = 0; i < num_input_fields; i++) {
CeedCallBackend(CeedOperatorInputRestore_Cuda(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, true, impl));
}

// Restore work vector
CeedCallBackend(CeedRestoreWorkVector(ceed, &active_e_vec_in));
CeedCallBackend(CeedRestoreWorkVector(ceed, &active_e_vec_out));
return CEED_ERROR_SUCCESS;
}

Expand Down
1 change: 1 addition & 0 deletions backends/cuda-ref/ceed-cuda-ref-qfunction.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ static int CeedQFunctionDestroy_Cuda(CeedQFunction qf) {
CeedQFunction_Cuda *data;

CeedCallBackend(CeedQFunctionGetData(qf, &data));
CeedCallBackend(CeedFree(&data->qfunction_source));
if (data->module) CeedCallCuda(CeedQFunctionReturnCeed(qf), cuModuleUnload(data->module));
CeedCallBackend(CeedFree(&data));
return CEED_ERROR_SUCCESS;
Expand Down
4 changes: 4 additions & 0 deletions backends/cuda-ref/ceed-cuda-ref-restriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ static inline int CeedElemRestrictionSetupCompile_Cuda(CeedElemRestriction rstr)
"USE_DETERMINISTIC", is_deterministic ? 1 : 0));
CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose));
CeedCallBackend(CeedGetKernel_Cuda(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose));
// Cleanup
CeedCallBackend(CeedFree(&offset_kernel_path));
for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i]));
CeedCallBackend(CeedFree(&file_paths));
} break;
case CEED_RESTRICTION_STANDARD: {
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/cuda/cuda-ref-restriction-offset.h", &restriction_kernel_path));
Expand Down
21 changes: 13 additions & 8 deletions backends/hip-ref/ceed-hip-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) {
CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem));
impl->num_points[e] = num_points_elem;
}
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}
impl->max_num_points = max_num_points;

Expand Down Expand Up @@ -777,6 +778,8 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec,
CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
CeedCallBackend(CeedElemRestrictionCreateVector(rstr_points, NULL, &impl->point_coords_elem));
CeedCallBackend(CeedElemRestrictionApply(rstr_points, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
CeedCallBackend(CeedVectorDestroy(&point_coords));
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}

// Process inputs
Expand Down Expand Up @@ -1535,11 +1538,9 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed
CeedCallHip(ceed, hipMemcpy(&asmb->d_B_in[i * elem_size_in * num_qpts_in], h_B_in, elem_size_in * num_qpts_in * sizeof(CeedScalar),
hipMemcpyHostToDevice));
}

if (identity) {
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&eval_modes_in));

// Load into B_out, in order that they will be used in eval_modes_out
{
Expand Down Expand Up @@ -1572,11 +1573,9 @@ static int CeedSingleOperatorAssembleSetup_Hip(CeedOperator op, CeedInt use_ceed
CeedCallHip(ceed, hipMemcpy(&asmb->d_B_out[i * elem_size_out * num_qpts_out], h_B_out, elem_size_out * num_qpts_out * sizeof(CeedScalar),
hipMemcpyHostToDevice));
}

if (identity) {
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&identity));
}
CeedCallBackend(CeedFree(&eval_modes_out));
return CEED_ERROR_SUCCESS;
}

Expand Down Expand Up @@ -1740,6 +1739,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce
CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, &point_coords));
CeedCallBackend(CeedElemRestrictionCreateVector(rstr_points, NULL, &impl->point_coords_elem));
CeedCallBackend(CeedElemRestrictionApply(rstr_points, CEED_NOTRANSPOSE, point_coords, impl->point_coords_elem, request));
CeedCallBackend(CeedVectorDestroy(&point_coords));
CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
}

// Process inputs
Expand Down Expand Up @@ -1930,6 +1931,10 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce
for (CeedInt i = 0; i < num_input_fields; i++) {
CeedCallBackend(CeedOperatorInputRestore_Hip(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, true, impl));
}

// Restore work vector
CeedCallBackend(CeedRestoreWorkVector(ceed, &active_e_vec_in));
CeedCallBackend(CeedRestoreWorkVector(ceed, &active_e_vec_out));
return CEED_ERROR_SUCCESS;
}

Expand Down
1 change: 1 addition & 0 deletions backends/hip-ref/ceed-hip-ref-qfunction.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static int CeedQFunctionDestroy_Hip(CeedQFunction qf) {
CeedQFunction_Hip *data;

CeedCallBackend(CeedQFunctionGetData(qf, &data));
CeedCallBackend(CeedFree(&data->qfunction_source));
if (data->module) CeedCallHip(CeedQFunctionReturnCeed(qf), hipModuleUnload(data->module));
CeedCallBackend(CeedFree(&data));
return CEED_ERROR_SUCCESS;
Expand Down
4 changes: 4 additions & 0 deletions backends/hip-ref/ceed-hip-ref-restriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ static inline int CeedElemRestrictionSetupCompile_Hip(CeedElemRestriction rstr)
"USE_DETERMINISTIC", is_deterministic ? 1 : 0));
CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "OffsetNoTranspose", &impl->ApplyNoTranspose));
CeedCallBackend(CeedGetKernel_Hip(ceed, impl->module, "AtPointsTranspose", &impl->ApplyTranspose));
// Cleanup
CeedCallBackend(CeedFree(&offset_kernel_path));
for (CeedInt i = 0; i < num_file_paths; i++) CeedCallBackend(CeedFree(&file_paths[i]));
CeedCallBackend(CeedFree(&file_paths));
} break;
case CEED_RESTRICTION_STANDARD: {
CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-restriction-offset.h", &restriction_kernel_path));
Expand Down

0 comments on commit f529add

Please sign in to comment.