Skip to content

Commit

Permalink
Fix C_MPI library linking
Browse files Browse the repository at this point in the history
Fix all warnings
Less verbose default flags
  • Loading branch information
tom91136 committed Aug 11, 2024
1 parent 72f1295 commit 3e10ff9
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 45 deletions.
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()

project(cloverleaf VERSION 1.0 LANGUAGES CXX)
project(cloverleaf VERSION 1.0 LANGUAGES C CXX)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

Expand Down Expand Up @@ -35,8 +35,8 @@ if ((NOT BUILD_TYPE STREQUAL RELEASE) AND (NOT BUILD_TYPE STREQUAL DEBUG))
endif ()

# setup some defaults flags for everything
set(DEFAULT_DEBUG_FLAGS -Wall -O2)
set(DEFAULT_RELEASE_FLAGS -Wall -O3)
set(DEFAULT_DEBUG_FLAGS -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -O2)
set(DEFAULT_RELEASE_FLAGS -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -O3)

macro(hint_flag FLAG DESCRIPTION)
if (NOT DEFINED ${FLAG})
Expand Down Expand Up @@ -71,7 +71,7 @@ option(USE_ONETBB "Enable oneTBB library for *supported* models. Enabling this o

option(FETCH_ONETBB "Fetch (download) the oneTBB library for *supported* models. This uses CMake's
FetchContent feature. Specify version by setting FETCH_ONETBB_VERSION" OFF)
set(FETCH_ONETBB_VERSION "v2021.10.0" CACHE STRING "Specify version of oneTBB to use if FETCH_ONETBB is ON")
set(FETCH_ONETBB_VERSION "v2021.13.0" CACHE STRING "Specify version of oneTBB to use if FETCH_ONETBB is ON")

if (FETCH_ONETBB)
FetchContent_Declare(
Expand All @@ -96,7 +96,7 @@ option(USE_ONEDPL "Enable oneDPL library for *supported* models. Enabling this o

option(FETCH_ONEDPL "Fetch (download) the oneDPL library for *supported* models. This uses CMake's
FetchContent feature. Specify version by setting FETCH_ONEDPL_VERSION" OFF)
set(FETCH_ONEDPL_VERSION "oneDPL-2022.2.0-rc1" CACHE STRING "Specify version of oneTBB to use if FETCH_ONEDPL is ON")
set(FETCH_ONEDPL_VERSION "oneDPL-2022.6.0-rc1" CACHE STRING "Specify version of oneTBB to use if FETCH_ONEDPL is ON")

if (FETCH_ONEDPL)
FetchContent_Declare(
Expand Down
8 changes: 4 additions & 4 deletions cmake/register_models.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ endmacro()

macro(register_append_cxx_flags CONFIG)
if ("${CONFIG}" STREQUAL "ANY")
list(APPEND DEFAULT_RELEASE_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN})
elseif ("${CONFIG}" STREQUAL "RELEASE")
list(APPEND DEFAULT_RELEASE_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN})
elseif ("${CONFIG}" STREQUAL "DEBUG")
list(APPEND DEFAULT_DEBUG_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN})
else ()
message(FATAL_ERROR "register_flags supports only RELEASE, DEBUG, or ANY for all configs, got `${CONFIG}`")
endif ()
Expand Down
9 changes: 0 additions & 9 deletions src/cuda/accelerate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,6 @@
#include "context.h"
#include "timer.h"

//#define par_ranged2m(rg, f) \
//{"KERNEL2D_START"; \
// \
// Range2d range = rg; \
// for (size_t j = range.fromY; j < range.toY; j++) { \
// for (size_t i = range.fromX; i < range.toX; i++) {"KERNEL2D_A";f"KERNEL2D_B";} \
// } \
//"KERNEL2D_END";}

// @brief Fortran acceleration kernel
// @author Wayne Gaudin
// @details The pressure and viscosity gradients are used to update the
Expand Down
2 changes: 0 additions & 2 deletions src/cuda/calc_dt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ void calc_dt_kernel(global_variables &globals, int x_min, int x_max, int y_min,
// DO j=x_min,x_max
// Kokkos::MDRangePolicy <Kokkos::Rank<2>> policy({x_min + 1, y_min + 1}, {x_max + 2, y_max + 2});

auto policy = clover::Range2d(x_min + 1, y_min + 1, x_max + 2, y_max + 2);

int xStart = x_min + 1, xEnd = x_max + 2;
int yStart = y_min + 1, yEnd = y_max + 2;
int sizeX = xEnd - xStart;
Expand Down
1 change: 1 addition & 0 deletions src/cuda/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ template <typename T> struct Buffer2D {
return sizeY;
} else {
static_assert(D < 2);
return 0; // make nvc++ happy
}
}

Expand Down
50 changes: 25 additions & 25 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -eu

export NVHPC_DIR="/opt/nvidia/hpc_sdk/Linux_x86_64/23.5"
export NVHPC_DIR="/opt/nvidia/hpc_sdk/Linux_x86_64/24.7"
export CUDA_DIR="$NVHPC_DIR/cuda/"
export PATH=$NVHPC_DIR/compilers/bin/:${PATH:-}
export KOKKOS_DIR="/home/tom/Downloads/kokkos-4.0.01/"
Expand All @@ -13,7 +13,7 @@ export CPU_RANKS=$(($(nproc) / 2))
export GPU_RANKS=3

VERBOSE="ON"
PROBLEM="InputDecks/clover_bm16_short.in"
PROBLEM="InputDecks/clover_bm16_very_short.in"

declare -A enabled_models=(
["serial"]=true
Expand Down Expand Up @@ -79,10 +79,10 @@ function test_nompi() {
)
(
:
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_TBB=ON
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONEDPL=OPENMP
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_TBB=ON
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONEDPL=OPENMP
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONETBB=ON -DFETCH_ONETBB=ON -DCXX_EXTRA_LIBRARIES=tbb
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONEDPL=OPENMP -DFETCH_ONEDPL=ON
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONETBB=ON -DFETCH_ONETBB=ON -DCXX_EXTRA_LIBRARIES=tbb
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DCXX_EXTRA_FLAGS="-Ofast" -DUSE_ONEDPL=OPENMP -DFETCH_ONEDPL=ON
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=nvc++ -DCXX_EXTRA_FLAGS="-Ofast;-stdpar;-target=multicore"
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=nvc++ -DCXX_EXTRA_FLAGS="-Ofast;-stdpar;-gpu=cc61"
)
Expand Down Expand Up @@ -110,9 +110,9 @@ function test_nompi() {
)
(
:
test_nompi "0" cuda -DCXX_EXTRA_FLAGS="-Ofast" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
test_nompi "0" cuda -DMANAGED_ALLOC=ON -DCXX_EXTRA_FLAGS="-Ofast" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
test_nompi "0" cuda -DSYNC_ALL_KERNELS=ON -DCXX_EXTRA_FLAGS="-Ofast" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
test_nompi "0" cuda -DCXX_EXTRA_FLAGS="-O3" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
test_nompi "0" cuda -DMANAGED_ALLOC=ON -DCXX_EXTRA_FLAGS="-O3" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
test_nompi "0" cuda -DSYNC_ALL_KERNELS=ON -DCXX_EXTRA_FLAGS="-O3" -DCMAKE_CUDA_COMPILER=nvcc -DCUDA_ARCH=sm_60
)
(
:
Expand All @@ -136,8 +136,8 @@ function test_nompi() {
(
:
set +eu
source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh
source /opt/intel/oneapi/compiler/2023.2.0/env/vars.sh
source /opt/intel/oneapi/tbb/2021.13/env/vars.sh
source /opt/intel/oneapi/compiler/2024.2/env/vars.sh
set -eu
export DPCPP_CPU_NUM_CUS=1
export DPCPP_CPU_SCHEDULE=static
Expand All @@ -149,8 +149,8 @@ function test_nompi() {
(
:
set +eu
source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh
source /opt/intel/oneapi/compiler/2023.2.0/env/vars.sh --include-intel-llvm
source /opt/intel/oneapi/tbb/2021.13/env/vars.sh
source /opt/intel/oneapi/compiler/2024.2/env/vars.sh --include-intel-llvm
set -eu
cuda_sycl_flags="-fsycl-targets=nvptx64-nvidia-cuda;-Xsycl-target-backend;--cuda-gpu-arch=sm_60;--cuda-path=$CUDA_DIR"
test_nompi "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONEDPL=DPCPP -DCXX_EXTRA_FLAGS="-fsycl;$cuda_sycl_flags"
Expand Down Expand Up @@ -180,10 +180,10 @@ function test_nompi() {
test_mpi $CPU_RANKS "0" serial -DCMAKE_CXX_COMPILER=clang++
(
:
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DUSE_TBB=ON
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DUSE_ONEDPL=OPENMP
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_TBB=ON
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONEDPL=OPENMP
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DUSE_ONETBB=ON -DFETCH_ONETBB=ON -DCXX_EXTRA_LIBRARIES=tbb
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=g++ -DUSE_ONEDPL=OPENMP -DFETCH_ONEDPL=ON
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONETBB=ON -DFETCH_ONETBB=ON -DCXX_EXTRA_LIBRARIES=tbb
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONEDPL=OPENMP -DFETCH_ONEDPL=ON
test_mpi $CPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=nvc++ -DCXX_EXTRA_FLAGS="-Ofast;-stdpar;-target=multicore"
test_mpi $GPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=nvc++ -DCXX_EXTRA_FLAGS="-Ofast;-stdpar;-gpu=cc61;--restrict"
)
Expand Down Expand Up @@ -223,8 +223,8 @@ function test_nompi() {
:
set +eu
module load mpi
source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh
source /opt/intel/oneapi/compiler/2023.2.0/env/vars.sh
source /opt/intel/oneapi/tbb/2021.13/env/vars.sh
source /opt/intel/oneapi/compiler/2024.2/env/vars.sh
set -eu
export DPCPP_CPU_NUM_CUS=1
export DPCPP_CPU_SCHEDULE=static
Expand All @@ -250,8 +250,8 @@ function test_nompi() {
:
set +eu
module load mpi
source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh
source /opt/intel/oneapi/compiler/2023.2.0/env/vars.sh --include-intel-llvm
source /opt/intel/oneapi/tbb/2021.13/env/vars.sh
source /opt/intel/oneapi/compiler/2024.2/env/vars.sh --include-intel-llvm
set -eu
cuda_sycl_flags="-fsycl-targets=nvptx64-nvidia-cuda;-Xsycl-target-backend;--cuda-gpu-arch=sm_60;--cuda-path=$CUDA_DIR"
test_mpi $GPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONEDPL=DPCPP -DCXX_EXTRA_FLAGS="-fsycl;$cuda_sycl_flags"
Expand All @@ -275,8 +275,8 @@ function test_nompi() {

### CUDA-aware MPI ###
(
export MPI_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/23.5/comm_libs/openmpi/openmpi-3.1.5/
export PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/23.5/comm_libs/openmpi/openmpi-3.1.5/bin/:${PATH:-}"
export MPI_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/24.7/comm_libs/openmpi/openmpi-3.1.5/
export PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/24.7/comm_libs/openmpi/openmpi-3.1.5/bin/:${PATH:-}"
(
:
test_mpi $GPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=nvc++ -DCXX_EXTRA_FLAGS="-Ofast;-stdpar;-gpu=cc61;--restrict"
Expand All @@ -302,8 +302,8 @@ function test_nompi() {
(
:
set +eu
source /opt/intel/oneapi/tbb/2021.10.0/env/vars.sh
source /opt/intel/oneapi/compiler/2023.2.0/env/vars.sh --include-intel-llvm
source /opt/intel/oneapi/tbb/2021.13/env/vars.sh
source /opt/intel/oneapi/compiler/2024.2/env/vars.sh --include-intel-llvm
set -eu
cuda_sycl_flags="-fsycl-targets=nvptx64-nvidia-cuda;-Xsycl-target-backend;--cuda-gpu-arch=sm_60;--cuda-path=$CUDA_DIR"
test_mpi $GPU_RANKS "0" std-indices -DCMAKE_CXX_COMPILER=clang++ -DUSE_ONEDPL=DPCPP -DCXX_EXTRA_FLAGS="-fsycl;$cuda_sycl_flags"
Expand Down

0 comments on commit 3e10ff9

Please sign in to comment.