From 519d7bbb4e134409ef49e46518dbdad64f7ac8ba Mon Sep 17 00:00:00 2001 From: "Raul P. Pelaez" Date: Tue, 4 Dec 2018 17:06:35 +0100 Subject: [PATCH] Now the project uses CMake for compiling, 3.8 required --- .gitignore | 5 +++- CMakeLists.txt | 49 ++++++++++++++++++++++++++++++++++ Makefile | 17 ------------ README.md | 13 ++++----- cmake/gitversion.sh | 1 + cmake/listArchs.sh | 10 +++++++ src/CMakeLists.txt | 21 +++++++++++++++ src/Makefile | 55 --------------------------------------- src/NeighbourListCPU.h | 4 +++ src/NeighbourListGPU.cuh | 9 ++++--- src/ParticleSorter.cuh | 2 +- src/config.h | 12 ++++++--- src/defines.h.in | 10 +++++++ src/gitversion.h.in | 4 +++ src/input.h | 23 +++++++++------- src/{main.cu => main.cpp} | 17 +++++++++++- src/rdfCPU.h | 4 +-- src/utils.cuh | 23 +++++++++------- src/vector_algebra.cuh | 40 +++++++++++++++++++++++----- test/Makefile | 2 +- 20 files changed, 204 insertions(+), 117 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 Makefile create mode 100644 cmake/gitversion.sh create mode 100644 cmake/listArchs.sh create mode 100644 src/CMakeLists.txt delete mode 100644 src/Makefile create mode 100644 src/defines.h.in create mode 100644 src/gitversion.h.in rename src/{main.cu => main.cpp} (95%) diff --git a/.gitignore b/.gitignore index 338a853..54b1e9c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ *~ bin/ bin/rdf +build/ *.o -*.depend \ No newline at end of file +*.depend +defines.h +gitversion.h \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..da17669 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required (VERSION 3.8) +include(CheckLanguage) +include(CheckCXXCompilerFlag) +project (RadialDistributionFunction CXX) +#set( CMAKE_VERBOSE_MAKEFILE on ) +set (RadialDistributionFunction_VERSION_MAJOR \"2\") +set (RadialDistributionFunction_VERSION_MINOR \"0\") + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS OFF) + +#I want only std=c++11, not c++0x +#check_cxx_compiler_flag("-std=c++11" HAS_CXX11) +if(NOT HAS_CXX11) + message(FATAL_ERROR "C++ compiler needs to allow c++11 standard") +endif() + +option(USE_BOOST "Use Boost-qi for reading, highly improves reading performance" OFF) +option(DONT_USE_CUDA "Dont compile in hybrid CPU/GPU mode, requires nvcc" ON) + +if(NOT DONT_USE_CUDA) + check_language(CUDA) + if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + set(USE_CUDA ON) + + set(CMAKE_CUDA_STANDARD 11) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) + set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF) + + execute_process(COMMAND bash ${PROJECT_SOURCE_DIR}/cmake/listArchs.sh OUTPUT_VARIABLE ARCHS) + set(CMAKE_CUDA_FLAGS ${ARCHS} ${CMAKE_CUDA_FLAGS}) + endif() +endif() + + +#set(CMAKE_CXX_FLAGS "-Wall -Wextra -fPIC" ${CMAKE_CXX_FLAGS}) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +include_directories(src/third_party) +add_subdirectory(src) + diff --git a/Makefile b/Makefile deleted file mode 100644 index 622f14b..0000000 --- a/Makefile +++ /dev/null @@ -1,17 +0,0 @@ - - -all: - $(MAKE) -C src - -install: all - mkdir -p ~/bin - mv bin/rdf ~/bin/ - -test: all - $(MAKE) -C test - -clean: - $(MAKE) -C test clean - $(MAKE) -C src clean - rm -f bin/rdf src/main.o - diff --git a/README.md b/README.md index b242f62..f56ed62 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,18 @@ rdf can compute in 4 modes: rdf will choose a between GPU/CPU according to the number of particles (unless specified with -device) and will choose NBody/Neighbour list according to the number of particles and the factor L/rcut. ## COMPILE WITH +```bash +$ mkdir build; cd build; cmake ..; make ``` -$ make -``` -You may have to change the Makefile to adequate it to the CUDA target architechture, currently set to -arch=sm_35 -Use: +You can specify some options in CMAKE, (use ccmake). For example, set DONT_USE_CUDA to ON to compile in CPU only mode (does not need nvcc). + +You can run some test after compiling with: ``` -$ make test +$ cd test; make test ``` -To compile and run several test using random numbers, the resulting rdf will be compared between CPU and GPU implementations. Which should be numerically identical. +This will run several test using random numbers, the resulting rdf will be compared between CPU and GPU implementations. Which should be numerically identical. ## SYNOPSYS diff --git a/cmake/gitversion.sh b/cmake/gitversion.sh new file mode 100644 index 0000000..2d89832 --- /dev/null +++ b/cmake/gitversion.sh @@ -0,0 +1 @@ +echo \"$(git rev-parse HEAD)\" diff --git a/cmake/listArchs.sh b/cmake/listArchs.sh new file mode 100644 index 0000000..70da47c --- /dev/null +++ b/cmake/listArchs.sh @@ -0,0 +1,10 @@ + +nvcc --help | + grep '\-\-gpu-code' -A1000 | + grep -Po 'compute_\K[0-9]+' | + sort | + uniq | + awk '{print "-gencode arch=compute_"$1",code=sm_"$1}' | + paste -sd" " | tr '\n' ' ' + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..e6c184c --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required (VERSION 3.8) +add_executable(RadialDistributionFunction main.cpp) +if(NOT DONT_USE_CUDA) + SET_SOURCE_FILES_PROPERTIES(main.cpp PROPERTIES LANGUAGE CUDA) +endif() + +set_target_properties(RadialDistributionFunction PROPERTIES OUTPUT_NAME "rdf") + +install(TARGETS RadialDistributionFunction RUNTIME DESTINATION bin) + +configure_file ( "defines.h.in" ${CMAKE_CURRENT_SOURCE_DIR}/defines.h ) + + +find_package(Git) +if(GIT_FOUND AND EXISTS "${CMAKE_SOURCE_DIR}/.git") + execute_process(COMMAND bash ${PROJECT_SOURCE_DIR}/cmake/gitversion.sh OUTPUT_VARIABLE GITCOMMIT) +else() + SET(GITCOMMIT "unknown") +endif() + +configure_file("gitversion.h.in" "${CMAKE_CURRENT_SOURCE_DIR}/gitversion.h") diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 6f39f6a..0000000 --- a/src/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -#Uncomment if you have boost, it will greatly improve performance -#USE_BOOST=-DUSE_BOOST - -#The target CUDA compute capability(s), if not set it will be autodetected -#ARCH ?= 30 35 50 52 60 61 -ARCH= - -#Uncomment to compile in double precision mode, single by default -#DOUBLE_PRECISION=-DDOUBLE_PRECISION - -#C++ compiler, I tested up to clang++-5.0 -#CXX=clang++-5.0 -CXX=g++ - -#Cuda version (assumed to be in /usr/local/cuda*) You can change this in CUDA_ROOT -#If not set it will be autodetected -CUDA_VER= - -ifeq ($(CUDA_VER),) -CUDA_VER:=$(shell ls -d /usr/local/cuda*/ | grep -Eo '\-[0-9]\.[0-9]' | cut -d- -f2 | sort -grk1 | head -1) -endif -CUDA_ROOT=/usr/local/cuda-$(CUDA_VER) - -#Flags to $(CXX) -CPU= -O3 -funroll-loops -fno-math-errno -fno-signed-zeros -march=native -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -Wno-sign-compare - - -#If arch was not set, autodetect all GPUs in the system -ifeq ($(ARCH),) -GENCODE_FLAGS:=$(shell printf '\#include\n int main(){int nD;cudaGetDeviceCount(&nD);for(int i=0;i $@ - echo "#define GITVERSION \"$(shell git rev-parse HEAD)\"" >> $@ - echo "#endif" >> $@ - -clean: - rm -f gitversion.h - -# %.o:%.cu -# nvcc -g -I . -arch=sm_$(ARCH) -std=c++11 -O3 $(USE_BOOST) $< -c -o $@ - diff --git a/src/NeighbourListCPU.h b/src/NeighbourListCPU.h index 17d4602..dab568c 100644 --- a/src/NeighbourListCPU.h +++ b/src/NeighbourListCPU.h @@ -2,6 +2,10 @@ #ifndef NEIGHBOURLISTCPU_H #define NEIGHBOURLISTCPU_H +#include"vector_algebra.cuh" +#include +#include"config.h" +#include"utils.cuh" namespace gdr{ class NeighbourListCPU{ std::vector head, list; diff --git a/src/NeighbourListGPU.cuh b/src/NeighbourListGPU.cuh index b83b5d0..72f4d02 100644 --- a/src/NeighbourListGPU.cuh +++ b/src/NeighbourListGPU.cuh @@ -27,17 +27,18 @@ TODO: 100- Make a better separation between neighbour list and transverse schemes in this file 100- Improve needsRebuild */ -#ifndef CELLLIST_CUH -#define CELLLIST_CUH +#ifndef NEIGHBOURLISTGPU_CUH +#define NEIGHBOURLISTGPU_CUH + -#include"ParticleSorter.cuh" #include"vector_algebra.cuh" #include"config.h" #include"utils.cuh" +#include"ParticleSorter.cuh" #include #include -#include +#include"third_party/cub/cub.cuh" #include diff --git a/src/ParticleSorter.cuh b/src/ParticleSorter.cuh index 52447cb..ca59cca 100644 --- a/src/ParticleSorter.cuh +++ b/src/ParticleSorter.cuh @@ -33,7 +33,7 @@ REFERENCES: #include"utils.cuh" #include #include -#include +#include"third_party/cub/cub.cuh" namespace gdr{ diff --git a/src/config.h b/src/config.h index 51aaf36..2fbfaa5 100644 --- a/src/config.h +++ b/src/config.h @@ -8,6 +8,11 @@ #include"vector_algebra.cuh" #include #include +#include"utils.cuh" +#include +#include +#include +#include"defines.h" namespace gdr{ struct Configuration{ @@ -117,15 +122,14 @@ namespace gdr{ printf(" Raul P. Pelaez 2017.\n"); printf(" \n"); + printf("RadialDistributionFunction v%s.%s\n", + RadialDistributionFunction_VERSION_MAJOR, + RadialDistributionFunction_VERSION_MINOR); printf("Compiled from git commit: %s\n", GITVERSION); printf("NAME \n"); printf("rdf - Computes the Radial Distribution Function (RDF) of a group of positions in a file,\n"); printf(" averages it for all snapshots in the file. \n"); printf(" \n"); - printf("COMPILE WITH \n"); - printf(" \n"); - printf("$ nvcc -arch=sm_52 -std=c++11 -O3 rdf.cu \n"); - printf(" \n"); printf("SYNOPSYS \n"); printf(" \n"); printf("rdf [OPTIONS]... [FILE]... \n"); diff --git a/src/defines.h.in b/src/defines.h.in new file mode 100644 index 0000000..54497c2 --- /dev/null +++ b/src/defines.h.in @@ -0,0 +1,10 @@ +#ifndef RDFCONFIG_H +#define RDFCONFIG_H +#define RadialDistributionFunction_VERSION_MAJOR @RadialDistributionFunction_VERSION_MAJOR@ +#define RadialDistributionFunction_VERSION_MINOR @RadialDistributionFunction_VERSION_MINOR@ +#cmakedefine USE_BOOST +#cmakedefine USE_CUDA +#ifdef USE_CUDA +#define GPUMODE +#endif +#endif diff --git a/src/gitversion.h.in b/src/gitversion.h.in new file mode 100644 index 0000000..72afee6 --- /dev/null +++ b/src/gitversion.h.in @@ -0,0 +1,4 @@ +#ifndef GITVERSION_H +#define GITVERSION_H +#define GITVERSION @GITCOMMIT@ +#endif diff --git a/src/input.h b/src/input.h index 7980c3e..9d8c861 100644 --- a/src/input.h +++ b/src/input.h @@ -8,11 +8,14 @@ #include #include #include +#include +#include + namespace gdr{ //Reads numbers from a file or standard input line by line class InputParse{ - shared_ptr input; - string currentLine; + std::shared_ptr input; + std::string currentLine; public: //Read from stdin by default InputParse(){ } @@ -20,27 +23,27 @@ namespace gdr{ //Take input from cin bool open(){ //The lambda ensures cin is not deleted - input.reset(&cin, [](...){}); + input.reset(&std::cin, [](...){}); if(!input->good()){ - cerr<<"ERROR: Unable to read from stdin!"<good()){ - cerr<<"ERROR: Unable to open file!"<goToNextLine(); //This could be faster - stringstream ss; + std::stringstream ss; ss.str(currentLine); for(int i=0; i>numbersInLine[i]; diff --git a/src/main.cu b/src/main.cpp similarity index 95% rename from src/main.cu rename to src/main.cpp index 0d3a8c7..1f1f063 100644 --- a/src/main.cu +++ b/src/main.cpp @@ -5,7 +5,11 @@ rdf - Computes the Radial Distribution Function (RDF) of a group of positions i COMPILE WITH -$ nvcc -arch=sm_52 -std=c++11 -O3 rdf.cu +$ mkdir build; cd build; cmake ..; make + +INSTALL WITH + +$ make install SYNOPSYS @@ -80,7 +84,9 @@ rdf will take the file as 2 snapshots with the positions of 3 particles in 3D. #include"utils.cuh" #include"config.h" #include"inputFast.h" +#ifdef GPUMODE #include"rdfGPU.cuh" +#endif #include"rdfCPU.h" #include using namespace gdr; @@ -102,8 +108,12 @@ int main(int argc, char *argv[]){ //If device is automatic, use this rule of hand to select which one to use if(config.deviceMode == Configuration::device::none){ +#ifdef GPUMODE if(config.numberParticles > 500) config.deviceMode = Configuration::device::GPU; else config.deviceMode = Configuration::device::CPU; +#else + config.deviceMode = Configuration::device::CPU; +#endif } //InputParse handles the transformation of a line from the input file to numbers InputParse inputParser; @@ -145,6 +155,7 @@ int main(int argc, char *argv[]){ template void computeWithGPU(InputParse &inputParser, const Configuration &config, int numberCoordinatesPerParticle){ + #ifdef GPUMODE int N = config.numberParticles; std::vector rdf(config.numberBins, 0); //Standard deviation @@ -169,6 +180,10 @@ void computeWithGPU(InputParse &inputParser, const Configuration &config, int nu double R = (i+0.5)*binSize; std::cout< -#include +//#include #include #include"NeighbourListCPU.h" namespace gdr{ @@ -152,7 +152,7 @@ namespace gdr{ if(T==1) std[i] = std::numeric_limits::quiet_NaN(); else - std[i] = sqrt(rdf_mean_and_var[i].y)/sqrt(T*max(T-1,1)); + std[i] = sqrt(rdf_mean_and_var[i].y)/sqrt(T*std::max(T-1,1)); } diff --git a/src/utils.cuh b/src/utils.cuh index 653d516..6b6773a 100644 --- a/src/utils.cuh +++ b/src/utils.cuh @@ -9,6 +9,11 @@ #define fori(x,y) for(int i=x; i Box2D; - struct Grid{ + struct Grid{ + Box3D box; /*A magic vector that transforms cell coordinates to 1D index when dotted*/ /*Simply: 1, ncellsx, ncellsx*ncellsy*/ int3 gridPos2CellIndex; @@ -50,7 +56,6 @@ namespace gdr{ int3 cellDim; //ncells in each size real3 cellSize; real3 invCellSize; /*The inverse of the cell size in each direction*/ - Box3D box; Grid(): Grid(Box3D(), make_int3(0,0,0)){} Grid(Box3D box, int3 cellDim): box(box), @@ -66,7 +71,7 @@ namespace gdr{ } template - inline __host__ __device__ int3 getCell(const VecType &r) const{ + inline HOSTDEVICE int3 getCell(const VecType &r) const{ // return int( (p+0.5L)/cellSize ) int3 cell = make_int3(( box.apply_pbc(make_real3(r)) + real(0.5)*box.boxSize)*invCellSize); //Anti-Traquinazo guard, you need to explicitly handle the case where a particle @@ -82,11 +87,11 @@ namespace gdr{ return cell; } - inline __host__ __device__ int getCellIndex(const int3 &cell) const{ + inline HOSTDEVICE int getCellIndex(const int3 &cell) const{ return dot(cell, gridPos2CellIndex); } - inline __host__ __device__ int3 pbc_cell(const int3 &cell) const{ + inline HOSTDEVICE int3 pbc_cell(const int3 &cell) const{ int3 cellPBC; cellPBC.x = pbc_cell_coord<0>(cell.x); cellPBC.y = pbc_cell_coord<1>(cell.y); @@ -95,7 +100,7 @@ namespace gdr{ } template - inline __host__ __device__ int pbc_cell_coord(int cell) const{ + inline HOSTDEVICE int pbc_cell_coord(int cell) const{ int ncells = 0; if(coordinate == 0){ ncells = cellDim.x; @@ -114,7 +119,7 @@ namespace gdr{ } }; - + } diff --git a/src/vector_algebra.cuh b/src/vector_algebra.cuh index 5e066e0..271ee6a 100644 --- a/src/vector_algebra.cuh +++ b/src/vector_algebra.cuh @@ -1,7 +1,38 @@ /*Raul P. Pelaez 2016. vector types algebra*/ #ifndef VECTOR_OVERLOADS_H #define VECTOR_OVERLOADS_H +#include"defines.h" +#ifdef GPUMODE #include +#define VECATTR inline __host__ __device__ +#else +struct double2{double x,y;}; +struct double3{double x,y,z;}; +struct double4{double x,y,z,w;}; + +struct float2{float x,y;}; +struct float3{float x,y,z;}; +struct float4{float x,y,z,w;}; + +struct int2{int x,y;}; +struct int3{int x,y,z;}; +struct int4{int x,y,z,w;}; + +#define VECATTR inline +VECATTR int2 make_int2(int x, int y){return {x,y};} +VECATTR int3 make_int3(int x, int y, int z){return {x,y,z};} +VECATTR int4 make_int4(int x, int y, int z, int w){return {x,y,z,w};} + +VECATTR float2 make_float2(float x, float y){return {x,y};} +VECATTR float3 make_float3(float x, float y, float z){return {x,y,z};} +VECATTR float4 make_float4(float x, float y, float z, float w){return {x,y,z,w};} + +VECATTR double2 make_double2(double x, double y){return {x,y};} +VECATTR double3 make_double3(double x, double y, double z){return {x,y,z};} +VECATTR double4 make_double4(double x, double y, double z, double w){return {x,y,z,w};} + + +#endif #include #ifndef SINGLE_PRECISION @@ -21,8 +52,6 @@ typedef unsigned int uint; typedef unsigned long long int ullint; -#define VECATTR inline __host__ __device__ - @@ -387,7 +416,6 @@ typedef unsigned long long int ullint; #endif VECATTR real4 make_real4(int4 a){ return make_real4(real(a.x), real(a.y), real(a.z), real(a.w));} - VECATTR real4 make_real4(uint4 a){return make_real4(real(a.x), real(a.y), real(a.z), real(a.w));} //////////////////REAL3/////////////////////////// @@ -415,7 +443,7 @@ typedef unsigned long long int ullint; VECATTR real3 make_real3(real2 a, real z){return make_real3(a.x, a.y, z);} VECATTR real3 make_real3(int3 a){ return make_real3(real(a.x), real(a.y), real(a.z));} - VECATTR real3 make_real3(uint3 a){return make_real3(real(a.x), real(a.y), real(a.z));} + //////////////////REAL2/////////////////////////// @@ -432,7 +460,7 @@ typedef unsigned long long int ullint; VECATTR real2 make_real2(real2 a){return make_real2(a.x, a.y);} VECATTR real2 make_real2(real4 a){return make_real2(a.x, a.y);} VECATTR real2 make_real2(int3 a){ return make_real2(real(a.x), real(a.y));} - VECATTR real2 make_real2(uint3 a){return make_real2(real(a.x), real(a.y));} + ////////////////DOUBLE PRECISION////////////////////// @@ -445,7 +473,7 @@ typedef unsigned long long int ullint; VECATTR double4 make_double4(double3 a){return make_double4(a.x, a.y, a.z, 0.0f);} VECATTR double4 make_double4(double3 a, double w){return make_double4(a.x, a.y, a.z, w);} VECATTR double4 make_double4(int4 a){return make_double4(double(a.x), double(a.y), double(a.z), double(a.w));} - VECATTR double4 make_double4(uint4 a){return make_double4(double(a.x), double(a.y), double(a.z), double(a.w));} + VECATTR double4 make_double4(float4 a){return make_double4(double(a.x), double(a.y), double(a.z), double(a.w));} //////DOUBLE4/////////////// diff --git a/test/Makefile b/test/Makefile index 999b67e..7d4a74f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,6 +1,6 @@ -EXE=../bin/rdf +EXE=../build/bin/RadialDistributionFunction all: seq 1 1e8 | awk '{print rand()*10, rand()*10, rand()*10}' > inipos