Skip to content

Commit 0769ba4

Browse files
kswirydopelesh
authored andcommitted
Randomized (F)GMRES solver with ILU0 preconditioner
Implementations for AMD and NVIDIA GPUs
1 parent e877a97 commit 0769ba4

26 files changed

+2541
-20
lines changed

CMakePresets.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
"installDir": "${sourceDir}/install",
1515
"generator": "Unix Makefiles",
1616
"cacheVariables": {
17-
"RESOLVE_USE_CUDA": "ON"
17+
"RESOLVE_USE_CUDA": "ON",
18+
"CMAKE_CUDA_ARCHITECTURES": "60"
1819
}
1920
},
2021
{
@@ -45,7 +46,8 @@
4546
"description": "Custom changes specific for Ascent",
4647
"cacheVariables": {
4748
"CMAKE_C_COMPILER": "$env{OLCF_GCC_ROOT}/bin/gcc",
48-
"CMAKE_CXX_COMPILER": "$env{OLCF_GCC_ROOT}/bin/g++"
49+
"CMAKE_CXX_COMPILER": "$env{OLCF_GCC_ROOT}/bin/g++",
50+
"CMAKE_CUDA_ARCHITECTURES": "70"
4951
}
5052
},
5153
{

examples/CMakeLists.txt

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ if(RESOLVE_USE_CUDA)
3636
# Build example where matrix data is updated
3737
add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp)
3838
target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve)
39-
39+
40+
#rand solver
41+
add_executable(gmres_cusparse_rand.exe r_randGMRES_CUDA.cpp)
42+
target_link_libraries(gmres_cusparse_rand.exe PRIVATE ReSolve)
4043
endif(RESOLVE_USE_CUDA)
4144

4245
# Create HIP examples
@@ -53,17 +56,21 @@ if(RESOLVE_USE_HIP)
5356
add_executable(klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp)
5457
target_link_libraries(klu_rocsolverrf_check_redo.exe PRIVATE ReSolve)
5558

59+
60+
# Rand GMRES test with rocsparse
61+
add_executable(gmres_rocsparse_rand.exe r_randGMRES.cpp)
62+
target_link_libraries(gmres_rocsparse_rand.exe PRIVATE ReSolve)
5663
endif(RESOLVE_USE_HIP)
5764

5865
# Install all examples in bin directory
5966
set(installable_executables klu_klu.exe klu_klu_standalone.exe)
6067

6168
if(RESOLVE_USE_CUDA)
62-
set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe)
69+
set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe gmres_cusparse_rand.exe)
6370
endif(RESOLVE_USE_CUDA)
6471

6572
if(RESOLVE_USE_HIP)
66-
set(installable_executables ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe klu_rocsolverrf_check_redo.exe)
73+
set(installable_executables ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe klu_rocsolverrf_check_redo.exe gmres_rocsparse_rand.exe)
6774
endif(RESOLVE_USE_HIP)
6875

6976
install(TARGETS ${installable_executables}

examples/r_randGMRES.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#include <string>
2+
#include <iostream>
3+
#include <iomanip>
4+
5+
#include <resolve/matrix/Coo.hpp>
6+
#include <resolve/matrix/Csr.hpp>
7+
#include <resolve/matrix/Csc.hpp>
8+
#include <resolve/vector/Vector.hpp>
9+
#include <resolve/matrix/io.hpp>
10+
#include <resolve/matrix/MatrixHandler.hpp>
11+
#include <resolve/vector/VectorHandler.hpp>
12+
#include <resolve/LinSolverDirectKLU.hpp>
13+
#include <resolve/LinSolverDirectRocSparseILU0.hpp>
14+
#include <resolve/LinSolverIterativeRandFGMRES.hpp>
15+
#include <resolve/workspace/LinAlgWorkspace.hpp>
16+
17+
using namespace ReSolve::constants;
18+
19+
int main(int argc, char *argv[])
20+
{
21+
// Use the same data types as those you specified in ReSolve build.
22+
using index_type = ReSolve::index_type;
23+
using real_type = ReSolve::real_type;
24+
using vector_type = ReSolve::vector::Vector;
25+
26+
(void) argc; // TODO: Check if the number of input parameters is correct.
27+
std::string matrixFileName = argv[1];
28+
std::string rhsFileName = argv[2];
29+
30+
31+
ReSolve::matrix::Coo* A_coo;
32+
ReSolve::matrix::Csr* A;
33+
ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP();
34+
workspace_HIP->initializeHandles();
35+
ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP);
36+
ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP);
37+
real_type* rhs = nullptr;
38+
real_type* x = nullptr;
39+
40+
vector_type* vec_rhs;
41+
vector_type* vec_x;
42+
vector_type* vec_r;
43+
44+
ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2);
45+
46+
ReSolve::LinSolverDirectRocSparseILU0* Rf = new ReSolve::LinSolverDirectRocSparseILU0(workspace_HIP);
47+
ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler,ReSolve::LinSolverIterativeRandFGMRES::fwht, GS, "hip");
48+
49+
std::cout << std::endl << std::endl << std::endl;
50+
std::cout << "========================================================================================================================"<<std::endl;
51+
std::cout << "Reading: " << matrixFileName<< std::endl;
52+
std::cout << "========================================================================================================================"<<std::endl;
53+
std::cout << std::endl;
54+
std::ifstream mat_file(matrixFileName);
55+
if(!mat_file.is_open())
56+
{
57+
std::cout << "Failed to open file " << matrixFileName << "\n";
58+
return -1;
59+
}
60+
std::ifstream rhs_file(rhsFileName);
61+
if(!rhs_file.is_open())
62+
{
63+
std::cout << "Failed to open file " << rhsFileName << "\n";
64+
return -1;
65+
}
66+
A_coo = ReSolve::io::readMatrixFromFile(mat_file);
67+
A = new ReSolve::matrix::Csr(A_coo->getNumRows(),
68+
A_coo->getNumColumns(),
69+
A_coo->getNnz(),
70+
A_coo->symmetric(),
71+
A_coo->expanded());
72+
73+
rhs = ReSolve::io::readRhsFromFile(rhs_file);
74+
x = new real_type[A->getNumRows()];
75+
vec_rhs = new vector_type(A->getNumRows());
76+
vec_x = new vector_type(A->getNumRows());
77+
vec_x->allocate(ReSolve::memory::HOST);
78+
//iinit guess is 0U
79+
vec_x->allocate(ReSolve::memory::DEVICE);
80+
vec_x->setToZero(ReSolve::memory::DEVICE);
81+
vec_r = new vector_type(A->getNumRows());
82+
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
83+
mat_file.close();
84+
rhs_file.close();
85+
86+
matrix_handler->coo2csr(A_coo,A, "hip");
87+
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
88+
//Now call direct solver
89+
real_type norm_b;
90+
matrix_handler->setValuesChanged(true, "hip");
91+
92+
Rf->setup(A);
93+
FGMRES->setRestart(800);
94+
FGMRES->setMaxit(800);
95+
FGMRES->setTol(1e-12);
96+
FGMRES->setup(A);
97+
GS->setup(FGMRES->getKrand(), FGMRES->getRestart());
98+
99+
//matrix_handler->setValuesChanged(true, "hip");
100+
FGMRES->resetMatrix(A);
101+
FGMRES->setupPreconditioner("LU", Rf);
102+
FGMRES->setFlexible(1);
103+
104+
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
105+
FGMRES->solve(vec_rhs, vec_x);
106+
107+
norm_b = vector_handler->dot(vec_rhs, vec_rhs, "hip");
108+
norm_b = sqrt(norm_b);
109+
std::cout << "FGMRES: init nrm: "
110+
<< std::scientific << std::setprecision(16)
111+
<< FGMRES->getInitResidualNorm()/norm_b
112+
<< " final nrm: "
113+
<< FGMRES->getFinalResidualNorm()/norm_b
114+
<< " iter: " << FGMRES->getNumIter() << "\n";
115+
116+
117+
delete A;
118+
delete A_coo;
119+
delete Rf;
120+
delete [] x;
121+
delete [] rhs;
122+
delete vec_r;
123+
delete vec_x;
124+
delete workspace_HIP;
125+
delete matrix_handler;
126+
delete vector_handler;
127+
128+
return 0;
129+
}

examples/r_randGMRES_CUDA.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#include <string>
2+
#include <iostream>
3+
#include <iomanip>
4+
5+
#include <resolve/matrix/Coo.hpp>
6+
#include <resolve/matrix/Csr.hpp>
7+
#include <resolve/matrix/Csc.hpp>
8+
#include <resolve/vector/Vector.hpp>
9+
#include <resolve/matrix/io.hpp>
10+
#include <resolve/matrix/MatrixHandler.hpp>
11+
#include <resolve/vector/VectorHandler.hpp>
12+
#include <resolve/LinSolverDirectKLU.hpp>
13+
#include <resolve/LinSolverDirectCuSparseILU0.hpp>
14+
#include <resolve/LinSolverIterativeRandFGMRES.hpp>
15+
#include <resolve/workspace/LinAlgWorkspace.hpp>
16+
17+
using namespace ReSolve::constants;
18+
19+
int main(int argc, char *argv[])
20+
{
21+
// Use the same data types as those you specified in ReSolve build.
22+
using index_type = ReSolve::index_type;
23+
using real_type = ReSolve::real_type;
24+
using vector_type = ReSolve::vector::Vector;
25+
26+
(void) argc; // TODO: Check if the number of input parameters is correct.
27+
std::string matrixFileName = argv[1];
28+
std::string rhsFileName = argv[2];
29+
30+
31+
ReSolve::matrix::Coo* A_coo;
32+
ReSolve::matrix::Csr* A;
33+
ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA();
34+
workspace_CUDA->initializeHandles();
35+
ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA);
36+
ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA);
37+
real_type* rhs = nullptr;
38+
real_type* x = nullptr;
39+
40+
vector_type* vec_rhs;
41+
vector_type* vec_x;
42+
vector_type* vec_r;
43+
44+
ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2);
45+
46+
ReSolve::LinSolverDirectCuSparseILU0* Rf = new ReSolve::LinSolverDirectCuSparseILU0(workspace_CUDA);
47+
ReSolve::LinSolverIterativeRandFGMRES* FGMRES = new ReSolve::LinSolverIterativeRandFGMRES(matrix_handler, vector_handler,ReSolve::LinSolverIterativeRandFGMRES::fwht, GS, "cuda");
48+
49+
std::cout << std::endl << std::endl << std::endl;
50+
std::cout << "========================================================================================================================"<<std::endl;
51+
std::cout << "Reading: " << matrixFileName<< std::endl;
52+
std::cout << "========================================================================================================================"<<std::endl;
53+
std::cout << std::endl;
54+
std::ifstream mat_file(matrixFileName);
55+
if(!mat_file.is_open())
56+
{
57+
std::cout << "Failed to open file " << matrixFileName << "\n";
58+
return -1;
59+
}
60+
std::ifstream rhs_file(rhsFileName);
61+
if(!rhs_file.is_open())
62+
{
63+
std::cout << "Failed to open file " << rhsFileName << "\n";
64+
return -1;
65+
}
66+
A_coo = ReSolve::io::readMatrixFromFile(mat_file);
67+
A = new ReSolve::matrix::Csr(A_coo->getNumRows(),
68+
A_coo->getNumColumns(),
69+
A_coo->getNnz(),
70+
A_coo->symmetric(),
71+
A_coo->expanded());
72+
73+
rhs = ReSolve::io::readRhsFromFile(rhs_file);
74+
x = new real_type[A->getNumRows()];
75+
vec_rhs = new vector_type(A->getNumRows());
76+
vec_x = new vector_type(A->getNumRows());
77+
vec_x->allocate(ReSolve::memory::HOST);
78+
//iinit guess is 0U
79+
vec_x->allocate(ReSolve::memory::DEVICE);
80+
vec_x->setToZero(ReSolve::memory::DEVICE);
81+
vec_r = new vector_type(A->getNumRows());
82+
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
83+
mat_file.close();
84+
rhs_file.close();
85+
86+
matrix_handler->coo2csr(A_coo,A, "cuda");
87+
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
88+
//Now call direct solver
89+
real_type norm_b;
90+
matrix_handler->setValuesChanged(true, "cuda");
91+
92+
Rf->setup(A, nullptr, nullptr, nullptr, nullptr, vec_rhs);
93+
FGMRES->setRestart(800);
94+
FGMRES->setMaxit(800);
95+
FGMRES->setTol(1e-12);
96+
FGMRES->setup(A);
97+
GS->setup(FGMRES->getKrand(), FGMRES->getRestart());
98+
99+
//matrix_handler->setValuesChanged(true, "cuda");
100+
FGMRES->resetMatrix(A);
101+
FGMRES->setupPreconditioner("LU", Rf);
102+
FGMRES->setFlexible(1);
103+
104+
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
105+
FGMRES->solve(vec_rhs, vec_x);
106+
107+
norm_b = vector_handler->dot(vec_rhs, vec_rhs, "cuda");
108+
norm_b = sqrt(norm_b);
109+
std::cout << "FGMRES: init nrm: "
110+
<< std::scientific << std::setprecision(16)
111+
<< FGMRES->getInitResidualNorm()/norm_b
112+
<< " final nrm: "
113+
<< FGMRES->getFinalResidualNorm()/norm_b
114+
<< " iter: " << FGMRES->getNumIter() << "\n";
115+
116+
117+
delete A;
118+
delete A_coo;
119+
delete Rf;
120+
delete [] x;
121+
delete [] rhs;
122+
delete vec_r;
123+
delete vec_x;
124+
delete workspace_CUDA;
125+
delete matrix_handler;
126+
delete vector_handler;
127+
128+
return 0;
129+
}

resolve/CMakeLists.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,22 @@ set(ReSolve_SRC
1818
set(ReSolve_GPU_SRC
1919
GramSchmidt.cpp
2020
LinSolverIterativeFGMRES.cpp
21-
)
21+
LinSolverIterativeRandFGMRES.cpp
22+
RandSketchingManager.cpp
23+
RandSketchingCountSketch.cpp
24+
RandSketchingFWHT.cpp
25+
)
2226

2327
# C++ code that links to CUDA SDK libraries
2428
set(ReSolve_CUDASDK_SRC
2529
LinSolverDirectCuSolverGLU.cpp
2630
LinSolverDirectCuSolverRf.cpp
31+
LinSolverDirectCuSparseILU0.cpp
2732
)
2833
# HIP files
2934
set(ReSolve_ROCM_SRC
3035
LinSolverDirectRocSolverRf.cpp
36+
LinSolverDirectRocSparseILU0.cpp
3137
)
3238
# Header files to be installed
3339
set(ReSolve_HEADER_INSTALL
@@ -37,13 +43,15 @@ set(ReSolve_HEADER_INSTALL
3743
LinSolverDirectCuSolverGLU.hpp
3844
LinSolverDirectCuSolverRf.hpp
3945
LinSolverDirectRocSolverRf.hpp
46+
LinSolverDirectRocSparseILU0.hpp
47+
LinSolverDirectCuSparseILU0.hpp
4048
LinSolverDirectKLU.hpp
4149
LinSolverIterativeFGMRES.hpp
4250
RefactorizationSolver.hpp
4351
SystemSolver.hpp
4452
GramSchmidt.hpp
4553
MemoryUtils.hpp
46-
)
54+
RandSketchingManager.hpp)
4755

4856
# Now, build workspaces
4957
add_subdirectory(workspace)

resolve/GramSchmidt.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,32 +151,38 @@ namespace ReSolve
151151
vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace);
152152
// V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol
153153
vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace );
154-
154+
mem_.deviceSynchronize();
155+
155156
// copy H_col to aux, we will need it later
156157
vec_Hcolumn_->setDataUpdated(memory::DEVICE);
157158
vec_Hcolumn_->setCurrentSize(i + 1);
158159
vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, memory::HOST);
160+
mem_.deviceSynchronize();
159161

160162
//Hcol = V(:,1:i)^T*V(:,i+1);
161163
vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace);
164+
mem_.deviceSynchronize();
162165

163166
// V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol
164167
vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace );
168+
mem_.deviceSynchronize();
165169

166170
// copy H_col to H
167171
vec_Hcolumn_->setDataUpdated(memory::DEVICE);
168172
vec_Hcolumn_->deepCopyVectorData(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST);
173+
mem_.deviceSynchronize();
169174

170175
// add both pieces together (unstable otherwise, careful here!!)
171176
t = 0.0;
172177
for(int j = 0; j <= i; ++j) {
173-
H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j];
178+
H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j];
174179
}
175180

176181
t = vector_handler_->dot(vec_v_, vec_v_, memspace);
177182
//set the last entry in Hessenberg matrix
178183
t = sqrt(t);
179184
H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t;
185+
180186
if(fabs(t) > EPSILON) {
181187
t = 1.0/t;
182188
vector_handler_->scal(&t, vec_v_, memspace);

0 commit comments

Comments
 (0)