Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPMV using onemkl and SYCL - for discussion purposes #1

Open
wants to merge 40 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a02ad18
try oneapi mkl... not working [skip ci]
chrisrichardson May 11, 2020
f31d7bb
Merge remote-tracking branch 'github/chris/onemkl' into chris/onemkl
chrisrichardson May 11, 2020
31d9189
Merge branch 'master' into chris/onemkl
chrisrichardson Jun 9, 2020
dec43eb
Maybe even works!
chrisrichardson Jun 9, 2020
4dfecc9
Fiddling around
chrisrichardson Jun 9, 2020
28e9240
Add debug output
chrisrichardson Jun 10, 2020
153bd56
Update to latest onemkl
chrisrichardson Jun 10, 2020
e42fb6b
start adding support to sycl
IgorBaratta Oct 24, 2020
55a7d3a
add Distributed vector
IgorBaratta Oct 24, 2020
27af3e7
update demo
IgorBaratta Oct 25, 2020
d7861ad
add new simple demo
IgorBaratta Oct 25, 2020
fbededa
add more documentation
IgorBaratta Oct 25, 2020
a9c6768
separate api and implementation
IgorBaratta Oct 25, 2020
d8e4a99
fix mat mult mkl
IgorBaratta Oct 26, 2020
3a60a6d
try to fix tests
IgorBaratta Oct 26, 2020
854b9b1
add docker intel base kit
IgorBaratta Oct 26, 2020
f8bf94a
use intel docker container
IgorBaratta Oct 26, 2020
060f84c
revert some changes
IgorBaratta Oct 26, 2020
677554e
install oneapi - github actions
IgorBaratta Oct 26, 2020
3a9643d
use clang12 compiler
IgorBaratta Oct 26, 2020
30761f0
few improvements
IgorBaratta Oct 26, 2020
c755b25
reenable some functionalities
IgorBaratta Oct 27, 2020
6b5ed9b
fix vector addition and multiplication
IgorBaratta Oct 27, 2020
064f6a9
fix cg
IgorBaratta Oct 27, 2020
5138837
fix cg norm
IgorBaratta Oct 27, 2020
96692fe
change default to cpu instead of host
IgorBaratta Oct 27, 2020
e001746
try to fix ci
IgorBaratta Oct 27, 2020
5d6a2f4
use github ci
IgorBaratta Oct 27, 2020
a14e80c
do not use sudo
IgorBaratta Oct 27, 2020
f2c128a
try setting onemkl dir
IgorBaratta Oct 27, 2020
c176bde
make a symbolic link to eigen headers
IgorBaratta Oct 27, 2020
86acd87
minor fixes
IgorBaratta Oct 28, 2020
edd3c47
Merge branch 'master' of github.com:chrisrichardson/spmv into igor/mk…
IgorBaratta Oct 28, 2020
a1fa1e9
bump oneapi version
IgorBaratta Oct 28, 2020
a77b046
use oneapi 1.0
IgorBaratta Oct 28, 2020
36393fa
formating
IgorBaratta Oct 28, 2020
d374161
remove eigen
IgorBaratta Nov 9, 2020
cde16d0
remove demo
IgorBaratta Nov 9, 2020
4a0e212
keep removing eigen
IgorBaratta Nov 9, 2020
323f848
fix for gcc<9.3
IgorBaratta Nov 9, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions .github/workflows/ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,24 @@ on: [push]
jobs:
build:

runs-on: ubuntu-latest
runs-on: ubuntu-20.04
container: intel/oneapi-basekit

env:
CXX: clang++
ONE_MKL_DIR: /opt/intel/oneapi/mkl/2021.1-beta10/

steps:
- uses: actions/checkout@v2
- name: Install Eigen3
run: sudo apt install -y libmpich-dev libeigen3-dev
- name: Install Dependencies
run: |
apt update && apt install -y libeigen3-dev
ln -s /usr/include/eigen3/Eigen /usr/include/Eigen
- name: cmake
run: cmake .
run: echo $ONE_MKL_DIR && cmake .
- name: make
run: make
- name: Run with 2 processes
run: ./demos/matvecmain && OMP_NUM_THREADS=1 mpirun -n 2 ./demos/matvecmain
run: |
./demos/matvecmain && mpirun -n 2 ./demos/matvecmain
./demos/vecnorm && mpirun -n 2 ./demos/vecnorm
17 changes: 6 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Require CMake 3.5
cmake_minimum_required(VERSION 3.5)
# Require CMake 3.10
cmake_minimum_required(VERSION 3.10)

# Choose Release or Debug
set(CMAKE_BUILD_TYPE Release)
Expand All @@ -18,18 +18,13 @@ set(CMAKE_SKIP_RPATH FALSE)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

find_package(MPI REQUIRED)
include_directories(${MPI_INCLUDE_PATH})

set(BLA_VENDOR Intel10_64lp)
find_package(BLAS)
if(BLAS_FOUND)
include_directories("/usr/include/mkl")
endif()

find_package(Eigen3 REQUIRED)
include_directories(${EIGEN3_INCLUDE_DIR} ${MPI_INCLUDE_PATH})
find_package(BLAS REQUIRED)
include_directories($ENV{ONE_MKL_DIR})

# Not sure why need to set -O3 and -DNDEBUG as should be set by Release build type
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=all -g -O3 -DNDEBUG -DEIGEN_MAX_ALIGN_BYTES=32")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -DNDEBUG -fsycl -DMKL_ILP64 -tbb")

add_subdirectory(spmv)
include_directories (${CMAKE_CURRENT_SOURCE_DIR})
Expand Down
19 changes: 14 additions & 5 deletions demos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@

# Executables

add_executable(cgmain cgmain.cpp)
target_link_libraries(cgmain LINK_PUBLIC ${PROJECT_NAME})
add_executable(vecnorm vecnorm.cpp)
target_link_libraries(vecnorm LINK_PUBLIC ${PROJECT_NAME})

add_executable(matvecmain matvecmain.cpp CreateA.cpp)
target_link_libraries(matvecmain ${PROJECT_NAME})

add_executable(restrictmain restrictmain.cpp)
target_link_libraries(restrictmain ${PROJECT_NAME})
add_executable(diagonal diagonal.cpp)
target_link_libraries(diagonal ${PROJECT_NAME})

add_executable(cgmain cgmain.cpp)
target_link_libraries(cgmain LINK_PUBLIC ${PROJECT_NAME})

# Copy files matrix and vector files to binary dir
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/petsc_mat_poisson
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/petsc_vec_poisson
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
99 changes: 48 additions & 51 deletions demos/CreateA.cpp
Original file line number Diff line number Diff line change
@@ -1,32 +1,14 @@
// Copyright (C) 2018-2020 Chris Richardson ([email protected])
// Copyright (C) 2018-2020 Chris Richardson, Igor Baratta
// SPDX-License-Identifier: MIT

#include "CreateA.h"
#include <Eigen/Sparse>

#include <memory>
#include <set>
#include <spmv/L2GMap.h>

//-----------------------------------------------------------------------------
// Divide size into N ~equal chunks
std::vector<std::int64_t> owner_ranges(std::int64_t size, std::int64_t N)
{
// Compute number of items per process and remainder
const std::int64_t n = N / size;
const std::int64_t r = N % size;

// Compute local range
std::vector<std::int64_t> ranges;
for (int rank = 0; rank < (size + 1); ++rank)
{
if (rank < r)
ranges.push_back(rank * (n + 1));
else
ranges.push_back(rank * n + r);
}
#include <spmv/utils.h>

return ranges;
}
//-----------------------------------------------------------------------------
spmv::Matrix<double> create_A(MPI_Comm comm, int N)
{
Expand All @@ -36,15 +18,16 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
MPI_Comm_size(comm, &mpi_size);

// Make a square Matrix divided evenly across cores
std::vector<std::int64_t> ranges = owner_ranges(mpi_size, N);
std::vector<std::int64_t> ranges = spmv::owner_ranges(mpi_size, N);

std::int64_t r0 = ranges[mpi_rank];
std::int64_t r1 = ranges[mpi_rank + 1];
int M = r1 - r0;

// Local part of the matrix
// Must be RowMajor and compressed
Eigen::SparseMatrix<double, Eigen::RowMajor> A(M, N);
// Local part of the matrix, COO format
std::vector<double> coo_data;
std::vector<std::int32_t> coo_row;
std::vector<std::int32_t> coo_col;

// Set up A
// Add entries on all local rows
Expand All @@ -57,29 +40,53 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
// Special case for very first and last global rows
if (c0 == 0)
{
A.insert(i, c0) = 1.0 - gamma;
A.insert(i, c0 + 1) = gamma;
// A.insert(i, c0) = 1.0 - gamma;
coo_data.push_back(1.0 - gamma);
coo_row.push_back(i);
coo_col.push_back(c0);

// A.insert(i, c0 + 1) = gamma;
coo_data.push_back(gamma);
coo_row.push_back(i);
coo_col.push_back(c0 + 1);
}
else if (c0 == (N - 1))
{
A.insert(i, c0 - 1) = gamma;
A.insert(i, c0) = 1.0 - gamma;
// A.insert(i, c0 - 1) = gamma;
coo_data.push_back(gamma);
coo_row.push_back(i);
coo_col.push_back(c0 - 1);

// A.insert(i, c0) = 1.0 - gamma;
coo_data.push_back(1.0 - gamma);
coo_row.push_back(i);
coo_col.push_back(c0);
}
else
{
A.insert(i, c0 - 1) = gamma;
A.insert(i, c0) = 1.0 - 2.0 * gamma;
A.insert(i, c0 + 1) = gamma;
// A.insert(i, c0 - 1) = gamma;
coo_data.push_back(gamma);
coo_row.push_back(i);
coo_col.push_back(c0 - 1);

// A.insert(i, c0) = 1.0 - 2.0 * gamma;
coo_data.push_back(1.0 - 2.0 * gamma);
coo_row.push_back(i);
coo_col.push_back(c0);

// A.insert(i, c0 + 1) = gamma;
coo_data.push_back(gamma);
coo_row.push_back(i);
coo_col.push_back(c0 + 1);
}
}
A.makeCompressed();

// Remap columns to local indexing
std::set<std::int64_t> ghost_indices;
std::int32_t nnz = A.outerIndexPtr()[M];
std::int32_t nnz = coo_col.size();
for (std::int32_t i = 0; i < nnz; ++i)
{
std::int32_t global_index = A.innerIndexPtr()[i];
std::int32_t global_index = coo_col[i];
if (global_index < r0 or global_index >= r1)
ghost_indices.insert(global_index);
}
Expand All @@ -90,23 +97,13 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
= std::make_shared<spmv::L2GMap>(comm, M, std::vector<std::int64_t>());

// Rebuild A using local indices
Eigen::SparseMatrix<double, Eigen::RowMajor> Alocal(M, M + ghosts.size());
std::vector<Eigen::Triplet<double>> vals;
std::int32_t* Aouter = A.outerIndexPtr();
std::int32_t* Ainner = A.innerIndexPtr();
double* Aval = A.valuePtr();
for (auto& col : coo_col)
col = col_l2g->global_to_local(col);

for (std::int32_t row = 0; row < M; ++row)
{
for (std::int32_t j = Aouter[row]; j < Aouter[row + 1]; ++j)
{
std::int32_t col = col_l2g->global_to_local(Ainner[j]);
double val = Aval[j];
vals.push_back(Eigen::Triplet<double>(row, col, val));
}
}
Alocal.setFromTriplets(vals.begin(), vals.end());
auto [data, indptr, indices] = spmv::coo_to_csr<double>(
row_l2g->local_size(), col_l2g->local_size() + col_l2g->num_ghosts(),
coo_data.size(), coo_row, coo_col, coo_data);

return spmv::Matrix<double>(Alocal, col_l2g, row_l2g);
return spmv::Matrix<double>(data, indptr, indices, col_l2g, row_l2g);
}
//-----------------------------------------------------------------------------
3 changes: 2 additions & 1 deletion demos/CreateA.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

#pragma once

#include <Eigen/Sparse>
#include <CL/sycl.hpp>

#include <memory>
#include <mpi.h>
#include <spmv/Matrix.h>
Expand Down
32 changes: 15 additions & 17 deletions demos/cgmain.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
// Copyright (C) 2018-2020 Chris Richardson ([email protected])
// SPDX-License-Identifier: MIT
#include <CL/sycl.hpp>

#include <Eigen/Dense>
#include <Eigen/Sparse>
#include <chrono>
#include <iostream>
#include <memory>
Expand Down Expand Up @@ -34,12 +33,15 @@ int cg_main(int argc, char** argv)
else
throw std::runtime_error("Use with filename");

auto A
= spmv::read_petsc_binary(MPI_COMM_WORLD, "petsc_mat" + argv1 + ".dat");
std::shared_ptr<const spmv::L2GMap> l2g = A.col_map();
auto A = spmv::read_petsc_binary(MPI_COMM_WORLD, "petsc_mat_" + argv1);
std::shared_ptr<spmv::L2GMap> l2g = A.col_map();

auto b_data
= spmv::read_petsc_binary_vector(MPI_COMM_WORLD, "petsc_vec_" + argv1);
std::int32_t ls = l2g->local_size() + l2g->num_ghosts();
b_data.resize(ls);
spmv::Vector<double> b(b_data, l2g);

auto b = spmv::read_petsc_binary_vector(MPI_COMM_WORLD,
"petsc_vec" + argv1 + ".dat");
// Get local and global sizes
std::int64_t N = l2g->global_size();

Expand All @@ -61,21 +63,17 @@ int cg_main(int argc, char** argv)
MPI_Pcontrol(0);

// Get norm on local part of vector
double xnorm = x.head(l2g->local_size()).squaredNorm();
double xnorm_sum;
MPI_Allreduce(&xnorm, &xnorm_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
double xnorm = x.norm();

// Test result
l2g->update(x.data());
Eigen::VectorXd r = A * x - b;
double rnorm = r.squaredNorm();
double rnorm_sum;
MPI_Allreduce(&rnorm, &rnorm_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
x.update();
auto r = A * x - b;
double rnom = r.norm();

if (mpi_rank == 0)
{
std::cout << "r.norm = " << std::sqrt(rnorm_sum) << "\n";
std::cout << "x.norm = " << std::sqrt(xnorm_sum) << " in " << num_its
std::cout << "r.norm = " << std::sqrt(rnom) << "\n";
std::cout << "x.norm = " << std::sqrt(xnorm) << " in " << num_its
<< " iterations\n";
std::cout << "\nTimings (" << mpi_size
<< ")\n----------------------------\n";
Expand Down
Loading