chrisrichardson · IgorBaratta · May 11, 2020 · May 11, 2020 · Jun 9, 2020 · Jun 9, 2020
diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
@@ -5,15 +5,24 @@ on: [push]
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
+    container: intel/oneapi-basekit
+
+    env:
+      CXX: clang++
+      ONE_MKL_DIR: /opt/intel/oneapi/mkl/2021.1-beta10/
 
     steps:
     - uses: actions/checkout@v2
-    - name: Install Eigen3
-      run: sudo apt install -y libmpich-dev libeigen3-dev
+    - name: Install Dependencies
+      run: |
+        apt update && apt install -y libeigen3-dev
+        ln -s /usr/include/eigen3/Eigen /usr/include/Eigen
     - name: cmake
-      run: cmake .
+      run: echo $ONE_MKL_DIR && cmake .
     - name: make
       run: make
     - name: Run with 2 processes
-      run: ./demos/matvecmain && OMP_NUM_THREADS=1 mpirun -n 2 ./demos/matvecmain
+      run: |
+          ./demos/matvecmain && mpirun -n 2 ./demos/matvecmain
+          ./demos/vecnorm && mpirun -n 2 ./demos/vecnorm
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,5 @@
-# Require CMake 3.5
-cmake_minimum_required(VERSION 3.5)
+# Require CMake 3.10
+cmake_minimum_required(VERSION 3.10)
 
 # Choose Release or Debug
 set(CMAKE_BUILD_TYPE Release)
@@ -18,18 +18,13 @@ set(CMAKE_SKIP_RPATH FALSE)
 set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 
 find_package(MPI REQUIRED)
+include_directories(${MPI_INCLUDE_PATH})
 
-set(BLA_VENDOR Intel10_64lp)
-find_package(BLAS)
-if(BLAS_FOUND)
-    include_directories("/usr/include/mkl")
-endif()
-
-find_package(Eigen3 REQUIRED)
-include_directories(${EIGEN3_INCLUDE_DIR} ${MPI_INCLUDE_PATH})
+find_package(BLAS REQUIRED)
+include_directories($ENV{ONE_MKL_DIR})
 
 # Not sure why need to set -O3 and -DNDEBUG as should be set by Release build type
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=all -g -O3 -DNDEBUG -DEIGEN_MAX_ALIGN_BYTES=32")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -DNDEBUG -fsycl -DMKL_ILP64 -tbb")
 
 add_subdirectory(spmv)
 include_directories (${CMAKE_CURRENT_SOURCE_DIR})

diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
@@ -1,11 +1,20 @@
-
 # Executables
 
-add_executable(cgmain cgmain.cpp)
-target_link_libraries(cgmain LINK_PUBLIC ${PROJECT_NAME})
+add_executable(vecnorm vecnorm.cpp)
+target_link_libraries(vecnorm LINK_PUBLIC ${PROJECT_NAME})
 
 add_executable(matvecmain matvecmain.cpp CreateA.cpp)
 target_link_libraries(matvecmain ${PROJECT_NAME})
 
-add_executable(restrictmain restrictmain.cpp)
-target_link_libraries(restrictmain ${PROJECT_NAME})
+add_executable(diagonal diagonal.cpp)
+target_link_libraries(diagonal ${PROJECT_NAME})
+
+add_executable(cgmain cgmain.cpp)
+target_link_libraries(cgmain LINK_PUBLIC ${PROJECT_NAME})
+
+# Copy files matrix and vector files to binary dir
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/petsc_mat_poisson
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/petsc_vec_poisson
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/demos/CreateA.cpp b/demos/CreateA.cpp
@@ -1,32 +1,14 @@
-// Copyright (C) 2018-2020 Chris Richardson ([email protected])
+// Copyright (C) 2018-2020 Chris Richardson, Igor Baratta
 // SPDX-License-Identifier:    MIT
 
 #include "CreateA.h"
-#include <Eigen/Sparse>
+
 #include <memory>
 #include <set>
 #include <spmv/L2GMap.h>
 
-//-----------------------------------------------------------------------------
-// Divide size into N ~equal chunks
-std::vector<std::int64_t> owner_ranges(std::int64_t size, std::int64_t N)
-{
-  // Compute number of items per process and remainder
-  const std::int64_t n = N / size;
-  const std::int64_t r = N % size;
-
-  // Compute local range
-  std::vector<std::int64_t> ranges;
-  for (int rank = 0; rank < (size + 1); ++rank)
-  {
-    if (rank < r)
-      ranges.push_back(rank * (n + 1));
-    else
-      ranges.push_back(rank * n + r);
-  }
+#include <spmv/utils.h>
 
-  return ranges;
-}
 //-----------------------------------------------------------------------------
 spmv::Matrix<double> create_A(MPI_Comm comm, int N)
 {
@@ -36,15 +18,16 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
   MPI_Comm_size(comm, &mpi_size);
 
   // Make a square Matrix divided evenly across cores
-  std::vector<std::int64_t> ranges = owner_ranges(mpi_size, N);
+  std::vector<std::int64_t> ranges = spmv::owner_ranges(mpi_size, N);
 
   std::int64_t r0 = ranges[mpi_rank];
   std::int64_t r1 = ranges[mpi_rank + 1];
   int M = r1 - r0;
 
-  // Local part of the matrix
-  // Must be RowMajor and compressed
-  Eigen::SparseMatrix<double, Eigen::RowMajor> A(M, N);
+  // Local part of the matrix, COO format
+  std::vector<double> coo_data;
+  std::vector<std::int32_t> coo_row;
+  std::vector<std::int32_t> coo_col;
 
   // Set up A
   // Add entries on all local rows
@@ -57,29 +40,53 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
     // Special case for very first and last global rows
     if (c0 == 0)
     {
-      A.insert(i, c0) = 1.0 - gamma;
-      A.insert(i, c0 + 1) = gamma;
+      // A.insert(i, c0) = 1.0 - gamma;
+      coo_data.push_back(1.0 - gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0);
+
+      // A.insert(i, c0 + 1) = gamma;
+      coo_data.push_back(gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0 + 1);
     }
     else if (c0 == (N - 1))
     {
-      A.insert(i, c0 - 1) = gamma;
-      A.insert(i, c0) = 1.0 - gamma;
+      // A.insert(i, c0 - 1) = gamma;
+      coo_data.push_back(gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0 - 1);
+
+      // A.insert(i, c0) = 1.0 - gamma;
+      coo_data.push_back(1.0 - gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0);
     }
     else
     {
-      A.insert(i, c0 - 1) = gamma;
-      A.insert(i, c0) = 1.0 - 2.0 * gamma;
-      A.insert(i, c0 + 1) = gamma;
+      // A.insert(i, c0 - 1) = gamma;
+      coo_data.push_back(gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0 - 1);
+
+      // A.insert(i, c0) = 1.0 - 2.0 * gamma;
+      coo_data.push_back(1.0 - 2.0 * gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0);
+
+      // A.insert(i, c0 + 1) = gamma;
+      coo_data.push_back(gamma);
+      coo_row.push_back(i);
+      coo_col.push_back(c0 + 1);
     }
   }
-  A.makeCompressed();
 
   // Remap columns to local indexing
   std::set<std::int64_t> ghost_indices;
-  std::int32_t nnz = A.outerIndexPtr()[M];
+  std::int32_t nnz = coo_col.size();
   for (std::int32_t i = 0; i < nnz; ++i)
   {
-    std::int32_t global_index = A.innerIndexPtr()[i];
+    std::int32_t global_index = coo_col[i];
     if (global_index < r0 or global_index >= r1)
       ghost_indices.insert(global_index);
   }
@@ -90,23 +97,13 @@ spmv::Matrix<double> create_A(MPI_Comm comm, int N)
       = std::make_shared<spmv::L2GMap>(comm, M, std::vector<std::int64_t>());
 
   // Rebuild A using local indices
-  Eigen::SparseMatrix<double, Eigen::RowMajor> Alocal(M, M + ghosts.size());
-  std::vector<Eigen::Triplet<double>> vals;
-  std::int32_t* Aouter = A.outerIndexPtr();
-  std::int32_t* Ainner = A.innerIndexPtr();
-  double* Aval = A.valuePtr();
+  for (auto& col : coo_col)
+    col = col_l2g->global_to_local(col);
 
-  for (std::int32_t row = 0; row < M; ++row)
-  {
-    for (std::int32_t j = Aouter[row]; j < Aouter[row + 1]; ++j)
-    {
-      std::int32_t col = col_l2g->global_to_local(Ainner[j]);
-      double val = Aval[j];
-      vals.push_back(Eigen::Triplet<double>(row, col, val));
-    }
-  }
-  Alocal.setFromTriplets(vals.begin(), vals.end());
+  auto [data, indptr, indices] = spmv::coo_to_csr<double>(
+      row_l2g->local_size(), col_l2g->local_size() + col_l2g->num_ghosts(),
+      coo_data.size(), coo_row, coo_col, coo_data);
 
-  return spmv::Matrix<double>(Alocal, col_l2g, row_l2g);
+  return spmv::Matrix<double>(data, indptr, indices, col_l2g, row_l2g);
 }
 //-----------------------------------------------------------------------------
diff --git a/demos/CreateA.h b/demos/CreateA.h
@@ -3,7 +3,8 @@
 
 #pragma once
 
-#include <Eigen/Sparse>
+#include <CL/sycl.hpp>
+
 #include <memory>
 #include <mpi.h>
 #include <spmv/Matrix.h>

diff --git a/demos/cgmain.cpp b/demos/cgmain.cpp
@@ -1,8 +1,7 @@
 // Copyright (C) 2018-2020 Chris Richardson ([email protected])
 // SPDX-License-Identifier:    MIT
+#include <CL/sycl.hpp>
 
-#include <Eigen/Dense>
-#include <Eigen/Sparse>
 #include <chrono>
 #include <iostream>
 #include <memory>
@@ -34,12 +33,15 @@ int cg_main(int argc, char** argv)
   else
     throw std::runtime_error("Use with filename");
 
-  auto A
-      = spmv::read_petsc_binary(MPI_COMM_WORLD, "petsc_mat" + argv1 + ".dat");
-  std::shared_ptr<const spmv::L2GMap> l2g = A.col_map();
+  auto A = spmv::read_petsc_binary(MPI_COMM_WORLD, "petsc_mat_" + argv1);
+  std::shared_ptr<spmv::L2GMap> l2g = A.col_map();
+
+  auto b_data
+      = spmv::read_petsc_binary_vector(MPI_COMM_WORLD, "petsc_vec_" + argv1);
+  std::int32_t ls = l2g->local_size() + l2g->num_ghosts();
+  b_data.resize(ls);
+  spmv::Vector<double> b(b_data, l2g);
 
-  auto b = spmv::read_petsc_binary_vector(MPI_COMM_WORLD,
-                                          "petsc_vec" + argv1 + ".dat");
   // Get local and global sizes
   std::int64_t N = l2g->global_size();
 
@@ -61,21 +63,17 @@ int cg_main(int argc, char** argv)
   MPI_Pcontrol(0);
 
   // Get norm on local part of vector
-  double xnorm = x.head(l2g->local_size()).squaredNorm();
-  double xnorm_sum;
-  MPI_Allreduce(&xnorm, &xnorm_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  double xnorm = x.norm();
 
   // Test result
-  l2g->update(x.data());
-  Eigen::VectorXd r = A * x - b;
-  double rnorm = r.squaredNorm();
-  double rnorm_sum;
-  MPI_Allreduce(&rnorm, &rnorm_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  x.update();
+  auto r = A * x - b;
+  double rnom = r.norm();
 
   if (mpi_rank == 0)
   {
-    std::cout << "r.norm = " << std::sqrt(rnorm_sum) << "\n";
-    std::cout << "x.norm = " << std::sqrt(xnorm_sum) << " in " << num_its
+    std::cout << "r.norm = " << std::sqrt(rnom) << "\n";
+    std::cout << "x.norm = " << std::sqrt(xnorm) << " in " << num_its
               << " iterations\n";
     std::cout << "\nTimings (" << mpi_size
               << ")\n----------------------------\n";