-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathmpi_sycl_usm.cpp
70 lines (55 loc) · 1.73 KB
/
mpi_sycl_usm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include <cstdio>
#include <iostream>
#include <sstream>
#include <vector>
#include <mpi.h>
#include <sycl/sycl.hpp>
template <typename T>
void init(sycl::queue &q, const size_t n, T &x)
{
auto kernel = [=](sycl::id<1> i) {
x[i] = 1.1 * (size_t)i;
};
q.parallel_for(n, kernel);
}
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
const size_t n = argc > 1 ? (size_t)std::stoll(argv[1]) : 1024;
const size_t nbytes = sizeof(double) * n;
int size, rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
auto gpu_devices = sycl::device::get_devices(sycl::info::device_type::gpu);
auto count = std::size(gpu_devices);
auto device = gpu_devices[rank];
sycl::queue q{device, sycl::property::queue::in_order{}};
printf("Hello from MPI rank %d/%d with a GPU of %zu\n", rank, size, count);
// Device data
double *x = sycl::malloc_device<double>(n, q);
q.fill(x, 0, n);
q.wait();
if (rank == 0) {
// Initialize data on rank 0
init(q, n, x);
q.wait();
// Send with rank 0
MPI_Send(x, n, MPI_DOUBLE, 1, 123, MPI_COMM_WORLD);
printf("Rank %d sent\n", rank);
} else if (rank == 1) {
// Receive with rank 1
MPI_Recv(x, n, MPI_DOUBLE, 0, 123, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("Rank %d received\n", rank);
}
// Copy result to CPU and print
std::vector<double> h_x(n);
q.memcpy(h_x.data(), x, nbytes).wait();
std::stringstream ss;
ss << "Rank " << rank << " has";
for (int i = 0; i < std::min(8ul, n); ++i) ss << " " << h_x[i];
if (n > 8) ss << " ...";
ss << "\n";
std::cout << ss.str();
free(x, q);
MPI_Finalize();
}