5
5
// For full license terms please see the LICENSE file distributed with this
6
6
// source code
7
7
8
- #ifndef MEM_SO_INCLUDE_HIP_STREAM_H_
9
- #define MEM_SO_INCLUDE_HIP_STREAM_H_
8
+ #pragma once
10
9
10
+ #include < algorithm>
11
11
#include < iostream>
12
12
#include < stdexcept>
13
13
#include < sstream>
14
14
15
15
#include " Stream.h"
16
+ #include " hip/hip_runtime.h"
17
+ #ifndef __HIP_PLATFORM_NVCC__
18
+ #include " hip/hip_ext.h"
19
+ #endif
16
20
17
21
#define IMPLEMENTATION_STRING " HIP"
18
22
19
23
template <class T >
20
24
class HIPStream : public Stream <T>
21
25
{
26
+ #ifdef __HIP_PLATFORM_NVCC__
27
+ #ifndef DWORDS_PER_LANE
28
+ #define DWORDS_PER_LANE 1
29
+ #endif
30
+ #ifndef CHUNKS_PER_BLOCK
31
+ #define CHUNKS_PER_BLOCK 8
32
+ #endif
33
+ #else
34
+ #ifndef DWORDS_PER_LANE
35
+ #define DWORDS_PER_LANE 4
36
+ #endif
37
+ #ifndef CHUNKS_PER_BLOCK
38
+ #define CHUNKS_PER_BLOCK 1
39
+ #endif
40
+ #endif
41
+ // make sure that either:
42
+ // DWORDS_PER_LANE is less than sizeof(T), in which case we default to 1 element
43
+ // or
44
+ // DWORDS_PER_LANE is divisible by sizeof(T)
45
+ static_assert ((DWORDS_PER_LANE * sizeof (unsigned int ) < sizeof (T)) ||
46
+ (DWORDS_PER_LANE * sizeof (unsigned int ) % sizeof (T) == 0 ),
47
+ " DWORDS_PER_LANE not divisible by sizeof(element_type)" );
48
+
49
+ static constexpr unsigned int chunks_per_block{CHUNKS_PER_BLOCK};
50
+ // take into account the datatype size
51
+ // that is, if we specify 4 DWORDS_PER_LANE, this is 2 FP64 elements
52
+ // and 4 FP32 elements
53
+ static constexpr unsigned int elements_per_lane{
54
+ (DWORDS_PER_LANE * sizeof (unsigned int )) < sizeof (T) ? 1 : (
55
+ DWORDS_PER_LANE * sizeof (unsigned int ) / sizeof (T))};
22
56
protected:
23
57
// Size of arrays
24
- unsigned int array_size;
58
+ const unsigned int array_size;
59
+ const unsigned int block_cnt;
60
+ const bool evt_timing;
61
+ hipEvent_t start_ev;
62
+ hipEvent_t stop_ev;
63
+ hipEvent_t coherent_ev;
25
64
26
65
// Host array for partial sums for dot kernel
27
66
T *sums;
@@ -30,22 +69,19 @@ class HIPStream : public Stream<T>
30
69
T *d_a;
31
70
T *d_b;
32
71
T *d_c;
33
- T *d_sum;
34
-
35
72
36
73
public:
37
-
38
- HIPStream (const unsigned int , const int );
74
+ HIPStream (const unsigned int , const bool , const int );
39
75
~HIPStream ();
40
76
41
- virtual void copy () override ;
42
- virtual void add () override ;
43
- virtual void mul () override ;
44
- virtual void triad () override ;
77
+ virtual float read () override ;
78
+ virtual float write () override ;
79
+ virtual float copy () override ;
80
+ virtual float add () override ;
81
+ virtual float mul () override ;
82
+ virtual float triad () override ;
45
83
virtual T dot () override ;
46
84
47
85
virtual void init_arrays (T initA, T initB, T initC) override ;
48
86
virtual void read_arrays (std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override ;
49
-
50
87
};
51
- #endif
0 commit comments