Skip to content

Commit f590ccb

Browse files
authored
Merge pull request #731 from ROCm/rvs-rel-6.1
Merge pull request #730 from jkottiku/master
2 parents 8ac8ffc + ddc928b commit f590ccb

File tree

7 files changed

+538
-145
lines changed

7 files changed

+538
-145
lines changed

babel.so/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,13 @@ set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSIO
6161
set(HIP_HCC_BUILD_FLAGS)
6262
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include ${ASAN_CXX_FLAGS}")
6363

64+
set(HIP_STREAM_BUILD_FLAGS "-DNONTEMPORAL=1 -DDWORDS_PER_LANE=4 -DTBSIZE=1024 -DCHUNKS_PER_BLOCK=2 -O3 -std=c++17")
65+
6466
# Set compiler and compiler flags
6567
set(CMAKE_CXX_COMPILER "${HIPCC_PATH}/bin/hipcc")
6668
set(CMAKE_C_COMPILER "${HIPCC_PATH}/bin/hipcc")
67-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
68-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
69+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS} ${HIP_STREAM_BUILD_FLAGS}")
70+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS} ${HIP_STREAM_BUILD_FLAGS}")
6971
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_LD_FLAGS}")
7072
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LD_FLAGS}")
7173

babel.so/include/HIPStream.h

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,62 @@
55
// For full license terms please see the LICENSE file distributed with this
66
// source code
77

8-
#ifndef MEM_SO_INCLUDE_HIP_STREAM_H_
9-
#define MEM_SO_INCLUDE_HIP_STREAM_H_
8+
#pragma once
109

10+
#include <algorithm>
1111
#include <iostream>
1212
#include <stdexcept>
1313
#include <sstream>
1414

1515
#include "Stream.h"
16+
#include "hip/hip_runtime.h"
17+
#ifndef __HIP_PLATFORM_NVCC__
18+
#include "hip/hip_ext.h"
19+
#endif
1620

1721
#define IMPLEMENTATION_STRING "HIP"
1822

1923
template <class T>
2024
class HIPStream : public Stream<T>
2125
{
26+
#ifdef __HIP_PLATFORM_NVCC__
27+
#ifndef DWORDS_PER_LANE
28+
#define DWORDS_PER_LANE 1
29+
#endif
30+
#ifndef CHUNKS_PER_BLOCK
31+
#define CHUNKS_PER_BLOCK 8
32+
#endif
33+
#else
34+
#ifndef DWORDS_PER_LANE
35+
#define DWORDS_PER_LANE 4
36+
#endif
37+
#ifndef CHUNKS_PER_BLOCK
38+
#define CHUNKS_PER_BLOCK 1
39+
#endif
40+
#endif
41+
// make sure that either:
42+
// DWORDS_PER_LANE is less than sizeof(T), in which case we default to 1 element
43+
// or
44+
// DWORDS_PER_LANE is divisible by sizeof(T)
45+
static_assert((DWORDS_PER_LANE * sizeof(unsigned int) < sizeof(T)) ||
46+
(DWORDS_PER_LANE * sizeof(unsigned int) % sizeof(T) == 0),
47+
"DWORDS_PER_LANE not divisible by sizeof(element_type)");
48+
49+
static constexpr unsigned int chunks_per_block{CHUNKS_PER_BLOCK};
50+
// take into account the datatype size
51+
// that is, if we specify 4 DWORDS_PER_LANE, this is 2 FP64 elements
52+
// and 4 FP32 elements
53+
static constexpr unsigned int elements_per_lane{
54+
(DWORDS_PER_LANE * sizeof(unsigned int)) < sizeof(T) ? 1 : (
55+
DWORDS_PER_LANE * sizeof(unsigned int) / sizeof(T))};
2256
protected:
2357
// Size of arrays
24-
unsigned int array_size;
58+
const unsigned int array_size;
59+
const unsigned int block_cnt;
60+
const bool evt_timing;
61+
hipEvent_t start_ev;
62+
hipEvent_t stop_ev;
63+
hipEvent_t coherent_ev;
2564

2665
// Host array for partial sums for dot kernel
2766
T *sums;
@@ -30,22 +69,19 @@ class HIPStream : public Stream<T>
3069
T *d_a;
3170
T *d_b;
3271
T *d_c;
33-
T *d_sum;
34-
3572

3673
public:
37-
38-
HIPStream(const unsigned int, const int);
74+
HIPStream(const unsigned int, const bool, const int);
3975
~HIPStream();
4076

41-
virtual void copy() override;
42-
virtual void add() override;
43-
virtual void mul() override;
44-
virtual void triad() override;
77+
virtual float read() override;
78+
virtual float write() override;
79+
virtual float copy() override;
80+
virtual float add() override;
81+
virtual float mul() override;
82+
virtual float triad() override;
4583
virtual T dot() override;
4684

4785
virtual void init_arrays(T initA, T initB, T initC) override;
4886
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
49-
5087
};
51-
#endif

babel.so/include/Stream.h

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
// For full license terms please see the LICENSE file distributed with this
66
// source code
77

8-
9-
#ifndef RVS_INCLUDE_STREAM_H_
10-
#define RVS_INCLUDE_STREAM_H_
8+
#pragma once
119

1210
#include <vector>
1311
#include <string>
@@ -27,10 +25,12 @@ class Stream
2725

2826
// Kernels
2927
// These must be blocking calls
30-
virtual void copy() = 0;
31-
virtual void mul() = 0;
32-
virtual void add() = 0;
33-
virtual void triad() = 0;
28+
virtual float read() = 0;
29+
virtual float write() = 0;
30+
virtual float copy() = 0;
31+
virtual float mul() = 0;
32+
virtual float add() = 0;
33+
virtual float triad() = 0;
3434
virtual T dot() = 0;
3535

3636
// Copy memory between host and device
@@ -44,6 +44,3 @@ class Stream
4444
void listDevices(void);
4545
std::string getDeviceName(const int);
4646
std::string getDeviceDriver(const int);
47-
48-
#endif
49-

babel.so/src/rvs_memworker.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ void MemWorker::run() {
6363
std::pair<int, uint16_t> device;
6464

6565
// log MEM stress test - start message
66-
msg = "[" + action_name + "] " + MODULE_NAME + " " +
67-
std::to_string(gpu_id) + " " + " Starting the Memory stress test ";
66+
msg = "[" + action_name + "] " + "[GPU:: " +
67+
std::to_string(gpu_id) + "] " + "Starting the Babel memory stress test";
6868
rvs::lp::Log(msg, rvs::logresults);
6969

7070
/* Device Index */

0 commit comments

Comments
 (0)