-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsum.cl
146 lines (146 loc) · 3.72 KB
/
sum.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include "FGPUlib.c"
#include "addsf3.c"
__kernel void sum_atomic_word(__global int *in, __global int *out, unsigned int reduce_factor) {
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned begin = x;
int i = 0, sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i != reduce_factor);
atomic_add(out, sum);
}
__kernel void sum_half_atomic(__global short *in, __global short *out, unsigned int reduce_factor) {
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned begin = x;
int i = 0, sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i != reduce_factor);
atomic_add((__global int*)out, sum);
}
__kernel void sum_half_improved_atomic(__global short2 *in, __global short *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i;
int sum = 0;
for(i = 0; i < reduce_factor/2; i++){
sum += in[begin].x;
sum += in[begin].y;
begin += size0;
}
atomic_add((__global int*)out, sum);
}
__kernel void sum_byte_atomic(__global char *in, __global char *out, unsigned int reduce_factor) {
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned begin = x;
int i = 0, sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i != reduce_factor);
atomic_add((__global int*)out, sum);
}
__kernel void sum_byte_improved_atomic(__global char4 *in, __global char *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i;
int sum = 0;
for(i = 0; i < reduce_factor/4; i++){
sum += in[begin].x;
sum += in[begin].y;
sum += in[begin].z;
sum += in[begin].w;
begin += size0;
}
atomic_add((__global int*)out, sum);
}
__kernel void sum(__global int *in, __global int *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i = 0;
int sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i!= reduce_factor);
out[x] = sum;
}
__kernel void sum_half(__global short *in, __global short *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i = 0;
int sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i!= reduce_factor);
out[x] = sum;
}
__kernel void sum_half_improved(__global short2 *in, __global short *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i;
int sum = 0;
for(i = 0; i < reduce_factor/2; i++){
sum += in[begin].x;
sum += in[begin].y;
begin += size0;
}
out[x] = sum;
}
__kernel void sum_byte(__global char *in, __global char *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i = 0;
int sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i!= reduce_factor);
out[x] = sum;
}
__kernel void sum_byte_improved(__global char4 *in, __global char *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i;
int sum = 0;
for(i = 0; i < reduce_factor/4; i++){
sum += in[begin].x;
sum += in[begin].y;
sum += in[begin].z;
sum += in[begin].w;
begin += size0;
}
out[x] = sum;
}
__kernel void sum_float(__global float *in, __global float *out, unsigned int reduce_factor){
int x = get_global_id(0);
int size0 = get_global_size(0);
unsigned int begin = x;
int i = 0;
float sum = 0;
do{
sum += in[begin];
i++;
begin += size0;
}while(i!= reduce_factor);
out[x] = sum;
}