6
6
7
7
namespace caffe {
8
8
9
+ template <typename Dtype>
10
+ __global__ void Slice (const int nthreads, const Dtype* in_data,
11
+ const bool forward, const int num_slices, const int slice_size,
12
+ const int bottom_slice_axis, const int top_slice_axis,
13
+ const int offset_slice_axis, Dtype* out_data) {
14
+ CUDA_KERNEL_LOOP (index, nthreads) {
15
+ const int total_slice_size = slice_size * top_slice_axis;
16
+ const int slice_num = index / total_slice_size;
17
+ const int slice_index = index % total_slice_size;
18
+ const int bottom_index = slice_index +
19
+ (slice_num * bottom_slice_axis + offset_slice_axis) * slice_size;
20
+ if (forward) {
21
+ out_data[index] = in_data[bottom_index];
22
+ } else {
23
+ out_data[bottom_index] = in_data[index];
24
+ }
25
+ }
26
+ }
27
+
9
28
template <typename Dtype>
10
29
void SliceLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
11
30
const vector<Blob<Dtype>*>& top) {
12
31
int offset_slice_axis = 0 ;
13
32
const Dtype* bottom_data = bottom[0 ]->gpu_data ();
14
33
const int bottom_slice_axis = bottom[0 ]->shape (slice_axis_);
34
+ const bool kForward = true ;
15
35
for (int i = 0 ; i < top.size (); ++i) {
16
36
Dtype* top_data = top[i]->mutable_gpu_data ();
17
37
const int top_slice_axis = top[i]->shape (slice_axis_);
18
- for (int n = 0 ; n < num_slices_; ++n) {
19
- const int top_offset = n * top_slice_axis * slice_size_;
20
- const int bottom_offset =
21
- (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
22
- caffe_copy (top_slice_axis * slice_size_,
23
- bottom_data + bottom_offset, top_data + top_offset);
24
- }
38
+ const int top_slice_size = top_slice_axis * slice_size_;
39
+ const int nthreads = top_slice_size * num_slices_;
40
+ Slice<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
41
+ <<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>> (
42
+ nthreads, bottom_data, kForward , num_slices_, slice_size_,
43
+ bottom_slice_axis, top_slice_axis, offset_slice_axis, top_data);
25
44
offset_slice_axis += top_slice_axis;
26
45
}
27
46
}
@@ -33,16 +52,16 @@ void SliceLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
33
52
int offset_slice_axis = 0 ;
34
53
Dtype* bottom_diff = bottom[0 ]->mutable_gpu_diff ();
35
54
const int bottom_slice_axis = bottom[0 ]->shape (slice_axis_);
55
+ const bool kForward = false ;
36
56
for (int i = 0 ; i < top.size (); ++i) {
37
57
const Dtype* top_diff = top[i]->gpu_diff ();
38
58
const int top_slice_axis = top[i]->shape (slice_axis_);
39
- for (int n = 0 ; n < num_slices_; ++n) {
40
- const int top_offset = n * top_slice_axis * slice_size_;
41
- const int bottom_offset =
42
- (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
43
- caffe_copy (top_slice_axis * slice_size_,
44
- top_diff + top_offset, bottom_diff + bottom_offset);
45
- }
59
+ const int top_slice_size = top_slice_axis * slice_size_;
60
+ const int nthreads = top_slice_size * num_slices_;
61
+ Slice<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
62
+ <<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>> (
63
+ nthreads, top_diff, kForward , num_slices_, slice_size_,
64
+ bottom_slice_axis, top_slice_axis, offset_slice_axis, bottom_diff);
46
65
offset_slice_axis += top_slice_axis;
47
66
}
48
67
}
0 commit comments