Skip to content

Commit daf32da

Browse files
jeffdonahuecbfinn
authored andcommitted
bug fix
1 parent c7cc2c2 commit daf32da

File tree

2 files changed

+60
-20
lines changed

2 files changed

+60
-20
lines changed

src/caffe/layers/concat_layer.cu

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,41 @@
66

77
namespace caffe {
88

9+
template <typename Dtype>
10+
__global__ void Concat(const int nthreads, const Dtype* in_data,
11+
const bool forward, const int num_concats, const int concat_size,
12+
const int top_concat_axis, const int bottom_concat_axis,
13+
const int offset_concat_axis, Dtype* out_data) {
14+
CUDA_KERNEL_LOOP(index, nthreads) {
15+
const int total_concat_size = concat_size * bottom_concat_axis;
16+
const int concat_num = index / total_concat_size;
17+
const int concat_index = index % total_concat_size;
18+
const int top_index = concat_index +
19+
(concat_num * top_concat_axis + offset_concat_axis) * concat_size;
20+
if (forward) {
21+
out_data[top_index] = in_data[index];
22+
} else {
23+
out_data[index] = in_data[top_index];
24+
}
25+
}
26+
}
27+
928
template <typename Dtype>
1029
void ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1130
const vector<Blob<Dtype>*>& top) {
1231
Dtype* top_data = top[0]->mutable_gpu_data();
1332
int offset_concat_axis = 0;
1433
const int top_concat_axis = top[0]->shape(concat_axis_);
34+
const bool kForward = true;
1535
for (int i = 0; i < bottom.size(); ++i) {
1636
const Dtype* bottom_data = bottom[i]->gpu_data();
1737
const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
18-
for (int n = 0; n < num_concats_; ++n) {
19-
caffe_copy(bottom_concat_axis * concat_input_size_,
20-
bottom_data + n * bottom_concat_axis * concat_input_size_,
21-
top_data + (n * top_concat_axis + offset_concat_axis)
22-
* concat_input_size_);
23-
}
38+
const int bottom_concat_size = bottom_concat_axis * concat_input_size_;
39+
const int nthreads = bottom_concat_size * num_concats_;
40+
Concat<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
41+
<<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>>(
42+
nthreads, bottom_data, kForward, num_concats_, concat_input_size_,
43+
top_concat_axis, bottom_concat_axis, offset_concat_axis, top_data);
2444
offset_concat_axis += bottom_concat_axis;
2545
}
2646
}
@@ -31,6 +51,7 @@ void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
3151
const Dtype* top_diff = top[0]->gpu_diff();
3252
int offset_concat_axis = 0;
3353
const int top_concat_axis = top[0]->shape(concat_axis_);
54+
const bool kForward = false;
3455
for (int i = 0; i < bottom.size(); ++i) {
3556
const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
3657
if (propagate_down[i]) {

src/caffe/layers/slice_layer.cu

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,41 @@
66

77
namespace caffe {
88

9+
template <typename Dtype>
10+
__global__ void Slice(const int nthreads, const Dtype* in_data,
11+
const bool forward, const int num_slices, const int slice_size,
12+
const int bottom_slice_axis, const int top_slice_axis,
13+
const int offset_slice_axis, Dtype* out_data) {
14+
CUDA_KERNEL_LOOP(index, nthreads) {
15+
const int total_slice_size = slice_size * top_slice_axis;
16+
const int slice_num = index / total_slice_size;
17+
const int slice_index = index % total_slice_size;
18+
const int bottom_index = slice_index +
19+
(slice_num * bottom_slice_axis + offset_slice_axis) * slice_size;
20+
if (forward) {
21+
out_data[index] = in_data[bottom_index];
22+
} else {
23+
out_data[bottom_index] = in_data[index];
24+
}
25+
}
26+
}
27+
928
template <typename Dtype>
1029
void SliceLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
1130
const vector<Blob<Dtype>*>& top) {
1231
int offset_slice_axis = 0;
1332
const Dtype* bottom_data = bottom[0]->gpu_data();
1433
const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
34+
const bool kForward = true;
1535
for (int i = 0; i < top.size(); ++i) {
1636
Dtype* top_data = top[i]->mutable_gpu_data();
1737
const int top_slice_axis = top[i]->shape(slice_axis_);
18-
for (int n = 0; n < num_slices_; ++n) {
19-
const int top_offset = n * top_slice_axis * slice_size_;
20-
const int bottom_offset =
21-
(n * bottom_slice_axis + offset_slice_axis) * slice_size_;
22-
caffe_copy(top_slice_axis * slice_size_,
23-
bottom_data + bottom_offset, top_data + top_offset);
24-
}
38+
const int top_slice_size = top_slice_axis * slice_size_;
39+
const int nthreads = top_slice_size * num_slices_;
40+
Slice<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
41+
<<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>>(
42+
nthreads, bottom_data, kForward, num_slices_, slice_size_,
43+
bottom_slice_axis, top_slice_axis, offset_slice_axis, top_data);
2544
offset_slice_axis += top_slice_axis;
2645
}
2746
}
@@ -33,16 +52,16 @@ void SliceLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
3352
int offset_slice_axis = 0;
3453
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
3554
const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
55+
const bool kForward = false;
3656
for (int i = 0; i < top.size(); ++i) {
3757
const Dtype* top_diff = top[i]->gpu_diff();
3858
const int top_slice_axis = top[i]->shape(slice_axis_);
39-
for (int n = 0; n < num_slices_; ++n) {
40-
const int top_offset = n * top_slice_axis * slice_size_;
41-
const int bottom_offset =
42-
(n * bottom_slice_axis + offset_slice_axis) * slice_size_;
43-
caffe_copy(top_slice_axis * slice_size_,
44-
top_diff + top_offset, bottom_diff + bottom_offset);
45-
}
59+
const int top_slice_size = top_slice_axis * slice_size_;
60+
const int nthreads = top_slice_size * num_slices_;
61+
Slice<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
62+
<<<CAFFE_GET_BLOCKS(nthreads), CAFFE_CUDA_NUM_THREADS>>>(
63+
nthreads, top_diff, kForward, num_slices_, slice_size_,
64+
bottom_slice_axis, top_slice_axis, offset_slice_axis, bottom_diff);
4665
offset_slice_axis += top_slice_axis;
4766
}
4867
}

0 commit comments

Comments
 (0)