Skip to content

Commit d2ee261

Browse files
authored
Merge pull request #320 from tsadakane/enh_cuda_CTnoise
CUDA implementation of CTnoise
2 parents 51c18ea + 8dcca6d commit d2ee261

File tree

9 files changed

+525
-25
lines changed

9 files changed

+525
-25
lines changed

Common/CUDA/RandomNumberGenerator.cu

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* CUDA functions for random number generator
4+
*
5+
* Adds noise of Poisson and normal distribution to the input.
6+
*
7+
* CODE by Tomoyuki SADAKANE
8+
* ---------------------------------------------------------------------------
9+
* ---------------------------------------------------------------------------
10+
* Copyright (c) 2015, University of Bath and CERN- European Organization for
11+
* Nuclear Research
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* 1. Redistributions of source code must retain the above copyright notice,
18+
* this list of conditions and the following disclaimer.
19+
*
20+
* 2. Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* 3. Neither the name of the copyright holder nor the names of its contributors
25+
* may be used to endorse or promote products derived from this software without
26+
* specific prior written permission.
27+
*
28+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
32+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38+
* POSSIBILITY OF SUCH DAMAGE.
39+
* ---------------------------------------------------------------------------
40+
*
41+
* Contact: [email protected]
42+
* Codes : https://github.com/CERN/TIGRE
43+
* ---------------------------------------------------------------------------
44+
*/
45+
46+
#include <stdio.h>
47+
#include <stdlib.h>
48+
#include <cuda.h>
49+
#include <curand_kernel.h>
50+
#include <curand.h>
51+
52+
#include "gpuUtils.hpp"
53+
#include "RandomNumberGenerator.hpp"
54+
55+
#define cudaCheckErrors(msg) \
56+
do { \
57+
cudaError_t __err = cudaGetLastError(); \
58+
if (__err != cudaSuccess) { \
59+
mexPrintf("%s \n",msg);\
60+
cudaDeviceReset();\
61+
mexErrMsgIdAndTxt("RandomNumberGenerator:",cudaGetErrorString(__err));\
62+
} \
63+
} while (0)
64+
65+
66+
__global__ void setup_kernel(curandState *state) {
67+
int idx = threadIdx.x + blockIdx.x * blockDim.x;
68+
/* Each thread gets same seed, a different sequence number, no offset */
69+
curand_init(1234, idx, 0, &state[idx]);
70+
}
71+
72+
__global__ void GeneratePoisson(curandState *state, const float* pfIn, size_t uiLen, float* pfOut) {
73+
int idx = threadIdx.x + blockIdx.x * blockDim.x;
74+
/* Copy state to local memory for efficiency */
75+
curandState localState = state[idx];
76+
int iIter = (uiLen + blockDim.x*gridDim.x - 1)/(blockDim.x*gridDim.x);
77+
for (int iI = 0; iI < iIter; ++iI) {
78+
size_t uiPos = (size_t)blockDim.x*gridDim.x*iI+idx;
79+
if (uiPos < uiLen) {
80+
/* Poisson */
81+
unsigned int uiPoisson = curand_poisson(&localState, pfIn[uiPos]);
82+
pfOut[uiPos] = (float)uiPoisson;
83+
}
84+
}
85+
/* Copy state back to global memory */
86+
state[idx] = localState;
87+
}
88+
89+
__global__ void GeneratePoissonAddGaussian(curandState *state,
90+
const float* pfIn,
91+
size_t uiLen,
92+
float fGaussMu,
93+
float fGaussSigma,
94+
float* pfOut)
95+
{
96+
int idx = threadIdx.x + blockIdx.x * blockDim.x;
97+
/* Copy state to local memory for efficiency */
98+
curandState localState = state[idx];
99+
int iIter = (uiLen + blockDim.x*gridDim.x - 1)/(blockDim.x*gridDim.x);
100+
for (int iI = 0; iI < iIter; ++iI) {
101+
size_t uiPos = (size_t)blockDim.x*gridDim.x*iI+idx;
102+
if (uiPos < uiLen) {
103+
/* Poisson */
104+
unsigned int uiPoisson = curand_poisson(&localState, pfIn[uiPos]);
105+
/* Gaussian */
106+
float fNormal = curand_normal(&localState) * fGaussSigma + fGaussMu;
107+
pfOut[uiPos] = fNormal + (float)uiPoisson;
108+
}
109+
}
110+
/* Copy state back to global memory */
111+
state[idx] = localState;
112+
}
113+
114+
115+
template<class T_value>
116+
void GetMinMax(const T_value* pfIn, size_t uiLen, T_value& tvMin, T_value& tvMax) {
117+
tvMin = pfIn[0];
118+
tvMax = pfIn[0];
119+
T_value tvVal;
120+
for (int iI = 1; iI < uiLen; ++iI) {
121+
tvVal = pfIn[iI];
122+
if (tvMax < tvVal) { tvMax = tvVal; continue;}
123+
if (tvMin > tvVal) { tvMin = tvVal; continue;}
124+
}
125+
}
126+
void poisson_1d(const float* pfIn, size_t uiLen, float* pfOut, const GpuIds& gpuids) {
127+
// printf("poisson_1d(pfIn = %p, uiLen = %zd, pfOut = %p)\n", pfIn, uiLen, pfOut);
128+
float* d_pfIn = nullptr;
129+
float* d_pfOut = nullptr;
130+
cudaMalloc((void **)&d_pfIn, uiLen * sizeof(float));
131+
cudaCheckErrors("poisson_1d fail cudaMalloc 1");
132+
cudaMalloc((void **)&d_pfOut, uiLen * sizeof(float));
133+
cudaCheckErrors("poisson_1d fail cudaMalloc 2");
134+
cudaMemcpy(d_pfIn, pfIn, uiLen*sizeof(float), cudaMemcpyHostToDevice);
135+
cudaCheckErrors("poisson_1d fail cudaMemcpy 1");
136+
137+
// float fMin, fMax;
138+
// GetMinMax(pfIn, uiLen, fMin, fMax);
139+
// printf("fMin, fMax = %f, %f\n", fMin, fMax);
140+
curandState *curandStates = nullptr;
141+
const int kiBlockDim = 1024; // Threads per Block
142+
const int kiGridDim = 64;//(uiLen+kiBlockDim-1)/kiBlockDim;
143+
cudaMalloc((void **)&curandStates, kiGridDim * kiBlockDim * sizeof(curandState));
144+
cudaCheckErrors("poisson_1d fail cudaMalloc 3");
145+
setup_kernel<<<kiGridDim, kiBlockDim>>>(curandStates);
146+
GeneratePoisson<<<kiGridDim, kiBlockDim>>>(curandStates, d_pfIn, uiLen, d_pfOut);
147+
cudaMemcpy(pfOut, d_pfOut, uiLen*sizeof(float), cudaMemcpyDeviceToHost);
148+
cudaCheckErrors("poisson_1d fail cudaMemcpy 2");
149+
// GetMinMax(pfOut, uiLen, fMin, fMax);
150+
// printf("fMin, fMax = %f, %f\n", fMin, fMax);
151+
152+
cudaFree(d_pfIn); d_pfIn = nullptr;
153+
cudaFree(d_pfOut); d_pfOut = nullptr;
154+
cudaFree(curandStates); curandStates = nullptr;
155+
}
156+
157+
void poisson_gaussian_1d(const float* pfIn,
158+
size_t uiLen,
159+
float fGaussMu,
160+
float fGaussSigma,
161+
float* pfOut,
162+
GpuIds& gpuids)
163+
{
164+
// printf("poisson_gaussian_1d(pfIn = %p, uiLen = %zd, fGaussMu = %+f, fGaussSigma = %f, pfOut = %p)\n", pfIn, uiLen, fGaussMu, fGaussSigma, pfOut);
165+
float* d_pfIn = nullptr;
166+
float* d_pfOut = nullptr;
167+
cudaMalloc((void **)&d_pfIn, uiLen * sizeof(float));
168+
cudaCheckErrors("poisson_gaussian_1d fail cudaMalloc 1");
169+
cudaMalloc((void **)&d_pfOut, uiLen * sizeof(float));
170+
cudaCheckErrors("poisson_gaussian_1d fail cudaMalloc 2");
171+
cudaMemcpy(d_pfIn, pfIn, uiLen*sizeof(float), cudaMemcpyHostToDevice);
172+
cudaCheckErrors("poisson_gaussian_1d fail cudaMemcpy 1");
173+
174+
// float fMin, fMax;
175+
// GetMinMax(pfIn, uiLen, fMin, fMax);
176+
// printf("fMin, fMax = %f, %f\n", fMin, fMax);
177+
curandState *curandStates = nullptr;
178+
const int kiBlockDim = 64; // Threads per Block
179+
const int kiGridDim = 64;//(uiLen+kiBlockDim-1)/kiBlockDim;
180+
cudaMalloc((void **)&curandStates, kiGridDim * kiBlockDim * sizeof(curandState));
181+
cudaCheckErrors("poisson_gaussian_1d fail cudaMalloc 3");
182+
setup_kernel<<<kiGridDim, kiBlockDim>>>(curandStates);
183+
GeneratePoissonAddGaussian<<<kiGridDim, kiBlockDim>>>(curandStates, d_pfIn, uiLen, fGaussMu, fGaussSigma, d_pfOut);
184+
cudaMemcpy(pfOut, d_pfOut, uiLen*sizeof(float), cudaMemcpyDeviceToHost);
185+
cudaCheckErrors("poisson_gaussian_1d fail cudaMemcpy 2");
186+
// GetMinMax(pfOut, uiLen, fMin, fMax);
187+
// printf("fMin, fMax = %f, %f\n", fMin, fMax);
188+
189+
190+
cudaFree(d_pfIn); d_pfIn = nullptr;
191+
cudaFree(d_pfOut); d_pfOut = nullptr;
192+
cudaFree(curandStates); curandStates = nullptr;
193+
}

Common/CUDA/RandomNumberGenerator.hpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* Header CUDA functions for random number generator
4+
*
5+
* Adds noise of Poisson and normal distribution to the input.
6+
*
7+
* CODE by Tomoyuki SADAKANE
8+
* ---------------------------------------------------------------------------
9+
* ---------------------------------------------------------------------------
10+
* Copyright (c) 2015, University of Bath and CERN- European Organization for
11+
* Nuclear Research
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* 1. Redistributions of source code must retain the above copyright notice,
18+
* this list of conditions and the following disclaimer.
19+
*
20+
* 2. Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* 3. Neither the name of the copyright holder nor the names of its contributors
25+
* may be used to endorse or promote products derived from this software without
26+
* specific prior written permission.
27+
*
28+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
32+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38+
* POSSIBILITY OF SUCH DAMAGE.
39+
* ---------------------------------------------------------------------------
40+
*
41+
* Contact: [email protected]
42+
* Codes : https://github.com/CERN/TIGRE
43+
* ---------------------------------------------------------------------------
44+
*/
45+
46+
#include "TIGRE_common.hpp"
47+
#include "GpuIds.hpp"
48+
void poisson_1d(const float* pfIn, size_t uiLen, float* pfOut, const GpuIds& gpuids);
49+
void poisson_gaussian_1d(const float* pfPoissonL, size_t uiLen, float fGaussMu, float fGaussSigma, float* pfOut, GpuIds& gpuids);

MATLAB/Compile.m

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
mex -largeArrayDims ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
6262
mex -largeArrayDims ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
6363
mex -largeArrayDims ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
64+
mex -largeArrayDims ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
6465
mex -largeArrayDims ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/win64
6566
mex -largeArrayDims ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
6667
mex -largeArrayDims ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win64
@@ -70,6 +71,7 @@
7071
mex ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
7172
mex ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
7273
mex ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
74+
mex ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
7375
mex ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/win32
7476
mex ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
7577
mex ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/win32
@@ -83,6 +85,7 @@
8385
mex -largeArrayDims ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
8486
mex -largeArrayDims ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
8587
mex -largeArrayDims ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
88+
mex -largeArrayDims ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
8689
mex -largeArrayDims ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/mac64
8790
mex -largeArrayDims ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
8891
mex -largeArrayDims ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac64
@@ -92,6 +95,7 @@
9295
mex ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
9396
mex ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
9497
mex ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
98+
mex ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
9599
mex ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/mac32
96100
mex ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
97101
mex ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/mac32
@@ -104,6 +108,7 @@
104108
mex -largeArrayDims ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
105109
mex -largeArrayDims ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
106110
mex -largeArrayDims ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
111+
mex -largeArrayDims ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
107112
mex -largeArrayDims ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/linux64
108113
mex -largeArrayDims ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
109114
mex -largeArrayDims ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux64
@@ -113,6 +118,7 @@
113118
mex ./Utilities/cuda_interface/minTV.cpp ../Common/CUDA/POCS_TV.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32
114119
mex ./Utilities/cuda_interface/AwminTV.cpp ../Common/CUDA/POCS_TV2.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32
115120
mex ./Utilities/cuda_interface/tvDenoise.cpp ../Common/CUDA/tvdenoising.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32
121+
mex ./Utilities/cuda_interface/AddNoise.cpp ../Common/CUDA/RandomNumberGenerator.cu ../Common/CUDA/GpuIds.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32
116122
mex -largeArrayDims ./Utilities/IO/VarianCBCT/mexReadXim.cpp -outdir ./Mex_files/linux32
117123
mex ./Utilities/GPU/getGpuName_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32
118124
mex ./Utilities/GPU/getGpuCount_mex.cpp ../Common/CUDA/gpuUtils.cu -outdir ./Mex_files/linux32

0 commit comments

Comments
 (0)