Skip to content

:) #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open

:) #16

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,31 @@
// #include <thrust/device_vector.h> // 如果想用 thrust 也是没问题的

// 这是基于“边角料法”的,请把他改成基于“网格跨步循环”的:10 分
__global__ void fill_sin(int *arr, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) return;
arr[i] = sinf(i);
// __global__ void fill_sin(int *arr, int n) {
// int i = blockIdx.x * blockDim.x + threadIdx.x;
// if (i < n) return; //>= n?
// arr[i] = sinf(i);
// }

__global__ void fill_sin(int* arr, int n) {
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
arr[i] = sinf(i);
}
}


__global__ void filter_positive(int *counter, int *res, int const *arr, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) return;
if (i < n) return; // shouldn't this be i >= n; confusing
if (arr[i] >= 0) {
// 这里有什么问题?请改正:10 分
int loc = *counter;
*counter += 1;
res[loc] = n;
// int loc = *counter;
// // *counter += 1; // multiple threads change the same data at the same time, data race
// atomicAdd(&counter[0], 1);
// res[loc] = n;

// or
res[atomicAdd(&counter[0], 1)]; // atomic add will return counter[0]
}
}

Expand All @@ -31,12 +42,13 @@ int main() {
std::vector<int, CudaAllocator<int>> counter(1);

// fill_sin 改成“网格跨步循环”以后,这里三重尖括号里的参数如何调整?10 分
fill_sin<<<n / 1024, 1024>>>(arr.data(), n);
fill_sin<<<n / 1024, 1024>>>(arr.data(), n); //might not be adjusted, since it could perform everything in one run. Could make gridDim smaller, but not sure if necessary

// 这里的“边角料法”对于不是 1024 整数倍的 n 会出错,为什么?请修复:10 分
filter_positive<<<n / 1024, 1024>>>(counter.data(), res.data(), arr.data(), n);
// 这里的“边角料法”对于不是 1024 整数倍的 n 会出错,为什么?请修复:10 分 -- if it is i >= n, there will be no problem
filter_positive<<<(n + 1023)/ 1024, 1024>>>(counter.data(), res.data(), arr.data(), n);

// 这里 CPU 访问数据前漏了一步什么操作?请补上:10 分
// 这里 CPU 访问数据前漏了一步什么操作?请补上:10 分 -- synchronize
checkCudaErrors(cudaDeviceSynchronize());

if (counter[0] <= n / 50) {
printf("Result too short! %d <= %d\n", counter[0], n / 50);
Expand Down