Skip to content

Commit 97d5686

Browse files
finalized examples
1 parent 84b49b2 commit 97d5686

11 files changed

+570
-8
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ install(TARGETS cusignm DESTINATION lib)
4646
install(FILES cusignm.h DESTINATION include)
4747

4848
# examples
49-
foreach(x d)
49+
foreach(x s d c z)
5050
# Newton
5151
add_executable(example_cusignm_${x}Newton example_cusignm_${x}Newton.cu)
5252
target_link_libraries(example_cusignm_${x}Newton PUBLIC cusignm)

cusignm_halley.cu

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,17 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
272272
*-----------------------------------------------------------------------------*/
273273
// workspace query for LU factorization
274274
CHECK_CUSOLVER(cusolverDnXgetrf_bufferSize(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, cusignm_traits<T>::computeType, &lworkdevice, &lworkhost));
275+
275276
// compute LU factorization
276277
CHECK_CUSOLVER(cusolverDnXgetrf(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, d_work, lworkdevice, h_work, lworkhost, d_info));
277-
// solve the linear system
278+
// int d_info_h;
279+
// CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
280+
// printf("iter=%d, d_info=%d\n", iter, d_info_h);
281+
282+
// solve the linear system
278283
CHECK_CUSOLVER(cusolverDnXgetrs(cusolverH, params, CUBLAS_OP_N, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, Stmp2, n, d_info));
284+
// CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
285+
// printf("iter=%d, d_info=%d\n", iter, d_info_h);
279286

280287
/*-----------------------------------------------------------------------------
281288
* update S as S <- Sold*(I + c * Sold*Sold)\(a * I + b * Sold*Sold)
@@ -288,7 +295,7 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
288295
Scalar diffSSold, nrmS;
289296
CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
290297
CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
291-
printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
298+
// printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
292299

293300
/*-----------------------------------------------------------------------------
294301
* stopping criteria
@@ -298,6 +305,14 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
298305
break;
299306
}
300307

308+
// NaN detected
309+
if (isnan(diffSSold) || isnan(nrmS)) {
310+
fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
311+
fflush(stderr);
312+
ret = -1;
313+
break;
314+
}
315+
301316
// maximum number of iterations reached
302317
if (iter == maxiter) {
303318
fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);

cusignm_newton.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
227227
Scalar diffSSold, nrmS;
228228
CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
229229
CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
230-
printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
230+
// printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
231231

232232
/*-----------------------------------------------------------------------------
233233
* stopping criteria
@@ -237,6 +237,13 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
237237
break;
238238
}
239239

240+
if (isnan(diffSSold) || isnan(nrmS)) {
241+
fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
242+
fflush(stderr);
243+
ret = -1;
244+
break;
245+
}
246+
240247
// maximum number of iterations reached
241248
if (iter == maxiter) {
242249
fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);

example_cusignm_cHalley.cu

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/* MIT License
2+
*
3+
* Copyright (c) 2024 Maximilian Behr
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a copy
6+
* of this software and associated documentation files (the "Software"), to deal
7+
* in the Software without restriction, including without limitation the rights
8+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
* copies of the Software, and to permit persons to whom the Software is
10+
* furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in all
13+
* copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
* SOFTWARE.
22+
*/
23+
24+
#include <stdio.h>
25+
#include <stdlib.h>
26+
27+
#include "cusignm.h"
28+
29+
int main(void) {
30+
/*-----------------------------------------------------------------------------
31+
* variables
32+
*-----------------------------------------------------------------------------*/
33+
int ret = 0; // return value
34+
const int n = 10; // size of the input matrix A n-by-n
35+
cuComplex *A, *S; // input matrix and sign matrix
36+
void *d_buffer = NULL; // device buffer
37+
void *h_buffer = NULL; // host buffer
38+
39+
/*-----------------------------------------------------------------------------
40+
* allocate A, Q and H
41+
*-----------------------------------------------------------------------------*/
42+
cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
43+
cudaMallocManaged((void **)&S, sizeof(*S) * n * n);
44+
45+
/*-----------------------------------------------------------------------------
46+
* create a random matrix A
47+
*-----------------------------------------------------------------------------*/
48+
srand(0);
49+
for (int i = 0; i < n; ++i) {
50+
for (int j = 0; j < n; ++j) {
51+
A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
52+
}
53+
}
54+
55+
/*-----------------------------------------------------------------------------
56+
* perform a workspace query and allocate memory buffer on the host and device
57+
*-----------------------------------------------------------------------------*/
58+
size_t d_bufferSize = 0, h_bufferSize = 0;
59+
cusignm_cHalleyBufferSize(n, &d_bufferSize, &h_bufferSize);
60+
61+
if (d_bufferSize > 0) {
62+
cudaMalloc((void **)&d_buffer, d_bufferSize);
63+
}
64+
65+
if (h_bufferSize > 0) {
66+
h_buffer = malloc(h_bufferSize);
67+
}
68+
69+
/*-----------------------------------------------------------------------------
70+
* compute Sign Function S = sign(A)
71+
*-----------------------------------------------------------------------------*/
72+
cusignm_cHalley(n, A, d_buffer, h_buffer, S);
73+
74+
/*-----------------------------------------------------------------------------
75+
* check sign function ||S^2 - I||_F
76+
*-----------------------------------------------------------------------------*/
77+
float fronrmdiff = 0.0f;
78+
for (int i = 0; i < n; ++i) {
79+
for (int j = 0; j < n; ++j) {
80+
cuComplex sum = cuComplex{0.0f, 0.0f};
81+
for (int k = 0; k < n; ++k) {
82+
sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
83+
}
84+
if (i == j) {
85+
sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
86+
}
87+
float diff = cuCabsf(sum);
88+
fronrmdiff += diff * diff;
89+
}
90+
}
91+
float error = sqrtf(fronrmdiff / sqrtf((float)n));
92+
printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
93+
if (error < 1e-4) {
94+
printf("Sign Function Approximation successful\n");
95+
} else {
96+
printf("Sign Function Approximation failed\n");
97+
ret = 1;
98+
}
99+
100+
/*-----------------------------------------------------------------------------
101+
* clear memory
102+
*-----------------------------------------------------------------------------*/
103+
cudaFree(A);
104+
cudaFree(S);
105+
cudaFree(d_buffer);
106+
free(h_buffer);
107+
return ret;
108+
}

example_cusignm_cNewton.cu

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/* MIT License
2+
*
3+
* Copyright (c) 2024 Maximilian Behr
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining a copy
6+
* of this software and associated documentation files (the "Software"), to deal
7+
* in the Software without restriction, including without limitation the rights
8+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
* copies of the Software, and to permit persons to whom the Software is
10+
* furnished to do so, subject to the following conditions:
11+
*
12+
* The above copyright notice and this permission notice shall be included in all
13+
* copies or substantial portions of the Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
* SOFTWARE.
22+
*/
23+
24+
#include <stdio.h>
25+
#include <stdlib.h>
26+
27+
#include "cusignm.h"
28+
29+
int main(void) {
30+
/*-----------------------------------------------------------------------------
31+
* variables
32+
*-----------------------------------------------------------------------------*/
33+
int ret = 0; // return value
34+
const int n = 10; // size of the input matrix A n-by-n
35+
cuComplex *A, *S; // input matrix and sign matrix
36+
void *d_buffer = NULL; // device buffer
37+
void *h_buffer = NULL; // host buffer
38+
39+
/*-----------------------------------------------------------------------------
40+
* allocate A, Q and H
41+
*-----------------------------------------------------------------------------*/
42+
cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
43+
cudaMallocManaged((void **)&S, sizeof(*S) * n * n);
44+
45+
/*-----------------------------------------------------------------------------
46+
* create a random matrix A
47+
*-----------------------------------------------------------------------------*/
48+
srand(0);
49+
for (int i = 0; i < n; ++i) {
50+
for (int j = 0; j < n; ++j) {
51+
A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
52+
}
53+
}
54+
55+
/*-----------------------------------------------------------------------------
56+
* perform a workspace query and allocate memory buffer on the host and device
57+
*-----------------------------------------------------------------------------*/
58+
size_t d_bufferSize = 0, h_bufferSize = 0;
59+
cusignm_cNewtonBufferSize(n, &d_bufferSize, &h_bufferSize);
60+
61+
if (d_bufferSize > 0) {
62+
cudaMalloc((void **)&d_buffer, d_bufferSize);
63+
}
64+
65+
if (h_bufferSize > 0) {
66+
h_buffer = malloc(h_bufferSize);
67+
}
68+
69+
/*-----------------------------------------------------------------------------
70+
* compute Sign Function S = sign(A)
71+
*-----------------------------------------------------------------------------*/
72+
cusignm_cNewton(n, A, d_buffer, h_buffer, S);
73+
74+
/*-----------------------------------------------------------------------------
75+
* check sign function ||S^2 - I||_F
76+
*-----------------------------------------------------------------------------*/
77+
float fronrmdiff = 0.0f;
78+
for (int i = 0; i < n; ++i) {
79+
for (int j = 0; j < n; ++j) {
80+
cuComplex sum = cuComplex{0.0f, 0.0f};
81+
for (int k = 0; k < n; ++k) {
82+
sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
83+
}
84+
if (i == j) {
85+
sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
86+
}
87+
float diff = cuCabsf(sum);
88+
fronrmdiff += diff * diff;
89+
}
90+
}
91+
float error = sqrtf(fronrmdiff / sqrtf((float)n));
92+
printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
93+
if (error < 1e-4) {
94+
printf("Sign Function Approximation successful\n");
95+
} else {
96+
printf("Sign Function Approximation failed\n");
97+
ret = 1;
98+
}
99+
100+
/*-----------------------------------------------------------------------------
101+
* clear memory
102+
*-----------------------------------------------------------------------------*/
103+
cudaFree(A);
104+
cudaFree(S);
105+
cudaFree(d_buffer);
106+
free(h_buffer);
107+
return ret;
108+
}

example_cusignm_dHalley.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(void) {
3131
* variables
3232
*-----------------------------------------------------------------------------*/
3333
int ret = 0; // return value
34-
const int n = 100; // size of the input matrix A n-by-n
34+
const int n = 10; // size of the input matrix A n-by-n
3535
double *A, *S; // input matrix and sign matrix
3636
void *d_buffer = NULL; // device buffer
3737
void *h_buffer = NULL; // host buffer

example_cusignm_dNewton.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(void) {
3131
* variables
3232
*-----------------------------------------------------------------------------*/
3333
int ret = 0; // return value
34-
const int n = 100; // size of the input matrix A n-by-n
34+
const int n = 10; // size of the input matrix A n-by-n
3535
double *A, *S; // input matrix and sign matrix
3636
void *d_buffer = NULL; // device buffer
3737
void *h_buffer = NULL; // host buffer

example_cusignm_sHalley.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(void) {
3131
* variables
3232
*-----------------------------------------------------------------------------*/
3333
int ret = 0; // return value
34-
const int n = 100; // size of the input matrix A n-by-n
34+
const int n = 10; // size of the input matrix A n-by-n
3535
float *A, *S; // input matrix and sign matrix
3636
void *d_buffer = NULL; // device buffer
3737
void *h_buffer = NULL; // host buffer
@@ -84,7 +84,7 @@ int main(void) {
8484
if (i == j) {
8585
sum -= 1.0f;
8686
}
87-
sum = absf(sum);
87+
sum = fabs(sum);
8888
fronrmdiff += sum * sum;
8989
}
9090
}

0 commit comments

Comments
 (0)