finalized examples

maximilianbehr · maximilianbehr · commit 97d568681a68 · 2024-04-02T15:55:04.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -46,7 +46,7 @@ install(TARGETS cusignm DESTINATION lib)
 install(FILES cusignm.h DESTINATION include)
 
 # examples
-foreach(x d)
+foreach(x s d c z)
   # Newton
   add_executable(example_cusignm_${x}Newton example_cusignm_${x}Newton.cu)
   target_link_libraries(example_cusignm_${x}Newton PUBLIC cusignm)
diff --git a/cusignm_halley.cu b/cusignm_halley.cu
@@ -272,10 +272,17 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
          *-----------------------------------------------------------------------------*/
         // workspace query for LU factorization
         CHECK_CUSOLVER(cusolverDnXgetrf_bufferSize(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, cusignm_traits<T>::computeType, &lworkdevice, &lworkhost));
+
         // compute LU factorization
         CHECK_CUSOLVER(cusolverDnXgetrf(cusolverH, params, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, d_work, lworkdevice, h_work, lworkhost, d_info));
-        // solve the linear system
+        // int d_info_h;
+        // CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
+        // printf("iter=%d, d_info=%d\n", iter, d_info_h);
+
+        //  solve the linear system
         CHECK_CUSOLVER(cusolverDnXgetrs(cusolverH, params, CUBLAS_OP_N, n, n, cusignm_traits<T>::dataType, Stmp, n, d_ipiv, cusignm_traits<T>::computeType, Stmp2, n, d_info));
+        // CHECK_CUDA(cudaMemcpy(&d_info_h, d_info, sizeof(int), cudaMemcpyDeviceToHost));
+        // printf("iter=%d, d_info=%d\n", iter, d_info_h);
 
         /*-----------------------------------------------------------------------------
          * update S as S <- Sold*(I + c * Sold*Sold)\(a * I + b * Sold*Sold)
@@ -288,7 +295,7 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
         Scalar diffSSold, nrmS;
         CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
         CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
-        printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
+        // printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
 
         /*-----------------------------------------------------------------------------
          * stopping criteria
@@ -298,6 +305,14 @@ static int cusignm_Halley(const int n, const T *A, void *d_buffer, void *h_buffe
             break;
         }
 
+        // NaN detected
+        if (isnan(diffSSold) || isnan(nrmS)) {
+            fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
+            fflush(stderr);
+            ret = -1;
+            break;
+        }
+
         // maximum number of iterations reached
         if (iter == maxiter) {
             fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);
diff --git a/cusignm_newton.cu b/cusignm_newton.cu
@@ -227,7 +227,7 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
         Scalar diffSSold, nrmS;
         CHECK_CUSIGNM(cusignm_diffnormFro(n, n, S, Sold, &diffSSold));
         CHECK_CUSIGNM(cusignm_normFro(n, n, S, &nrmS));
-        printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
+        // printf("iter=%d, diffSSold=%e, nrmS=%e, rel. change=%e\n", iter, diffSSold, nrmS, diffSSold / nrmS);
 
         /*-----------------------------------------------------------------------------
          * stopping criteria
@@ -237,6 +237,13 @@ static int cusignm_Newton(const int n, const T *A, void *d_buffer, void *h_buffe
             break;
         }
 
+        if (isnan(diffSSold) || isnan(nrmS)) {
+            fprintf(stderr, "%s-%s:%d no convergence - NaN detected\n", __func__, __FILE__, __LINE__);
+            fflush(stderr);
+            ret = -1;
+            break;
+        }
+
         // maximum number of iterations reached
         if (iter == maxiter) {
             fprintf(stderr, "%s-%s:%d no convergence - maximum number of iterations reached\n", __func__, __FILE__, __LINE__);
diff --git a/example_cusignm_cHalley.cu b/example_cusignm_cHalley.cu
@@ -0,0 +1,108 @@
+/* MIT License
+ *
+ * Copyright (c) 2024 Maximilian Behr
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cusignm.h"
+
+int main(void) {
+    /*-----------------------------------------------------------------------------
+     * variables
+     *-----------------------------------------------------------------------------*/
+    int ret = 0;            // return value
+    const int n = 10;       // size of the input matrix A n-by-n
+    cuComplex *A, *S;       // input matrix and sign matrix
+    void *d_buffer = NULL;  // device buffer
+    void *h_buffer = NULL;  // host buffer
+
+    /*-----------------------------------------------------------------------------
+     * allocate A, Q and H
+     *-----------------------------------------------------------------------------*/
+    cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
+    cudaMallocManaged((void **)&S, sizeof(*S) * n * n);
+
+    /*-----------------------------------------------------------------------------
+     * create a random matrix A
+     *-----------------------------------------------------------------------------*/
+    srand(0);
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
+        }
+    }
+
+    /*-----------------------------------------------------------------------------
+     * perform a workspace query and allocate memory buffer on the host and device
+     *-----------------------------------------------------------------------------*/
+    size_t d_bufferSize = 0, h_bufferSize = 0;
+    cusignm_cHalleyBufferSize(n, &d_bufferSize, &h_bufferSize);
+
+    if (d_bufferSize > 0) {
+        cudaMalloc((void **)&d_buffer, d_bufferSize);
+    }
+
+    if (h_bufferSize > 0) {
+        h_buffer = malloc(h_bufferSize);
+    }
+
+    /*-----------------------------------------------------------------------------
+     *  compute Sign Function S = sign(A)
+     *-----------------------------------------------------------------------------*/
+    cusignm_cHalley(n, A, d_buffer, h_buffer, S);
+
+    /*-----------------------------------------------------------------------------
+     * check sign function ||S^2 - I||_F
+     *-----------------------------------------------------------------------------*/
+    float fronrmdiff = 0.0f;
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            cuComplex sum = cuComplex{0.0f, 0.0f};
+            for (int k = 0; k < n; ++k) {
+                sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
+            }
+            if (i == j) {
+                sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
+            }
+            float diff = cuCabsf(sum);
+            fronrmdiff += diff * diff;
+        }
+    }
+    float error = sqrtf(fronrmdiff / sqrtf((float)n));
+    printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
+    if (error < 1e-4) {
+        printf("Sign Function Approximation successful\n");
+    } else {
+        printf("Sign Function Approximation failed\n");
+        ret = 1;
+    }
+
+    /*-----------------------------------------------------------------------------
+     * clear memory
+     *-----------------------------------------------------------------------------*/
+    cudaFree(A);
+    cudaFree(S);
+    cudaFree(d_buffer);
+    free(h_buffer);
+    return ret;
+}
diff --git a/example_cusignm_cNewton.cu b/example_cusignm_cNewton.cu
@@ -0,0 +1,108 @@
+/* MIT License
+ *
+ * Copyright (c) 2024 Maximilian Behr
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cusignm.h"
+
+int main(void) {
+    /*-----------------------------------------------------------------------------
+     * variables
+     *-----------------------------------------------------------------------------*/
+    int ret = 0;            // return value
+    const int n = 10;       // size of the input matrix A n-by-n
+    cuComplex *A, *S;       // input matrix and sign matrix
+    void *d_buffer = NULL;  // device buffer
+    void *h_buffer = NULL;  // host buffer
+
+    /*-----------------------------------------------------------------------------
+     * allocate A, Q and H
+     *-----------------------------------------------------------------------------*/
+    cudaMallocManaged((void **)&A, sizeof(*A) * n * n);
+    cudaMallocManaged((void **)&S, sizeof(*S) * n * n);
+
+    /*-----------------------------------------------------------------------------
+     * create a random matrix A
+     *-----------------------------------------------------------------------------*/
+    srand(0);
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            A[i + j * n] = cuComplex({(float)rand() / RAND_MAX, (float)rand() / RAND_MAX});
+        }
+    }
+
+    /*-----------------------------------------------------------------------------
+     * perform a workspace query and allocate memory buffer on the host and device
+     *-----------------------------------------------------------------------------*/
+    size_t d_bufferSize = 0, h_bufferSize = 0;
+    cusignm_cNewtonBufferSize(n, &d_bufferSize, &h_bufferSize);
+
+    if (d_bufferSize > 0) {
+        cudaMalloc((void **)&d_buffer, d_bufferSize);
+    }
+
+    if (h_bufferSize > 0) {
+        h_buffer = malloc(h_bufferSize);
+    }
+
+    /*-----------------------------------------------------------------------------
+     *  compute Sign Function S = sign(A)
+     *-----------------------------------------------------------------------------*/
+    cusignm_cNewton(n, A, d_buffer, h_buffer, S);
+
+    /*-----------------------------------------------------------------------------
+     * check sign function ||S^2 - I||_F
+     *-----------------------------------------------------------------------------*/
+    float fronrmdiff = 0.0f;
+    for (int i = 0; i < n; ++i) {
+        for (int j = 0; j < n; ++j) {
+            cuComplex sum = cuComplex{0.0f, 0.0f};
+            for (int k = 0; k < n; ++k) {
+                sum = cuCaddf(sum, cuCmulf(S[i + k * n], S[k + j * n]));
+            }
+            if (i == j) {
+                sum = cuCsubf(sum, cuComplex{1.0f, 0.0f});
+            }
+            float diff = cuCabsf(sum);
+            fronrmdiff += diff * diff;
+        }
+    }
+    float error = sqrtf(fronrmdiff / sqrtf((float)n));
+    printf("rel. error ||S^2 - I ||_F / ||I||_F = %e\n", error);
+    if (error < 1e-4) {
+        printf("Sign Function Approximation successful\n");
+    } else {
+        printf("Sign Function Approximation failed\n");
+        ret = 1;
+    }
+
+    /*-----------------------------------------------------------------------------
+     * clear memory
+     *-----------------------------------------------------------------------------*/
+    cudaFree(A);
+    cudaFree(S);
+    cudaFree(d_buffer);
+    free(h_buffer);
+    return ret;
+}
diff --git a/example_cusignm_dHalley.cu b/example_cusignm_dHalley.cu
@@ -31,7 +31,7 @@ int main(void) {
      * variables
      *-----------------------------------------------------------------------------*/
     int ret = 0;            // return value
-    const int n = 100;      // size of the input matrix A n-by-n
+    const int n = 10;       // size of the input matrix A n-by-n
     double *A, *S;          // input matrix and sign matrix
     void *d_buffer = NULL;  // device buffer
     void *h_buffer = NULL;  // host buffer
diff --git a/example_cusignm_dNewton.cu b/example_cusignm_dNewton.cu
@@ -31,7 +31,7 @@ int main(void) {
      * variables
      *-----------------------------------------------------------------------------*/
     int ret = 0;            // return value
-    const int n = 100;      // size of the input matrix A n-by-n
+    const int n = 10;       // size of the input matrix A n-by-n
     double *A, *S;          // input matrix and sign matrix
     void *d_buffer = NULL;  // device buffer
     void *h_buffer = NULL;  // host buffer
diff --git a/example_cusignm_sHalley.cu b/example_cusignm_sHalley.cu
@@ -31,7 +31,7 @@ int main(void) {
      * variables
      *-----------------------------------------------------------------------------*/
     int ret = 0;            // return value
-    const int n = 100;      // size of the input matrix A n-by-n
+    const int n = 10;       // size of the input matrix A n-by-n
     float *A, *S;           // input matrix and sign matrix
     void *d_buffer = NULL;  // device buffer
     void *h_buffer = NULL;  // host buffer
@@ -84,7 +84,7 @@ int main(void) {
             if (i == j) {
                 sum -= 1.0f;
             }
-            sum = absf(sum);
+            sum = fabs(sum);
             fronrmdiff += sum * sum;
         }
     }
diff --git a/example_cusignm_sNewton.cu b/example_cusignm_sNewton.cu
diff --git a/example_cusignm_zHalley.cu b/example_cusignm_zHalley.cu
diff --git a/example_cusignm_zNewton.cu b/example_cusignm_zNewton.cu

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ int main(void) {`
`31`	`31`	`* variables`
`32`	`32`	`-----------------------------------------------------------------------------/`
`33`	`33`	`int ret = 0; // return value`
`34`		`- const int n = 100; // size of the input matrix A n-by-n`
	`34`	`+ const int n = 10; // size of the input matrix A n-by-n`
`35`	`35`	`float A, S; // input matrix and sign matrix`
`36`	`36`	`void *d_buffer = NULL; // device buffer`
`37`	`37`	`void *h_buffer = NULL; // host buffer`
`@@ -84,7 +84,7 @@ int main(void) {`
`84`	`84`	`if (i == j) {`
`85`	`85`	`sum -= 1.0f;`
`86`	`86`	`}`
`87`		`- sum = absf(sum);`
	`87`	`+ sum = fabs(sum);`
`88`	`88`	`fronrmdiff += sum * sum;`
`89`	`89`	`}`
`90`	`90`	`}`