Merge pull request #43 from icl-utk-edu/release-notes

abdelfattah83 · web-flow · commit 5b1710a79b65 · 2025-02-25T14:09:26.000-05:00
Start notes for the next release
diff --git a/ReleaseNotes b/ReleaseNotes
@@ -25,6 +25,16 @@ Included are routines for the following algorithms:
 Most routines have all four precisions:
 single (s), double (d), single-complex (c), double-complex (z).
 
+2.10.0 - XXX XX, XXXX
+    * New functionality: Variable-size batch non-pivoting LU factorization
+      - Contributed by Wajih Boukaram, Yang Liu, and Sherry Li at LBNL
+      - magma_<T>getrf_nopiv_vbatched performs a non-pivoting LU factorization on a
+        non-uniform batch of matrices.
+      - An expert interface is available (magma_<T>getrf_nopiv_expert_vbatched) for
+        handling small diagonal elements below certain threshold(s) defined by the user.
+    * Performance improvements for batch Cholesky factorization (magma_<T>potrf_batched)
+    * Performance improvements for batch triangular solve (magma_<T>trsv_batched)
+
 2.9.0 - Jan 23, 2025
     * [DEPRECATED] The MAGMA-sparse package is considered to be "Legacy Support Mode"
       only, meaning the MAGMA-sparse component is not under active development. Many
diff --git a/src/zgetrf_nopiv_vbatched.cpp b/src/zgetrf_nopiv_vbatched.cpp
@@ -5,8 +5,9 @@
    Univ. of Colorado, Denver
    @date
 
-   @author Azzam Haidar
-   @author Tingxing Dong
+   @author Wajih-Halim Boukaram
+   @author Yang Liu
+   @author Sherry Li
 
    @precisions normal z -> s d c
 */
@@ -56,7 +57,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
                     info_array, i, batchCount, queue);
 
         if (arginfo != 0 ) return arginfo;
-        
+
         if ( (i + ib) < max_n){
             // trsm
             magmablas_ztrsm_vbatched_core(
@@ -92,13 +93,12 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
     -------
     ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
     without pivoting. It replaces tiny pivots smaller than a specified tolerance
-    by that tolernace
+    by that tolerance
 
     The factorization has the form
         A = L * U
-    where L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
+    where L is lower triangular with unit diagonal elements (lower trapezoidal
+    if m > n), and U is upper triangular (upper trapezoidal if m < n).
 
     This is the right-looking Level 3 BLAS version of the algorithm.
 
@@ -145,8 +145,8 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
 
     @param[in]
     dtol_array  Array of DOUBLEs, dimension (batchCount), for corresponding matrices.
-            Each is an the tolerance that is compared to the diagonal element before
-            the column is scaled by its inverse. If the value of the diagonal is less 
+            Each is the tolerance that is compared to the diagonal element before
+            the column is scaled by its inverse. If the value of the diagonal is less
             than the threshold, the diagonal is replaced by the threshold.
             If the array is set to NULL, then the threshold is set to the eps parameter
 
@@ -163,7 +163,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
                   has been completed, but the factor U is exactly
                   singular, and division by zero will occur if it is used
                   to solve a system of equations. If a tolerance array is specified
-                  the value shows the number of times a tiny pivot was replaced 
+                  the value shows the number of times a tiny pivot was replaced
 
     @param[in]
     WORK        VOID pointer
@@ -214,7 +214,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
 
     // split workspace as needed by magma_zgetrf_nopiv_vbatched_max_nocheck
     magma_int_t* minmn           = (magma_int_t*)work;
-    
+
     // init
     magma_ivec_min_vv( batchCount, m, n, minmn, queue);
 
@@ -237,25 +237,24 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
     -------
     ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
     without pivoting. It replaces tiny pivots smaller than a specified tolerance
-    by that tolernace
+    by that tolerance.
 
     The factorization has the form
         A = L * U
-    where L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
+    where L is lower triangular with unit diagonal elements (lower trapezoidal
+    if m > n), and U is upper triangular (upper trapezoidal if m < n).
 
     This is the right-looking Level 3 BLAS version of the algorithm.
 
     This is the variable-size batched version, which factors batchCount matrices of
     different sizes in parallel. Each matrix is assumed to have its own size and leading
     dimension.
 
-    This is the expert version taking an extra parameter for the tolerance for diagonal 
+    This is the expert version taking an extra parameter for the tolerance for diagonal
     elements. Small diagonal elements will be replaced by the specified tolerance preserving
-    the sign and the info array will report the number of replacements. This is useful in the 
+    the sign and the info array will report the number of replacements. This is useful in the
     context of static pivoting used in sparse solvers such as SuperLU, where the tolerance would
-    be the the norm of the matrix scaled by the machine epsilon for example. 
+    be the the norm of the matrix scaled by the machine epsilon for example.
 
     Arguments
     ---------
@@ -280,8 +279,8 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
 
     @param[in]
     dtol_array  Array of DOUBLEs, dimension (batchCount), for corresponding matrices.
-            Each is an the tolerance that is compared to the diagonal element before
-            the column is scaled by its inverse. If the value of the diagonal is less 
+            Each is the tolerance that is compared to the diagonal element before
+            the column is scaled by its inverse. If the value of the diagonal is less
             than the threshold, the diagonal is replaced by the threshold.
             If the array is set to NULL, then the threshold is set to the eps parameter
 
@@ -298,7 +297,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
                   has been completed, but the factor U is exactly
                   singular, and division by zero will occur if it is used
                   to solve a system of equations. If a tolerance array is specified
-                  the value shows the number of times a tiny pivot was replaced 
+                  the value shows the number of times a tiny pivot was replaced
 
     @param[in]
     batchCount  INTEGER
@@ -374,14 +373,12 @@ magma_zgetrf_nopiv_expert_vbatched(
     Purpose
     -------
     ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
-    without pivoting. It replaces tiny pivots smaller than a specified tolerance
-    by that tolernace
+    without pivoting.
 
     The factorization has the form
         A = L * U
-    where L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
+    where L is lower triangular with unit diagonal elements (lower trapezoidal
+    if m > n), and U is upper triangular (upper trapezoidal if m < n).
 
     This is the right-looking Level 3 BLAS version of the algorithm.
 
@@ -415,7 +412,10 @@ magma_zgetrf_nopiv_expert_vbatched(
       -     = 0:  successful exit
       -     < 0:  if INFO = -i, the i-th argument had an illegal value
                   or another error occured, such as memory allocation failed.
-      -     > 0:  if INFO = i, there were i tiny pivot replacements 
+      -     > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
 
     @param[in]
     batchCount  INTEGER
diff --git a/testing/testing_zpotrf_batched.cpp b/testing/testing_zpotrf_batched.cpp
@@ -114,8 +114,6 @@ int main( int argc, char** argv)
                 printf("magma_zpotrf_batched returned argument error %lld: %s.\n", (long long) info, magma_strerror( info ));
                 status = -1;
             }
-            if (status == -1)
-                goto cleanup;
 
             /* =====================================================================
                Performs operation using LAPACK
@@ -175,19 +173,20 @@ int main( int argc, char** argv)
                 printf("%10lld %5lld     ---   (  ---  )   %7.2f (%7.2f)     ---\n",
                        (long long) batchCount, (long long) N, gpu_perf, gpu_time*1000. );
             }
-cleanup:
+
             magma_free_cpu( hinfo_magma );
             magma_free_cpu( h_A );
             magma_free_cpu( h_R );
             magma_free( d_A );
             magma_free( d_A_array );
             magma_free( dinfo_magma );
-            if (status == -1)
-                break;
+
             fflush( stdout );
         }
-        if (status == -1)
-            break;
+
+        if ( opts.niter > 1 ) {
+            printf( "\n" );
+        }
     }
 
     opts.cleanup();