Skip to content

Commit 5b1710a

Browse files
Merge pull request #43 from icl-utk-edu/release-notes
Start notes for the next release
2 parents 42ed11e + c380203 commit 5b1710a

File tree

3 files changed

+43
-34
lines changed

3 files changed

+43
-34
lines changed

ReleaseNotes

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ Included are routines for the following algorithms:
2525
Most routines have all four precisions:
2626
single (s), double (d), single-complex (c), double-complex (z).
2727

28+
2.10.0 - XXX XX, XXXX
29+
* New functionality: Variable-size batch non-pivoting LU factorization
30+
- Contributed by Wajih Boukaram, Yang Liu, and Sherry Li at LBNL
31+
- magma_<T>getrf_nopiv_vbatched performs a non-pivoting LU factorization on a
32+
non-uniform batch of matrices.
33+
- An expert interface is available (magma_<T>getrf_nopiv_expert_vbatched) for
34+
handling small diagonal elements below certain threshold(s) defined by the user.
35+
* Performance improvements for batch Cholesky factorization (magma_<T>potrf_batched)
36+
* Performance improvements for batch triangular solve (magma_<T>trsv_batched)
37+
2838
2.9.0 - Jan 23, 2025
2939
* [DEPRECATED] The MAGMA-sparse package is considered to be "Legacy Support Mode"
3040
only, meaning the MAGMA-sparse component is not under active development. Many

src/zgetrf_nopiv_vbatched.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
Univ. of Colorado, Denver
66
@date
77
8-
@author Azzam Haidar
9-
@author Tingxing Dong
8+
@author Wajih-Halim Boukaram
9+
@author Yang Liu
10+
@author Sherry Li
1011
1112
@precisions normal z -> s d c
1213
*/
@@ -56,7 +57,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
5657
info_array, i, batchCount, queue);
5758

5859
if (arginfo != 0 ) return arginfo;
59-
60+
6061
if ( (i + ib) < max_n){
6162
// trsm
6263
magmablas_ztrsm_vbatched_core(
@@ -92,13 +93,12 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
9293
-------
9394
ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
9495
without pivoting. It replaces tiny pivots smaller than a specified tolerance
95-
by that tolernace
96+
by that tolerance
9697
9798
The factorization has the form
9899
A = L * U
99-
where L is lower triangular with unit
100-
diagonal elements (lower trapezoidal if m > n), and U is upper
101-
triangular (upper trapezoidal if m < n).
100+
where L is lower triangular with unit diagonal elements (lower trapezoidal
101+
if m > n), and U is upper triangular (upper trapezoidal if m < n).
102102
103103
This is the right-looking Level 3 BLAS version of the algorithm.
104104
@@ -145,8 +145,8 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
145145
146146
@param[in]
147147
dtol_array Array of DOUBLEs, dimension (batchCount), for corresponding matrices.
148-
Each is an the tolerance that is compared to the diagonal element before
149-
the column is scaled by its inverse. If the value of the diagonal is less
148+
Each is the tolerance that is compared to the diagonal element before
149+
the column is scaled by its inverse. If the value of the diagonal is less
150150
than the threshold, the diagonal is replaced by the threshold.
151151
If the array is set to NULL, then the threshold is set to the eps parameter
152152
@@ -163,7 +163,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck(
163163
has been completed, but the factor U is exactly
164164
singular, and division by zero will occur if it is used
165165
to solve a system of equations. If a tolerance array is specified
166-
the value shows the number of times a tiny pivot was replaced
166+
the value shows the number of times a tiny pivot was replaced
167167
168168
@param[in]
169169
WORK VOID pointer
@@ -214,7 +214,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
214214

215215
// split workspace as needed by magma_zgetrf_nopiv_vbatched_max_nocheck
216216
magma_int_t* minmn = (magma_int_t*)work;
217-
217+
218218
// init
219219
magma_ivec_min_vv( batchCount, m, n, minmn, queue);
220220

@@ -237,25 +237,24 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
237237
-------
238238
ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
239239
without pivoting. It replaces tiny pivots smaller than a specified tolerance
240-
by that tolernace
240+
by that tolerance.
241241
242242
The factorization has the form
243243
A = L * U
244-
where L is lower triangular with unit
245-
diagonal elements (lower trapezoidal if m > n), and U is upper
246-
triangular (upper trapezoidal if m < n).
244+
where L is lower triangular with unit diagonal elements (lower trapezoidal
245+
if m > n), and U is upper triangular (upper trapezoidal if m < n).
247246
248247
This is the right-looking Level 3 BLAS version of the algorithm.
249248
250249
This is the variable-size batched version, which factors batchCount matrices of
251250
different sizes in parallel. Each matrix is assumed to have its own size and leading
252251
dimension.
253252
254-
This is the expert version taking an extra parameter for the tolerance for diagonal
253+
This is the expert version taking an extra parameter for the tolerance for diagonal
255254
elements. Small diagonal elements will be replaced by the specified tolerance preserving
256-
the sign and the info array will report the number of replacements. This is useful in the
255+
the sign and the info array will report the number of replacements. This is useful in the
257256
context of static pivoting used in sparse solvers such as SuperLU, where the tolerance would
258-
be the the norm of the matrix scaled by the machine epsilon for example.
257+
be the the norm of the matrix scaled by the machine epsilon for example.
259258
260259
Arguments
261260
---------
@@ -280,8 +279,8 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
280279
281280
@param[in]
282281
dtol_array Array of DOUBLEs, dimension (batchCount), for corresponding matrices.
283-
Each is an the tolerance that is compared to the diagonal element before
284-
the column is scaled by its inverse. If the value of the diagonal is less
282+
Each is the tolerance that is compared to the diagonal element before
283+
the column is scaled by its inverse. If the value of the diagonal is less
285284
than the threshold, the diagonal is replaced by the threshold.
286285
If the array is set to NULL, then the threshold is set to the eps parameter
287286
@@ -298,7 +297,7 @@ magma_zgetrf_nopiv_vbatched_max_nocheck_work(
298297
has been completed, but the factor U is exactly
299298
singular, and division by zero will occur if it is used
300299
to solve a system of equations. If a tolerance array is specified
301-
the value shows the number of times a tiny pivot was replaced
300+
the value shows the number of times a tiny pivot was replaced
302301
303302
@param[in]
304303
batchCount INTEGER
@@ -374,14 +373,12 @@ magma_zgetrf_nopiv_expert_vbatched(
374373
Purpose
375374
-------
376375
ZGETRF NOPIV computes an LU factorization of a general M-by-N matrix A
377-
without pivoting. It replaces tiny pivots smaller than a specified tolerance
378-
by that tolernace
376+
without pivoting.
379377
380378
The factorization has the form
381379
A = L * U
382-
where L is lower triangular with unit
383-
diagonal elements (lower trapezoidal if m > n), and U is upper
384-
triangular (upper trapezoidal if m < n).
380+
where L is lower triangular with unit diagonal elements (lower trapezoidal
381+
if m > n), and U is upper triangular (upper trapezoidal if m < n).
385382
386383
This is the right-looking Level 3 BLAS version of the algorithm.
387384
@@ -415,7 +412,10 @@ magma_zgetrf_nopiv_expert_vbatched(
415412
- = 0: successful exit
416413
- < 0: if INFO = -i, the i-th argument had an illegal value
417414
or another error occured, such as memory allocation failed.
418-
- > 0: if INFO = i, there were i tiny pivot replacements
415+
- > 0: if INFO = i, U(i,i) is exactly zero. The factorization
416+
has been completed, but the factor U is exactly
417+
singular, and division by zero will occur if it is used
418+
to solve a system of equations.
419419
420420
@param[in]
421421
batchCount INTEGER

testing/testing_zpotrf_batched.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ int main( int argc, char** argv)
114114
printf("magma_zpotrf_batched returned argument error %lld: %s.\n", (long long) info, magma_strerror( info ));
115115
status = -1;
116116
}
117-
if (status == -1)
118-
goto cleanup;
119117

120118
/* =====================================================================
121119
Performs operation using LAPACK
@@ -175,19 +173,20 @@ int main( int argc, char** argv)
175173
printf("%10lld %5lld --- ( --- ) %7.2f (%7.2f) ---\n",
176174
(long long) batchCount, (long long) N, gpu_perf, gpu_time*1000. );
177175
}
178-
cleanup:
176+
179177
magma_free_cpu( hinfo_magma );
180178
magma_free_cpu( h_A );
181179
magma_free_cpu( h_R );
182180
magma_free( d_A );
183181
magma_free( d_A_array );
184182
magma_free( dinfo_magma );
185-
if (status == -1)
186-
break;
183+
187184
fflush( stdout );
188185
}
189-
if (status == -1)
190-
break;
186+
187+
if ( opts.niter > 1 ) {
188+
printf( "\n" );
189+
}
191190
}
192191

193192
opts.cleanup();

0 commit comments

Comments
 (0)