@@ -31,9 +31,12 @@ const IMPLIMENTED_PROJECTIONS = Set{Symbol}((:nnscale, :simplex, :nonnegative))
31
31
32
32
- `:ncone`: vector-set distance between the -gradient of the objective and the normal cone
33
33
- `:iterates`: A,B before and after one iteration are close in L2 norm
34
- - `:objective`: objective before and after one iteration is close
34
+ - `:objective`: objective is small
35
+ - `:relativeerror`: relative error is small (when `normalize=:nothing`) or
36
+ mean relative error averaging fibres or slices when the normalization is `:fibres` or
37
+ `:slices` respectfuly.
35
38
"""
36
- const IMPLIMENTED_CRITERIA = Set {Symbol} ((:ncone , :iterates , :objective ))
39
+ const IMPLIMENTED_CRITERIA = Set {Symbol} ((:ncone , :iterates , :objective , :relativeerror ))
37
40
38
41
"""
39
42
IMPLIMENTED_STEPSIZES::Set{Symbol}
@@ -66,17 +69,21 @@ const IMPLIMENTED_OPTIONS = Dict(
66
69
)
67
70
68
71
@doc raw """
69
- nnmtf(Y::Abstract3Tensor , R::Integer; kwargs...)
72
+ nnmtf(Y::AbstractArray , R::Integer; kwargs...)
70
73
71
- Non-negatively matrix-tensor factorizes an order 3 tensor Y with a given "rank" R.
74
+ Non-negatively matrix-tensor factorizes an order N tensor Y with a given "rank" R.
72
75
73
- Factorizes ``Y \a pprox A B`` where ``\d isplaystyle Y[i,j,k] \a pprox \s um_{r=1}^R A[i,r]*B[r,j,k]``
76
+ For an order ``N=3`` tensor, this factorizes ``Y \a pprox A B`` where
77
+ ``\d isplaystyle Y[i,j,k] \a pprox \s um_{r=1}^R A[i,r]*B[r,j,k]``
74
78
and the factors ``A, B \g eq 0`` are nonnegative.
75
79
80
+ For higher orders, this becomes
81
+ ``\d isplaystyle Y[i1,i2,...,iN] \a pprox \s um_{r=1}^R A[i1,r]*B[r,i2,...,iN].``
82
+
76
83
Note there may NOT be a unique optimal solution
77
84
78
85
# Arguments
79
- - `Y::Abstract3Tensor `: tensor to factorize
86
+ - `Y::AbstractArray{T,N} `: tensor to factorize
80
87
- `R::Integer`: rank to factorize Y (size(A)[2] and size(B)[1])
81
88
82
89
# Keywords
@@ -89,14 +96,16 @@ Note there may NOT be a unique optimal solution
89
96
- `criterion::Symbol=:ncone`: how to determine if the algorithm has converged (must be in IMPLIMENTED_CRITERIA)
90
97
- `stepsize::Symbol=:lipshitz`: used for the gradient decent step (must be in IMPLIMENTED_STEPSIZES)
91
98
- `momentum::Bool=false`: use momentum updates
92
- - `delta::Real=0.9999`: safeguard for maximum amount of momentum (see eq 3.5 Xu & Yin 2013)
99
+ - `delta::Real=0.9999`: safeguard for maximum amount of momentum (see eq ( 3.5) Xu & Yin 2013)
93
100
- `R_max::Integer=size(Y)[1]`: maximum rank to try if R is not given
94
101
- `projectionA::Symbol=projection`: projection to use on factor A (must be in IMPLIMENTED_PROJECTIONS)
95
102
- `projectionB::Symbol=projection`: projection to use on factor B (must be in IMPLIMENTED_PROJECTIONS)
103
+ - `A_init::AbstractMatrix=nothing`: initial A for the iterative algorithm. Should be kept as nothing if `R` is not given.
104
+ - `B_init::AbstractArray=nothing`: initial B for the iterative algorithm. Should be kept as nothing if `R` is not given.
96
105
97
106
# Returns
98
107
- `A::Matrix{Float64}`: the matrix A in the factorization Y ≈ A * B
99
- - `B::Array{Float64, 3 }`: the tensor B in the factorization Y ≈ A * B
108
+ - `B::Array{Float64, N }`: the tensor B in the factorization Y ≈ A * B
100
109
- `rel_errors::Vector{Float64}`: relative errors at each iteration
101
110
- `norm_grad::Vector{Float64}`: norm of the full gradient at each iteration
102
111
- `dist_Ncone::Vector{Float64}`: distance of the -gradient to the normal cone at each iteration
@@ -224,6 +233,8 @@ function _nnmtf_proxgrad(
224
233
rescale_Y:: Bool = (projection == :nnscale ? true : false ),
225
234
projectionA:: Symbol = projection,
226
235
projectionB:: Symbol = projection,
236
+ A_init:: Union{Nothing, AbstractMatrix} = nothing ,
237
+ B_init:: Union{Nothing, AbstractArray} = nothing ,
227
238
)
228
239
# Override scaling if no normalization is requested
229
240
normalize == :nothing ? (rescale_AB = rescale_Y = false ) : nothing
@@ -232,11 +243,25 @@ function _nnmtf_proxgrad(
232
243
M, Ns... = size (Y)
233
244
234
245
# Initialize A, B
235
- init (x... ) = abs .(randn (x... ))
236
- A = init (M, R)
237
- B = init (R, Ns... )
246
+ if A_init === nothing
247
+ A = _init (M, R)
248
+ else
249
+ size (A_init) == (M, R) || throw (ArgumentError (" A_init should have size $((M, R)) , got $(size (A_init)) " ))
250
+ A = A_init
251
+ end
238
252
239
- rescaleAB! (A, B; normalize)
253
+ if A_init === nothing
254
+ B = _init (R, Ns... )
255
+ else
256
+ size (B_init) == (R, Ns... ) || throw (ArgumentError (" A_init should have size $((R, Ns... )) , got $(size (B_init)) " ))
257
+ B = B_init
258
+ end
259
+
260
+ # Only want to rescale the initialization if both A and B were not given
261
+ # Otherwise, we should use the provided initialization
262
+ if rescale_AB && A_init === nothing && B_init === nothing
263
+ rescaleAB! (A, B; normalize)
264
+ end
240
265
241
266
problem_size = R* (M + prod (Ns))
242
267
@@ -254,7 +279,7 @@ function _nnmtf_proxgrad(
254
279
255
280
# Calculate initial relative error and gradient
256
281
Yhat = A* B
257
- rel_errors[i] = residual (Yhat, Y; normalize)
282
+ rel_errors[i] = relative_error (Yhat, Y; normalize)
258
283
grad_A, grad_B = calc_gradient (A, B, Y)
259
284
norm_grad[i] = combined_norm (grad_A, grad_B)
260
285
dist_Ncone[i] = dist_to_Ncone (grad_A, grad_B, A, B)
@@ -318,15 +343,15 @@ function _nnmtf_proxgrad(
318
343
# Calculate relative error and norm of gradient
319
344
i += 1
320
345
Yhat .= A* B
321
- rel_errors[i] = residual (Yhat, Y; normalize)
346
+ rel_errors[i] = relative_error (Yhat, Y; normalize)
322
347
# grad_A, grad_B = calc_gradient(A, B, Y)
323
348
grad_A .= calc_gradientA (A, B, Y)
324
349
grad_B .= calc_gradientB (A, B, Y)
325
350
norm_grad[i] = combined_norm (grad_A, grad_B)
326
351
# norm_grad[i] = combined_norm(grad_A, grad_B)
327
352
dist_Ncone[i] = dist_to_Ncone (grad_A, grad_B, A, B)
328
353
329
- if converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y)
354
+ if converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y, Yhat, normalize )
330
355
break
331
356
end
332
357
@@ -367,6 +392,11 @@ function _nnmtf_proxgrad(
367
392
return A, B, rel_errors, norm_grad, dist_Ncone
368
393
end
369
394
395
+ """
396
+ Default initialization
397
+ """
398
+ _init (x... ) = abs .(randn (x... ))
399
+
370
400
"""
371
401
Convergence criteria function.
372
402
@@ -376,16 +406,26 @@ independent of the dimentions of Y and rank R.
376
406
Note the use of `;` in the function definition so that order of arguments does not matter,
377
407
and keyword assignment can be ignored if the input variables are named exactly as below.
378
408
"""
379
- function converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y)
409
+ function converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y, Yhat, normalize)
410
+ criterion_value = 0.0
411
+
380
412
if ! (criterion in IMPLIMENTED_CRITERIA)
381
413
return UnimplimentedError (" criterion is not an impliment criterion" )
414
+
382
415
elseif criterion == :ncone
383
- return dist_Ncone[i]/ sqrt (problem_size) < tol # TODO remove root problem size dependence
416
+ criterion_value = dist_Ncone[i]/ sqrt (problem_size) # TODO remove root problem size dependence
417
+
384
418
elseif criterion == :iterates
385
- return combined_norm (A - A_last, B - B_last) < tol
419
+ criterion_value = combined_norm (A - A_last, B - B_last)
420
+
386
421
elseif criterion == :objective
387
- return 0.5 * norm (A* B - Y)^ 2 < tol
422
+ criterion_value = 0.5 * norm (Yhat - Y)^ 2
423
+
424
+ elseif criterion == :relativeerror
425
+ criterion_value = relative_error (Yhat, Y; normalize)
388
426
end
427
+
428
+ return criterion_value < tol
389
429
end
390
430
391
431
"""
@@ -405,7 +445,7 @@ function to_dims(normalize::Symbol)
405
445
end
406
446
407
447
"""
408
- residual (Yhat, Y; normalize=:nothing)
448
+ relative_error (Yhat, Y; normalize=:nothing)
409
449
410
450
Wrapper to use the relative error calculation according to the normalization used.
411
451
@@ -415,7 +455,7 @@ Wrapper to use the relative error calculation according to the normalization use
415
455
416
456
See also [`rel_error`](@ref), [`mean_rel_error`](@ref).
417
457
"""
418
- function residual (Yhat, Y; normalize= :nothing )
458
+ function relative_error (Yhat, Y; normalize= :nothing )
419
459
if normalize in (:fibres , :slices )
420
460
return mean_rel_error (Yhat, Y; dims= to_dims (normalize))
421
461
elseif normalize == :nothing
@@ -655,7 +695,8 @@ function nnmtf_proxgrad_online(
655
695
dist_Ncone = zeros (maxiter)
656
696
657
697
# Calculate initial relative error and gradient
658
- rel_errors[i] = residual (A* B, Y; normalize)
698
+ Yhat = A* B
699
+ rel_errors[i] = relative_error (Yhat, Y; normalize)
659
700
grad_A, grad_B = calc_gradient (A, B, Y)
660
701
norm_grad[i] = combined_norm (grad_A, grad_B)
661
702
dist_Ncone[i] = dist_to_Ncone (grad_A, grad_B, A, B)
@@ -719,12 +760,13 @@ function nnmtf_proxgrad_online(
719
760
720
761
# Calculate relative error and norm of gradient
721
762
i += 1
722
- rel_errors[i] = residual (A* B, Y; normalize)
763
+ Yhat .= A* B
764
+ rel_errors[i] = relative_error (Yhat, Y; normalize)
723
765
grad_A, grad_B = calc_gradient (A, B, Y)
724
766
norm_grad[i] = combined_norm (grad_A, grad_B)
725
767
dist_Ncone[i] = dist_to_Ncone (grad_A, grad_B, A, B)
726
768
727
- if converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y)
769
+ if converged (; dist_Ncone, i, A, B, A_last, B_last, tol, problem_size, criterion, Y, Yhat, normalize )
728
770
break
729
771
end
730
772
0 commit comments