diff --git a/scripts/quickcheckallocations.jl b/scripts/quickcheckallocations.jl index b0dad67..5560011 100644 --- a/scripts/quickcheckallocations.jl +++ b/scripts/quickcheckallocations.jl @@ -478,4 +478,30 @@ KendallTau.corspearman($xm; skipmissing=:none) KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none) 22.049 ms (1623 allocations: 17.27 MiB) ======================================================================================================================== +======================================================================================================================== +Dates.now() = DateTime("2024-04-03T11:47:26.152") +ENV["COMPUTERNAME"] = "DESKTOP-HSGAM5S" +Julia Version 1.10.2 +Threads.nthreads() = 20 +size(x) = (1000, 1000) +typeof(x) = Matrix{Float64} +size(xm) = (1000, 1000) +typeof(xm) = Matrix{Union{Missing, Float64}} +do_StatsBase_times = false +KendallTau.corkendall($x) 1.176 s (1409 allocations: 16.53 MiB) +KendallTau.corkendall($xm; skipmissing=:pairwise) 1.587 s (1409 allocations: 16.23 MiB) +KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:pairwise) 1.577 s (1377 allocations: 16.23 MiB) +KendallTau.corkendall($xm; skipmissing=:listwise) 5.748 ms (2410 allocations: 11.86 MiB) +KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:listwise) 5.196 ms (2378 allocations: 11.85 MiB) +KendallTau.corkendall($xm; skipmissing=:none) 53.383 ms (1420 allocations: 17.19 MiB) +KendallTau.pairwise(KendallTau.corkendall,eachcol($xm),skipmissing=:none) 52.164 ms (1418 allocations: 17.19 MiB) +KendallTau.corspearman($x) 17.053 ms (1223 allocations: 39.42 MiB) +KendallTau.corspearman($xm; skipmissing=:pairwise) 407.303 ms (1572 allocations: 23.98 MiB) +KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:pairwise) 405.928 ms (1540 allocations: 23.98 MiB) +KendallTau.corspearman($xm; skipmissing=:listwise) 2.944 ms (2209 allocations: 19.44 MiB) +KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:listwise) 2.966 ms (2177 allocations: 19.44 MiB) +KendallTau.corspearman($xm; skipmissing=:none) 13.384 ms (1721 allocations: 17.38 MiB) +KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none) 13.110 ms (1719 allocations: 17.38 MiB) +======================================================================================================================== + =# \ No newline at end of file diff --git a/src/KendallTau.jl b/src/KendallTau.jl index 6b6244b..6969fbd 100644 --- a/src/KendallTau.jl +++ b/src/KendallTau.jl @@ -1,6 +1,7 @@ module KendallTau using Missings: disallowmissing import StatsBase: cor, cov, _tiedrank!, tiedrank +using LinearAlgebra include("rankcorr.jl") include("corkendall_fromfile.jl") diff --git a/src/pairwise.jl b/src/pairwise.jl index 4d79552..7c1def2 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -5,15 +5,15 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y, # Swap x and y for more efficient threaded loop. if nr < nc - dest′ = reshape(dest, size(dest, 2), size(dest, 1)) - _pairwise!(Val(:none), f, dest′, y, x, symmetric) - dest .= transpose(dest′) + dest2 = similar(dest, size(dest, 2), size(dest, 1)) + _pairwise!(Val(:none), f, dest2, y, x, symmetric) + dest .= transpose(dest2) return dest end #cor and friends are special-cased. - iscor = (f in (corkendall, corspearman, cor)) - (iscor || f == cov) && (symmetric = x === y) + diag_is_1 = (f in (corkendall, corspearman, cor)) + (diag_is_1 || f == cov) && (symmetric = x === y) #cov(x) is faster than cov(x, x) (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y))) @@ -21,15 +21,15 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y, for j in subset for i = 1:(symmetric ? j : nc) # For performance, diagonal is special-cased - if iscor && i==j && y[i] === x[j] && V !== Union{} + if diag_is_1 && i == j && y[i] === x[j] && V !== Union{} dest[j, i] = V === Missing ? missing : 1.0 else dest[j, i] = f(x[j], y[i]) end - symmetric && (dest[i, j] = dest[j, i]) end end end + symmetric && LinearAlgebra.copytri!(dest, 'L') return dest end @@ -81,15 +81,15 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri # Swap x and y for more efficient threaded loop. if nr < nc - dest′ = reshape(dest, size(dest, 2), size(dest, 1)) - _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric) - dest .= transpose(dest′) + dest2 = similar(dest, size(dest, 2), size(dest, 1)) + _pairwise!(Val(:pairwise), f, dest2, y, x, symmetric) + dest .= transpose(dest2) return dest end #cor and friends are special-cased. - iscor = (f in (corkendall, corspearman, cor)) - (iscor || f == cov) && (symmetric = x === y) + diag_is_1 = (f in (corkendall, corspearman, cor)) + (diag_is_1 || f == cov) && (symmetric = x === y) #cov(x) is faster than cov(x, x) (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y))) @@ -97,19 +97,19 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri nmty = promoted_nmtype(y)[] Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads()) + scratch_fx = task_local_vector(:scratch_fx, nmtx, m) + scratch_fy = task_local_vector(:scratch_fy, nmty, m) for j in subset - scratch_fx = task_local_vector(:scratch_fx, nmtx, m) - scratch_fy = task_local_vector(:scratch_fy, nmty, m) for i = 1:(symmetric ? j : nc) - if iscor && i == j && y[i] === x[j] && V !== Union{} && V!== Missing + if diag_is_1 && i == j && y[i] === x[j] && V !== Union{} && V !== Missing dest[j, i] = 1.0 else - dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx, scratch_fy)...) + dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...) end - symmetric && (dest[i, j] = dest[j, i]) end end end + symmetric && LinearAlgebra.copytri!(dest, 'L') return dest end @@ -223,7 +223,7 @@ presence `missing`, `NaN` or `Inf` entries. # Examples ```jldoctest -julia> using KendallTau, Statistics +julia> using StatsBase, Statistics julia> dest = zeros(3, 3); @@ -255,7 +255,7 @@ julia> dest ``` """ function pairwise!(f, dest::AbstractMatrix, x, y=x; - symmetric::Bool=false, skipmissing::Symbol=:none) + symmetric::Bool=false, skipmissing::Symbol=:none) check_vectors(x, y, skipmissing, symmetric) return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) end @@ -288,7 +288,7 @@ presence `missing`, `NaN` or `Inf` entries. # Examples ```jldoctest -julia> using KendallTau, Statistics +julia> using StatsBase, Statistics julia> x = [1 3 7 2 5 6 @@ -326,15 +326,14 @@ promoted_nmtype(x) = mapreduce(nonmissingtype ∘ eltype, promote_type, x, init= """ handle_listwise(x, y) - Remove missings in a listwise manner. Assumes `x` and `y` are vectors of iterables which - have been validated via `check_vectors`. - +Remove missings in a listwise manner. Assumes `x` and `y` are vectors of iterables which +have been validated via `check_vectors`. ## Example ```julia-repl julia> a = [6,7,8,9,10,missing];b = [4,5,6,7,missing,8];c = [2,3,4,missing,5,6];d = [1,2,3,4,5,6]; -julia> KendallTau.handle_listwise([a,b],[c,d]) +julia> StatsBase.handle_listwise([a,b],[c,d]) ([[6, 7, 8], [4, 5, 6]], [[2, 3, 4], [1, 2, 3]]) ``` """ @@ -382,16 +381,16 @@ function handle_pairwise(x::AbstractVector, y::AbstractVector; axes(x) == axes(y) || throw(DimensionMismatch("x and y have inconsistent dimensions")) lb = first(axes(x, 1)) - j = lb - 1 + j = lb @inbounds for i in eachindex(x) if !(ismissing(x[i]) || ismissing(y[i])) - j += 1 scratch_fx[j] = x[i] scratch_fy[j] = y[i] + j += 1 end end - return view(scratch_fx, lb:j), view(scratch_fy, lb:j) + return view(scratch_fx, lb:(j-1)), view(scratch_fy, lb:(j-1)) end #=Condition a) makes equal_sum_subsets useful for load-balancing the multi-threaded section @@ -405,7 +404,7 @@ is nearly equal. If `n` is a multiple of `2 * num_subsets` both conditions hold ## Example ```julia-repl -julia> KendallTau.equal_sum_subsets(30,5) +julia> StatsBase.equal_sum_subsets(30,5) 5-element Vector{Vector{Int64}}: [30, 21, 20, 11, 10, 1] [29, 22, 19, 12, 9, 2] diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 27771a8..1cb7563 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -38,17 +38,17 @@ function corspearman(x::AbstractVector, y::AbstractVector; skipmissing::Symbol=: end function corspearman(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none) - return corspearman(x, reshape(y, (length(y), 1)); skipmissing) + return corspearman(x, reshape(y, (length(y), 1)); skipmissing=skipmissing) end function corspearman(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none) - return corspearman(reshape(x, (length(x), 1)), y; skipmissing) + return corspearman(reshape(x, (length(x), 1)), y; skipmissing=skipmissing) end function corspearman(x::AbstractMatrix, y::AbstractMatrix=x; skipmissing::Symbol=:none) check_rankcor_args(x, y, skipmissing, true) - return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing) + return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing=skipmissing) end function corspearman(x::AbstractVector{T}) where {T} @@ -99,9 +99,9 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman), # Swap x and y for more efficient threaded loop. if nr < nc - dest′ = reshape(dest, size(dest, 2), size(dest, 1)) - _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric) - dest .= transpose(dest′) + dest2 = similar(dest, size(dest, 2), size(dest, 1)) + _pairwise!(Val(:pairwise), f, dest2, y, x, symmetric) + dest .= transpose(dest2) return dest end @@ -137,10 +137,10 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman), view(tempx, :, j), view(tempy, :, i), inds, spnmx, spnmy, nmx, nmy, ranksx, ranksy) end - symmetric && (dest[i, j] = dest[j, i]) end end end + symmetric && LinearAlgebra.copytri!(dest, 'L') return dest end @@ -162,20 +162,17 @@ All vector arguments must have the same axes. ## Example ```julia-repl -julia> using KendallTau, BenchmarkTools, Random +julia> using StatsBase, BenchmarkTools, Random julia> Random.seed!(1); -julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05,\ -missing,randn(1000)); +julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05, missing,randn(1000)); -julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);\ -spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000); +julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000); julia> nmx=zeros(1000);nmy=zeros(1000);ranksx=similar(x,Float64);ranksy=similar(y,Float64); -julia> @btime KendallTau.corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\ -\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy) +julia> @btime corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy) 4.671 μs (0 allocations: 0 bytes) -0.016058512110609713 ``` @@ -288,7 +285,7 @@ julia> x = y = fill(missing,2,2) julia> Statistics.cor(x,y) ERROR: MethodError: no method matching copy(::Missing) -julia> KendallTau._cor(x,y) +julia> StatsBase._cor(x,y) 2×2 Matrix{Union{Missing, Float64}}: 1.0 missing missing 1.0 @@ -417,7 +414,7 @@ Uses multiple threads when either `x` or `y` is a matrix. function corkendall(x::AbstractMatrix, y::AbstractMatrix=x; skipmissing::Symbol=:none) check_rankcor_args(x, y, skipmissing, true) - return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing) + return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing=skipmissing) end function corkendall(x::AbstractVector, y::AbstractVector; @@ -437,11 +434,11 @@ end StatsBase.corspearman, but consistent with StatsBase.corkendall. =# function corkendall(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none) - return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing)) + return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing=skipmissing)) end function corkendall(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none) - return corkendall(reshape(x, (length(x), 1)), y; skipmissing) + return corkendall(reshape(x, (length(x), 1)), y; skipmissing=skipmissing) end function corkendall(x::AbstractVector{T}) where {T} @@ -471,9 +468,9 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst # Swap x and y for more efficient threaded loop. if nr < nc - dest′ = reshape(dest, size(dest, 2), size(dest, 1)) - corkendall_loop!(skipmissing, f, dest′, y, x, symmetric) - dest .= transpose(dest′) + dest2 = similar(dest, size(dest, 2), size(dest, 1)) + corkendall_loop!(skipmissing, f, dest2, y, x, symmetric) + dest .= transpose(dest2) return dest end @@ -515,12 +512,13 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst else yi .= y[i] dest[j, i] = corkendall_kernel!(sortedxj, yi, permx, skipmissing; - scratch_py, scratch_sy, scratch_fx, scratch_fy) + scratch_py=scratch_py, scratch_sy=scratch_sy, scratch_fx=scratch_fx, + scratch_fy=scratch_fy) end - symmetric && (dest[i, j] = dest[j, i]) end end end + symmetric && LinearAlgebra.copytri!(dest, 'L') return dest end @@ -531,17 +529,15 @@ end # JSTOR, www.jstor.org/stable/2282833. function check_rankcor_args(x, y, skipmissing, allowlistwise::Bool) - Base.require_one_based_indexing(x, y) + # Base.require_one_based_indexing(x, y) #TODO find how to reject non-1-based in a way that works in Julia 1.0.5 size(x, 1) == size(y, 1) || throw(DimensionMismatch("x and y have inconsistent dimensions")) if allowlistwise skipmissing in (:none, :pairwise, :listwise) || - throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, \ - but got :$skipmissing")) + throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, but got :$skipmissing")) else skipmissing in (:none, :pairwise) || - throw(ArgumentError("skipmissing must be either :none or :pairwise, but \ - got :$skipmissing")) + throw(ArgumentError("skipmissing must be either :none or :pairwise, but got :$skipmissing")) end end @@ -585,7 +581,7 @@ function corkendall_kernel!(sortedx::AbstractVector{T}, y::AbstractVector{U}, end if missing isa T || missing isa V - sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx, scratch_fy) + sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx=scratch_fx, scratch_fy=scratch_fy) else permutedy = scratch_py end @@ -750,5 +746,4 @@ end # can also accept arguments with element type for which isnan is not defined but isless is # is defined, so that rank correlation makes sense. _isnan(x::T) where {T<:Number} = isnan(x) -_isnan(x) = false - +_isnan(x) = false \ No newline at end of file diff --git a/test/versus_naive.jl b/test/versus_naive.jl index ddde9f8..62cd537 100644 --- a/test/versus_naive.jl +++ b/test/versus_naive.jl @@ -1,5 +1,6 @@ using KendallTau using Test +using LinearAlgebra using Random: randn, rand, MersenneTwister @testset "corkendall against corkendall_naive" begin