Skip to content

Commit

Permalink
Sync with PGS62/StatsBase
Browse files Browse the repository at this point in the history
  • Loading branch information
PGS62 committed Apr 3, 2024
1 parent 64ad9d7 commit 467ddf5
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 59 deletions.
26 changes: 26 additions & 0 deletions scripts/quickcheckallocations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -478,4 +478,30 @@ KendallTau.corspearman($xm; skipmissing=:none)
KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none) 22.049 ms (1623 allocations: 17.27 MiB)
========================================================================================================================
========================================================================================================================
Dates.now() = DateTime("2024-04-03T11:47:26.152")
ENV["COMPUTERNAME"] = "DESKTOP-HSGAM5S"
Julia Version 1.10.2
Threads.nthreads() = 20
size(x) = (1000, 1000)
typeof(x) = Matrix{Float64}
size(xm) = (1000, 1000)
typeof(xm) = Matrix{Union{Missing, Float64}}
do_StatsBase_times = false
KendallTau.corkendall($x) 1.176 s (1409 allocations: 16.53 MiB)
KendallTau.corkendall($xm; skipmissing=:pairwise) 1.587 s (1409 allocations: 16.23 MiB)
KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:pairwise) 1.577 s (1377 allocations: 16.23 MiB)
KendallTau.corkendall($xm; skipmissing=:listwise) 5.748 ms (2410 allocations: 11.86 MiB)
KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:listwise) 5.196 ms (2378 allocations: 11.85 MiB)
KendallTau.corkendall($xm; skipmissing=:none) 53.383 ms (1420 allocations: 17.19 MiB)
KendallTau.pairwise(KendallTau.corkendall,eachcol($xm),skipmissing=:none) 52.164 ms (1418 allocations: 17.19 MiB)
KendallTau.corspearman($x) 17.053 ms (1223 allocations: 39.42 MiB)
KendallTau.corspearman($xm; skipmissing=:pairwise) 407.303 ms (1572 allocations: 23.98 MiB)
KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:pairwise) 405.928 ms (1540 allocations: 23.98 MiB)
KendallTau.corspearman($xm; skipmissing=:listwise) 2.944 ms (2209 allocations: 19.44 MiB)
KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:listwise) 2.966 ms (2177 allocations: 19.44 MiB)
KendallTau.corspearman($xm; skipmissing=:none) 13.384 ms (1721 allocations: 17.38 MiB)
KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none) 13.110 ms (1719 allocations: 17.38 MiB)
========================================================================================================================
=#
1 change: 1 addition & 0 deletions src/KendallTau.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module KendallTau
using Missings: disallowmissing
import StatsBase: cor, cov, _tiedrank!, tiedrank
using LinearAlgebra

include("rankcorr.jl")
include("corkendall_fromfile.jl")
Expand Down
55 changes: 27 additions & 28 deletions src/pairwise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,31 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,

# Swap x and y for more efficient threaded loop.
if nr < nc
dest′ = reshape(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:none), f, dest′, y, x, symmetric)
dest .= transpose(dest′)
dest2 = similar(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:none), f, dest2, y, x, symmetric)
dest .= transpose(dest2)
return dest
end

#cor and friends are special-cased.
iscor = (f in (corkendall, corspearman, cor))
(iscor || f == cov) && (symmetric = x === y)
diag_is_1 = (f in (corkendall, corspearman, cor))
(diag_is_1 || f == cov) && (symmetric = x === y)
#cov(x) is faster than cov(x, x)
(f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))

Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
for j in subset
for i = 1:(symmetric ? j : nc)
# For performance, diagonal is special-cased
if iscor && i==j && y[i] === x[j] && V !== Union{}
if diag_is_1 && i == j && y[i] === x[j] && V !== Union{}
dest[j, i] = V === Missing ? missing : 1.0
else
dest[j, i] = f(x[j], y[i])
end
symmetric && (dest[i, j] = dest[j, i])
end
end
end
symmetric && LinearAlgebra.copytri!(dest, 'L')
return dest
end

Expand Down Expand Up @@ -81,35 +81,35 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri

# Swap x and y for more efficient threaded loop.
if nr < nc
dest′ = reshape(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
dest .= transpose(dest′)
dest2 = similar(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:pairwise), f, dest2, y, x, symmetric)
dest .= transpose(dest2)
return dest
end

#cor and friends are special-cased.
iscor = (f in (corkendall, corspearman, cor))
(iscor || f == cov) && (symmetric = x === y)
diag_is_1 = (f in (corkendall, corspearman, cor))
(diag_is_1 || f == cov) && (symmetric = x === y)
#cov(x) is faster than cov(x, x)
(f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))

nmtx = promoted_nmtype(x)[]
nmty = promoted_nmtype(y)[]

Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
scratch_fy = task_local_vector(:scratch_fy, nmty, m)
for j in subset
scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
scratch_fy = task_local_vector(:scratch_fy, nmty, m)
for i = 1:(symmetric ? j : nc)
if iscor && i == j && y[i] === x[j] && V !== Union{} && V!== Missing
if diag_is_1 && i == j && y[i] === x[j] && V !== Union{} && V !== Missing
dest[j, i] = 1.0
else
dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx, scratch_fy)...)
dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...)
end
symmetric && (dest[i, j] = dest[j, i])
end
end
end
symmetric && LinearAlgebra.copytri!(dest, 'L')
return dest
end

Expand Down Expand Up @@ -223,7 +223,7 @@ presence `missing`, `NaN` or `Inf` entries.
# Examples
```jldoctest
julia> using KendallTau, Statistics
julia> using StatsBase, Statistics
julia> dest = zeros(3, 3);
Expand Down Expand Up @@ -255,7 +255,7 @@ julia> dest
```
"""
function pairwise!(f, dest::AbstractMatrix, x, y=x;
symmetric::Bool=false, skipmissing::Symbol=:none)
symmetric::Bool=false, skipmissing::Symbol=:none)
check_vectors(x, y, skipmissing, symmetric)
return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing)
end
Expand Down Expand Up @@ -288,7 +288,7 @@ presence `missing`, `NaN` or `Inf` entries.
# Examples
```jldoctest
julia> using KendallTau, Statistics
julia> using StatsBase, Statistics
julia> x = [1 3 7
2 5 6
Expand Down Expand Up @@ -326,15 +326,14 @@ promoted_nmtype(x) = mapreduce(nonmissingtype ∘ eltype, promote_type, x, init=
"""
handle_listwise(x, y)
Remove missings in a listwise manner. Assumes `x` and `y` are vectors of iterables which
have been validated via `check_vectors`.
Remove missings in a listwise manner. Assumes `x` and `y` are vectors of iterables which
have been validated via `check_vectors`.
## Example
```julia-repl
julia> a = [6,7,8,9,10,missing];b = [4,5,6,7,missing,8];c = [2,3,4,missing,5,6];d = [1,2,3,4,5,6];
julia> KendallTau.handle_listwise([a,b],[c,d])
julia> StatsBase.handle_listwise([a,b],[c,d])
([[6, 7, 8], [4, 5, 6]], [[2, 3, 4], [1, 2, 3]])
```
"""
Expand Down Expand Up @@ -382,16 +381,16 @@ function handle_pairwise(x::AbstractVector, y::AbstractVector;

axes(x) == axes(y) || throw(DimensionMismatch("x and y have inconsistent dimensions"))
lb = first(axes(x, 1))
j = lb - 1
j = lb
@inbounds for i in eachindex(x)
if !(ismissing(x[i]) || ismissing(y[i]))
j += 1
scratch_fx[j] = x[i]
scratch_fy[j] = y[i]
j += 1
end
end

return view(scratch_fx, lb:j), view(scratch_fy, lb:j)
return view(scratch_fx, lb:(j-1)), view(scratch_fy, lb:(j-1))
end

#=Condition a) makes equal_sum_subsets useful for load-balancing the multi-threaded section
Expand All @@ -405,7 +404,7 @@ is nearly equal. If `n` is a multiple of `2 * num_subsets` both conditions hold
## Example
```julia-repl
julia> KendallTau.equal_sum_subsets(30,5)
julia> StatsBase.equal_sum_subsets(30,5)
5-element Vector{Vector{Int64}}:
[30, 21, 20, 11, 10, 1]
[29, 22, 19, 12, 9, 2]
Expand Down
57 changes: 26 additions & 31 deletions src/rankcorr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ function corspearman(x::AbstractVector, y::AbstractVector; skipmissing::Symbol=:
end

function corspearman(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
return corspearman(x, reshape(y, (length(y), 1)); skipmissing)
return corspearman(x, reshape(y, (length(y), 1)); skipmissing=skipmissing)
end

function corspearman(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
return corspearman(reshape(x, (length(x), 1)), y; skipmissing)
return corspearman(reshape(x, (length(x), 1)), y; skipmissing=skipmissing)
end

function corspearman(x::AbstractMatrix, y::AbstractMatrix=x;
skipmissing::Symbol=:none)
check_rankcor_args(x, y, skipmissing, true)
return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing)
return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing=skipmissing)
end

function corspearman(x::AbstractVector{T}) where {T}
Expand Down Expand Up @@ -99,9 +99,9 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),

# Swap x and y for more efficient threaded loop.
if nr < nc
dest′ = reshape(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
dest .= transpose(dest′)
dest2 = similar(dest, size(dest, 2), size(dest, 1))
_pairwise!(Val(:pairwise), f, dest2, y, x, symmetric)
dest .= transpose(dest2)
return dest
end

Expand Down Expand Up @@ -137,10 +137,10 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
view(tempx, :, j), view(tempy, :, i), inds, spnmx, spnmy, nmx,
nmy, ranksx, ranksy)
end
symmetric && (dest[i, j] = dest[j, i])
end
end
end
symmetric && LinearAlgebra.copytri!(dest, 'L')
return dest
end

Expand All @@ -162,20 +162,17 @@ All vector arguments must have the same axes.
## Example
```julia-repl
julia> using KendallTau, BenchmarkTools, Random
julia> using StatsBase, BenchmarkTools, Random
julia> Random.seed!(1);
julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05,\
missing,randn(1000));
julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05, missing,randn(1000));
julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);\
spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000);
julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000);
julia> nmx=zeros(1000);nmy=zeros(1000);ranksx=similar(x,Float64);ranksy=similar(y,Float64);
julia> @btime KendallTau.corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\
\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy)
julia> @btime corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy)
4.671 μs (0 allocations: 0 bytes)
-0.016058512110609713
```
Expand Down Expand Up @@ -288,7 +285,7 @@ julia> x = y = fill(missing,2,2)
julia> Statistics.cor(x,y)
ERROR: MethodError: no method matching copy(::Missing)
julia> KendallTau._cor(x,y)
julia> StatsBase._cor(x,y)
2×2 Matrix{Union{Missing, Float64}}:
1.0 missing
missing 1.0
Expand Down Expand Up @@ -417,7 +414,7 @@ Uses multiple threads when either `x` or `y` is a matrix.
function corkendall(x::AbstractMatrix, y::AbstractMatrix=x;
skipmissing::Symbol=:none)
check_rankcor_args(x, y, skipmissing, true)
return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing)
return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing=skipmissing)
end

function corkendall(x::AbstractVector, y::AbstractVector;
Expand All @@ -437,11 +434,11 @@ end
StatsBase.corspearman, but consistent with StatsBase.corkendall.
=#
function corkendall(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing))
return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing=skipmissing))
end

function corkendall(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
return corkendall(reshape(x, (length(x), 1)), y; skipmissing)
return corkendall(reshape(x, (length(x), 1)), y; skipmissing=skipmissing)
end

function corkendall(x::AbstractVector{T}) where {T}
Expand Down Expand Up @@ -471,9 +468,9 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst

# Swap x and y for more efficient threaded loop.
if nr < nc
dest′ = reshape(dest, size(dest, 2), size(dest, 1))
corkendall_loop!(skipmissing, f, dest′, y, x, symmetric)
dest .= transpose(dest′)
dest2 = similar(dest, size(dest, 2), size(dest, 1))
corkendall_loop!(skipmissing, f, dest2, y, x, symmetric)
dest .= transpose(dest2)
return dest
end

Expand Down Expand Up @@ -515,12 +512,13 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
else
yi .= y[i]
dest[j, i] = corkendall_kernel!(sortedxj, yi, permx, skipmissing;
scratch_py, scratch_sy, scratch_fx, scratch_fy)
scratch_py=scratch_py, scratch_sy=scratch_sy, scratch_fx=scratch_fx,
scratch_fy=scratch_fy)
end
symmetric && (dest[i, j] = dest[j, i])
end
end
end
symmetric && LinearAlgebra.copytri!(dest, 'L')
return dest
end

Expand All @@ -531,17 +529,15 @@ end
# JSTOR, www.jstor.org/stable/2282833.

function check_rankcor_args(x, y, skipmissing, allowlistwise::Bool)
Base.require_one_based_indexing(x, y)
# Base.require_one_based_indexing(x, y) #TODO find how to reject non-1-based in a way that works in Julia 1.0.5
size(x, 1) == size(y, 1) ||
throw(DimensionMismatch("x and y have inconsistent dimensions"))
if allowlistwise
skipmissing in (:none, :pairwise, :listwise) ||
throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, \
but got :$skipmissing"))
throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, but got :$skipmissing"))
else
skipmissing in (:none, :pairwise) ||
throw(ArgumentError("skipmissing must be either :none or :pairwise, but \
got :$skipmissing"))
throw(ArgumentError("skipmissing must be either :none or :pairwise, but got :$skipmissing"))
end
end

Expand Down Expand Up @@ -585,7 +581,7 @@ function corkendall_kernel!(sortedx::AbstractVector{T}, y::AbstractVector{U},
end

if missing isa T || missing isa V
sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx, scratch_fy)
sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx=scratch_fx, scratch_fy=scratch_fy)
else
permutedy = scratch_py
end
Expand Down Expand Up @@ -750,5 +746,4 @@ end
# can also accept arguments with element type for which isnan is not defined but isless is
# is defined, so that rank correlation makes sense.
_isnan(x::T) where {T<:Number} = isnan(x)
_isnan(x) = false

_isnan(x) = false
1 change: 1 addition & 0 deletions test/versus_naive.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using KendallTau
using Test
using LinearAlgebra
using Random: randn, rand, MersenneTwister

@testset "corkendall against corkendall_naive" begin
Expand Down

0 comments on commit 467ddf5

Please sign in to comment.