From 96525424d75d826820ec6459d42c490072c8ff61 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Thu, 4 Apr 2024 16:21:33 +0100
Subject: [PATCH] Now have iterators EqualSumSubsets and TwoStepRange

---
 scripts/quickcheckallocations.jl |  27 +++++++
 src/pairwise.jl                  | 124 +++++++++++++++++++------------
 src/rankcorr.jl                  |   4 +-
 test/rankcorr.jl                 |   8 +-
 4 files changed, 111 insertions(+), 52 deletions(-)

diff --git a/scripts/quickcheckallocations.jl b/scripts/quickcheckallocations.jl
index 5560011..6d0619b 100644
--- a/scripts/quickcheckallocations.jl
+++ b/scripts/quickcheckallocations.jl
@@ -504,4 +504,31 @@ KendallTau.corspearman($xm; skipmissing=:none)
 KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none)       13.110 ms (1719 allocations: 17.38 MiB)
 ========================================================================================================================
 
+'Slightly reduced allocations thanks to EqualSumSubsets and TwoStepRange
+========================================================================================================================
+Dates.now() = DateTime("2024-04-04T15:08:01.261")
+ENV["COMPUTERNAME"] = "DESKTOP-HSGAM5S"
+Julia Version 1.10.2
+Threads.nthreads() = 20
+size(x) = (1000, 1000)
+typeof(x) = Matrix{Float64}
+size(xm) = (1000, 1000)
+typeof(xm) = Matrix{Union{Missing, Float64}}
+do_StatsBase_times = false
+KendallTau.corkendall($x)                                                        1.169 s (1329 allocations: 16.49 MiB)
+KendallTau.corkendall($xm; skipmissing=:pairwise)                                1.221 s (1329 allocations: 16.19 MiB)
+KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:pairwise)   1.445 s (1297 allocations: 16.19 MiB)
+KendallTau.corkendall($xm; skipmissing=:listwise)                                5.546 ms (2330 allocations: 11.82 MiB)
+KendallTau.pairwise(KendallTau.corkendall,eachcol($xm); skipmissing=:listwise)   5.505 ms (2298 allocations: 11.82 MiB)
+KendallTau.corkendall($xm; skipmissing=:none)                                    51.045 ms (1340 allocations: 17.15 MiB)
+KendallTau.pairwise(KendallTau.corkendall,eachcol($xm),skipmissing=:none)        54.172 ms (1338 allocations: 17.15 MiB)
+KendallTau.corspearman($x)                                                       16.746 ms (1223 allocations: 39.42 MiB)
+KendallTau.corspearman($xm; skipmissing=:pairwise)                               421.965 ms (1492 allocations: 23.94 MiB)
+KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:pairwise)  408.834 ms (1460 allocations: 23.94 MiB)
+KendallTau.corspearman($xm; skipmissing=:listwise)                               3.235 ms (2209 allocations: 19.44 MiB)
+KendallTau.pairwise(KendallTau.corspearman,eachcol($xm); skipmissing=:listwise)  2.911 ms (2177 allocations: 19.44 MiB)
+KendallTau.corspearman($xm; skipmissing=:none)                                   13.804 ms (1721 allocations: 17.38 MiB)
+KendallTau.pairwise(KendallTau.corspearman,eachcol($xm),skipmissing=:none)       13.510 ms (1719 allocations: 17.38 MiB)
+========================================================================================================================
+
 =#
\ No newline at end of file
diff --git a/src/pairwise.jl b/src/pairwise.jl
index a8d60b8..2121e22 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -17,7 +17,7 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
     #cov(x) is faster than cov(x, x)
     (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
 
-    Threads.@threads for subset in Equal_sum_vectors(nc, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nc, Threads.nthreads())
         for j in subset
             for i = (symmetric ? j : 1):nr
                 # For performance, diagonal is special-cased
@@ -96,7 +96,7 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
     nmtx = promoted_nmtype(x)[]
     nmty = promoted_nmtype(y)[]
 
-    Threads.@threads for subset in Equal_sum_vectors(nc, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nc, Threads.nthreads())
         scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
         scratch_fy = task_local_vector(:scratch_fy, nmty, m)
         for j in subset
@@ -407,72 +407,104 @@ function task_local_vector(key::Symbol, similarto::AbstractArray{V},
 end
 
 """
-    Equal_sum_vectors
+    EqualSumSubsets
 
-An iterator that partitions the integers 1 to n into `num_vectors` vectors such that a) each
-vector is of nearly equal length; and b) the sum of the elements in each vector is nearly
-equal. If `n` is a multiple of `2 * num_vectors` both conditions hold exactly.
+An iterator that partitions the integers 1 to n into `num_subsets` "subsets" (of type
+TwoStepRange) such that a) each subset is of nearly equal length; and b) the sum of the
+elements in each subset is nearly equal. If `n` is a multiple of `2 * num_subsets` both
+conditions hold exactly.
 
 ## Example
 ```julia-repl
-julia> for s in KendallTau.Equal_sum_vectors(30,5)
-println((s, sum(s)))
+julia> for s in KendallTau.EqualSumSubsets(30,5)
+println((collect(s), sum(s)))
 end
 ([30, 21, 20, 11, 10, 1], 93)
 ([29, 22, 19, 12, 9, 2], 93)
 ([28, 23, 18, 13, 8, 3], 93)
 ([27, 24, 17, 14, 7, 4], 93)
 ([26, 25, 16, 15, 6, 5], 93)
+
+#Check for correct partitioning, in this case of integers 1:1000 into 17 subsets.
+julia> sort(vcat([collect(s) for s in KendallTau.EqualSumSubsets(1000,17)]...))==1:1000
+true
+
 ```
 """
-struct Equal_sum_vectors
+struct EqualSumSubsets
     n::Int64
-    num_vectors::Int64
+    num_subsets::Int64
+
+    function EqualSumSubsets(n, num_subsets)
+        n > 0 || throw("n must be positive, but got $n")
+        num_subsets > 0 || throw("num_subsets must be positive, but got $num_subsets")
+        return new(n,num_subsets)
+    end
+
 end
 
-Base.length(x::Equal_sum_vectors) = min(x.n, x.num_vectors)
+Base.eltype(::EqualSumSubsets) = TwoStepRange
+Base.length(x::EqualSumSubsets) = min(x.n, x.num_subsets)
+Base.firstindex(::EqualSumSubsets) = 1
 
-Base.firstindex(x::Equal_sum_vectors) = 1
+function Base.iterate(ess::EqualSumSubsets, state::Int64=1)
+    state > length(ess) && return nothing
+    return getindex(ess, state), state + 1
+end
 
-function Base.iterate(ess::Equal_sum_vectors, state::Int64=1)
-    state > length(ess) && return (nothing)
-    return (getindex(ess, state), state + 1)
+function Base.getindex(ess::EqualSumSubsets, i::Int64=1)
+    n, nss = ess.n, ess.num_subsets
+    step1 = 2i - 2nss - 1
+    step2 = 1 - 2i
+    return TwoStepRange(n - i + 1, step1, step2)
 end
 
-function demo(n, num_vectors)
-    for s in Equal_sum_vectors(n, num_vectors)
-        println(s, sum(s))
+"""
+TwoStepRange
+
+Range with a starting value of `start`, stop value of `1` and a step that alternates
+between `step1` and `step2`. `start` must be positive and `step1` and `step2` must be
+negative.
+
+# Examples
+```jldoctest
+julia> collect(KendallTau.TwoStepRange(30,-7,-3))
+6-element Vector{Int64}:
+ 30
+ 23
+ 20
+ 13
+ 10
+  3
+
+```
+"""
+struct TwoStepRange
+    start::Int64
+    step1::Int64
+    step2::Int64
+
+    function TwoStepRange(start, step1, step2)
+        start > 0 || throw("start must be positive, but got $start")
+        step1 < 0 || throw("step1 must be negative, but got $step1")
+        step2 < 0 || throw("step2 must be negative, but got $step2")
+        return new(start, step1, step2)
     end
 end
 
-function Base.getindex(ess::Equal_sum_vectors, i::Int64=1)
-    i > length(ess) && return (nothing)
-    n, nss = ess.n, ess.num_vectors
-    s = 2i - 1
-    b = 2nss - s
+Base.eltype(::TwoStepRange) = Int64
 
-    lresult = div(n, nss)
-    if mod(lresult, 2) == 1
-        if i > nss - rem(n, nss)
-            lresult += 1
-        end
-    else
-        if i <= rem(n, nss)
-            lresult += 1
-        end
-    end
+function Base.length(tsr::TwoStepRange)
+    return length((tsr.start):(tsr.step1+tsr.step2):1) +
+           length((tsr.start+tsr.step1):(tsr.step1+tsr.step2):1)
+end
 
-    result = zeros(Int64, lresult)
-    result[1] = n - i + 1
-    k = 1
-    while true
-        x = result[k] - (mod(k, 2) == 0 ? s : b)
-        if x > 0
-            k += 1
-            result[k] = x
-        else
-            break
-        end
-    end
-    return result
+function Base.iterate(tsr::TwoStepRange, i::Int64=1)
+    (i > length(tsr)) && return nothing
+    return getindex(tsr, i), i + 1
+end
+
+function Base.getindex(tsr::TwoStepRange, i::Int64=1)::Int64
+    a, b = divrem(i - 1, 2)
+    return tsr.start + (tsr.step1 + tsr.step2) * a + b * tsr.step1
 end
\ No newline at end of file
diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index bc78251..dca8277 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -112,7 +112,7 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
     fl64 = Float64[]
     nmtx = promoted_nmtype(x)[]
     nmty = promoted_nmtype(y)[]
-    Threads.@threads for subset in Equal_sum_vectors(nr, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nr, Threads.nthreads())
 
         for i in subset
 
@@ -482,7 +482,7 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
 
     symmetric = x === y
 
-    Threads.@threads for subset in Equal_sum_vectors(nr, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nr, Threads.nthreads())
 
         for i in subset
 
diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index 31d42d1..2562855 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -32,14 +32,14 @@ using Test
     @test KendallTau.midpoint(1, 10) == 5
     @test KendallTau.midpoint(1, widen(10)) == 5
 
-    for n in 1:50, nss in 1:5
+    for n in 1:200, nss in 1:7
         #check is a partition
-        @test sort(vcat([s for s in KendallTau.Equal_sum_vectors(n, nss)]...)) == 1:n
+        @test sort(vcat([collect(s) for s in KendallTau.EqualSumSubsets(n, nss)]...)) == 1:n
         #check near-equal lengths
-        lengths = [length(s) for s in KendallTau.Equal_sum_vectors(n, nss)]
+        lengths = [length(s) for s in KendallTau.EqualSumSubsets(n, nss)]
         @test (maximum(lengths) - minimum(lengths)) <= 1
         #check near-equal sums
-        sums = [sum(s) for s in KendallTau.Equal_sum_vectors(n, nss)]
+        sums = [sum(collect(s)) for s in KendallTau.EqualSumSubsets(n, nss)]
         @test (maximum(sums) - minimum(sums)) < nss
     end