Skip to content

Commit 2d82375

Browse files
committed
some tests for new healpix ring workspace
1 parent 7431a93 commit 2d82375

File tree

4 files changed

+37
-95
lines changed

4 files changed

+37
-95
lines changed

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
name = "XGPaint"
22
uuid = "af630e4a-6754-4ec2-ab8a-f9f8b9ebafbf"
33
authors = ["Zack Li"]
4-
version = "0.4"
4+
version = "0.4.0"
55

66
[deps]
77
CSVFiles = "5d742f6a-9f54-50ce-8119-2520741973ca"
8+
ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
89
Cosmology = "76746363-e552-5dba-9a5a-cef6fa9cc5ab"
910
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
1011
DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"

src/profiles.jl

Lines changed: 25 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11

22

3-
# RECTANGULAR WORKSPACES
3+
# Import ChunkSplitters for better threading
4+
using ChunkSplitters: chunks
45

6+
# RECTANGULAR WORKSPACES
57

6-
# default workspaces are immutable, so just forward the type
7-
wrapserialworkspace(w, tid) = w
88

99
struct CarClenshawCurtisProfileWorkspace{T,A<:AbstractArray{T,2}} <: AbstractProfileWorkspace{T}
1010
sin_α::A
@@ -50,13 +50,16 @@ function profile_grid(model::AbstractGNFW{T}, logθs, redshifts, logMs) where T
5050
N_logθ, N_z, N_logM = length(logθs), length(redshifts), length(logMs)
5151
A = zeros(T, (N_logθ, N_z, N_logM))
5252

53-
Threads.@threads :static for im in 1:N_logM
54-
logM = logMs[im]
55-
M = 10^(logM)
56-
for (iz, z) in enumerate(redshifts)
57-
forin 1:N_logθ
58-
θ = exp(logθs[iθ])
59-
A[iθ, iz, im] = max(zero(T), model(θ, M, z))
53+
# Use ChunkSplitters for better load balancing
54+
Threads.@threads for chunk in chunks(1:N_logM; n=Threads.nthreads())
55+
for im in chunk
56+
logM = logMs[im]
57+
M = 10^(logM)
58+
for (iz, z) in enumerate(redshifts)
59+
forin 1:N_logθ
60+
θ = exp(logθs[iθ])
61+
A[iθ, iz, im] = max(zero(T), model(θ, M, z))
62+
end
6063
end
6164
end
6265
end
@@ -325,7 +328,7 @@ end
325328
function profile_paint!(m::Enmap{T, 2, Matrix{T}, Gnomonic{T}},
326329
workspace::GnomonicProfileWorkspace, model, Mh, z, α₀, δ₀,
327330
θmax, normalization=1) where T
328-
profile_paint_generic!(m, model, workspace, Mh, z, α₀, δ₀, θmax, normalization)
331+
profile_paint_generic!(m, workspace, model, Mh, z, α₀, δ₀, θmax, normalization)
329332
end
330333

331334

@@ -417,25 +420,15 @@ function paint!(m, workspace, model, masses, redshifts, αs, δs;
417420
zerobeforepainting && _fillzero!(m)
418421

419422
N_sources = length(masses)
420-
chunksize = ceil(Int, N_sources / (2Threads.nthreads()))
421-
chunks = chunk(N_sources, chunksize)
422-
423+
423424
if N_sources < 2Threads.nthreads() # don't thread if there are not many sources
424-
return paintrange!(1:N_sources, m, wrapserialworkspace(workspace, 1),
425+
return paintrange!(1:N_sources, m, workspace,
425426
model, masses, redshifts, αs, δs)
426427
end
427428

428-
Threads.@threads for ti in 1:Threads.nthreads()
429-
chunk_i = 2ti
430-
i1, i2 = chunks[chunk_i]
431-
paintrange!(i1:i2, m, wrapserialworkspace(workspace, ti),
432-
model, masses, redshifts, αs, δs)
433-
end
434-
435-
Threads.@threads for ti in 1:Threads.nthreads()
436-
chunk_i = 2ti - 1
437-
i1, i2 = chunks[chunk_i]
438-
paintrange!(i1:i2, m, wrapserialworkspace(workspace, ti),
429+
# Use ChunkSplitters for better load balancing
430+
Threads.@threads for chunk in chunks(1:N_sources; n=2*Threads.nthreads())
431+
paintrange!(chunk, m, workspace,
439432
model, masses, redshifts, αs, δs)
440433
end
441434
end
@@ -460,25 +453,15 @@ function paint!(m, workspace, model, masses, redshifts, αs, δs, proj_v_over_c;
460453
zerobeforepainting && _fillzero!(m)
461454

462455
N_sources = length(masses)
463-
chunksize = ceil(Int, N_sources / (2Threads.nthreads()))
464-
chunks = chunk(N_sources, chunksize)
465-
456+
466457
if N_sources < 2Threads.nthreads() # don't thread if there are not many sources
467-
return paintrange!(1:N_sources, m, wrapserialworkspace(workspace, 1),
468-
model, masses, redshifts, αs, δs, proj_v_over_c)
469-
end
470-
471-
Threads.@threads for ti in 1:Threads.nthreads()
472-
chunk_i = 2ti
473-
i1, i2 = chunks[chunk_i]
474-
paintrange!(i1:i2, m, wrapserialworkspace(workspace, ti),
458+
return paintrange!(1:N_sources, m, workspace,
475459
model, masses, redshifts, αs, δs, proj_v_over_c)
476460
end
477461

478-
Threads.@threads for ti in 1:Threads.nthreads()
479-
chunk_i = 2ti - 1
480-
i1, i2 = chunks[chunk_i]
481-
paintrange!(i1:i2, m, wrapserialworkspace(workspace, ti),
462+
# Use ChunkSplitters for better load balancing
463+
Threads.@threads for chunk in chunks(1:N_sources; n=2*Threads.nthreads())
464+
paintrange!(chunk, m, workspace,
482465
model, masses, redshifts, αs, δs, proj_v_over_c)
483466
end
484467
end
@@ -488,6 +471,6 @@ end
488471
# function paint!(m, workspace::HealpixSerialProfileWorkspace, model, masses, redshifts, αs, δs;
489472
# zerobeforepainting=true)
490473
# zerobeforepainting && _fillzero!(m)
491-
# return paintrange!(1:length(masses), m, wrapserialworkspace(workspace, 1),
474+
# return paintrange!(1:length(masses), m, workspace,
492475
# model, masses, redshifts, αs, δs)
493476
# end

src/util.jl

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ using Healpix
33
using Random
44
using Random: MersenneTwister
55
using LazyArtifacts
6+
using ChunkSplitters: chunks
67

78
"""
89
Utility function to read an HDF5 table with x, y, z, M_h as the four rows.
@@ -96,22 +97,12 @@ function chunk(arr_len, chunksize::Integer)
9697
end
9798

9899

99-
function getrange(n)
100-
tid = Threads.threadid()
101-
nt = Threads.nthreads()
102-
d , r = divrem(n, nt)
103-
from = (tid - 1) * d + min(r, tid - 1) + 1
104-
to = from + d - 1 + (tid r ? 1 : 0)
105-
from:to
106-
end
107-
108-
109-
function threaded_rand!(random_number_generators, arr::Array{T,1};
110-
chunksize=4096) where T
100+
function threaded_rand!(random_number_generators, arr::Array{T,1}) where T
111101

112-
num = size(arr,1)
113-
Threads.@threads :static for (i1, i2) in chunk(num, chunksize)
114-
@views rand!(random_number_generators[Threads.threadid()], arr[i1:i2])
102+
# Use ChunkSplitters for better threading
103+
Threads.@threads for chunk in chunks(eachindex(arr); n=Threads.nthreads())
104+
tid = Threads.threadid()
105+
@views rand!(random_number_generators[tid], arr[chunk])
115106
end
116107
end
117108

test/test_ringworkspace.jl

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ using Test
66
"""
77
Brute-force reference implementation that loops over all pixels
88
"""
9-
function profile_paint_bruteforce!(m::HealpixMap{T, RingOrder}, model, Mh, z, α₀, δ₀, θmax, normalization=1) where T
9+
function profile_paint_bruteforce!(
10+
m::HealpixMap{T, RingOrder}, model, Mh, z, α₀, δ₀, θmax, normalization=1
11+
) where T
12+
1013
ϕ₀ = α₀
1114
θ₀ = T(π)/2 - δ₀
1215
x₀, y₀, z₀ = ang2vec(θ₀, ϕ₀)
@@ -107,42 +110,6 @@ XGPaint.compute_θmin(::TestModel{T}) where T = eps(T)
107110
end
108111
end
109112

110-
@testset "RingWorkspace Performance" begin
111-
# Simple performance check - should be faster than brute force
112-
nside = 128 # Smaller than before for faster testing
113-
res = Healpix.Resolution(nside)
114-
workspace = RingWorkspace(res)
115-
116-
map_ringworkspace = HealpixMap{Float64, RingOrder}(zeros(Float64, nside2npix(nside)))
117-
map_bruteforce = HealpixMap{Float64, RingOrder}(zeros(Float64, nside2npix(nside)))
118-
119-
model = TestModel(1.0)
120-
121-
# Test case
122-
α₀, δ₀, θmax = 1.5708, 0.7854, 0.1
123-
Mh, z = 1e14, 0.5
124-
125-
# Warm up
126-
XGPaint.profile_paint_generic!(map_ringworkspace, workspace, model, Mh, z, α₀, δ₀, θmax)
127-
profile_paint_bruteforce!(map_bruteforce, model, Mh, z, α₀, δ₀, θmax)
128-
129-
# Time RingWorkspace
130-
fill!(map_ringworkspace.pixels, 0.0)
131-
time_ringworkspace = @elapsed XGPaint.profile_paint_generic!(map_ringworkspace, workspace, model, Mh, z, α₀, δ₀, θmax)
132-
133-
# Time brute force
134-
fill!(map_bruteforce.pixels, 0.0)
135-
time_bruteforce = @elapsed profile_paint_bruteforce!(map_bruteforce, model, Mh, z, α₀, δ₀, θmax)
136-
137-
speedup = time_bruteforce / time_ringworkspace
138-
139-
# Should be significantly faster
140-
@test speedup > 5.0 # Conservative threshold
141-
142-
# Should also give same results
143-
@test map_ringworkspace.pixels map_bruteforce.pixels
144-
end
145-
146113
@testset "RingWorkspace vs Brute Force with Real Y Profile" begin
147114
# Setup with real y profile model
148115
nside = 32 # Smaller for faster testing with real profile

0 commit comments

Comments
 (0)