Skip to content

Commit

Permalink
Merge pull request #194 from JuliaMolSim/nonbonded-kernel
Browse files Browse the repository at this point in the history
Nonbonded kernel
  • Loading branch information
Peppone98 authored Jan 14, 2025
2 parents 32f5c73 + a753577 commit 636c9da
Show file tree
Hide file tree
Showing 10 changed files with 1,147 additions and 198 deletions.
863 changes: 787 additions & 76 deletions src/cuda.jl

Large diffs are not rendered by default.

13 changes: 3 additions & 10 deletions src/energy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -257,24 +257,17 @@ function potential_energy(sys::System{D, true, T}, neighbors, step_n::Integer=0;
n_threads::Integer=Threads.nthreads()) where {D, T}
pe_vec_nounits = CUDA.zeros(T, 1)
val_ft = Val(T)
buffers = init_forces_buffer!(sys, ustrip_vec.(zero(sys.coords)), 1)

pairwise_inters_nonl = filter(!use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nonl) > 0
nbs = NoNeighborList(length(sys))
pairwise_pe_gpu!(pe_vec_nounits, sys.coords, sys.velocities, sys.atoms, sys.boundary,
pairwise_inters_nonl, nbs, step_n, sys.energy_units, val_ft)
pairwise_pe_gpu!(pe_vec_nounits, buffers, sys, pairwise_inters_nonl, nbs, step_n)
end

pairwise_inters_nl = filter(use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nl) > 0
if isnothing(neighbors)
error("an interaction uses the neighbor list but neighbors is nothing")
end
if length(neighbors) > 0
nbs = @view neighbors.list[1:neighbors.n]
pairwise_pe_gpu!(pe_vec_nounits, sys.coords, sys.velocities, sys.atoms, sys.boundary,
pairwise_inters_nl, nbs, step_n, sys.energy_units, val_ft)
end
pairwise_pe_gpu!(pe_vec_nounits, buffers, sys, pairwise_inters_nl, nothing, step_n)
end

for inter_list in values(sys.specific_inter_lists)
Expand Down
54 changes: 35 additions & 19 deletions src/force.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,37 @@ Base.:+(x::SpecificForce2Atoms, y::SpecificForce2Atoms) = SpecificForce2Atoms(x.
Base.:+(x::SpecificForce3Atoms, y::SpecificForce3Atoms) = SpecificForce3Atoms(x.f1 + y.f1, x.f2 + y.f2, x.f3 + y.f3)
Base.:+(x::SpecificForce4Atoms, y::SpecificForce4Atoms) = SpecificForce4Atoms(x.f1 + y.f1, x.f2 + y.f2, x.f3 + y.f3, x.f4 + y.f4)

function init_forces_buffer(forces_nounits, n_threads)
function init_forces_buffer!(sys, forces_nounits, n_threads)
if n_threads == 1
return nothing
else
return [similar(forces_nounits) for _ in 1:n_threads]
end
end

function init_forces_buffer(forces_nounits::CuArray{SVector{D, T}}, n_threads) where {D, T}
return CUDA.zeros(T, D, length(forces_nounits))
struct ForcesBuffer{F, C, M, R}
fs_mat::F
box_mins::C
box_maxs::C
Morton_seq::M
compressed_eligible::R
compressed_special::R
end

function init_forces_buffer!(sys, forces_nounits::CuArray{SVector{D, T}}, n_threads) where {D, T}
N = length(forces_nounits)
C = eltype(eltype(sys.coords))
n_blocks = cld(N, 32)
fs_mat = CUDA.zeros(T, D, N)
box_mins = CUDA.zeros(C, n_blocks, D)
box_maxs = CUDA.zeros(C, n_blocks, D)
Morton_seq = CUDA.zeros(Int32, N)
compressed_eligible = CUDA.zeros(UInt32, 32, n_blocks, n_blocks)
compressed_special = CUDA.zeros(UInt32, 32, n_blocks, n_blocks)
if sys.neighbor_finder isa GPUNeighborFinder
sys.neighbor_finder.initialized = false
end
return ForcesBuffer(fs_mat, box_mins, box_maxs, Morton_seq, compressed_eligible, compressed_special)
end

"""
Expand All @@ -139,7 +160,7 @@ end

function forces(sys, neighbors, step_n::Integer=0; n_threads::Integer=Threads.nthreads())
forces_nounits = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer(forces_nounits, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits, n_threads)
forces_nounits!(forces_nounits, sys, neighbors, forces_buffer, step_n; n_threads=n_threads)
return forces_nounits .* sys.force_units
end
Expand Down Expand Up @@ -347,36 +368,29 @@ function specific_forces!(fs_nounits, atoms, coords, velocities, boundary, force
end

function forces_nounits!(fs_nounits, sys::System{D, true, T}, neighbors,
fs_mat=CUDA.zeros(T, D, length(sys)), step_n::Integer=0;
buffers, step_n::Integer=0;
n_threads::Integer=Threads.nthreads()) where {D, T}
fill!(fs_mat, zero(T))
fill!(buffers.fs_mat, zero(T))
val_ft = Val(T)

pairwise_inters_nonl = filter(!use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nonl) > 0
nbs = NoNeighborList(length(sys))
pairwise_force_gpu!(fs_mat, sys.coords, sys.velocities, sys.atoms, sys.boundary, pairwise_inters_nonl,
nbs, step_n, sys.force_units, val_ft)
n = length(sys)
nbs = NoNeighborList(n)
pairwise_force_gpu!(buffers, sys, pairwise_inters_nonl, nbs, step_n)
end

pairwise_inters_nl = filter(use_neighbors, values(sys.pairwise_inters))
if length(pairwise_inters_nl) > 0
if isnothing(neighbors)
error("an interaction uses the neighbor list but neighbors is nothing")
end
if length(neighbors) > 0
nbs = @view neighbors.list[1:neighbors.n]
pairwise_force_gpu!(fs_mat, sys.coords, sys.velocities, sys.atoms, sys.boundary, pairwise_inters_nl,
nbs, step_n, sys.force_units, val_ft)
end
pairwise_force_gpu!(buffers, sys, pairwise_inters_nl, nothing, step_n)
end

for inter_list in values(sys.specific_inter_lists)
specific_force_gpu!(fs_mat, inter_list, sys.coords, sys.velocities, sys.atoms,
specific_force_gpu!(buffers.fs_mat, inter_list, sys.coords, sys.velocities, sys.atoms,
sys.boundary, step_n, sys.force_units, val_ft)
end

fs_nounits .= reinterpret(SVector{D, T}, vec(fs_mat))
fs_nounits .= reinterpret(SVector{D, T}, vec(buffers.fs_mat))

for inter in values(sys.general_inters)
fs_gen = AtomsCalculators.forces(sys, inter; neighbors=neighbors, step_n=step_n,
Expand All @@ -387,3 +401,5 @@ function forces_nounits!(fs_nounits, sys::System{D, true, T}, neighbors,

return fs_nounits
end


37 changes: 36 additions & 1 deletion src/neighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ export
find_neighbors,
DistanceNeighborFinder,
TreeNeighborFinder,
CellListMapNeighborFinder
CellListMapNeighborFinder,
GPUNeighborFinder

"""
use_neighbors(inter)
Expand Down Expand Up @@ -42,6 +43,33 @@ find_neighbors(sys::System; kwargs...) = find_neighbors(sys, sys.neighbor_finder

find_neighbors(sys::System, nf::NoNeighborFinder, args...; kwargs...) = nothing

"""
GPUNeighborFinder(; eligible, dist_cutoff, special, n_steps_reorder, initialized)
Returns nothing as neighbor list since the Eastman's algorithm for GPU computation of nonbonded forces does not require previous knowledge of a neighbor list.
"""
mutable struct GPUNeighborFinder{B, D}
eligible::B
dist_cutoff::D
special::B
n_steps_reorder::Int
initialized::Bool
end

function GPUNeighborFinder(;
eligible,
dist_cutoff,
special=zero(eligible),
n_steps_reorder=10,
initialized=false)
return GPUNeighborFinder{typeof(eligible), typeof(dist_cutoff)}(
eligible, dist_cutoff, special, n_steps_reorder, initialized)
end

find_neighbors(sys::System{D, true}, nf::GPUNeighborFinder, current_neighbors=nothing, step_n::Integer=0, initialized::Bool=false; kwargs...) where D = nothing

find_neighbors(sys::System, nf::GPUNeighborFinder, args...; kwargs...) = nothing

"""
DistanceNeighborFinder(; eligible, dist_cutoff, special, n_steps)
Expand Down Expand Up @@ -365,3 +393,10 @@ function Base.show(io::IO, neighbor_finder::Union{DistanceNeighborFinder,
println(io, " n_steps = " , neighbor_finder.n_steps)
print( io, " dist_cutoff = ", neighbor_finder.dist_cutoff)
end

function Base.show(io::IO, neighbor_finder::GPUNeighborFinder)
println(io, typeof(neighbor_finder))
println(io, " Size of eligible matrix = " , size(neighbor_finder.eligible))
println(io, " n_steps_reorder = " , neighbor_finder.n_steps_reorder)
print( io, " dist_cutoff = ", neighbor_finder.dist_cutoff)
end
40 changes: 28 additions & 12 deletions src/setup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -887,14 +887,15 @@ function System(coord_file::AbstractString,
end
coords = wrap_coords.(coords, (boundary_used,))

if gpu || !use_cell_list
neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(eligible) : eligible),
special=(gpu ? CuArray(special) : special),
n_steps=10,
if gpu
neighbor_finder = GPUNeighborFinder(
eligible=CuArray(eligible),
dist_cutoff=T(dist_neighbors),
special=CuArray(special),
n_steps_reorder=10,
initialized=false,
)
else
elseif use_cell_list
neighbor_finder = CellListMapNeighborFinder(
eligible=eligible,
special=special,
Expand All @@ -903,6 +904,13 @@ function System(coord_file::AbstractString,
unit_cell=boundary_used,
dist_cutoff=T(dist_neighbors),
)
else
neighbor_finder = DistanceNeighborFinder(
eligible=eligible,
special=special,
n_steps=10,
dist_cutoff=T(dist_neighbors),
)
end
if gpu
atoms = CuArray([atoms_abst...])
Expand Down Expand Up @@ -1276,14 +1284,15 @@ function System(T::Type,
end
specific_inter_lists = tuple(specific_inter_array...)

if gpu || !use_cell_list
neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(eligible) : eligible),
special=(gpu ? CuArray(special) : special),
n_steps=10,
if gpu
neighbor_finder = GPUNeighborFinder(
eligible=CuArray(eligible),
dist_cutoff=T(dist_neighbors),
special=CuArray(special),
n_steps_reorder=10,
initialized=false,
)
else
elseif use_cell_list
neighbor_finder = CellListMapNeighborFinder(
eligible=eligible,
special=special,
Expand All @@ -1292,6 +1301,13 @@ function System(T::Type,
unit_cell=boundary_used,
dist_cutoff=T(dist_neighbors),
)
else
neighbor_finder = DistanceNeighborFinder(
eligible=eligible,
special=special,
n_steps=10,
dist_cutoff=T(dist_neighbors),
)
end
if gpu
atoms = CuArray([atoms_abst...])
Expand Down
24 changes: 14 additions & 10 deletions src/simulators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Custom simulators should implement this function.
coords_copy = similar(sys.coords)
F_nounits = ustrip_vec.(similar(sys.coords))
F = F_nounits .* sys.force_units
forces_buffer = init_forces_buffer(F_nounits, n_threads)
forces_buffer = init_forces_buffer!(sys, F_nounits, n_threads)

for step_n in 1:sim.max_steps
F_nounits .= forces_nounits!(F_nounits, sys, neighbors, forces_buffer, step_n;
Expand Down Expand Up @@ -142,11 +142,13 @@ end
sys.coords .= wrap_coords.(sys.coords, (sys.boundary,))
!iszero(sim.remove_CM_motion) && remove_CM_motion!(sys)
neighbors = find_neighbors(sys, sys.neighbor_finder; n_threads=n_threads)
forces_t = forces(sys, neighbors; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0; n_threads=n_threads)
forces_t = forces_nounits_t .* sys.force_units
accels_t = forces_t ./ masses(sys)
forces_nounits_t_dt = ustrip_vec.(similar(sys.coords))
forces_t_dt = forces_nounits_t_dt .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t_dt, n_threads)
accels_t_dt = zero(accels_t)
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads, current_forces=forces_t)
using_constraints = length(sys.constraints) > 0
Expand Down Expand Up @@ -233,7 +235,7 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
accels_t = forces_t ./ masses(sys)
using_constraints = length(sys.constraints) > 0
if using_constraints
Expand Down Expand Up @@ -309,7 +311,7 @@ StormerVerlet(; dt, coupling=NoCoupling()) = StormerVerlet(dt, coupling)
coords_last, coords_copy = similar(sys.coords), similar(sys.coords)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
accels_t = forces_t ./ masses(sys)
using_constraints = length(sys.constraints) > 0

Expand Down Expand Up @@ -393,7 +395,7 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)
using_constraints = length(sys.constraints) > 0
Expand Down Expand Up @@ -501,7 +503,7 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)

Expand Down Expand Up @@ -621,7 +623,7 @@ end
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(similar(sys.coords))
forces_t = forces_nounits_t .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t, n_threads)
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
accels_t = forces_t ./ masses(sys)
noise = similar(sys.velocities)

Expand Down Expand Up @@ -692,11 +694,13 @@ end
sys.coords .= wrap_coords.(sys.coords, (sys.boundary,))
!iszero(sim.remove_CM_motion) && remove_CM_motion!(sys)
neighbors = find_neighbors(sys, sys.neighbor_finder; n_threads=n_threads)
forces_t = forces(sys, neighbors; n_threads=n_threads)
forces_nounits_t = ustrip_vec.(zero(sys.coords))
forces_buffer = init_forces_buffer!(sys, forces_nounits_t, n_threads)
forces_nounits_t = forces_nounits!(forces_nounits_t, sys, neighbors, forces_buffer, 0; n_threads=n_threads)
forces_t = forces_nounits_t .* sys.force_units
accels_t = forces_t ./ masses(sys)
forces_nounits_t_dt = ustrip_vec.(similar(sys.coords))
forces_t_dt = forces_nounits_t_dt .* sys.force_units
forces_buffer = init_forces_buffer(forces_nounits_t_dt, n_threads)
accels_t_dt = zero(accels_t)
apply_loggers!(sys, neighbors, 0, run_loggers; n_threads=n_threads, current_forces=forces_t)
v_half = zero(sys.velocities)
Expand Down
15 changes: 11 additions & 4 deletions test/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,17 @@
# Mark all pairs as ineligible for pairwise interactions and check that the
# potential energy from the specific interactions does not change on scaling
no_nbs = falses(length(sys), length(sys))
sys.neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
if gpu
sys.neighbor_finder = GPUNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
else
sys.neighbor_finder = DistanceNeighborFinder(
eligible=(gpu ? CuArray(no_nbs) : no_nbs),
dist_cutoff=1.0u"nm",
)
end
coords_start = copy(sys.coords)
pe_start = potential_energy(sys, find_neighbors(sys))
scale_factor = 1.02
Expand Down
Loading

0 comments on commit 636c9da

Please sign in to comment.