Skip to content

Add function get_device #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
docs/build/

.vscode

2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ version = "0.8.0-dev"
[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

Expand Down
8 changes: 3 additions & 5 deletions examples/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ function matmul!(a, b, c)
println("Matrix size mismatch!")
return nothing
end
if isa(a, Array)
kernel! = matmul_kernel!(CPU(),4)
else
kernel! = matmul_kernel!(CUDADevice(),256)
end
device = KernelAbstractions.get_device(a)
n = device isa GPU ? 256 : 4
kernel! = matmul_kernel!(device, n)
kernel!(a, b, c, ndrange=size(c))
end

Expand Down
14 changes: 3 additions & 11 deletions examples/naive_transpose.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,9 @@ function naive_transpose!(a, b)
println("Matrix size mismatch!")
return nothing
end

if isa(a, Array)
kernel! = naive_transpose_kernel!(CPU(), 4)
elseif isa(a, CuArray)
kernel! = naive_transpose_kernel!(CUDADevice(), 256)
elseif isa(a, ROCArray)
kernel! = naive_transpose_kernel!(ROCDevice(), 256)
else
println("Unrecognized array type!")
end

device = KernelAbstractions.get_device(a)
n = device isa GPU ? 256 : 4
kernel! = naive_transpose_kernel!(device, n)
kernel!(a, b, ndrange=size(a))
end

Expand Down
3 changes: 3 additions & 0 deletions lib/CUDAKernels/src/CUDAKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import KernelAbstractions

export CUDADevice

KernelAbstractions.get_device(::CUDA.CuArray) = CUDADevice()
KernelAbstractions.get_device(::CUDA.CUSPARSE.AbstractCuSparseArray) = CUDADevice()

const FREE_STREAMS = CUDA.CuStream[]
const STREAMS = CUDA.CuStream[]
const STREAM_GC_THRESHOLD = Ref{Int}(16)
Expand Down
2 changes: 2 additions & 0 deletions lib/CUDAKernels/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KernelGradients = "e5faadeb-7f6c-408e-9747-a7a26e81c66a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1 change: 1 addition & 0 deletions lib/CUDAKernels/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using Enzyme
using CUDA
using CUDAKernels
using Test
using SparseArrays

include(joinpath(dirname(pathof(KernelAbstractions)), "..", "test", "testsuite.jl"))
include(joinpath(dirname(pathof(KernelGradients)), "..", "test", "testsuite.jl"))
Expand Down
3 changes: 3 additions & 0 deletions lib/ROCKernels/src/ROCKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import KernelAbstractions

export ROCDevice

KernelAbstractions.get_device(::AMDGPU.ROCArray) = ROCDevice()


const FREE_QUEUES = HSAQueue[]
const QUEUES = HSAQueue[]
const QUEUE_GC_THRESHOLD = Ref{Int}(16)
Expand Down
2 changes: 2 additions & 0 deletions lib/ROCKernels/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KernelGradients = "e5faadeb-7f6c-408e-9747-a7a26e81c66a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
19 changes: 19 additions & 0 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ export Device, GPU, CPU, Event, MultiEvent, NoneEvent
export async_copy!


using LinearAlgebra
using MacroTools
using SparseArrays
using StaticArrays
using Adapt

Expand Down Expand Up @@ -336,6 +338,23 @@ abstract type GPU <: Device end

struct CPU <: Device end


"""
KernelAbstractions.get_device(A::AbstractArray)::KernelAbstractions.Device

Get a `KernelAbstractions.Device` instance suitable for array `A`.
"""
function get_device end

# Should cover SubArray, ReshapedArray, ReinterpretArray, Hermitian, AbstractTriangular, etc.:
get_device(A::AbstractArray) = get_device(parent(A))

get_device(A::AbstractSparseArray) = get_device(rowvals(A))
get_device(A::Diagonal) = get_device(A.diag)
get_device(A::Tridiagonal) = get_device(A.d)

get_device(::Array) = CPU()

include("nditeration.jl")
using .NDIteration
import .NDIteration: get
Expand Down
2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
[deps]
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
16 changes: 16 additions & 0 deletions test/test.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using KernelAbstractions
using KernelAbstractions.NDIteration
using InteractiveUtils
using LinearAlgebra
using SparseArrays
import SpecialFunctions

identity(x) = x
Expand Down Expand Up @@ -64,6 +66,20 @@ end
A[I] = i
end

@testset "get_device" begin
x = ArrayT(rand(Float32, 5))
A = ArrayT(rand(Float32, 5,5))
device = backend()
@test @inferred(KernelAbstractions.get_device(A)) == device
@test @inferred(KernelAbstractions.get_device(view(A, 2:4, 1:3))) == device
if !(isdefined(Main, :ROCKernels) && (device isa Main.ROCKernels.ROCDevice))
# Sparse arrays are not supported by the ROCm backend yet:
@test @inferred(KernelAbstractions.get_device(sparse(A))) == device
end
@test @inferred(KernelAbstractions.get_device(Diagonal(x))) == device
@test @inferred(KernelAbstractions.get_device(Tridiagonal(A))) == device
end

@testset "indextest" begin
# TODO: add test for _group and _local_cartesian
A = ArrayT{Int}(undef, 16, 16)
Expand Down