@@ -16,73 +16,87 @@ import
16
16
../ laser/ dynamic_stack_arrays,
17
17
../ laser/ tensor/ datatypes,
18
18
nimblas,
19
- nimcuda/ cuda12_5/ [cuda_runtime_api, check],
20
19
# Standard library
21
20
std/ [complex]
22
21
23
22
export nimblas.OrderType, complex
24
23
export datatypes, dynamic_stack_arrays
25
24
26
- type
27
- CudaTensorRefTrackerObj* [T: SomeFloat] = object
28
- value* : ptr UncheckedArray[T]
29
-
30
- CudaTensorRefTracker* [T] = ref CudaTensorRefTrackerObj[T]
31
-
32
- CudaStorage* [T: SomeFloat] = object
33
- # # Opaque seq-like structure for storage on the Cuda backend.
34
- # #
35
- # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
36
- # #
37
- # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
38
- Flen* : int
39
- Fdata* : ptr UncheckedArray[T]
40
- Fref_tracking* : CudaTensorRefTracker[T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
41
-
42
- CudaTensor* [T: SomeFloat] = object
43
- # # Tensor data structure stored on Nvidia GPU (Cuda)
44
- # # - ``shape``: Dimensions of the CudaTensor
45
- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
46
- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
47
- # # - ``storage``: An opaque data storage for the CudaTensor
48
- # #
49
- # # Warning ⚠:
50
- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
51
- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
52
- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
53
- shape* : Metadata
54
- strides* : Metadata
55
- offset* : int
56
- storage* : CudaStorage[T]
57
-
58
- ClStorage* [T: SomeFloat] = object
59
- # # Opaque seq-like structure for storage on the OpenCL backend.
60
- Flen* : int
61
- Fdata* : ptr UncheckedArray[T]
62
- Fref_tracking* : ref [ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
63
-
64
- ClTensor* [T: SomeFloat] = object
65
- # # Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
66
- # # - ``shape``: Dimensions of the CudaTensor
67
- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
68
- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
69
- # # - ``storage``: An opaque data storage for the CudaTensor
70
- # #
71
- # # Warning ⚠:
72
- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
73
- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
74
- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
75
- shape* : Metadata
76
- strides* : Metadata
77
- offset* : int
78
- storage* : ClStorage[T]
79
-
80
- AnyTensor* [T] = Tensor[T] or CudaTensor[T] or ClTensor[T]
81
-
82
-
83
- proc deallocCuda* [T](p: CudaTensorRefTracker[T]) {.noSideEffect.}=
84
- if not p.value.isNil:
85
- check cudaFree(p.value)
25
+ when defined(cuda):
26
+ import nimcuda/ cuda12_5/ [cuda_runtime_api, check]
27
+
28
+ type
29
+ CudaTensorRefTrackerObj* [T: SomeFloat] = object
30
+ value* : ptr UncheckedArray[T]
31
+
32
+ CudaTensorRefTracker* [T] = ref CudaTensorRefTrackerObj[T]
33
+
34
+ CudaStorage* [T: SomeFloat] = object
35
+ # # Opaque seq-like structure for storage on the Cuda backend.
36
+ # #
37
+ # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
38
+ # #
39
+ # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
40
+ Flen* : int
41
+ Fdata* : ptr UncheckedArray[T]
42
+ Fref_tracking* : CudaTensorRefTracker[T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
43
+
44
+ CudaTensor* [T: SomeFloat] = object
45
+ # # Tensor data structure stored on Nvidia GPU (Cuda)
46
+ # # - ``shape``: Dimensions of the CudaTensor
47
+ # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
48
+ # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
49
+ # # - ``storage``: An opaque data storage for the CudaTensor
50
+ # #
51
+ # # Warning ⚠:
52
+ # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
53
+ # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
54
+ # # Explicit copies can be made with ``clone``: ``var a = b.clone``
55
+ shape* : Metadata
56
+ strides* : Metadata
57
+ offset* : int
58
+ storage* : CudaStorage[T]
59
+
60
+ proc deallocCuda* [T](p: CudaTensorRefTracker[T]) {.noSideEffect.}=
61
+ if not p.value.isNil:
62
+ check cudaFree(p.value)
63
+
64
+ when defined(opencl):
65
+ type
66
+ ClStorage*[T: SomeFloat] = object
67
+ ## Opaque seq -like structure for storage on the OpenCL backend.
68
+ Flen*: int
69
+ Fdata*: ptr UncheckedArray[T]
70
+ Fref_tracking*: ref [ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
71
+
72
+ ClTensor*[T: SomeFloat] = object
73
+ ## Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
74
+ ## - ``shape``: Dimensions of the CudaTensor
75
+ ## - ``strides``: Numbers of items to skip to get the next item along a dimension.
76
+ ## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
77
+ ## - ``storage``: An opaque data storage for the CudaTensor
78
+ ##
79
+ ## Warning ⚠:
80
+ ## Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
81
+ ## However modification on metadata (shape, strides or offset) will not affect the other tensor.
82
+ ## Explicit copies can be made with ``clone``: ``var a = b.clone``
83
+ shape*: Metadata
84
+ strides*: Metadata
85
+ offset*: int
86
+ storage*: ClStorage[T]
87
+
88
+ when defined(cuda) and defined(opencl):
89
+ type AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T]
90
+ elif defined(cuda):
91
+ type AnyTensor*[T] = Tensor[T] or CudaTensor[T]
92
+ elif defined(opencl):
93
+ type AnyTensor*[T] = Tensor[T] or ClTensor[T]
94
+ else:
95
+ type AnyTensor*[T] = Tensor[T]
96
+
97
+ type GpuTensor[T] = AnyTensor[T] and not Tensor[T]
98
+
99
+
86
100
87
101
88
102
# ###############
@@ -102,10 +116,10 @@ proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw ins
102
116
# Tensor Metadata
103
117
# ################
104
118
105
- func rank* [T](t: CudaTensor[T] or ClTensor [T]): range [0 .. LASER_MAXRANK] {.inline.} =
119
+ func rank* [T](t: GpuTensor [T]): range [0 .. LASER_MAXRANK] {.inline.} =
106
120
t.shape.len
107
121
108
- func size*[T](t: CudaTensor[T] or ClTensor [T]): Natural {.inline.} =
122
+ func size*[T](t: GpuTensor [T]): Natural {.inline.} =
109
123
t.shape.product
110
124
111
125
proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result : var Metadata) {.noSideEffect.} =
@@ -131,7 +145,7 @@ proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: va
131
145
accum *= shape[i]
132
146
return
133
147
134
- func is_C_contiguous* (t: CudaTensor or ClTensor ): bool =
148
+ func is_C_contiguous* (t: GpuTensor ): bool =
135
149
# # Check if the tensor follows C convention / is row major
136
150
var cur_size = 1
137
151
for i in countdown(t.rank - 1 ,0 ):
@@ -182,14 +196,14 @@ proc get_offset_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffec
182
196
proc get_offset_ptr*[T: not KnownSupportsCopyMem](t: AnyTensor[T]): ptr T {.error: "`get_offset_ptr`" &
183
197
" cannot be safely used for GC'ed types!".}
184
198
185
- proc get_data_ptr*[T](t: CudaTensor[T] or ClTensor [T]): ptr T {.noSideEffect, inline.}=
199
+ proc get_data_ptr*[T](t: GpuTensor [T]): ptr T {.noSideEffect, inline.}=
186
200
## Input:
187
201
## - A tensor
188
202
## Returns:
189
203
## - A pointer to the real start of its data (no offset)
190
204
cast[ptr T](t.storage.Fdata)
191
205
192
- proc get_offset_ptr*[T](t: CudaTensor[T] or ClTensor [T]): ptr T {.noSideEffect, inline.}=
206
+ proc get_offset_ptr*[T](t: GpuTensor [T]): ptr T {.noSideEffect, inline.}=
193
207
## Input:
194
208
## - A tensor
195
209
## Returns:
0 commit comments