|
4 | 4 |
|
5 | 5 | import cupy as cp
|
6 | 6 | import numpy as np
|
| 7 | +import pytest |
7 | 8 |
|
8 | 9 | import cuda.parallel.experimental.algorithms as algorithms
|
9 | 10 | from cuda.parallel.experimental.struct import gpu_struct
|
10 | 11 |
|
11 | 12 |
|
12 |
| -def test_segmented_reduce(input_array): |
13 |
| - "Test for all supported input types" |
| 13 | +@pytest.fixture(params=["i4", "u4", "i8", "u8"]) |
| 14 | +def offset_dtype(request): |
| 15 | + return np.dtype(request.param) |
| 16 | + |
| 17 | + |
| 18 | +def test_segmented_reduce(input_array, offset_dtype): |
| 19 | + "Test for all supported input types and for some offset types" |
14 | 20 |
|
15 | 21 | def binary_op(a, b):
|
16 | 22 | return a + b
|
17 | 23 |
|
18 | 24 | assert input_array.ndim == 1
|
19 | 25 | sz = input_array.size
|
20 |
| - rng = np.random.default_rng() |
21 |
| - n_segments = 2**4 |
22 |
| - h_offsets = np.zeros(n_segments + 1, dtype="int64") |
23 |
| - h_offsets[1:] = rng.multinomial(sz, [1 / 16] * 16) |
| 26 | + rng = cp.random |
| 27 | + n_segments = 16 |
| 28 | + h_offsets = cp.zeros(n_segments + 1, dtype="int64") |
| 29 | + h_offsets[1:] = rng.multinomial(sz, [1 / n_segments] * n_segments) |
24 | 30 |
|
25 |
| - offsets = cp.asarray(h_offsets) |
| 31 | + offsets = cp.cumsum(cp.asarray(h_offsets, dtype=offset_dtype), dtype=offset_dtype) |
26 | 32 |
|
27 | 33 | start_offsets = offsets[:-1]
|
28 |
| - end_offsets = offsets[:-1] |
| 34 | + end_offsets = offsets[1:] |
| 35 | + |
| 36 | + assert offsets.dtype == np.dtype(offset_dtype) |
| 37 | + assert cp.all(start_offsets <= end_offsets) |
| 38 | + assert end_offsets[-1] == sz |
29 | 39 |
|
30 | 40 | d_in = cp.asarray(input_array)
|
31 | 41 | d_out = cp.empty(n_segments, dtype=d_in.dtype)
|
@@ -67,11 +77,11 @@ class Pixel:
|
67 | 77 | def max_g_value(x, y):
|
68 | 78 | return x if x.g > y.g else y
|
69 | 79 |
|
70 |
| - def ceil_up(n, m): |
| 80 | + def align_up(n, m): |
71 | 81 | return ((n + m - 1) // m) * m
|
72 | 82 |
|
73 | 83 | segment_size = 64
|
74 |
| - n_pixels = ceil_up(4000, 64) |
| 84 | + n_pixels = align_up(4000, 64) |
75 | 85 | offsets = cp.arange(n_pixels + segment_size - 1, step=segment_size, dtype=np.int64)
|
76 | 86 | start_offsets = offsets[:-1]
|
77 | 87 | end_offsets = offsets[1:]
|
|
0 commit comments