Skip to content

Commit 7ea0812

Browse files
Fix logic in test_segmented_reduce (#4198)
* Fix logic in test_segmented_reduce, also test over different types of offsets To resolve gh-4197, use `cupy.cumsum` to accumulate over random partition sizes to form correct offsets sequence. Add assertions to verify that `offsets` is a non-decreasing sequence, and that its last element equals the size of the input array. Perform the test for several plausible offset data types. * Changes per PR review comments 1. Use `cupy.random` to draw random sample on GPU, rather than on CPU followed by a transfer 2. Use `cp.empty` to allocate output, rather than `cp.zeros`
1 parent 8b4d386 commit 7ea0812

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

python/cuda_parallel/tests/test_segmented_reduce.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,38 @@
44

55
import cupy as cp
66
import numpy as np
7+
import pytest
78

89
import cuda.parallel.experimental.algorithms as algorithms
910
from cuda.parallel.experimental.struct import gpu_struct
1011

1112

12-
def test_segmented_reduce(input_array):
13-
"Test for all supported input types"
13+
@pytest.fixture(params=["i4", "u4", "i8", "u8"])
14+
def offset_dtype(request):
15+
return np.dtype(request.param)
16+
17+
18+
def test_segmented_reduce(input_array, offset_dtype):
19+
"Test for all supported input types and for some offset types"
1420

1521
def binary_op(a, b):
1622
return a + b
1723

1824
assert input_array.ndim == 1
1925
sz = input_array.size
20-
rng = np.random.default_rng()
21-
n_segments = 2**4
22-
h_offsets = np.zeros(n_segments + 1, dtype="int64")
23-
h_offsets[1:] = rng.multinomial(sz, [1 / 16] * 16)
26+
rng = cp.random
27+
n_segments = 16
28+
h_offsets = cp.zeros(n_segments + 1, dtype="int64")
29+
h_offsets[1:] = rng.multinomial(sz, [1 / n_segments] * n_segments)
2430

25-
offsets = cp.asarray(h_offsets)
31+
offsets = cp.cumsum(cp.asarray(h_offsets, dtype=offset_dtype), dtype=offset_dtype)
2632

2733
start_offsets = offsets[:-1]
28-
end_offsets = offsets[:-1]
34+
end_offsets = offsets[1:]
35+
36+
assert offsets.dtype == np.dtype(offset_dtype)
37+
assert cp.all(start_offsets <= end_offsets)
38+
assert end_offsets[-1] == sz
2939

3040
d_in = cp.asarray(input_array)
3141
d_out = cp.empty(n_segments, dtype=d_in.dtype)
@@ -67,11 +77,11 @@ class Pixel:
6777
def max_g_value(x, y):
6878
return x if x.g > y.g else y
6979

70-
def ceil_up(n, m):
80+
def align_up(n, m):
7181
return ((n + m - 1) // m) * m
7282

7383
segment_size = 64
74-
n_pixels = ceil_up(4000, 64)
84+
n_pixels = align_up(4000, 64)
7585
offsets = cp.arange(n_pixels + segment_size - 1, step=segment_size, dtype=np.int64)
7686
start_offsets = offsets[:-1]
7787
end_offsets = offsets[1:]

0 commit comments

Comments
 (0)