Skip to content

Commit ff50d69

Browse files
committed
Propagate random state to numpy rng
When generating a classification problem a mix of cupy and numpy random generators is used. The random state needs to be propagated to the numpy generator.
1 parent abdba02 commit ff50d69

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

python/cuml/cuml/datasets/classification.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -39,18 +39,23 @@ def _generate_hypercube(samples, dimensions, rng):
3939
make_classification."
4040
)
4141

42-
from sklearn.utils.random import sample_without_replacement
42+
from sklearn.utils.random import (
43+
sample_without_replacement,
44+
check_random_state,
45+
)
46+
47+
# Least worst way to derive a Numpy random state from a cupy random state?
48+
rs = check_random_state(int(rng.randint(dimensions)))
4349

4450
if dimensions > 30:
4551
return np.hstack(
4652
[
47-
np.random.randint(2, size=(samples, dimensions - 30)),
53+
rs.randint(2, size=(samples, dimensions - 30)),
4854
_generate_hypercube(samples, 30, rng),
4955
]
5056
)
51-
random_state = int(rng.randint(dimensions))
5257
out = sample_without_replacement(
53-
2**dimensions, samples, random_state=random_state
58+
2**dimensions, samples, random_state=rs
5459
).astype(dtype=">u4", copy=False)
5560
out = np.unpackbits(out.view(">u1")).reshape((-1, 32))[:, -dimensions:]
5661
return out

python/cuml/cuml/tests/test_make_classification.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -147,3 +147,30 @@ def test_make_classification_informative_features():
147147
make(
148148
n_features=2, n_informative=2, n_classes=3, n_clusters_per_class=2
149149
)
150+
151+
152+
def test_make_classification_random_state():
153+
# Check that results are stable across repeated calls
154+
155+
# We need to use more than 30 features to test all of the code paths
156+
X, y = make_classification(n_features=30 + 2, random_state=42)
157+
X2, y2 = make_classification(n_features=30 + 2, random_state=42)
158+
assert array_equal(X, X2)
159+
assert array_equal(y, y2)
160+
161+
# Check that results are different across different random states
162+
X3, y3 = make_classification(n_features=30 + 2, random_state=43)
163+
assert not array_equal(X, X3)
164+
assert not array_equal(y, y3)
165+
166+
167+
def test_make_classification_random_state_gh_6510():
168+
# Non regression test for gh-6510
169+
X, y = make_classification(
170+
10, 35, n_redundant=0, n_repeated=0, n_informative=35, random_state=42
171+
)
172+
X2, y2 = make_classification(
173+
10, 35, n_redundant=0, n_repeated=0, n_informative=35, random_state=42
174+
)
175+
assert array_equal(X, X2)
176+
assert array_equal(y, y2)

0 commit comments

Comments
 (0)