diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py index c5fa4dbe84..dca6c84c4e 100644 --- a/src/scanpy/_compat.py +++ b/src/scanpy/_compat.py @@ -9,15 +9,19 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, cast, overload +import numpy as np from packaging.version import Version if TYPE_CHECKING: from collections.abc import Callable from importlib.metadata import PackageMetadata + P = ParamSpec("P") R = TypeVar("R") +_LegacyRandom = int | np.random.RandomState | None + if TYPE_CHECKING: # type checkers are confused and can only see …core.Array diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index 150afe8311..67e2ae03c8 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -12,6 +12,7 @@ import re import sys import warnings +from collections.abc import Sequence from contextlib import contextmanager, suppress from enum import Enum from functools import partial, reduce, singledispatch, wraps @@ -56,12 +57,13 @@ from anndata import AnnData from numpy.typing import ArrayLike, DTypeLike, NDArray + from .._compat import _LegacyRandom from ..neighbors import NeighborsParams, RPForestDict -# e.g. https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html -# maybe in the future random.Generator -AnyRandom = int | np.random.RandomState | None +SeedLike = int | np.integer | Sequence[int] | np.random.SeedSequence +RNGLike = np.random.Generator | np.random.BitGenerator + LegacyUnionType = type(Union[int, str]) # noqa: UP007 @@ -493,7 +495,7 @@ def moving_average(a: np.ndarray, n: int): return ret[n - 1 :] / n -def get_random_state(seed: AnyRandom) -> np.random.RandomState: +def _get_legacy_random(seed: _LegacyRandom) -> np.random.RandomState: if isinstance(seed, np.random.RandomState): return seed return np.random.RandomState(seed) diff --git a/src/scanpy/datasets/_datasets.py b/src/scanpy/datasets/_datasets.py index 41b23160d6..df510b3209 100644 --- a/src/scanpy/datasets/_datasets.py +++ b/src/scanpy/datasets/_datasets.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: from typing import Literal - from .._utils import AnyRandom + from .._compat import _LegacyRandom VisiumSampleID = Literal[ "V1_Breast_Cancer_Block_A_Section_1", @@ -63,7 +63,7 @@ def blobs( n_centers: int = 5, cluster_std: float = 1.0, n_observations: int = 640, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, ) -> AnnData: """\ Gaussian Blobs. diff --git a/src/scanpy/external/pp/_dca.py b/src/scanpy/external/pp/_dca.py index 14842c8071..c47fff90f2 100644 --- a/src/scanpy/external/pp/_dca.py +++ b/src/scanpy/external/pp/_dca.py @@ -11,7 +11,7 @@ from anndata import AnnData - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom _AEType = Literal["zinb-conddisp", "zinb", "nb-conddisp", "nb"] @@ -62,7 +62,7 @@ def dca( early_stop: int = 15, batch_size: int = 32, optimizer: str = "RMSprop", - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, threads: int | None = None, learning_rate: float | None = None, verbose: bool = False, diff --git a/src/scanpy/external/pp/_magic.py b/src/scanpy/external/pp/_magic.py index fd4b19667d..132d2a6448 100644 --- a/src/scanpy/external/pp/_magic.py +++ b/src/scanpy/external/pp/_magic.py @@ -19,7 +19,7 @@ from anndata import AnnData - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom MIN_VERSION = "2.0" @@ -36,7 +36,7 @@ def magic( n_pca: int | None = 100, solver: Literal["exact", "approximate"] = "exact", knn_dist: str = "euclidean", - random_state: AnyRandom = None, + random_state: _LegacyRandom = None, n_jobs: int | None = None, verbose: bool = False, copy: bool | None = None, diff --git a/src/scanpy/external/tl/_phate.py b/src/scanpy/external/tl/_phate.py index 78b50327a9..ff50a1e6f7 100644 --- a/src/scanpy/external/tl/_phate.py +++ b/src/scanpy/external/tl/_phate.py @@ -16,7 +16,7 @@ from anndata import AnnData - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom @old_positionals( @@ -49,7 +49,7 @@ def phate( mds_dist: str = "euclidean", mds: Literal["classic", "metric", "nonmetric"] = "metric", n_jobs: int | None = None, - random_state: AnyRandom = None, + random_state: _LegacyRandom = None, verbose: bool | int | None = None, copy: bool = False, **kwargs, diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py index 379f34227b..ec5957b325 100644 --- a/src/scanpy/neighbors/__init__.py +++ b/src/scanpy/neighbors/__init__.py @@ -33,7 +33,7 @@ from igraph import Graph from scipy.sparse import csr_matrix - from .._utils import AnyRandom + from .._compat import _LegacyRandom from ._types import KnnTransformerLike, _Metric, _MetricFn @@ -54,13 +54,13 @@ class KwdsForTransformer(TypedDict): n_neighbors: int metric: _Metric | _MetricFn metric_params: Mapping[str, Any] - random_state: AnyRandom + random_state: _LegacyRandom class NeighborsParams(TypedDict): n_neighbors: int method: _Method - random_state: AnyRandom + random_state: _LegacyRandom metric: _Metric | _MetricFn metric_kwds: NotRequired[Mapping[str, Any]] use_rep: NotRequired[str] @@ -79,7 +79,7 @@ def neighbors( transformer: KnnTransformerLike | _KnownTransformer | None = None, metric: _Metric | _MetricFn = "euclidean", metric_kwds: Mapping[str, Any] = MappingProxyType({}), - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, key_added: str | None = None, copy: bool = False, ) -> AnnData | None: @@ -521,7 +521,7 @@ def compute_neighbors( transformer: KnnTransformerLike | _KnownTransformer | None = None, metric: _Metric | _MetricFn = "euclidean", metric_kwds: Mapping[str, Any] = MappingProxyType({}), - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, ) -> None: """\ Compute distances and connectivities of neighbors. @@ -757,7 +757,7 @@ def compute_eigen( n_comps: int = 15, sym: bool | None = None, sort: Literal["decrease", "increase"] = "decrease", - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, ): """\ Compute eigen decomposition of transition matrix. diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py index 7e62d46eac..ff14a19989 100644 --- a/src/scanpy/plotting/_tools/paga.py +++ b/src/scanpy/plotting/_tools/paga.py @@ -33,6 +33,7 @@ from matplotlib.colors import Colormap from scipy.sparse import spmatrix + from ..._compat import _LegacyRandom from ...tools._draw_graph import _Layout as _LayoutWithoutEqTree from .._utils import _FontSize, _FontWeight, _LegendLoc @@ -210,7 +211,7 @@ def _compute_pos( adjacency_solid: spmatrix | np.ndarray, *, layout: _Layout | None = None, - random_state: _sc_utils.AnyRandom = 0, + random_state: _LegacyRandom = 0, init_pos: np.ndarray | None = None, adj_tree=None, root: int = 0, diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index dba47d821c..3fd288ad93 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -30,7 +30,8 @@ from scipy import sparse from scipy.sparse import spmatrix - from ..._utils import AnyRandom, Empty + from ..._compat import _LegacyRandom + from ..._utils import Empty CSMatrix = sparse.csr_matrix | sparse.csc_matrix @@ -70,7 +71,7 @@ def pca( layer: str | None = None, zero_center: bool | None = True, svd_solver: SvdSolver | None = None, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, return_info: bool = False, mask_var: NDArray[np.bool_] | str | None | Empty = _empty, use_highly_variable: bool | None = None, diff --git a/src/scanpy/preprocessing/_pca/_compat.py b/src/scanpy/preprocessing/_pca/_compat.py index 23cb60a2e9..28eef2ba1a 100644 --- a/src/scanpy/preprocessing/_pca/_compat.py +++ b/src/scanpy/preprocessing/_pca/_compat.py @@ -18,7 +18,7 @@ from scipy import sparse from sklearn.decomposition import PCA - from .._utils import AnyRandom + from ..._compat import _LegacyRandom CSMatrix = sparse.csr_matrix | sparse.csc_matrix @@ -29,7 +29,7 @@ def _pca_compat_sparse( *, solver: Literal["arpack", "lobpcg"], mu: NDArray[np.floating] | None = None, - random_state: AnyRandom = None, + random_state: _LegacyRandom = None, ) -> tuple[NDArray[np.floating], PCA]: """Sparse PCA for scikit-learn <1.4""" random_state = check_random_state(random_state) diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 4579739939..4b97405df9 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from anndata import AnnData - from .._utils import AnyRandom + from .._compat import _LegacyRandom @old_positionals( @@ -36,7 +36,7 @@ def recipe_weinreb17( cv_threshold: int = 2, n_pcs: int = 50, svd_solver="randomized", - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | None: """\ diff --git a/src/scanpy/preprocessing/_scrublet/__init__.py b/src/scanpy/preprocessing/_scrublet/__init__.py index d57eb81750..68b7f59526 100644 --- a/src/scanpy/preprocessing/_scrublet/__init__.py +++ b/src/scanpy/preprocessing/_scrublet/__init__.py @@ -15,7 +15,7 @@ from .core import Scrublet if TYPE_CHECKING: - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom from ...neighbors import _Metric, _MetricFn @@ -58,7 +58,7 @@ def scrublet( threshold: float | None = None, verbose: bool = True, copy: bool = False, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, ) -> AnnData | None: """\ Predict doublets using Scrublet :cite:p:`Wolock2019`. @@ -309,7 +309,7 @@ def _scrublet_call_doublets( knn_dist_metric: _Metric | _MetricFn = "euclidean", get_doublet_neighbor_parents: bool = False, threshold: float | None = None, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, verbose: bool = True, ) -> AnnData: """\ @@ -503,7 +503,7 @@ def scrublet_simulate_doublets( layer: str | None = None, sim_doublet_ratio: float = 2.0, synthetic_doublet_umi_subsampling: float = 1.0, - random_seed: AnyRandom = 0, + random_seed: _LegacyRandom = 0, ) -> AnnData: """\ Simulate doublets by adding the counts of random observed transcriptome pairs. diff --git a/src/scanpy/preprocessing/_scrublet/core.py b/src/scanpy/preprocessing/_scrublet/core.py index 4c992b2b64..1236f42a7a 100644 --- a/src/scanpy/preprocessing/_scrublet/core.py +++ b/src/scanpy/preprocessing/_scrublet/core.py @@ -9,7 +9,7 @@ from scipy import sparse from ... import logging as logg -from ..._utils import get_random_state +from ..._utils import _get_legacy_random from ...neighbors import ( Neighbors, _get_indices_distances_from_sparse_matrix, @@ -21,7 +21,7 @@ from numpy.random import RandomState from numpy.typing import NDArray - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom from ...neighbors import _Metric, _MetricFn __all__ = ["Scrublet"] @@ -73,7 +73,7 @@ class Scrublet: n_neighbors: InitVar[int | None] = None expected_doublet_rate: float = 0.1 stdev_doublet_rate: float = 0.02 - random_state: InitVar[AnyRandom] = 0 + random_state: InitVar[_LegacyRandom] = 0 # private fields @@ -174,7 +174,7 @@ def __post_init__( counts_obs: sparse.csr_matrix | sparse.csc_matrix | NDArray[np.integer], total_counts_obs: NDArray[np.integer] | None, n_neighbors: int | None, - random_state: AnyRandom, + random_state: _LegacyRandom, ) -> None: self._counts_obs = sparse.csc_matrix(counts_obs) self._total_counts_obs = ( @@ -187,7 +187,7 @@ def __post_init__( if n_neighbors is None else n_neighbors ) - self._random_state = get_random_state(random_state) + self._random_state = _get_legacy_random(random_state) def simulate_doublets( self, diff --git a/src/scanpy/preprocessing/_scrublet/pipeline.py b/src/scanpy/preprocessing/_scrublet/pipeline.py index 5f6c62838c..586587e2cf 100644 --- a/src/scanpy/preprocessing/_scrublet/pipeline.py +++ b/src/scanpy/preprocessing/_scrublet/pipeline.py @@ -12,7 +12,7 @@ if TYPE_CHECKING: from typing import Literal - from ..._utils import AnyRandom + from ..._compat import _LegacyRandom from .core import Scrublet @@ -49,7 +49,7 @@ def truncated_svd( self: Scrublet, n_prin_comps: int = 30, *, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, algorithm: Literal["arpack", "randomized"] = "arpack", ) -> None: if self._counts_sim_norm is None: @@ -68,7 +68,7 @@ def pca( self: Scrublet, n_prin_comps: int = 50, *, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, svd_solver: Literal["auto", "full", "arpack", "randomized"] = "arpack", ) -> None: if self._counts_sim_norm is None: diff --git a/src/scanpy/preprocessing/_scrublet/sparse_utils.py b/src/scanpy/preprocessing/_scrublet/sparse_utils.py index cc0b1bc815..795559583c 100644 --- a/src/scanpy/preprocessing/_scrublet/sparse_utils.py +++ b/src/scanpy/preprocessing/_scrublet/sparse_utils.py @@ -7,12 +7,12 @@ from scanpy.preprocessing._utils import _get_mean_var -from ..._utils import get_random_state +from ..._utils import _get_legacy_random if TYPE_CHECKING: from numpy.typing import NDArray - from ..._utils import AnyRandom + from .._compat import _LegacyRandom def sparse_multiply( @@ -47,10 +47,10 @@ def subsample_counts( *, rate: float, original_totals, - random_seed: AnyRandom = 0, + random_seed: _LegacyRandom = 0, ) -> tuple[sparse.csr_matrix | sparse.csc_matrix, NDArray[np.int64]]: if rate < 1: - random_seed = get_random_state(random_seed) + random_seed = _get_legacy_random(random_seed) E.data = random_seed.binomial(np.round(E.data).astype(int), rate) current_totals = np.asarray(E.sum(1)).squeeze() unsampled_orig_totals = original_totals - current_totals diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 4d540ef931..01936414a5 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -50,8 +50,7 @@ import pandas as pd from numpy.typing import NDArray - from .._compat import DaskArray - from .._utils import AnyRandom + from .._compat import DaskArray, _LegacyRandom @old_positionals( @@ -831,7 +830,7 @@ def subsample( fraction: float | None = None, *, n_obs: int | None = None, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | tuple[np.ndarray | spmatrix, NDArray[np.int64]] | None: """\ @@ -894,7 +893,7 @@ def downsample_counts( counts_per_cell: int | Collection[int] | None = None, total_counts: int | None = None, *, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, replace: bool = False, copy: bool = False, ) -> AnnData | None: @@ -1030,7 +1029,7 @@ def _downsample_array( col: np.ndarray, target: int, *, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, replace: bool = True, inplace: bool = False, ): diff --git a/src/scanpy/preprocessing/_utils.py b/src/scanpy/preprocessing/_utils.py index 9c02f7e636..b200e89ce8 100644 --- a/src/scanpy/preprocessing/_utils.py +++ b/src/scanpy/preprocessing/_utils.py @@ -16,8 +16,8 @@ from numpy.typing import DTypeLike, NDArray - from .._compat import DaskArray - from .._utils import AnyRandom, _SupportedArray + from .._compat import DaskArray, _LegacyRandom + from .._utils import _SupportedArray @singledispatch @@ -150,7 +150,7 @@ def sample_comb( dims: tuple[int, ...], nsamp: int, *, - random_state: AnyRandom = None, + random_state: _LegacyRandom = None, method: Literal[ "auto", "tracking_selection", "reservoir_sampling", "pool" ] = "auto", diff --git a/src/scanpy/tools/_diffmap.py b/src/scanpy/tools/_diffmap.py index dee643c39b..d2bdcc647b 100644 --- a/src/scanpy/tools/_diffmap.py +++ b/src/scanpy/tools/_diffmap.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: from anndata import AnnData - from .._utils import AnyRandom + from .._compat import _LegacyRandom @old_positionals("neighbors_key", "random_state", "copy") @@ -17,7 +17,7 @@ def diffmap( n_comps: int = 15, *, neighbors_key: str | None = None, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, copy: bool = False, ) -> AnnData | None: """\ diff --git a/src/scanpy/tools/_draw_graph.py b/src/scanpy/tools/_draw_graph.py index 3f0e65c061..aedd41f3d3 100644 --- a/src/scanpy/tools/_draw_graph.py +++ b/src/scanpy/tools/_draw_graph.py @@ -18,7 +18,7 @@ from anndata import AnnData from scipy.sparse import spmatrix - from .._utils import AnyRandom + from .._compat import _LegacyRandom S = TypeVar("S", bound=LiteralString) @@ -43,7 +43,7 @@ def draw_graph( *, init_pos: str | bool | None = None, root: int | None = None, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, n_jobs: int | None = None, adjacency: spmatrix | None = None, key_added_ext: str | None = None, diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 5a8ba00484..f73ec1fd7d 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -17,6 +17,8 @@ from anndata import AnnData from scipy import sparse + from .._compat import _LegacyRandom + try: from leidenalg.VertexPartition import MutableVertexPartition except ImportError: @@ -32,7 +34,7 @@ def leiden( resolution: float = 1, *, restrict_to: tuple[str, Sequence[str]] | None = None, - random_state: _utils.AnyRandom = 0, + random_state: _LegacyRandom = 0, key_added: str = "leiden", adjacency: sparse.spmatrix | None = None, directed: bool | None = None, diff --git a/src/scanpy/tools/_louvain.py b/src/scanpy/tools/_louvain.py index d3e616a850..470858ff38 100644 --- a/src/scanpy/tools/_louvain.py +++ b/src/scanpy/tools/_louvain.py @@ -22,6 +22,8 @@ from anndata import AnnData from scipy.sparse import spmatrix + from .._compat import _LegacyRandom + try: from louvain.VertexPartition import MutableVertexPartition except ImportError: @@ -50,7 +52,7 @@ def louvain( adata: AnnData, resolution: float | None = None, *, - random_state: _utils.AnyRandom = 0, + random_state: _LegacyRandom = 0, restrict_to: tuple[str, Sequence[str]] | None = None, key_added: str = "louvain", adjacency: spmatrix | None = None, diff --git a/src/scanpy/tools/_score_genes.py b/src/scanpy/tools/_score_genes.py index a3909b7a28..a40d9f3288 100644 --- a/src/scanpy/tools/_score_genes.py +++ b/src/scanpy/tools/_score_genes.py @@ -22,7 +22,7 @@ from numpy.typing import DTypeLike, NDArray from scipy.sparse import csc_matrix, csr_matrix - from .._utils import AnyRandom + from .._compat import _LegacyRandom try: _StrIdx = pd.Index[str] @@ -70,7 +70,7 @@ def score_genes( gene_pool: Sequence[str] | pd.Index[str] | None = None, n_bins: int = 25, score_name: str = "score", - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, copy: bool = False, use_raw: bool | None = None, layer: str | None = None, diff --git a/src/scanpy/tools/_tsne.py b/src/scanpy/tools/_tsne.py index ac0e6a6317..18e4a47f8e 100644 --- a/src/scanpy/tools/_tsne.py +++ b/src/scanpy/tools/_tsne.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from anndata import AnnData - from .._utils import AnyRandom + from .._compat import _LegacyRandom @old_positionals( @@ -38,7 +38,7 @@ def tsne( metric: str = "euclidean", early_exaggeration: float = 12, learning_rate: float = 1000, - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, use_fast_tsne: bool = False, n_jobs: int | None = None, key_added: str | None = None, diff --git a/src/scanpy/tools/_umap.py b/src/scanpy/tools/_umap.py index 4f225da2a1..902171d58c 100644 --- a/src/scanpy/tools/_umap.py +++ b/src/scanpy/tools/_umap.py @@ -17,7 +17,7 @@ from anndata import AnnData - from .._utils import AnyRandom + from .._compat import _LegacyRandom _InitPos = Literal["paga", "spectral", "random"] @@ -49,7 +49,7 @@ def umap( gamma: float = 1.0, negative_sample_rate: int = 5, init_pos: _InitPos | np.ndarray | None = "spectral", - random_state: AnyRandom = 0, + random_state: _LegacyRandom = 0, a: float | None = None, b: float | None = None, method: Literal["umap", "rapids"] = "umap",