From f2af154260282e9d197bba5d5320fc427b0d7979 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 14 Nov 2024 15:34:53 +0100 Subject: [PATCH 1/9] (chore): migrate to only checking `cs{r,c}_matrix` instead of `spmatrix` --- src/anndata/_core/aligned_mapping.py | 5 ++--- src/anndata/_core/anndata.py | 8 ++++---- src/anndata/_core/index.py | 12 ++++++------ src/anndata/_core/merge.py | 25 +++++++++++-------------- src/anndata/_core/raw.py | 7 +++---- src/anndata/_core/sparse_dataset.py | 6 +++--- src/anndata/_core/storage.py | 4 ++-- src/anndata/_io/h5ad.py | 13 +++++++------ src/anndata/_io/specs/methods.py | 15 ++++++++------- src/anndata/compat/__init__.py | 6 +++--- src/anndata/tests/helpers.py | 11 ++++++----- src/anndata/typing.py | 6 ++---- tests/test_base.py | 2 +- tests/test_concatenate.py | 4 +++- tests/test_x.py | 6 +++--- 15 files changed, 64 insertions(+), 66 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 9df5ac977..dbe5dbebf 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -9,10 +9,9 @@ import numpy as np import pandas as pd -from scipy.sparse import spmatrix from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning -from ..compat import AwkArray +from ..compat import AwkArray, SpMatrix from ..utils import ( axis_len, convert_to_dict, @@ -36,7 +35,7 @@ OneDIdx = Sequence[int] | Sequence[bool] | slice TwoDIdx = tuple[OneDIdx, OneDIdx] # TODO: pd.DataFrame only allowed in AxisArrays? -Value = pd.DataFrame | spmatrix | np.ndarray +Value = pd.DataFrame | SpMatrix | np.ndarray P = TypeVar("P", bound="AlignedMappingBase") """Parent mapping an AlignedView is based on.""" diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 8a8eaf949..92fd7a2b2 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -221,13 +221,13 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): def __init__( self, - X: np.ndarray | sparse.spmatrix | pd.DataFrame | None = None, + X: ArrayDataStructureType | pd.DataFrame | None = None, obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, uns: Mapping[str, Any] | None = None, obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, - layers: Mapping[str, np.ndarray | sparse.spmatrix] | None = None, + layers: Mapping[str, ArrayDataStructureType] | None = None, raw: Mapping[str, Any] | None = None, dtype: np.dtype | type | str | None = None, shape: tuple[int, int] | None = None, @@ -573,7 +573,7 @@ def X(self) -> ArrayDataStructureType | None: # return X @X.setter - def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): + def X(self, value: ArrayDataStructureType | None): if value is None: if self.isbacked: raise NotImplementedError( @@ -1169,7 +1169,7 @@ def _inplace_subset_obs(self, index: Index1D): self._init_as_actual(adata_subset) # TODO: Update, possibly remove - def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix): + def __setitem__(self, index: Index, val: ArrayDataStructureType): if self.is_view: raise ValueError("Object is view and cannot be accessed with `[]`.") obs, var = self._normalize_indices(index) diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index f1d72ce0d..44ab83666 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -8,9 +8,9 @@ import h5py import numpy as np import pandas as pd -from scipy.sparse import issparse, spmatrix +from scipy.sparse import issparse -from ..compat import AwkArray, DaskArray, SpArray +from ..compat import AwkArray, DaskArray, SpArray, SpMatrix if TYPE_CHECKING: from ..compat import Index, Index1D @@ -69,13 +69,13 @@ def name_idx(i): elif isinstance(indexer, str): return index.get_loc(indexer) # int elif isinstance( - indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray + indexer, Sequence | np.ndarray | pd.Index | SpMatrix | np.matrix | SpArray ): if hasattr(indexer, "shape") and ( (indexer.shape == (index.shape[0], 1)) or (indexer.shape == (1, index.shape[0])) ): - if isinstance(indexer, spmatrix | SpArray): + if isinstance(indexer, SpMatrix | SpArray): indexer = indexer.toarray() indexer = np.ravel(indexer) if not isinstance(indexer, np.ndarray | pd.Index): @@ -167,9 +167,9 @@ def _subset_dask(a: DaskArray, subset_idx: Index): return a[subset_idx] -@_subset.register(spmatrix) +@_subset.register(SpMatrix) @_subset.register(SpArray) -def _subset_sparse(a: spmatrix | SpArray, subset_idx: Index): +def _subset_sparse(a: SpMatrix | SpArray, subset_idx: Index): # Correcting for indexing behaviour of sparse.spmatrix if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx): first_idx = subset_idx[0] diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 0dfa5dab2..858c346bc 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -17,7 +17,6 @@ import pandas as pd from natsort import natsorted from scipy import sparse -from scipy.sparse import spmatrix from anndata._warnings import ExperimentalFeatureWarning @@ -29,6 +28,7 @@ CupySparseMatrix, DaskArray, SpArray, + SpMatrix, _map_cat_to_str, ) from ..utils import asarray, axis_len, warn_once @@ -135,7 +135,7 @@ def equal_dask_array(a, b) -> bool: if isinstance(b, DaskArray): if tokenize(a) == tokenize(b): return True - if isinstance(a._meta, spmatrix): + if isinstance(a._meta, SpMatrix): # TODO: Maybe also do this in the other case? return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all() else: @@ -165,7 +165,7 @@ def equal_series(a, b) -> bool: return a.equals(b) -@equal.register(sparse.spmatrix) +@equal.register(SpMatrix) @equal.register(SpArray) @equal.register(CupySparseMatrix) def equal_sparse(a, b) -> bool: @@ -174,7 +174,7 @@ def equal_sparse(a, b) -> bool: xp = array_api_compat.array_namespace(a.data) - if isinstance(b, CupySparseMatrix | sparse.spmatrix | SpArray): + if isinstance(b, CupySparseMatrix | SpMatrix | SpArray): if isinstance(a, CupySparseMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 @@ -206,7 +206,7 @@ def equal_awkward(a, b) -> bool: def as_sparse(x, use_sparse_array=False): - if not isinstance(x, sparse.spmatrix | SpArray): + if not isinstance(x, SpMatrix | SpArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) return sparse.csr_matrix(x) @@ -536,7 +536,7 @@ def apply(self, el, *, axis, fill_value=None): return el if isinstance(el, pd.DataFrame): return self._apply_to_df(el, axis=axis, fill_value=fill_value) - elif isinstance(el, sparse.spmatrix | SpArray | CupySparseMatrix): + elif isinstance(el, SpMatrix | SpArray | CupySparseMatrix): return self._apply_to_sparse(el, axis=axis, fill_value=fill_value) elif isinstance(el, AwkArray): return self._apply_to_awkward(el, axis=axis, fill_value=fill_value) @@ -614,8 +614,8 @@ def _apply_to_array(self, el, *, axis, fill_value=None): ) def _apply_to_sparse( - self, el: sparse.spmatrix | SpArray, *, axis, fill_value=None - ) -> spmatrix: + self, el: SpMatrix | SpArray, *, axis, fill_value=None + ) -> SpMatrix: if isinstance(el, CupySparseMatrix): from cupyx.scipy import sparse else: @@ -724,11 +724,8 @@ def default_fill_value(els): This is largely due to backwards compat, and might not be the ideal solution. """ if any( - isinstance(el, sparse.spmatrix | SpArray) - or ( - isinstance(el, DaskArray) - and isinstance(el._meta, sparse.spmatrix | SpArray) - ) + isinstance(el, SpMatrix | SpArray) + or (isinstance(el, DaskArray) and isinstance(el._meta, SpMatrix | SpArray)) for el in els ): return 0 @@ -828,7 +825,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ], axis=axis, ) - elif any(isinstance(a, sparse.spmatrix | SpArray) for a in arrays): + elif any(isinstance(a, SpMatrix | SpArray) for a in arrays): sparse_stack = (sparse.vstack, sparse.hstack)[axis] use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays) return sparse_stack( diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index d138440b5..7b96cd562 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -17,8 +17,7 @@ from collections.abc import Mapping, Sequence from typing import ClassVar - from scipy import sparse - + from ..compat import SpMatrix from .aligned_mapping import AxisArraysView from .anndata import AnnData from .sparse_dataset import BaseCompressedSparseDataset @@ -31,7 +30,7 @@ class Raw: def __init__( self, adata: AnnData, - X: np.ndarray | sparse.spmatrix | None = None, + X: np.ndarray | SpMatrix | None = None, var: pd.DataFrame | Mapping[str, Sequence] | None = None, varm: AxisArrays | Mapping[str, np.ndarray] | None = None, ): @@ -66,7 +65,7 @@ def _get_X(self, layer=None): return self.X @property - def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: + def X(self) -> BaseCompressedSparseDataset | np.ndarray | SpMatrix: # TODO: Handle unsorted array of integer indices for h5py.Datasets if not self._adata.isbacked: return self._X diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index ae6b47c7f..f25bf5f5e 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -28,7 +28,7 @@ from .. import abc from .._settings import settings -from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr +from ..compat import H5Group, SpArray, SpMatrix, ZarrArray, ZarrGroup, _read_attr from .index import _fix_slice_bounds, _subset, unpack_index if TYPE_CHECKING: @@ -312,7 +312,7 @@ def get_memory_class( if format == fmt: if use_sparray_in_io and issubclass(memory_class, SpArray): return memory_class - elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(memory_class, SpMatrix): return memory_class raise ValueError(f"Format string {format} is not supported.") @@ -324,7 +324,7 @@ def get_backed_class( if format == fmt: if use_sparray_in_io and issubclass(backed_class, SpArray): return backed_class - elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(backed_class, SpMatrix): return backed_class raise ValueError(f"Format string {format} is not supported.") diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py index 9e036ba44..30b7a8b6d 100644 --- a/src/anndata/_core/storage.py +++ b/src/anndata/_core/storage.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd -from scipy import sparse from .._warnings import ImplicitModificationWarning +from ..compat import SpMatrix from ..utils import ( ensure_df_homogeneous, join_english, @@ -39,7 +39,7 @@ def coerce_array( warnings.warn(msg, ImplicitModificationWarning) value = value.A return value - elif isinstance(value, sparse.spmatrix): + elif isinstance(value, SpMatrix): msg = ( f"AnnData previously had undefined behavior around matrices of type {type(value)}." "In 0.12, passing in this type will throw an error. Please convert to a supported type." diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index edf4977cc..93d9623aa 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -18,6 +18,7 @@ from .._core.file_backing import filename from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import ( + SpMatrix, _clean_uns, _decode_structured_array, _from_fixed_length_strings, @@ -82,14 +83,14 @@ def write_h5ad( f.attrs.setdefault("encoding-version", "0.1.0") if "X" in as_dense and isinstance( - adata.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.X, SpMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) elif not (adata.isbacked and Path(adata.filename) == Path(filepath)): # If adata.isbacked, X should already be up to date write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) if "raw/X" in as_dense and isinstance( - adata.raw.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.raw.X, SpMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense( f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs @@ -115,7 +116,7 @@ def write_h5ad( def write_sparse_as_dense( f: h5py.Group, key: str, - value: sparse.spmatrix | BaseCompressedSparseDataset, + value: SpMatrix | BaseCompressedSparseDataset, *, dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ): @@ -172,7 +173,7 @@ def read_h5ad( backed: Literal["r", "r+"] | bool | None = None, *, as_sparse: Sequence[str] = (), - as_sparse_fmt: type[sparse.spmatrix] = sparse.csr_matrix, + as_sparse_fmt: type[SpMatrix] = sparse.csr_matrix, chunk_size: int = 6000, # TODO, probably make this 2d chunks ) -> AnnData: """\ @@ -275,7 +276,7 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, + rdasp: Callable[[h5py.Dataset], SpMatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -348,7 +349,7 @@ def read_dataset(dataset: h5py.Dataset): @report_read_key_on_error def read_dense_as_sparse( - dataset: h5py.Dataset, sparse_format: sparse.spmatrix, axis_chunk: int + dataset: h5py.Dataset, sparse_format: SpMatrix, axis_chunk: int ): if sparse_format == sparse.csr_matrix: return read_dense_as_csr(dataset, axis_chunk) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 582245310..52fd82427 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -52,7 +52,10 @@ from numpy.typing import NDArray from anndata._types import ArrayStorageType, GroupStorageType - from anndata.compat import SpArray + from anndata.compat import ( + SpArray, + SpMatrix, + ) from anndata.typing import AxisStorable, InMemoryArrayOrScalarType from .registry import Reader, Writer @@ -127,7 +130,7 @@ def wrapper( @_REGISTRY.register_read(H5Array, IOSpec("", "")) def read_basic( elem: H5File | H5Group | H5Array, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | SpMatrix | SpArray: from anndata._io import h5ad warn( @@ -149,7 +152,7 @@ def read_basic( @_REGISTRY.register_read(ZarrArray, IOSpec("", "")) def read_basic_zarr( elem: ZarrGroup | ZarrArray, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | SpMatrix | SpArray: from anndata._io import zarr warn( @@ -588,7 +591,7 @@ def write_recarray_zarr( def write_sparse_compressed( f: GroupStorageType, key: str, - value: sparse.spmatrix | SpArray, + value: SpMatrix | SpArray, *, _writer: Writer, fmt: Literal["csr", "csc"], @@ -755,9 +758,7 @@ def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]: @_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0")) -def read_sparse( - elem: GroupStorageType, *, _reader: Reader -) -> sparse.spmatrix | SpArray: +def read_sparse(elem: GroupStorageType, *, _reader: Reader) -> SpMatrix | SpArray: return sparse_dataset(elem).to_memory() diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 255ffa548..d6acc90ec 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -18,7 +18,6 @@ import numpy as np import pandas as pd import scipy -import scipy.sparse from packaging.version import Version from .exceptiongroups import add_note # noqa: F401 @@ -32,6 +31,7 @@ CAN_USE_SPARSE_ARRAY = Version(scipy.__version__) >= Version("1.11") +SpMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix if not CAN_USE_SPARSE_ARRAY: @@ -40,7 +40,7 @@ class SpArray: def __repr__(): return "mock scipy.sparse.sparray" else: - SpArray = scipy.sparse.sparray + SpArray = scipy.sparse.csr_array | scipy.sparse.csc_array class Empty: @@ -56,7 +56,7 @@ class Empty: | tuple[Index1D, Index1D, EllipsisType] | tuple[EllipsisType, Index1D, Index1D] | tuple[Index1D, EllipsisType, Index1D] - | scipy.sparse.spmatrix + | SpMatrix | SpArray ) H5Group = h5py.Group diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 6ed637ed8..979a2e27b 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -32,6 +32,7 @@ CupySparseMatrix, DaskArray, SpArray, + SpMatrix, ZarrArray, ) from anndata.utils import asarray @@ -598,7 +599,7 @@ def assert_equal_arrayview(a, b, exact=False, elem_name=None): @assert_equal.register(BaseCompressedSparseDataset) -@assert_equal.register(sparse.spmatrix) +@assert_equal.register(SpMatrix) def assert_equal_sparse(a, b, exact=False, elem_name=None): a = asarray(a) assert_equal(b, a, exact, elem_name=elem_name) @@ -785,7 +786,7 @@ def as_dense_dask_array(a): return da.asarray(a, chunks=_half_chunk_size(a.shape)) -@as_dense_dask_array.register(sparse.spmatrix) +@as_dense_dask_array.register(SpMatrix) def _(a): return as_dense_dask_array(a.toarray()) @@ -802,7 +803,7 @@ def as_sparse_dask_array(a) -> DaskArray: return da.from_array(sparse.csr_matrix(a), chunks=_half_chunk_size(a.shape)) -@as_sparse_dask_array.register(sparse.spmatrix) +@as_sparse_dask_array.register(SpMatrix) def _(a): import dask.array as da @@ -952,7 +953,7 @@ def as_cupy(val, typ=None): if issubclass(typ, CupyArray): import cupy as cp - if isinstance(val, sparse.spmatrix): + if isinstance(val, SpMatrix): val = val.toarray() return cp.array(val) elif issubclass(typ, CupyCSRMatrix): @@ -990,7 +991,7 @@ def shares_memory(x, y) -> bool: return np.shares_memory(x, y) -@shares_memory.register(sparse.spmatrix) +@shares_memory.register(SpMatrix) def shares_memory_sparse(x, y): return ( np.shares_memory(x.data, y.data) diff --git a/src/anndata/typing.py b/src/anndata/typing.py index d13927bad..8012a162d 100644 --- a/src/anndata/typing.py +++ b/src/anndata/typing.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd from numpy import ma -from scipy import sparse from . import abc from ._core.anndata import AnnData @@ -16,6 +15,7 @@ DaskArray, H5Array, SpArray, + SpMatrix, ZappyArray, ZarrArray, ) @@ -31,12 +31,10 @@ Index = _Index """1D or 2D index an :class:`~anndata.AnnData` object can be sliced with.""" - ArrayDataStructureType: TypeAlias = ( np.ndarray | ma.MaskedArray - | sparse.csr_matrix - | sparse.csc_matrix + | SpMatrix | SpArray | AwkArray | H5Array diff --git a/tests/test_base.py b/tests/test_base.py index e1401ed74..a9f1092a9 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -32,7 +32,7 @@ def test_creation(): AnnData(ma.array([[1, 2], [3, 4]]), uns=dict(mask=[0, 1, 1, 0])) AnnData(sp.eye(2, format="csr")) if CAN_USE_SPARSE_ARRAY: - AnnData(sp.eye_array(2)) + AnnData(sp.eye_array(2, format="csr")) X = np.array([[1, 2, 3], [4, 5, 6]]) adata = AnnData( X=X, diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index e034debd2..d9f399dd6 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -1044,7 +1044,9 @@ def gen_list(n): def gen_sparse(n): - return sparse.random(np.random.randint(1, 100), np.random.randint(1, 100)) + return sparse.random( + np.random.randint(1, 100), np.random.randint(1, 100), format="csr" + ) def gen_something(n): diff --git a/tests/test_x.py b/tests/test_x.py index 64b1bb87d..42de50b23 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -186,8 +186,8 @@ def test_set_dense_x_view_from_sparse(): def test_warn_on_non_csr_csc_matrix(): X = sparse.eye(100) - with pytest.warns( - FutureWarning, - match=rf"AnnData previously had undefined behavior around matrices of type {type(X)}.*", + with pytest.raises( + ValueError, + match=r"X needs to be of one of", ): ad.AnnData(X=X) From 4f97787f00eb92da417b20bf4053d90ea4edd56c Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 14 Nov 2024 15:37:26 +0100 Subject: [PATCH 2/9] (chore): alter tests as well --- tests/test_backed_hdf5.py | 6 +++--- tests/test_backed_sparse.py | 18 +++++++++--------- tests/test_concatenate.py | 10 +++++----- tests/test_concatenate_disk.py | 2 +- tests/test_io_conversion.py | 5 +++-- tests/test_io_dispatched.py | 5 ++--- tests/test_io_elementwise.py | 10 ++++++++-- tests/test_readwrite.py | 5 ++--- 8 files changed, 33 insertions(+), 28 deletions(-) diff --git a/tests/test_backed_hdf5.py b/tests/test_backed_hdf5.py index 19b4ca44d..509c91fc1 100644 --- a/tests/test_backed_hdf5.py +++ b/tests/test_backed_hdf5.py @@ -10,7 +10,7 @@ from scipy import sparse import anndata as ad -from anndata.compat import SpArray +from anndata.compat import SpArray, SpMatrix from anndata.tests.helpers import ( GEN_ADATA_DASK_ARGS, as_dense_dask_array, @@ -200,8 +200,8 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2): var_idx = subset_func2(mem_adata.var_names) if ( array_type is asarray - and isinstance(obs_idx, list | np.ndarray | sparse.spmatrix | SpArray) - and isinstance(var_idx, list | np.ndarray | sparse.spmatrix | SpArray) + and isinstance(obs_idx, list | np.ndarray | SpMatrix | SpArray) + and isinstance(var_idx, list | np.ndarray | SpMatrix | SpArray) ): pytest.xfail( "Fancy indexing does not work with multiple arrays on a h5py.Dataset" diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 2778c76bb..f2f023010 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -13,7 +13,7 @@ import anndata as ad from anndata._core.anndata import AnnData from anndata._core.sparse_dataset import sparse_dataset -from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray +from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray, SpMatrix from anndata.experimental import read_dispatched from anndata.tests.helpers import AccessTrackingStore, assert_equal, subset_func @@ -259,8 +259,8 @@ def test_consecutive_bool( ) def test_dataset_append_memory( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], - append_method: Callable[[list[sparse.spmatrix]], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], SpMatrix], + append_method: Callable[[list[SpMatrix]], SpMatrix], diskfmt: Literal["h5ad", "zarr"], ): path = tmp_path / f"test.{diskfmt.replace('ad', '')}" @@ -296,7 +296,7 @@ def test_dataset_append_memory( ) def test_read_array( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], SpMatrix], diskfmt: Literal["h5ad", "zarr"], subset_func, subset_func2, @@ -316,7 +316,7 @@ def test_read_array( ad.settings.use_sparse_array_on_read = True assert issubclass(type(diskmtx[obs_idx, var_idx]), SpArray) ad.settings.use_sparse_array_on_read = False - assert issubclass(type(diskmtx[obs_idx, var_idx]), sparse.spmatrix) + assert issubclass(type(diskmtx[obs_idx, var_idx]), SpMatrix) @pytest.mark.parametrize( @@ -328,8 +328,8 @@ def test_read_array( ) def test_dataset_append_disk( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], - append_method: Callable[[list[sparse.spmatrix]], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], SpMatrix], + append_method: Callable[[list[SpMatrix]], SpMatrix], diskfmt: Literal["h5ad", "zarr"], ): path = tmp_path / f"test.{diskfmt.replace('ad', '')}" @@ -356,7 +356,7 @@ def test_dataset_append_disk( @pytest.mark.parametrize("sparse_format", [sparse.csr_matrix, sparse.csc_matrix]) def test_indptr_cache( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], SpMatrix], ): path = tmp_path / "test.zarr" a = sparse_format(sparse.random(10, 10)) @@ -438,7 +438,7 @@ def width_idx_kinds( ) def test_data_access( tmp_path: Path, - sparse_format: Callable[[ArrayLike], sparse.spmatrix], + sparse_format: Callable[[ArrayLike], SpMatrix], idx_maj: Idx, idx_min: Idx, exp: Sequence[str], diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index d9f399dd6..72b6a89f3 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -20,7 +20,7 @@ from anndata import AnnData, Raw, concat from anndata._core import merge from anndata._core.index import _subset -from anndata.compat import AwkArray, CupySparseMatrix, DaskArray, SpArray +from anndata.compat import AwkArray, CupySparseMatrix, DaskArray, SpArray, SpMatrix from anndata.tests import helpers from anndata.tests.helpers import ( BASE_MATRIX_PARAMS, @@ -61,7 +61,7 @@ def _filled_array(a, fill_value=None): return as_dense_dask_array(_filled_array_np(a, fill_value)) -@filled_like.register(sparse.spmatrix) +@filled_like.register(SpMatrix) def _filled_sparse(a, fill_value=None): if fill_value is None: return sparse.csr_matrix(a.shape) @@ -200,7 +200,7 @@ def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_com if isinstance(orig.X, SpArray): base_type = SpArray else: - base_type = sparse.spmatrix + base_type = SpMatrix if isinstance(orig.X, CupySparseMatrix): base_type = CupySparseMatrix assert isinstance(result.X, base_type) @@ -404,7 +404,7 @@ def test_concatenate_obsm_outer(obsm_adatas, fill_val): ), ) - assert isinstance(outer.obsm["sparse"], sparse.spmatrix) + assert isinstance(outer.obsm["sparse"], SpMatrix) np.testing.assert_equal( outer.obsm["sparse"].toarray(), np.array( @@ -1496,7 +1496,7 @@ def test_concat_X_dtype(cpu_array_type, sparse_indexer_type): if sparse.issparse(result.X): # See https://github.com/scipy/scipy/issues/20389 for why this doesn't work with csc if sparse_indexer_type == np.int64 and ( - issubclass(cpu_array_type, sparse.spmatrix) or adata.X.format == "csc" + issubclass(cpu_array_type, SpMatrix) or adata.X.format == "csc" ): pytest.xfail( "Data type int64 is not maintained for sparse matrices or csc array" diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index a05d9a308..9bb8be289 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -30,7 +30,7 @@ pd.DataFrame, ), varm_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame), - layers_types=(sparse.spmatrix, np.ndarray, pd.DataFrame), + layers_types=(sparse.csr_matrix, np.ndarray, pd.DataFrame), ) diff --git a/tests/test_io_conversion.py b/tests/test_io_conversion.py index 217a9cc16..0d0f0196c 100644 --- a/tests/test_io_conversion.py +++ b/tests/test_io_conversion.py @@ -10,6 +10,7 @@ from scipy import sparse import anndata as ad +from anndata.compat import SpMatrix from anndata.tests.helpers import assert_equal, gen_adata @@ -99,8 +100,8 @@ def test_dense_to_sparse_memory(tmp_path, spmtx_format, to_convert): orig = gen_adata((50, 50), np.array) orig.raw = orig.copy() orig.write_h5ad(dense_path) - assert not isinstance(orig.X, sparse.spmatrix) - assert not isinstance(orig.raw.X, sparse.spmatrix) + assert not isinstance(orig.X, SpMatrix) + assert not isinstance(orig.raw.X, SpMatrix) curr = ad.read_h5ad(dense_path, as_sparse=to_convert, as_sparse_fmt=spmtx_format) diff --git a/tests/test_io_dispatched.py b/tests/test_io_dispatched.py index 0bbbf285a..8b142ce79 100644 --- a/tests/test_io_dispatched.py +++ b/tests/test_io_dispatched.py @@ -4,10 +4,9 @@ import h5py import zarr -from scipy import sparse import anndata as ad -from anndata.compat import SpArray +from anndata.compat import SpArray, SpMatrix from anndata.experimental import read_dispatched, write_dispatched from anndata.tests.helpers import assert_equal, gen_adata @@ -96,7 +95,7 @@ def set_copy(d, **kwargs): # TODO: Should the passed path be absolute? path = "/" + store.path + "/" + k if hasattr(elem, "shape") and not isinstance( - elem, sparse.spmatrix | SpArray | ad.AnnData + elem, SpMatrix | SpArray | ad.AnnData ): if re.match(r"^/((X)|(layers)).*", path): chunks = (M, N) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 3ca5324b8..a33e34850 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -22,7 +22,13 @@ get_spec, ) from anndata._io.specs.registry import IORegistryError -from anndata.compat import CAN_USE_SPARSE_ARRAY, SpArray, ZarrGroup, _read_attr +from anndata.compat import ( + CAN_USE_SPARSE_ARRAY, + SpArray, + SpMatrix, + ZarrGroup, + _read_attr, +) from anndata.experimental import read_elem_as_dask from anndata.io import read_elem, write_elem from anndata.tests.helpers import ( @@ -220,7 +226,7 @@ def test_io_spec(store, value, encoding_type): @pytest.mark.parametrize("as_dask", [False, True]) def test_io_spec_cupy(store, value, encoding_type, as_dask): if as_dask: - if isinstance(value, sparse.spmatrix): + if isinstance(value, SpMatrix): value = as_cupy_sparse_dask_array(value, format=encoding_type[:3]) else: value = as_dense_cupy_dask_array(value) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 518559995..8a8861a4f 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -15,12 +15,11 @@ import pytest import zarr from numba.core.errors import NumbaDeprecationWarning -from scipy import sparse from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata.compat import DaskArray, SpArray, _read_attr +from anndata.compat import DaskArray, SpArray, SpMatrix, _read_attr from anndata.tests.helpers import as_dense_dask_array, assert_equal, gen_adata from testing.anndata._helpers import xfail_if_numpy2_loompy @@ -161,7 +160,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa # since we tested if assigned types and loaded types are DaskArray # this would also work if they work if isinstance(adata_src.raw.X, SpArray): - assert isinstance(adata.raw.X, sparse.spmatrix) + assert isinstance(adata.raw.X, SpMatrix) else: assert isinstance(adata_src.raw.X, type(adata.raw.X) | DaskArray) assert isinstance( From 8ea1c4ebe46aa3a1481d8a5fe4eae7bd87330b9d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 14 Nov 2024 15:44:02 +0100 Subject: [PATCH 3/9] (chore): release note --- docs/release-notes/1767.breaking.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/release-notes/1767.breaking.md diff --git a/docs/release-notes/1767.breaking.md b/docs/release-notes/1767.breaking.md new file mode 100644 index 000000000..43449ebc7 --- /dev/null +++ b/docs/release-notes/1767.breaking.md @@ -0,0 +1 @@ +Tighten usage of {class}`scipy.sparse.spmatrix` for describing sparse matrices in types and instance checks to only {class}`scipy.sparse.csr_matrix` and {class}`scipy.sparse.csc_matrix` {user}`ilan-gold` From 02530726f96ac3f9d3455098c554981354b02c99 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:11:01 +0100 Subject: [PATCH 4/9] (chore): update for scientific python deprecation schedule --- .azure-pipelines.yml | 14 +++++++------- .github/workflows/benchmark.yml | 2 +- .readthedocs.yml | 2 +- hatch.toml | 4 ++-- pyproject.toml | 15 +++++++++------ 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index afc1b4153..ed0835174 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -15,17 +15,17 @@ jobs: strategy: matrix: Python3.12: - python.version: "3.12" + python.version: "3.13" RUN_COVERAGE: yes TEST_TYPE: "coverage" - Python3.10: - python.version: "3.10" + Python3.11: + python.version: "3.11" PreRelease: - python.version: "3.12" + python.version: "3.13" DEPENDENCIES_VERSION: "pre-release" TEST_TYPE: "strict-warning" minimum_versions: - python.version: "3.10" + python.version: "3.11" DEPENDENCIES_VERSION: "minimum" TEST_TYPE: "coverage" steps: @@ -108,8 +108,8 @@ jobs: steps: - task: UsePythonVersion@0 inputs: - versionSpec: "3.12" - displayName: "Use Python 3.12" + versionSpec: "3.13" + displayName: "Use Python 3.13" - script: | set -e diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index b7355b6b5..87d159833 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.12"] + python: ["3.13"] os: [ubuntu-latest] env: diff --git a/.readthedocs.yml b/.readthedocs.yml index 8fa840e28..71adba451 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -2,7 +2,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.12" + python: "3.13" jobs: post_checkout: # unshallow so version can be derived from tag diff --git a/hatch.toml b/hatch.toml index 738056567..799213d71 100644 --- a/hatch.toml +++ b/hatch.toml @@ -21,8 +21,8 @@ overrides.matrix.deps.env-vars = [ { key = "UV_RESOLUTION", value = "lowest-direct", if = ["min"] }, ] overrides.matrix.deps.python = [ - { if = ["min"], value = "3.10" }, - { if = ["stable", "pre"], value = "3.12" }, + { if = ["min"], value = "3.11" }, + { if = ["stable", "pre"], value = "3.13" }, ] [[envs.hatch-test.matrix]] diff --git a/pyproject.toml b/pyproject.toml index 3cc1b31a3..471c33c82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = ["hatchling", "hatch-vcs"] [project] name = "anndata" description = "Annotated data." -requires-python = ">=3.10" +requires-python = ">=3.11" license = "BSD-3-Clause" authors = [ { name = "Philipp Angerer" }, @@ -29,19 +29,19 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Visualization", ] dependencies = [ # pandas <1.4 has pandas/issues/35446 # pandas 2.1.0rc0 has pandas/issues/54622 - "pandas >=1.4, !=2.1.0rc0, !=2.1.2", - "numpy>=1.23", + "pandas >=2.0.0, !=2.1.0rc0, !=2.1.2", + "numpy>=1.25", # https://github.com/scverse/anndata/issues/1434 - "scipy >1.8", + "scipy >1.11", "h5py>=3.6", "exceptiongroup; python_version<'3.11'", "natsort", @@ -87,7 +87,7 @@ test = [ "loompy>=3.0.5", "pytest>=8.2", "pytest-cov>=2.10", - "zarr<3.0.0a0", + "zarr<3.0.0a0,>=2.15", "matplotlib", "scikit-learn", "openpyxl", @@ -101,6 +101,7 @@ test = [ "pytest_memray", "pytest-mock", "anndata[dask]", + "numba @ git+ssh://git@github.com/numba/numba" ] dev-test = ["pytest-xdist"] # local test speedups gpu = ["cupy"] @@ -116,6 +117,8 @@ version-file = "src/anndata/_version.py" raw-options.version_scheme = "release-branch-semver" [tool.hatch.build.targets.wheel] packages = ["src/anndata", "src/testing"] +[tool.hatch.metadata] +allow-direct-references = true [tool.coverage.run] data_file = "test-data/coverage" From b547a2c84ecdc58ca083bf04f1fc5d5ff49fb086 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:16:16 +0100 Subject: [PATCH 5/9] (chore): change name in azure pipeline --- .azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index ed0835174..201ca5e30 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -14,7 +14,7 @@ jobs: vmImage: "ubuntu-22.04" strategy: matrix: - Python3.12: + Python3.13: python.version: "3.13" RUN_COVERAGE: yes TEST_TYPE: "coverage" From fb686af57de5ea3d9022be3973e26caec6c12a30 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:18:31 +0100 Subject: [PATCH 6/9] (fix): use `https` --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 471c33c82..261da1b3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ test = [ "pytest_memray", "pytest-mock", "anndata[dask]", - "numba @ git+ssh://git@github.com/numba/numba" + "numba @ git+https://git@github.com/numba/numba" ] dev-test = ["pytest-xdist"] # local test speedups gpu = ["cupy"] From b891b7cf1b597165cc2dc4c25a27ace0b211bf58 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:20:43 +0100 Subject: [PATCH 7/9] (fix): remove `git@` --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 261da1b3f..bffee9a74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ test = [ "pytest_memray", "pytest-mock", "anndata[dask]", - "numba @ git+https://git@github.com/numba/numba" + "numba @ git+https://github.com/numba/numba" ] dev-test = ["pytest-xdist"] # local test speedups gpu = ["cupy"] From 7ab8eee195caeeda35d3f588ec6c36f282316e46 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:23:09 +0100 Subject: [PATCH 8/9] (chore): remove py3.11 checks --- ci/scripts/min-deps.py | 7 +------ src/anndata/_settings.py | 28 +++++++--------------------- src/anndata/compat/__init__.py | 24 ------------------------ src/testing/anndata/_pytest.py | 3 ++- 4 files changed, 10 insertions(+), 52 deletions(-) diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py index c6bac0cf4..5af01c83e 100755 --- a/ci/scripts/min-deps.py +++ b/ci/scripts/min-deps.py @@ -2,16 +2,11 @@ from __future__ import annotations import argparse -import sys +import tomllib from collections import deque from pathlib import Path from typing import TYPE_CHECKING -if sys.version_info >= (3, 11): - import tomllib -else: - import tomli as tomllib - from packaging.requirements import Requirement from packaging.version import Version diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index f67633c08..31ce95fcd 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -2,7 +2,6 @@ import inspect import os -import sys import textwrap import warnings from collections.abc import Iterable @@ -53,27 +52,14 @@ def describe(self: RegisteredOption, *, as_rst: bool = False) -> str: return textwrap.dedent(doc) -if sys.version_info >= (3, 11): - - class RegisteredOption(NamedTuple, Generic[T]): - option: str - default_value: T - description: str - validate: Callable[[T], None] - type: object - - describe = describe - -else: - - class RegisteredOption(NamedTuple): - option: str - default_value: T - description: str - validate: Callable[[T], None] - type: object +class RegisteredOption(NamedTuple, Generic[T]): + option: str + default_value: T + description: str + validate: Callable[[T], None] + type: object - describe = describe + describe = describe def check_and_get_environ_var( diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index d6acc90ec..d64d054c6 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -1,15 +1,10 @@ from __future__ import annotations -import os -import sys from codecs import decode from collections.abc import Mapping -from contextlib import AbstractContextManager -from dataclasses import dataclass, field from functools import singledispatch, wraps from importlib.util import find_spec from inspect import Parameter, signature -from pathlib import Path from types import EllipsisType from typing import TYPE_CHECKING, TypeVar from warnings import warn @@ -20,8 +15,6 @@ import scipy from packaging.version import Version -from .exceptiongroups import add_note # noqa: F401 - if TYPE_CHECKING: from typing import Any @@ -69,23 +62,6 @@ class Empty: ############################# -if sys.version_info >= (3, 11): - from contextlib import chdir -else: - - @dataclass - class chdir(AbstractContextManager): - path: Path - _old_cwd: list[Path] = field(default_factory=list) - - def __enter__(self) -> None: - self._old_cwd.append(Path()) - os.chdir(self.path) - - def __exit__(self, *_exc_info) -> None: - os.chdir(self._old_cwd.pop()) - - ############################# # Optional deps ############################# diff --git a/src/testing/anndata/_pytest.py b/src/testing/anndata/_pytest.py index 5b0fd60e0..df4441c04 100644 --- a/src/testing/anndata/_pytest.py +++ b/src/testing/anndata/_pytest.py @@ -51,7 +51,8 @@ def _doctest_env( ) from scanpy import settings - from anndata.compat import chdir + from contextlib import chdir + from anndata.utils import import_name assert isinstance(request.node.parent, pytest.Module) From 94c1cdf6ed24dc1a4140951dad7fcfb72720b0a3 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 15 Nov 2024 14:34:13 +0100 Subject: [PATCH 9/9] (fix): remove scanpy for now --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bffee9a74..ba08d73a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,6 @@ test = [ "openpyxl", "joblib", "boltons", - "scanpy", "httpx", # For data downloading "dask[distributed]", "awkward>=2.3", @@ -101,7 +100,6 @@ test = [ "pytest_memray", "pytest-mock", "anndata[dask]", - "numba @ git+https://github.com/numba/numba" ] dev-test = ["pytest-xdist"] # local test speedups gpu = ["cupy"]