Merge branch 'main' into ig/xarray_compat

scverse · Nov 28, 2024 · 8b95aff · 8b95aff
2 parents 4c00216 + 41369da
commit 8b95aff
Show file tree

Hide file tree

Showing 20 changed files with 302 additions and 142 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.2
+    rev: v0.7.4
     hooks:
       - id: ruff
         types_or: [python, pyi, jupyter]

diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py
@@ -1,4 +1,11 @@
 #!/usr/bin/env python3
+# /// script
+# dependencies = [
+#   "tomli; python_version < '3.11'",
+#   "packaging",
+# ]
+# ///
+
 from __future__ import annotations
 
 import argparse
@@ -33,12 +40,14 @@ def min_dep(req: Requirement) -> Requirement:
     if req.extras:
         req_name = f"{req_name}[{','.join(req.extras)}]"
 
-    specs = [spec for spec in req.specifier if spec.operator in {">", ">=", "~=", "=="}]
-    if not specs:
+    filter_specs = [
+        spec for spec in req.specifier if spec.operator in {"==", "~=", ">=", ">"}
+    ]
+    if not filter_specs:
         return Requirement(req_name)
 
     min_version = Version("0.0.0.a1")
-    for spec in specs:
+    for spec in filter_specs:
         if spec.operator in {">", ">=", "~="}:
             min_version = max(min_version, Version(spec.version))
         elif spec.operator == "==":

diff --git a/ci/scripts/towncrier_automation.py b/ci/scripts/towncrier_automation.py
@@ -2,7 +2,9 @@
 from __future__ import annotations
 
 import argparse
+import re
 import subprocess
+from functools import cache
 from typing import TYPE_CHECKING
 
 from packaging.version import Version
@@ -11,8 +13,33 @@
     from collections.abc import Sequence
 
 
+class BumpVersion(Version):
+    def __init__(self, version: str) -> None:
+        super().__init__(version)
+
+        if len(self.release) != 3:
+            msg = f"{version} must contain major, minor, and patch version."
+            raise argparse.ArgumentTypeError(msg)
+
+        base_branch = get_base_branch()
+        patch_branch_pattern = re.compile(r"\d+\.\d+\.x")
+        if self.micro != 0 and not patch_branch_pattern.fullmatch(base_branch):
+            msg = (
+                f"{version} is a patch release, but "
+                f"you are trying to release from a non-patch release branch: {base_branch}."
+            )
+            raise argparse.ArgumentTypeError(msg)
+
+        if self.micro == 0 and base_branch != "main":
+            msg = (
+                f"{version} is a minor or major release, "
+                f"but you are trying to release not from main: {base_branch}."
+            )
+            raise argparse.ArgumentTypeError(msg)
+
+
 class Args(argparse.Namespace):
-    version: str
+    version: BumpVersion
     dry_run: bool
 
 
@@ -28,7 +55,7 @@ def parse_args(argv: Sequence[str] | None = None) -> Args:
     )
     parser.add_argument(
         "version",
-        type=str,
+        type=BumpVersion,
         help=(
             "The new version for the release must have at least three parts, like `major.minor.patch` and no `major.minor`. "
             "It can have a suffix like `major.minor.patch.dev0` or `major.minor.0rc1`."
@@ -40,10 +67,6 @@ def parse_args(argv: Sequence[str] | None = None) -> Args:
         action="store_true",
     )
     args = parser.parse_args(argv, Args())
-    # validate the version
-    if len(Version(args.version).release) != 3:
-        msg = f"Version argument {args.version} must contain major, minor, and patch version."
-        raise ValueError(msg)
     return args
 
 
@@ -56,15 +79,8 @@ def main(argv: Sequence[str] | None = None) -> None:
     )
 
     # Check if we are on the main branch to know if we need to backport
-    base_branch = subprocess.run(
-        ["git", "rev-parse", "--abbrev-ref", "HEAD"],
-        capture_output=True,
-        text=True,
-        check=True,
-    ).stdout.strip()
-    pr_description = (
-        "" if base_branch == "main" else "@meeseeksmachine backport to main"
-    )
+    base_branch = get_base_branch()
+    pr_description = "" if base_branch == "main" else "@meeseeksdev backport to main"
     branch_name = f"release_notes_{args.version}"
 
     # Create a new branch + commit
@@ -106,5 +122,15 @@ def main(argv: Sequence[str] | None = None) -> None:
         print("Dry run, not merging")
 
 
+@cache
+def get_base_branch():
+    return subprocess.run(
+        ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+        capture_output=True,
+        text=True,
+        check=True,
+    ).stdout.strip()
+
+
 if __name__ == "__main__":
     main()
diff --git a/docs/release-notes/0.11.0.md b/docs/release-notes/0.11.0.md
@@ -1,12 +1,49 @@
 (v0.11.0)=
 ### 0.11.0 {small}`2024-11-07`
 
+Release candidates:
+
+- (v0.11.0rc3)=
+  {guilabel}`rc3` 2024-10-14
+- (v0.11.0rc2)=
+  {guilabel}`rc2` 2024-09-24
+- (v0.11.0rc1)=
+  {guilabel}`rc1` 2024-09-04
+
 #### Bug fixes
 
 - Ensure {func}`anndata.concat` of {class}`~anndata.AnnData` object with {class}`scipy.sparse.spmatrix` and {class}`scipy.sparse.sparray` dask arrays uses the correct fill value of 0. {user}`ilan-gold` ({pr}`1719`)
-- Ensure that views of AwkwardArrays have their "view" attributes removed on saving an {class}`~anndata.AnnData` object
-  to disk. {user}`grst` ({pr}`1736`)
+- Ensure that views of AwkwardArrays have their "view" attributes removed on saving an {class}`~anndata.AnnData` object to disk. {user}`grst` ({pr}`1736`)
+
+#### Breaking changes
+
+- {guilabel}`rc3` Drop support for `python` 3.9 {user}`ilan-gold` ({pr}`1712`)
+- {guilabel}`rc2` A new `anndata.io` module contains all `read_*` and `write_*` functions, and all imports of such functions should go through this module. Old ways of importing these functions i.e., `from anndata import read_csv` or `from anndata._io.specs import read_elem` will still work, but are now considered deprecated and give a warning on import with the exception of {func}`anndata.io.read_zarr` and {func}`anndata.io.read_h5ad`, which will remain at the top-level `anndata` without warning. {user}`ilan-gold ({pr}`1682`)
+- {guilabel}`rc1` Removed deprecated modules `anndata.core` and `anndata.readwrite` {user}`ivirshup` ({pr}`1197`)
+- {guilabel}`rc1` No longer export `sparse_dataset` from `anndata.experimental`, instead exporting {func}`anndata.io.sparse_dataset` {user}`ilan-gold` ({pr}`1642`)
+- {guilabel}`rc1` Move `RWAble` and `InMemoryElem` out of `experimental`, renaming `RWAble` to {type}`~anndata.typing.AxisStorable` and `InMemoryElem` to {type}`~anndata.typing.RWAble` {user}`ilan-gold` ({pr}`1643`)
+
+#### Development Process
+
+- {guilabel}`rc2` Add extra `dask` dependency for installation i.e., `pip install anndata[dask]` {user}`ilan-gold` ({pr}`1677`)
+- {guilabel}`rc2` Remove `shall_` from variable names in `settings` {user}`ilan-gold` ({pr}`1685`)
+- {guilabel}`rc1` Create new `cupy` installation options for cuda 11 & 12 called `cu11` and `cu12` {user}`Intron7` ({pr}`1596`)
+
+#### Documentation
+
+- {guilabel}`rc1` Correct {attr}`anndata.AnnData.X` type to include {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` as possible types and being deprecation process for non-csr/csc {class}`scipy.sparse.spmatrix` types in {attr}`anndata.AnnData.X` {user}`ilan-gold` ({pr}`1616`)
 
 #### Features
 
 - Add support for ellipsis indexing of the {class}`~anndata.AnnData` object {user}`ilan-gold` ({pr}`1729`)
+- {guilabel}`rc1` `scipy.sparse.csr_array` and `scipy.sparse.csc_array` are now supported when constructing `AnnData` objects {user}`ilan-gold` {user}`isaac-virshup` ({pr}`1028`)
+- {guilabel}`rc1` Allow `axis` parameter of e.g. {func}`anndata.concat` to accept `'obs'` and `'var'` {user}`flying-sheep` ({pr}`1244`)
+- {guilabel}`rc1` Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {user}`ilan-gold` ({pr}`1270`)
+- {guilabel}`rc1` Add {attr}`~anndata.settings.remove_unused_categories` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1340`)
+- {guilabel}`rc1` Add {func}`~anndata.experimental.read_elem_as_dask` function to handle i/o with sparse and dense arrays {user}`ilan-gold` ({pr}`1469`)
+- {guilabel}`rc1` Add ability to convert strings to categoricals on write in {meth}`~anndata.AnnData.write_h5ad` and {meth}`~anndata.AnnData.write_zarr` via `convert_strings_to_categoricals` parameter {user}` falexwolf` ({pr}`1474`)
+- {guilabel}`rc1` Add {attr}`~anndata.settings.check_uniqueness` option to {attr}`anndata.settings` to override current behavior {user}`ilan-gold` ({pr}`1507`)
+- {guilabel}`rc1` Add functionality to write from GPU {class}`dask.array.Array` to disk {user}`ilan-gold` ({pr}`1550`)
+- {guilabel}`rc1` Read and write support for nullable string arrays ({class}`pandas.arrays.StringArray`). Use pandas’ {doc}`pandas:user_guide/options` `mode.string_storage` to control which storage mode is used when reading `dtype="string"` columns. {user}`flying-sheep` ({pr}`1558`)
+- {guilabel}`rc1` Export {func}`~anndata.io.write_elem` and {func}`~anndata.io.read_elem` directly from the main package instead of `experimental` {user}`ilan-gold` ({pr}`1598`)
+- {guilabel}`rc1` Allow reading sparse data (via {func}`~anndata.io.read_elem` or {func}`~anndata.io.sparse_dataset`) into either {class}`scipy.sparse.csr_array` or {class}`scipy.sparse.csc_array` via {attr}`anndata.settings.use_sparse_array_on_read` {user}`ilan-gold` ({pr}`1633`)
diff --git a/docs/release-notes/0.11.0rc1.md b/docs/release-notes/0.11.0rc1.md
diff --git a/docs/release-notes/0.11.0rc2.md b/docs/release-notes/0.11.0rc2.md
diff --git a/docs/release-notes/0.11.0rc3.md b/docs/release-notes/0.11.0rc3.md
diff --git a/docs/release-notes/0.11.1.md b/docs/release-notes/0.11.1.md
@@ -0,0 +1,8 @@
+(v0.11.1)=
+### 0.11.1 {small}`2024-11-12`
+
+### Bug fixes
+
+- Remove upper pin on `dask` and exclude versions broken with sparse indexing {user}`ilan-gold` ({pr}`1725`)
+- Fix chunking with -1 in `chunks` argument of {func}`~anndata.experimental.read_elem_as_dask` {user}`ilan-gold` ({pr}`1743`)
+- Fix `cupy<0.13` imports in non-gpu environments {user}`ilan-gold` ({pr}`1754`)
diff --git a/docs/release-notes/1744.bugfix.md b/docs/release-notes/1744.bugfix.md
@@ -0,0 +1 @@
+Cache accesses to the `data` and `indices` arrays in {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` {user}`ilan-gold`
diff --git a/pyproject.toml b/pyproject.toml
@@ -109,8 +109,8 @@ gpu = ["cupy"]
 cu12 = ["cupy-cuda12x"]
 cu11 = ["cupy-cuda11x"]
 # https://github.com/dask/dask/issues/11290
-dask = ["dask[array]>=2022.09.2,<2024.8.0"]
 lazy = ["xarray>=2024.06.0", "aiohttp", "requests", "zarr<3.0.0a0", "anndata[dask]"]
+dask = ["dask[array]>=2022.09.2,!=2024.8.*,!=2024.9.*"]
 
 [tool.hatch.version]
 source = "vcs"
@@ -184,6 +184,7 @@ select = [
     "ICN", # Follow import conventions
     "PTH", # Pathlib instead of os.path
     "PT",  # Pytest conventions
+    "PYI", # Typing
 ]
 ignore = [
     # line too long -> we accept long comment lines; formatter gets rid of long code lines

diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py
@@ -1170,9 +1170,7 @@ def _inplace_subset_obs(self, index: Index1D):
         self._init_as_actual(adata_subset)
 
     # TODO: Update, possibly remove
-    def __setitem__(
-        self, index: Index, val: int | float | np.ndarray | sparse.spmatrix
-    ):
+    def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix):
         if self.is_view:
             raise ValueError("Object is view and cannot be accessed with `[]`.")
         obs, var = self._normalize_indices(index)

diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py
@@ -38,6 +38,7 @@
     from scipy.sparse._compressed import _cs_matrix
 
     from .._types import GroupStorageType
+    from ..compat import H5Array
     from .index import Index
 else:
     from scipy.sparse import spmatrix as _cs_matrix
@@ -380,7 +381,7 @@ def backend(self) -> Literal["zarr", "hdf5"]:
     @property
     def dtype(self) -> np.dtype:
         """The :class:`numpy.dtype` of the `data` attribute of the sparse matrix."""
-        return self.group["data"].dtype
+        return self._data.dtype
 
     @classmethod
     def _check_group_format(cls, group):
@@ -545,16 +546,18 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None
         indptr[orig_data_size:] = (
             sparse_matrix.indptr[1:].astype(np.int64) + indptr_offset
         )
-        # Clear cached property
-        if hasattr(self, "indptr"):
-            del self._indptr
 
         # indices
         indices = self.group["indices"]
         orig_data_size = indices.shape[0]
         indices.resize((orig_data_size + sparse_matrix.indices.shape[0],))
         indices[orig_data_size:] = sparse_matrix.indices
 
+        # Clear cached property
+        for attr in ["_indptr", "_indices", "_data"]:
+            if hasattr(self, attr):
+                delattr(self, attr)
+
     @cached_property
     def _indptr(self) -> np.ndarray:
         """\
@@ -565,11 +568,25 @@ def _indptr(self) -> np.ndarray:
         arr = self.group["indptr"][...]
         return arr
 
+    @cached_property
+    def _indices(self) -> H5Array | ZarrArray:
+        """\
+        Cache access to the indices to prevent unnecessary reads of the zarray
+        """
+        return self.group["indices"]
+
+    @cached_property
+    def _data(self) -> H5Array | ZarrArray:
+        """\
+        Cache access to the data to prevent unnecessary reads of the zarray
+        """
+        return self.group["data"]
+
     def _to_backed(self) -> BackedSparseMatrix:
         format_class = get_backed_class(self.format)
         mtx = format_class(self.shape, dtype=self.dtype)
-        mtx.data = self.group["data"]
-        mtx.indices = self.group["indices"]
+        mtx.data = self._data
+        mtx.indices = self._indices
         mtx.indptr = self._indptr
         return mtx
 
@@ -578,8 +595,8 @@ def to_memory(self) -> ss.csr_matrix | ss.csc_matrix | SpArray:
             self.format, use_sparray_in_io=settings.use_sparse_array_on_read
         )
         mtx = format_class(self.shape, dtype=self.dtype)
-        mtx.data = self.group["data"][...]
-        mtx.indices = self.group["indices"][...]
+        mtx.data = self._data[...]
+        mtx.indices = self._indices[...]
         mtx.indptr = self._indptr
         return mtx
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Cache accesses to the `data` and `indices` arrays in {class}`~anndata.abc.CSRDataset` and {class}`~anndata.abc.CSCDataset` {user}`ilan-gold`