Prep 0.8 (#689)

ivirshup · web-flow · commit 555f54693821 · 2022-01-24T14:53:37.000+01:00
* Fix pr ref

* draft

* Typo fix

* note removal of xlrd dep

* Remove datetime addition from release notes

* Add read_elem, write_elem to release (breaks build)

Added `read_elem`, `write_elem` to experimental and release notes.

Breaks build due to typing in docs.

* Make docs build

* A little cleanup around X=None

* Bump required version of h5py (tentative change)

* pandas 1.4.0 compat (only test suite was broken, probably don't need release)

* Note release candidate is out
diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
@@ -116,7 +116,7 @@ def _gen_dataframe(anno, length, index_names):
 
 @_gen_dataframe.register(pd.DataFrame)
 def _(anno, length, index_names):
-    anno = anno.copy()
+    anno = anno.copy(deep=False)
     if not is_string_dtype(anno.index):
         warnings.warn("Transforming to str index.", ImplicitModificationWarning)
         anno.index = anno.index.astype(str)
diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py
@@ -30,8 +30,6 @@
 from .specs import read_elem, write_elem
 from anndata._warnings import OldFormatWarning
 
-H5Group = Union[h5py.Group, h5py.File]
-H5Dataset = h5py.Dataset
 T = TypeVar("T")
 
 
diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py
@@ -2,10 +2,10 @@
 
 from collections.abc import Mapping
 from functools import singledispatch, wraps
-from typing import NamedTuple, Tuple, Type, Callable, Union
+from typing import Any, NamedTuple, Tuple, Type, Callable, Union
 
 
-from anndata.compat import _read_attr
+from anndata.compat import _read_attr, ZarrArray, ZarrGroup, H5Group, H5Array
 from anndata._io.utils import report_write_key_on_error, report_read_key_on_error
 
 # TODO: This probably should be replaced by a hashable Mapping due to conversion b/w "_" and "-"
@@ -137,15 +137,15 @@ def get_spec(
 
 @report_write_key_on_error
 def write_elem(
-    f: "Union[h5py.Group, zarr.Group]",
+    f: "Union[H5Group, ZarrGroup]",
     k: str,
-    elem,
+    elem: Any,
     *args,
     modifiers=frozenset(),
     **kwargs,
 ):
     """
-    Write an element to a disk store.
+    Write an element to a disk store using it's anndata encoding.
 
     Params
     ------
@@ -175,7 +175,10 @@ def write_elem(
         _REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs)
 
 
-def read_elem(elem, modifiers: frozenset(str) = frozenset()):
+def read_elem(
+    elem: Union[H5Array, H5Group, ZarrGroup, ZarrArray],
+    modifiers: frozenset(str) = frozenset(),
+) -> Any:
     """Read an element from an on disk store."""
     return _REGISTRY.get_reader(type(elem), get_spec(elem), frozenset(modifiers))(elem)
 
diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py
@@ -18,6 +18,10 @@ class Empty:
     pass
 
 
+H5Group = Union[h5py.Group, h5py.File]
+H5Array = h5py.Dataset
+
+
 # try importing zarr, dask, and zappy
 from packaging import version
 
diff --git a/anndata/experimental/__init__.py b/anndata/experimental/__init__.py
@@ -1,2 +1,4 @@
 from .multi_files import AnnCollection
 from .pytorch import AnnLoader
+
+from anndata._io.specs import read_elem, write_elem
diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py
@@ -222,7 +222,7 @@ def slice_subset(index, min_size=2):
 
 
 def single_subset(index):
-    return index[np.random.randint(0, len(index), size=())]
+    return index[np.random.randint(0, len(index))]
 
 
 @pytest.fixture(
diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py
@@ -525,7 +525,6 @@ def test_read_umi_tools():
 def test_write_categorical(tmp_path, diskfmt):
     adata_pth = tmp_path / f"adata.{diskfmt}"
     orig = ad.AnnData(
-        X=np.ones((5, 5)),
         obs=pd.DataFrame(
             dict(
                 cat1=["a", "a", "b", np.nan, np.nan],
@@ -542,7 +541,6 @@ def test_write_categorical(tmp_path, diskfmt):
 def test_write_categorical_index(tmp_path, diskfmt):
     adata_pth = tmp_path / f"adata.{diskfmt}"
     orig = ad.AnnData(
-        X=np.ones((5, 5)),
         uns={"df": pd.DataFrame(index=pd.Categorical(list("aabcd")))},
     )
     getattr(orig, f"write_{diskfmt}")(adata_pth)
@@ -557,7 +555,9 @@ def test_write_categorical_index(tmp_path, diskfmt):
 def test_dataframe_reserved_columns(tmp_path, diskfmt):
     reserved = ("_index",)
     adata_pth = tmp_path / f"adata.{diskfmt}"
-    orig = ad.AnnData(X=np.ones((5, 5)))
+    orig = ad.AnnData(
+        obs=pd.DataFrame(index=np.arange(5)), var=pd.DataFrame(index=np.arange(5))
+    )
     for colname in reserved:
         to_write = orig.copy()
         to_write.obs[colname] = np.ones(5)
@@ -608,7 +608,6 @@ def test_write_string_types(tmp_path, diskfmt):
     adata_pth = tmp_path / f"adata.{diskfmt}"
 
     adata = ad.AnnData(
-        np.ones((3, 3)),
         obs=pd.DataFrame(
             np.ones((3, 2)),
             columns=["a", np.str_("b")],
diff --git a/docs/api.rst b/docs/api.rst
@@ -80,6 +80,15 @@ Two classes for working with batched access to collections of many `AnnData` obj
    experimental.AnnCollection
    experimental.AnnLoader
 
+Low level methods for reading and writing elements of an `AnnData`` object to a store:
+
+
+.. autosummary::
+   :toctree: generated/
+
+   experimental.read_elem
+   experimental.write_elem
+
 
 Errors and warnings
 -------------------
diff --git a/docs/conf.py b/docs/conf.py
@@ -87,6 +87,8 @@ def setup(app: Sphinx):
     xarray=("http://xarray.pydata.org/en/stable/", None),
 )
 qualname_overrides = {
+    "h5py._hl.group.Group": "h5py.Group",
+    "h5py._hl.files.File": "h5py.File",
     "anndata._core.anndata.AnnData": "anndata.AnnData",
     # Temporarily
     "anndata._core.raw.Raw": "anndata.AnnData",
diff --git a/docs/release-latest.rst b/docs/release-latest.rst
@@ -2,20 +2,38 @@
 .. role:: smaller
 
 
-On `master` :small:`the future`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+`0.8.0` :small:`the future`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. rubric:: Bug fixes
+.. note:: 0.8.0 is currently in the release candidate phase. Install this version with `pip install "anndata==0.8.0rc1"`.
+
+.. rubric:: IO Specification
+
+Internal handling of IO has been overhauled.
+This should make it much easier to support new datatypes, use partial access, and use `AnnData` internally in other formats.
+
+- Each element should be tagged with an `encoding_type` and `encoding_version`. See updated docs on the :doc:`file format <fileformat-prose>`
+- Support for nullable integer and boolean data arrays. More data types to come!
+- Experimental support for low level access to the IO API via :func:`~anndata.experimental.read_elem` and :func:`~anndata.experimental.write_elem`
 
 .. rubric:: Features
 
 - Compatibility with `h5ad` files written from Julia :pr:`569` :smaller:`I Kats`
 - Many logging messages that should have been warnings are now warnings :pr:`650` :smaller:`I Virshup`
 - Significantly more efficient :func:`anndata.read_umi_tools` :pr:`661` :smaller:`I Virshup`
 - Fixed deepcopy of a copy of a view retaining sparse matrix view mixin type :pr:`670` :smaller:`M Klein`
-- In many cases :attr:`~anndata.AnnData.X` can now be `None` :pr:`463` :smaller:`R Cannoodt` :pr:`667` :smaller:`I Virshup`. Remaining work is documented in :issue:`467`.
+- In many cases :attr:`~anndata.AnnData.X` can now be `None` :pr:`463` :smaller:`R Cannoodt` :pr:`677` :smaller:`I Virshup`. Remaining work is documented in :issue:`467`.
+- Removed hard `xlrd` dependency :smaller:`I Virshup`
+- `obs` and `var` dataframes are no longer copied by default on `AnnData` instantiation :issue:`371` :smaller:`I Virshup`
+
+.. rubric:: Bug fixes
+
+- Fixed issue where `.copy` was creating sparse matrices views when copying :pr:`670` :smaller:`michalk8`
+
+.. rubric:: Dependencies
 
-.. rubric:: Documentation
+* `xlrd` dropped as a hard dependency
+* Now requires `h5py` `v3.0.0` or newer
 
 
 0.7.8 :small:`9 November, 2021`
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,7 +44,7 @@ dependencies = [
     "pandas>=1.1.1",  # pandas <1.1.1 has pandas/issues/35446
     "numpy>=1.16.5",  # required by pandas 1.x
     "scipy>1.4",
-    "h5py",
+    "h5py>=3",
     "natsort",
     "packaging>=20",
     # for getting the stable version

Original file line number	Diff line number	Diff line change
`@@ -87,6 +87,8 @@ def setup(app: Sphinx):`
`87`	`87`	`xarray=("http://xarray.pydata.org/en/stable/", None),`
`88`	`88`	`)`
`89`	`89`	`qualname_overrides = {`
	`90`	`+ "h5py._hl.group.Group": "h5py.Group",`
	`91`	`+ "h5py._hl.files.File": "h5py.File",`
`90`	`92`	`"anndata._core.anndata.AnnData": "anndata.AnnData",`
`91`	`93`	`# Temporarily`
`92`	`94`	`"anndata._core.raw.Raw": "anndata.AnnData",`