Skip to content

Commit

Permalink
Fix anndata warnings (#2779)
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Dec 15, 2023
1 parent 737da3a commit 86dc4d5
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 40 deletions.
1 change: 1 addition & 0 deletions docs/release-notes/1.9.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
- Specify correct version of `matplotlib` dependency {pr}`2733` {smaller}`P Fisher`
- Fix {func}`scanpy.pl.violin` usage of `seaborn.catplot` {pr}`2739` {smaller}`E Roellin`
- Fix {func}`scanpy.pp.highly_variable_genes` to handle the combinations of `inplace` and `subset` consistently {pr}`2757` {smaller}`E Roellin`
- Replace usage of various deprecated functionality from {mod}`anndata` and {mod}`pandas` {pr}`2678` {pr}`2779` {smaller}`P Angerer`
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@ markers = [
"internet: tests which rely on internet resources (enable with `--internet-tests`)",
"gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
]
filterwarnings = [
# When calling `.show()` in tests, this is raised
"ignore:FigureCanvasAgg is non-interactive:UserWarning",
# We explicitly handle these errors in tests
"error:`anndata.read` is deprecated:FutureWarning",
"error:Observation names are not unique:UserWarning",
"error:The dtype argument is deprecated and will be removed:FutureWarning",
"error:The behavior of DataFrame\\.sum with axis=None is deprecated:FutureWarning",
]

[tool.coverage.run]
data_file = "test-data/coverage"
Expand All @@ -176,6 +185,11 @@ omit = ["*/tests/*"]
output = "test-data/coverage.xml"
[tool.coverage.paths]
source = [".", "**/site-packages"]
[tool.coverage.report]
exclude_also = [
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

[tool.ruff]
select = [
Expand Down
2 changes: 1 addition & 1 deletion scanpy/datasets/_ebi_expression_atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def ebi_expression_atlas(
experiment_dir = settings.datasetdir / accession
dataset_path = experiment_dir / f"{accession}.h5ad"
try:
adata = anndata.read(dataset_path)
adata = anndata.read_h5ad(dataset_path)
if filter_boring:
adata.obs = _filter_boring(adata.obs)
return adata
Expand Down
11 changes: 5 additions & 6 deletions scanpy/external/pp/_bbknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,17 @@ def bbknn(
PyNNDescent supports metrics listed in `pynndescent.distances.named_distances`
and custom functions, including compiled Numba code.
>>> pynndescent.distances.named_distances.keys()
>>> import pynndescent
>>> pynndescent.distances.named_distances.keys() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
dict_keys(['euclidean', 'l2', 'sqeuclidean', 'manhattan', 'taxicab', 'l1', 'chebyshev', 'linfinity',
'linfty', 'linf', 'minkowski', 'seuclidean', 'standardised_euclidean', 'wminkowski', 'weighted_minkowski',
'mahalanobis', 'canberra', 'cosine', 'dot', 'correlation', 'hellinger', 'haversine', 'braycurtis', 'spearmanr',
'kantorovich', 'wasserstein', 'tsss', 'true_angular', 'hamming', 'jaccard', 'dice', 'matching', 'kulsinski',
'rogerstanimoto', 'russellrao', 'sokalsneath', 'sokalmichener', 'yule'])
'linfty', 'linf', 'minkowski', 'seuclidean', 'standardised_euclidean', 'wminkowski', ...])
KDTree supports members of :class:`sklearn.neighbors.KDTree`’s ``valid_metrics`` list, or parameterised
:class:`~sklearn.metrics.DistanceMetric` objects:
>>> import sklearn.neighbors
>>> sklearn.neighbors.KDTree.valid_metrics
['p', 'chebyshev', 'cityblock', 'minkowski', 'infinity', 'l2', 'euclidean', 'manhattan', 'l1']
['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity']
.. note:: check the relevant documentation for up-to-date lists.
copy
Expand Down
2 changes: 1 addition & 1 deletion scanpy/external/pp/_hashsolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def hashsolo(
-------
>>> import anndata
>>> import scanpy.external as sce
>>> adata = anndata.read("data.h5ad")
>>> adata = anndata.read_h5ad("data.h5ad")
>>> sce.pp.hashsolo(adata, ['Hash1', 'Hash2', 'Hash3'])
>>> adata.obs.head()
"""
Expand Down
3 changes: 3 additions & 0 deletions scanpy/preprocessing/_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def filter_cells(
--------
>>> import scanpy as sc
>>> adata = sc.datasets.krumsiek11()
UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.
utils.warn_names_duplicates("obs")
>>> adata.obs_names_make_unique()
>>> adata.n_obs
640
>>> adata.var_names.tolist() # doctest: +NORMALIZE_WHITESPACE
Expand Down
4 changes: 2 additions & 2 deletions scanpy/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
AnnData,
read_csv,
read_excel,
read_h5ad,
read_hdf,
read_loom,
read_mtx,
read_text,
)
from anndata import read as read_h5ad
from matplotlib.image import imread

from . import logging as logg
Expand Down Expand Up @@ -904,7 +904,7 @@ def _read_softgz(filename: str | bytes | Path | BinaryIO) -> AnnData:
X = np.array(X).T
obs = pd.DataFrame({"groups": groups}, index=sample_names)
var = pd.DataFrame(index=gene_names)
return AnnData(X=X, obs=obs, var=var, dtype=X.dtype)
return AnnData(X=X, obs=obs, var=var)


# -------------------------------------------------------------------------------
Expand Down
8 changes: 7 additions & 1 deletion scanpy/testing/_helpers/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from __future__ import annotations

import warnings

try:
from functools import cache
except ImportError: # Python < 3.9
Expand Down Expand Up @@ -47,7 +49,11 @@ def pbmc68k_reduced() -> AnnData:


def krumsiek11() -> AnnData:
return _krumsiek11().copy()
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Observation names are not unique", module="anndata"
)
return _krumsiek11().copy()


def paul15() -> AnnData:
Expand Down
24 changes: 23 additions & 1 deletion scanpy/testing/_pytest/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
from __future__ import annotations

import warnings
from typing import TYPE_CHECKING

import numpy as np
Expand All @@ -16,6 +17,7 @@
)

if TYPE_CHECKING:
from collections.abc import Generator
from pathlib import Path

__all__ = [
Expand All @@ -33,11 +35,31 @@ def float_dtype(request):


@pytest.fixture()
def doctest_env(cache: pytest.Cache, tmp_path: Path) -> None:
def doctest_env(cache: pytest.Cache, tmp_path: Path) -> Generator[None, None, None]:
from scanpy import settings
from scanpy._compat import chdir

showwarning_orig = warnings.showwarning

def showwarning(message, category, filename, lineno, file=None, line=None):
if file is None:
if line is None:
import linecache

line = linecache.getline(filename, lineno)
line = line.strip()
print(f"{category.__name__}: {message}\n {line}")
else:
showwarning_orig(message, category, filename, lineno, file, line)

# make errors visible and the rest ignored
warnings.filters = [
("default", *rest) for action, *rest in warnings.filters if action == "error"
] + [("ignore", None, Warning, None, 0)]

warnings.showwarning = showwarning
old_dd, settings.datasetdir = settings.datasetdir, cache.mkdir("scanpy-data")
with chdir(tmp_path):
yield
warnings.showwarning = showwarning_orig
settings.datasetdir = old_dd
6 changes: 4 additions & 2 deletions scanpy/tests/test_combat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import pandas as pd
import pytest
from anndata.tests.helpers import assert_equal
from sklearn.metrics import silhouette_score

Expand Down Expand Up @@ -59,8 +60,9 @@ def test_combat_obs_names():
{"batch": pd.Categorical(np.random.randint(0, 2, 200))},
index=np.repeat(np.arange(100), 2).astype(str), # Non-unique index
)
a = sc.AnnData(X, obs)
b = a.copy()
with pytest.warns(UserWarning, match="Observation names are not unique"):
a = sc.AnnData(X, obs)
b = a.copy()
b.obs_names_make_unique()

sc.pp.combat(a, "batch")
Expand Down
6 changes: 4 additions & 2 deletions scanpy/tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def test_ebi_expression_atlas(tmp_dataset_dir):


def test_krumsiek11(tmp_dataset_dir):
adata = sc.datasets.krumsiek11()
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
adata = sc.datasets.krumsiek11()
assert adata.shape == (640, 11)
assert all(
np.unique(adata.obs["cell_type"])
Expand All @@ -80,7 +81,8 @@ def test_blobs():


def test_toggleswitch():
sc.datasets.toggleswitch()
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
sc.datasets.toggleswitch()


def test_pbmc68k_reduced():
Expand Down
38 changes: 21 additions & 17 deletions scanpy/tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,20 @@
from scanpy.datasets._utils import filter_oldformatwarning
from scanpy.testing._helpers.data import pbmc68k_reduced


# Override so warning gets caught
def transpose_adata(adata: AnnData, *, expect_duplicates: bool = False) -> AnnData:
if not expect_duplicates:
return adata.T
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
return adata.T


TRANSPOSE_PARAMS = pytest.mark.parametrize(
"dim,transform,func",
[
(
"obs",
lambda x: x,
sc.get.obs_df,
),
(
"var",
lambda x: x.T,
sc.get.var_df,
),
("obs", lambda x, expect_duplicates=False: x, sc.get.obs_df),
("var", transpose_adata, sc.get.var_df),
],
ids=["obs_df", "var_df"],
)
Expand Down Expand Up @@ -140,11 +141,12 @@ def test_obs_df(adata):
assert all(badkey_err.match(k) for k in badkeys)

# test non unique index
adata = sc.AnnData(
np.arange(16).reshape(4, 4),
obs=pd.DataFrame(index=["a", "a", "b", "c"]),
var=pd.DataFrame(index=[f"gene{i}" for i in range(4)]),
)
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
adata = sc.AnnData(
np.arange(16).reshape(4, 4),
obs=pd.DataFrame(index=["a", "a", "b", "c"]),
var=pd.DataFrame(index=[f"gene{i}" for i in range(4)]),
)
df = sc.get.obs_df(adata, ["gene1"])
pd.testing.assert_index_equal(df.index, adata.obs_names)

Expand Down Expand Up @@ -373,16 +375,18 @@ def test_repeated_cols(dim, transform, func):

@TRANSPOSE_PARAMS
def test_repeated_index_vals(dim, transform, func):
# THis one could be reverted, see:
# This one could be reverted, see:
# https://github.com/scverse/scanpy/pull/1583#issuecomment-770641710
alt_dim = ["obs", "var"][dim == "obs"]

adata = transform(
sc.AnnData(
np.ones((5, 10)),
var=pd.DataFrame(
index=["repeated_id"] * 2 + [f"gene-{i}" for i in range(8)]
),
)
),
expect_duplicates=True,
)

with pytest.raises(
Expand Down
4 changes: 2 additions & 2 deletions scanpy/tests/test_rank_genes_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,15 @@ def test_emptycat():

def test_log1p_save_restore(tmp_path):
"""tests the sequence log1p→save→load→rank_genes_groups"""
from anndata import read
from anndata import read_h5ad

pbmc = pbmc68k_reduced()
sc.pp.log1p(pbmc)

path = tmp_path / "test.h5ad"
pbmc.write(path)

pbmc = read(path)
pbmc = read_h5ad(path)

sc.tl.rank_genes_groups(pbmc, groupby="bulk_labels", use_raw=True)

Expand Down
2 changes: 1 addition & 1 deletion scanpy/tests/test_scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_scale(typ, dtype, mask, X, X_centered, X_scaled):
def test_mask_string():
with pytest.raises(ValueError):
sc.pp.scale(np.array(X_original), mask="mask")
adata = AnnData(np.array(X_for_mask), dtype="float32")
adata = AnnData(np.array(X_for_mask, dtype="float32"))
adata.obs["some cells"] = np.array((0, 0, 1, 1, 1, 0, 0), dtype=bool)
sc.pp.scale(adata, mask="some cells")
assert np.array_equal(adata.X, X_centered_for_mask)
Expand Down
6 changes: 4 additions & 2 deletions scanpy/tests/test_sim.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

import numpy as np
import pytest

import scanpy as sc


def test_sim_toggleswitch(tmp_write_dir):
adata = sc.tl.sim("toggleswitch")
np.allclose(adata.X, sc.datasets.toggleswitch().X, np.finfo(np.float32).eps)
with pytest.warns(UserWarning, match=r"Observation names are not unique"):
adata = sc.tl.sim("toggleswitch")
np.allclose(adata.X, sc.datasets.toggleswitch().X, np.finfo(np.float32).eps)
4 changes: 2 additions & 2 deletions scanpy/tools/_rank_genes_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def wilcoxon(

# Calculate rank sums for each chunk for the current mask
for ranks, left, right in _ranks(self.X, mask, mask_rest):
scores[left:right] = np.sum(ranks.iloc[0:n_active, :])
scores[left:right] = ranks.iloc[0:n_active, :].sum(axis=0)
if tie_correct:
T[left:right] = _tiecorrect(ranks)

Expand Down Expand Up @@ -322,7 +322,7 @@ def wilcoxon(
for ranks, left, right in _ranks(self.X):
# sum up adjusted_ranks to calculate W_m,n
for imask, mask in enumerate(self.groups_masks):
scores[imask, left:right] = np.sum(ranks.iloc[mask, :])
scores[imask, left:right] = ranks.iloc[mask, :].sum(axis=0)
if tie_correct:
T[imask, left:right] = _tiecorrect(ranks)

Expand Down

0 comments on commit 86dc4d5

Please sign in to comment.