Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added function to download Visium as SpatialData #949

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion src/squidpy/datasets/_10x_datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import os
import shutil
import tarfile
from pathlib import Path
from typing import (
Expand All @@ -8,9 +10,13 @@
Union, # noqa: F401
)

import spatialdata as sd
from anndata import AnnData
from scanpy import _utils
from scanpy import logging as logg
from scanpy._settings import settings
from scanpy._utils import check_presence_download
from spatialdata import SpatialData

from squidpy._constants._constants import TenxVersions
from squidpy.datasets._utils import PathLike
Expand Down Expand Up @@ -106,7 +112,9 @@ def visium(

url_prefix = f"https://cf.10xgenomics.com/samples/spatial-exp/{spaceranger_version}/{sample_id}/"
visium_files = VisiumFiles(
f"{sample_id}_filtered_feature_bc_matrix.h5", f"{sample_id}_spatial.tar.gz", f"{sample_id}_image.tif"
f"{sample_id}_filtered_feature_bc_matrix.h5",
f"{sample_id}_spatial.tar.gz",
f"{sample_id}_image.tif",
)

# download spatial data
Expand Down Expand Up @@ -134,3 +142,61 @@ def visium(
)

return read_visium(base_dir / sample_id)


def visium_hne_sdata(filename: Path | str | None = None) -> SpatialData:
"""
Downloads a Visium H&E dataset and provides it as a `SpatialData` object.

This function combines the outputs from `squidpy.datasets.visium_hne_adata()`
and `squidpy.datasets.visium_hne_image()` into a `SpatialData` object containing:
- A multi-scale representation of the H&E image.
- The spots as a shapes layer.
- Gene expression data as an AnnData object.

If no filename is provided, it defaults to `~/.cache/squidpy/visium_hne_sdata.zip`.

Parameters
----------
filename : Path | str | None, optional
Path to save the dataset. If a directory is provided, the default filename
`visium_hne_sdata.zip` will be used inside that directory. If a `.zarr` filename
is provided, it will be converted to `.zip` for downloading.

Returns
-------
SpatialData
The downloaded and extracted Visium H&E dataset as a `SpatialData` object.
"""
if filename is None:
filename = Path.home() / ".cache/squidpy/visium_hne_sdata.zip"
else:
if not isinstance(filename, Path | str):
raise TypeError(f"Expected `filename` to be of type `Path` or `str`, found `{type(filename).__name__}`.")

filename = Path(filename).expanduser()

if filename.is_dir():
filename = filename / "visium_hne_sdata.zip"
elif filename.suffix not in {".zip", ".zarr"}:
raise ValueError(f"Expected `filename` to have suffix `.zip` or `.zarr`, found `{filename.suffix}`.")
elif filename.suffix == ".zarr":
filename = filename.with_suffix(".zip") # Ensure zip download

filename = filename.absolute()
extracted_path = filename.with_suffix(".zarr")

if not extracted_path.exists():
try:
logg.info(f"Downloading Visium H&E SpatialData to {filename}")
check_presence_download(
filename=filename,
backup_url="https://ndownloader.figshare.com/files/52048571",
)
logg.info(f"Extracting dataset from {filename} to {extracted_path}")
shutil.unpack_archive(str(filename), str(extracted_path))
except Exception as e:
logg.error(f"Failed to download or extract dataset: {e}")
raise

return sd.read_zarr(extracted_path)
2 changes: 1 addition & 1 deletion src/squidpy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations

from squidpy.datasets._10x_datasets import visium
from squidpy.datasets._10x_datasets import visium, visium_hne_sdata
from squidpy.datasets._dataset import * # noqa: F403
from squidpy.datasets._image import * # noqa: F403
19 changes: 17 additions & 2 deletions tests/datasets/test_download_visium_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,22 @@
from pathlib import Path

import pytest
import spatialdata as sd
from anndata.tests.helpers import assert_adata_equal
from scanpy._settings import settings

from squidpy.datasets import visium
from squidpy.datasets import visium, visium_hne_sdata


@pytest.mark.timeout(120)
@pytest.mark.internet()
@pytest.mark.parametrize(
"sample", ["V1_Mouse_Kidney", "Targeted_Visium_Human_SpinalCord_Neuroscience", "Visium_FFPE_Human_Breast_Cancer"]
"sample",
[
"V1_Mouse_Kidney",
"Targeted_Visium_Human_SpinalCord_Neuroscience",
"Visium_FFPE_Human_Breast_Cancer",
],
)
def test_visium_datasets(tmpdir, sample):
# Tests that reading / downloading datasets works and it does not have any global effects
Expand All @@ -43,3 +49,12 @@ def test_visium_datasets(tmpdir, sample):
process = subprocess.run(["file", "--mime-type", image_path], stdout=subprocess.PIPE)
output = process.stdout.strip().decode() # make process output string
assert output == str(image_path) + ": image/tiff"


@pytest.mark.timeout(120)
@pytest.mark.internet()
def test_visium_sdata_dataset(tmpdir):
sdata = visium_hne_sdata(Path(tmpdir))
assert isinstance(sdata, sd.SpatialData)
assert list(sdata.shapes.keys()) == ["spots"]
assert list(sdata.images.keys()) == ["hne"]
Loading