diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 796105b..1760f1d 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -52,4 +52,4 @@ jobs: # This uses the trusted publisher workflow so no token is required. - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 571135c..ff8c6d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Added `remove_img` function (PR #34) - Refactored `get_img_idx` for improved maintainability - Disambiguated `get_img_data` between `_imgutils.py` and `SpatialExperiment.py` - Moved `SpatialFeatureExperiment` into its own package diff --git a/README.md b/README.md index ecc9280..ffc3143 100644 --- a/README.md +++ b/README.md @@ -100,4 +100,4 @@ For more detailed information about available methods and functionality, please ## Note This project has been set up using [BiocSetup](https://github.com/biocpy/biocsetup) -and [PyScaffold](https://pyscaffold.org/). \ No newline at end of file +and [PyScaffold](https://pyscaffold.org/). diff --git a/docs/requirements.txt b/docs/requirements.txt index a1b9d2b..c20cf60 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,9 @@ +furo +myst-nb # Requirements file for ReadTheDocs, check .readthedocs.yml. # To build the module reference correctly, make sure every external package # under `install_requires` in `setup.cfg` is also listed here! # sphinx_rtd_theme myst-parser[linkify] sphinx>=3.2.1 -myst-nb -furo sphinx-autodoc-typehints diff --git a/setup.cfg b/setup.cfg index 5d1e869..e0e090f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,7 @@ python_requires = >=3.9 # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - biocframe>=0.6.1 + biocframe>=0.6.3 biocutils>=0.2 summarizedexperiment>=0.5 singlecellexperiment>=0.5.7 diff --git a/setup.py b/setup.py index 86f505c..cbb86cc 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ """ - Setup file for SpatialExperiment. - Use setup.cfg to configure your project. +Setup file for SpatialExperiment. +Use setup.cfg to configure your project. - This file was generated with PyScaffold 4.6. - PyScaffold helps you to put up the scaffold of your new Python project. - Learn more under: https://pyscaffold.org/ +This file was generated with PyScaffold 4.6. +PyScaffold helps you to put up the scaffold of your new Python project. +Learn more under: https://pyscaffold.org/ """ from setuptools import setup diff --git a/src/spatialexperiment/SpatialExperiment.py b/src/spatialexperiment/SpatialExperiment.py index 988cbec..715c519 100644 --- a/src/spatialexperiment/SpatialExperiment.py +++ b/src/spatialexperiment/SpatialExperiment.py @@ -12,16 +12,12 @@ check_assays_are_equal, merge_assays, merge_se_colnames, - relaxed_merge_assays + relaxed_merge_assays, ) from summarizedexperiment._frameutils import _sanitize_frame from summarizedexperiment.RangedSummarizedExperiment import GRangesOrGRangesList from singlecellexperiment import SingleCellExperiment -from singlecellexperiment._combineutils import ( - merge_generic, - relaxed_merge_generic, - relaxed_merge_numpy_generic -) +from singlecellexperiment._combineutils import merge_generic, relaxed_merge_generic, relaxed_merge_numpy_generic from ._imgutils import get_img_idx from ._validators import ( @@ -202,9 +198,7 @@ def __init__( column_data = _sanitize_frame(column_data, num_rows=self.shape[1]) if not column_data.has_column("sample_id"): - column_data["sample_id"] = ["sample01"] * self.shape[ - 1 - ] # hard code default sample_id as "sample01" + column_data["sample_id"] = ["sample01"] * self.shape[1] # hard code default sample_id as "sample01" spatial_coords = _sanitize_frame(spatial_coords, num_rows=self.shape[1]) img_data = _sanitize_frame(img_data, num_rows=0) @@ -217,9 +211,7 @@ def __init__( _validate_column_data(column_data=column_data) _validate_img_data(img_data=img_data) _validate_sample_ids(column_data=column_data, img_data=img_data) - _validate_spatial_coords( - spatial_coords=spatial_coords, column_data=column_data - ) + _validate_spatial_coords(spatial_coords=spatial_coords, column_data=column_data) ######################### ######>> Copying <<###### @@ -323,14 +315,10 @@ def __repr__(self) -> str: output += ", row_ranges=" + self._row_ranges.__repr__() if self._alternative_experiments is not None: - output += ", alternative_experiments=" + ut.print_truncated_list( - self.alternative_experiment_names - ) + output += ", alternative_experiments=" + ut.print_truncated_list(self.alternative_experiment_names) if self._reduced_dims is not None: - output += ", reduced_dims=" + ut.print_truncated_list( - self.reduced_dim_names - ) + output += ", reduced_dims=" + ut.print_truncated_list(self.reduced_dim_names) if self._main_experiment_name is not None: output += ", main_experiment_name=" + self._main_experiment_name @@ -358,10 +346,14 @@ def __str__(self) -> str: output += f"assays({len(self.assay_names)}): {ut.print_truncated_list(self.assay_names)}\n" - output += f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" + output += ( + f"row_data columns({len(self._rows.column_names)}): {ut.print_truncated_list(self._rows.column_names)}\n" + ) output += f"row_names({0 if self._row_names is None else len(self._row_names)}): {' ' if self._row_names is None else ut.print_truncated_list(self._row_names)}\n" - output += f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" + output += ( + f"column_data columns({len(self._cols.column_names)}): {ut.print_truncated_list(self._cols.column_names)}\n" + ) output += f"column_names({0 if self._column_names is None else len(self._column_names)}): {' ' if self._column_names is None else ut.print_truncated_list(self._column_names)}\n" output += f"main_experiment_name: {' ' if self._main_experiment_name is None else self._main_experiment_name}\n" @@ -434,9 +426,7 @@ def set_spatial_coords( in_place: bool = False, ) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates`.""" - return self.set_spatial_coordinates( - spatial_coords=spatial_coords, in_place=in_place - ) + return self.set_spatial_coordinates(spatial_coords=spatial_coords, in_place=in_place) @property def spatial_coords(self) -> BiocFrame: @@ -458,9 +448,7 @@ def spatial_coordinates(self) -> BiocFrame: return self.get_spatial_coordinates() @spatial_coordinates.setter - def spatial_coordinates( - self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]] - ): + def spatial_coordinates(self, spatial_coords: Optional[Union[BiocFrame, np.ndarray]]): """Alias for :py:meth:`~set_spatial_coordinates`.""" warn( "Setting property 'spatial_coords' is an in-place operation, use 'set_spatial_coordinates' instead.", @@ -510,21 +498,15 @@ def set_spatial_coordinates_names( new_spatial_coords = self._spatial_coords else: _validate_spatial_coords_names(spatial_coords_names, self._spatial_coords) - new_spatial_coords = self._spatial_coords.set_column_names( - spatial_coords_names - ) + new_spatial_coords = self._spatial_coords.set_column_names(spatial_coords_names) output = self._define_output(in_place) output._spatial_coords = new_spatial_coords return output - def set_spatial_coords_names( - self, spatial_coords_names: List[str], in_place: bool = False - ) -> "SpatialExperiment": + def set_spatial_coords_names(self, spatial_coords_names: List[str], in_place: bool = False) -> "SpatialExperiment": """Alias for :py:meth:`~set_spatial_coordinates_names`.""" - return self.set_spatial_coordinates_names( - spatial_coords_names=spatial_coords_names, in_place=in_place - ) + return self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=in_place) @property def spatial_coords_names(self) -> List[str]: @@ -538,9 +520,7 @@ def spatial_coords_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names( - spatial_coords_names=spatial_coords_names, in_place=True - ) + self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) @property def spatial_coordinates_names(self) -> List[str]: @@ -554,9 +534,7 @@ def spatial_coordinates_names(self, spatial_coords_names: List[str]): "Setting property 'spatial_coords_names' is an in-place operation, use 'set_spatial_coordinates_names' instead.", UserWarning, ) - self.set_spatial_coordinates_names( - spatial_coords_names=spatial_coords_names, in_place=True - ) + self.set_spatial_coordinates_names(spatial_coords_names=spatial_coords_names, in_place=True) ############################## ########>> img_data <<######## @@ -574,9 +552,7 @@ def get_img_data(self) -> BiocFrame: """Alias for :py:meth:`~get_image_data`.""" return self.get_image_data() - def set_image_data( - self, img_data: Optional[BiocFrame], in_place: bool = False - ) -> "SpatialExperiment": + def set_image_data(self, img_data: Optional[BiocFrame], in_place: bool = False) -> "SpatialExperiment": """Set new image data. Args: @@ -605,9 +581,7 @@ def set_image_data( output._img_data = img_data return output - def set_img_data( - self, img_data: BiocFrame, in_place: bool = False - ) -> "SpatialExperiment": + def set_img_data(self, img_data: BiocFrame, in_place: bool = False) -> "SpatialExperiment": """Alias for :py:meth:`~set_image_data`.""" return self.set_image_data(img_data=img_data, in_place=in_place) @@ -669,9 +643,7 @@ def get_scale_factors( _validate_id(sample_id) _validate_id(image_id) - idxs = get_img_idx( - img_data=self.img_data, sample_id=sample_id, image_id=image_id - ) + idxs = get_img_idx(img_data=self.img_data, sample_id=sample_id, image_id=image_id) return self.img_data[idxs,]["scale_factor"] @@ -734,9 +706,7 @@ def get_slice( spe = super().get_slice(rows=rows, columns=columns) slicer = self._generic_slice(rows=rows, columns=columns) - do_slice_cols = not ( - isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None) - ) + do_slice_cols = not (isinstance(slicer.col_indices, slice) and slicer.col_indices == slice(None)) new_spatial_coords = None @@ -744,9 +714,7 @@ def get_slice( new_spatial_coords = self.spatial_coords[slicer.col_indices, :] column_sample_ids = set(spe.column_data["sample_id"]) - mask = [ - sample_id in column_sample_ids for sample_id in self.img_data["sample_id"] - ] + mask = [sample_id in column_sample_ids for sample_id in self.img_data["sample_id"]] new_img_data = self.img_data[mask,] @@ -822,11 +790,9 @@ def get_img( if not self.img_data: return None - idxs = get_img_idx( - img_data=self.img_data, sample_id=sample_id, image_id=image_id - ) + indices = get_img_idx(img_data=self.img_data, sample_id=sample_id, image_id=image_id) - images = self.img_data[idxs,]["data"] + images = self.img_data[indices,]["data"] return images[0] if len(images) == 1 else images def add_img( @@ -869,9 +835,7 @@ def add_img( Raises: ValueError: If the sample_id and image_id pair already exists. """ - _validate_sample_image_ids( - img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id - ) + _validate_sample_image_ids(img_data=self._img_data, new_sample_id=sample_id, new_image_id=image_id) if isinstance(image_source, (str, Path)): is_url = urlparse(str(image_source)).scheme in ("http", "https", "ftp") @@ -897,12 +861,8 @@ def add_img( output._img_data = new_img_data return output - # TODO: implement rmv_img() - def rmv_img( - self, - sample_id: Union[str, bool, None] = None, - image_id: Union[str, bool, None] = None, - in_place: bool = False + def remove_img( + self, sample_id: Union[str, bool, None] = None, image_id: Union[str, bool, None] = None, in_place: bool = False ) -> "SpatialExperiment": """Remove an image entry. @@ -921,7 +881,16 @@ def rmv_img( Whether to modify the ``SpatialExperiment`` in place. Defaults to False. """ - raise NotImplementedError() + _validate_id(sample_id) + _validate_id(image_id) + + indices = get_img_idx(img_data=self.img_data, sample_id=sample_id, image_id=image_id) + + new_img_data = self._img_data.remove_rows(indices) + + output = self._define_output(in_place=in_place) + output._img_data = new_img_data + return output def img_source( self, @@ -929,9 +898,7 @@ def img_source( image_id: Union[str, bool, None] = None, path=False, ): - raise NotImplementedError( - "This function is irrelevant because it is for `RemoteSpatialImages`" - ) + raise NotImplementedError("This function is irrelevant because it is for `RemoteSpatialImages`") def img_raster(self, sample_id=None, image_id=None): # NOTE: this function seems redundant, might be an artifact of the different subclasses of SpatialImage in the R implementation? just call `get_img()` for now diff --git a/src/spatialexperiment/SpatialImage.py b/src/spatialexperiment/SpatialImage.py index 4218a2a..5ac1266 100644 --- a/src/spatialexperiment/SpatialImage.py +++ b/src/spatialexperiment/SpatialImage.py @@ -50,9 +50,7 @@ def get_metadata(self) -> dict: """ return self._metadata - def set_metadata( - self, metadata: dict, in_place: bool = False - ) -> "VirtualSpatialImage": + def set_metadata(self, metadata: dict, in_place: bool = False) -> "VirtualSpatialImage": """Set additional metadata. Args: @@ -67,9 +65,7 @@ def set_metadata( or as a reference to the (in-place-modified) original. """ if not isinstance(metadata, dict): - raise TypeError( - f"`metadata` must be a dictionary, provided {type(metadata)}." - ) + raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.") output = self._define_output(in_place) output._metadata = metadata return output @@ -150,9 +146,7 @@ def _sanitize_loaded_image(image): class LoadedSpatialImage(VirtualSpatialImage): """Class for images loaded into memory.""" - def __init__( - self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None - ): + def __init__(self, image: Union[Image.Image, np.ndarray], metadata: Optional[dict] = None): """Initialize the object. Args: @@ -256,9 +250,7 @@ def get_image(self) -> Image.Image: return self._image - def set_image( - self, image: Union[Image.Image, np.ndarray], in_place: bool = False - ) -> "LoadedSpatialImage": + def set_image(self, image: Union[Image.Image, np.ndarray], in_place: bool = False) -> "LoadedSpatialImage": """Set new image. Args: @@ -410,9 +402,7 @@ def get_path(self) -> Path: """Get the path to the image file.""" return self._path - def set_path( - self, path: Union[str, Path], in_place: bool = False - ) -> "StoredSpatialImage": + def set_path(self, path: Union[str, Path], in_place: bool = False) -> "StoredSpatialImage": """Update the path to the image file. Args: @@ -473,9 +463,7 @@ def _validate_url(url): class RemoteSpatialImage(VirtualSpatialImage): """Class for remotely hosted images.""" - def __init__( - self, url: str, metadata: Optional[dict] = None, validate: bool = True - ): + def __init__(self, url: str, metadata: Optional[dict] = None, validate: bool = True): """Initialize the object. Args: diff --git a/src/spatialexperiment/__init__.py b/src/spatialexperiment/__init__.py index 99d8f7e..e31286f 100644 --- a/src/spatialexperiment/__init__.py +++ b/src/spatialexperiment/__init__.py @@ -35,4 +35,3 @@ "VirtualSpatialImage", "construct_spatial_image_class", ] - diff --git a/src/spatialexperiment/_combineutils.py b/src/spatialexperiment/_combineutils.py index 4b40641..51233eb 100644 --- a/src/spatialexperiment/_combineutils.py +++ b/src/spatialexperiment/_combineutils.py @@ -25,9 +25,7 @@ def _append_indices_to_samples(bframes: List[BiocFrame]) -> List[BiocFrame]: modified_bframes = [] for i, bframe in enumerate(bframes, start=1): bframe_copy = deepcopy(bframe) - bframe_copy["sample_id"] = [ - f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"] - ] + bframe_copy["sample_id"] = [f"{sample_id}_{i}" for sample_id in bframe_copy["sample_id"]] modified_bframes.append(bframe_copy) return modified_bframes @@ -41,7 +39,7 @@ def merge_spatial_frames(x: List[SpatialExperiment], relaxed: bool = False) -> T Args: x: List of ``SpatialExperiment`` objects - relaxed: If `True`, allows frames with different columns to be combined. + relaxed: If `True`, allows frames with different columns to be combined. Absent columns in any frame are filled with appropriate placeholder values. Defaults to `False`. @@ -52,9 +50,7 @@ def merge_spatial_frames(x: List[SpatialExperiment], relaxed: bool = False) -> T img_datas = [y._img_data for y in x] expected_unique = sum([len(set(_cols["sample_id"])) for _cols in cols]) - all_sample_ids = list( - itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols) - ) + all_sample_ids = list(itertools.chain.from_iterable(_cols["sample_id"] for _cols in cols)) if len(set(all_sample_ids)) < expected_unique: warn( @@ -80,7 +76,7 @@ def merge_spatial_coordinates(spatial_coords: List[BiocFrame], relaxed: bool = F Args: spatial_coords: List of `BiocFrame`s containing spatial coordinates. - relaxed: If `True`, allows frames with different columns to be combined. + relaxed: If `True`, allows frames with different columns to be combined. Absent columns in any frame are filled with appropriate placeholder values. Defaults to `False`. diff --git a/src/spatialexperiment/_imgutils.py b/src/spatialexperiment/_imgutils.py index 1c79ff6..36dcea7 100644 --- a/src/spatialexperiment/_imgutils.py +++ b/src/spatialexperiment/_imgutils.py @@ -14,7 +14,6 @@ __license__ = "MIT" - def read_image(input_image): """Read image from PIL Image, file path, or URL. @@ -31,7 +30,7 @@ def read_image(input_image): if isinstance(input_image, Image.Image): return input_image - + if isinstance(input_image, (str, Path)): is_url = urlparse(str(input_image)).scheme in ("http", "https", "ftp") if is_url: @@ -39,16 +38,12 @@ def read_image(input_image): return Image.open(BytesIO(response.content)) else: return Image.open(input_image) - + raise TypeError(f"Expected PIL Image, path, or URL. Got {type(input_image)}") def construct_img_data( - img: Union[str, os.PathLike], - scale_factor: str, - sample_id: str, - image_id: str, - load: bool = True + img: Union[str, os.PathLike], scale_factor: str, sample_id: str, image_id: str, load: bool = True ) -> BiocFrame: """ Construct an image data dataframe. @@ -77,14 +72,7 @@ def construct_img_data( img = read_image(img) spi = construct_spatial_image_class(img) - return BiocFrame( - { - "sample_id": [sample_id], - "image_id": [image_id], - "data": [spi], - "scale_factor": [scale_factor] - } - ) + return BiocFrame({"sample_id": [sample_id], "image_id": [image_id], "data": [spi], "scale_factor": [scale_factor]}) def get_img_idx( @@ -137,13 +125,13 @@ def get_img_idx( iid = image_ids == image_id elif sample_id is True and image_id is None: sid = np.full(len(img_data), True) - iid = [img_data['sample_id'].index(x) for x in set(img_data['sample_id'])] + iid = [img_data["sample_id"].index(x) for x in set(img_data["sample_id"])] iid = np.eye(len(img_data))[iid, :].sum(axis=0) elif sample_id is None and image_id is True: first_sid = img_data["sample_id"][0] sid = sample_ids == first_sid iid = np.full(len(img_data), True) - + mask = sid.astype(bool) & iid.astype(bool) if not any(mask): raise ValueError( diff --git a/src/spatialexperiment/_initutils.py b/src/spatialexperiment/_initutils.py index 6e58a86..e1fe0b5 100644 --- a/src/spatialexperiment/_initutils.py +++ b/src/spatialexperiment/_initutils.py @@ -33,11 +33,7 @@ def construct_spatial_coords_from_names( current_column_data = _sanitize_frame(column_data, num_rows=column_data.shape[1]) - missing_names = [ - name - for name in spatial_coords_names - if name not in current_column_data.column_names - ] + missing_names = [name for name in spatial_coords_names if name not in current_column_data.column_names] if missing_names: raise ValueError( f"The following names in `spatial_coords_names` are missing from `column_data`: {missing_names}" @@ -48,11 +44,7 @@ def construct_spatial_coords_from_names( column_data_subset = deepcopy( current_column_data[ :, - [ - col - for col in current_column_data.column_names - if col not in spatial_coords_names - ], + [col for col in current_column_data.column_names if col not in spatial_coords_names], ] ) @@ -89,9 +81,7 @@ def construct_img_data( A `BiocFrame` representing the image data for a `SpatialExperiment`. """ if not len(image_id) == len(image_sources) == len(scale_factors): - raise ValueError( - "'image_id', 'image_sources' and 'scale_factors' are not the same length." - ) + raise ValueError("'image_id', 'image_sources' and 'scale_factors' are not the same length.") spis = [] for image_source in image_sources: diff --git a/src/spatialexperiment/_validators.py b/src/spatialexperiment/_validators.py index a3ae86e..c1756bf 100644 --- a/src/spatialexperiment/_validators.py +++ b/src/spatialexperiment/_validators.py @@ -13,9 +13,7 @@ def _validate_spatial_coords_names(spatial_coords_names, spatial_coords): raise TypeError("'spatial_coords_names' is not a list of strings") if len(spatial_coords_names) != spatial_coords.shape[1]: - raise ValueError( - f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names." - ) + raise ValueError(f"Expected {spatial_coords.shape[1]} names. Got {len(spatial_coords_names)} names.") def _validate_column_data(column_data): @@ -45,9 +43,7 @@ def _validate_sample_image_ids(img_data, new_sample_id, new_image_id): for row in img_data: data = row[1] if data["sample_id"] == new_sample_id and data["image_id"] == new_image_id: - raise ValueError( - f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists" - ) + raise ValueError(f"Image with Sample ID: {new_sample_id} and Image ID: {new_image_id} already exists") # TODO: check if 'new_sample_id' is present in column_data['sample_id'] @@ -57,10 +53,7 @@ def _validate_spatial_coords(spatial_coords, column_data): return if not hasattr(spatial_coords, "shape"): - raise TypeError( - "Spatial coordinates must be a dataframe-like object." - "Does not contain a `shape` property." - ) + raise TypeError("Spatial coordinates must be a dataframe-like object." "Does not contain a `shape` property.") if column_data.shape[0] != spatial_coords.shape[0]: raise ValueError("'spatial_coords' do not contain coordinates for all cells.") @@ -91,9 +84,7 @@ def _validate_sample_ids(column_data, img_data): column_data_sample_ids = set(column_data["sample_id"]) if not img_data_sample_ids <= column_data_sample_ids: - raise ValueError( - "All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']" - ) + raise ValueError("All 'sample_id's in 'img_data' must be present in 'column_data['sample_id']") if img_data_sample_ids != column_data_sample_ids: warnings.warn( diff --git a/src/spatialexperiment/io/__init__.py b/src/spatialexperiment/io/__init__.py index e12bd3c..fd1a969 100644 --- a/src/spatialexperiment/io/__init__.py +++ b/src/spatialexperiment/io/__init__.py @@ -1,4 +1,3 @@ from .tenx_visium import read_tenx_visium __all__ = ["read_tenx_visium"] - diff --git a/src/spatialexperiment/io/tenx_visium.py b/src/spatialexperiment/io/tenx_visium.py index 32622a8..cf76b58 100644 --- a/src/spatialexperiment/io/tenx_visium.py +++ b/src/spatialexperiment/io/tenx_visium.py @@ -14,7 +14,7 @@ from .._initutils import construct_spatial_coords_from_names -def read_tissue_positions(tissue_positions_path) -> 'pd.DataFrame': +def read_tissue_positions(tissue_positions_path) -> "pd.DataFrame": """Read and parse tissue position file. Args: @@ -25,6 +25,7 @@ def read_tissue_positions(tissue_positions_path) -> 'pd.DataFrame': A DataFrame with the tissue positions. """ import pandas as pd + column_names = [ "barcode", "in_tissue", @@ -36,9 +37,7 @@ def read_tissue_positions(tissue_positions_path) -> 'pd.DataFrame': has_header = "list" not in os.path.basename(tissue_positions_path) - tissue_positions = pd.read_csv( - tissue_positions_path, header=0 if has_header else None, names=column_names - ) + tissue_positions = pd.read_csv(tissue_positions_path, header=0 if has_header else None, names=column_names) tissue_positions = tissue_positions.set_index("barcode") tissue_positions["in_tissue"] = tissue_positions["in_tissue"].astype(bool) @@ -50,7 +49,7 @@ def read_img_data( sample_ids: Optional[List[str]] = None, image_sources: Optional[List[str]] = None, scale_factors: str = None, - load: bool = True + load: bool = True, ) -> BiocFrame: """Read in images and scale factors for 10x Genomics Visium data, and return as a valid `img_data` object. @@ -75,15 +74,11 @@ def read_img_data( if sample_ids is None: raise ValueError("`sample_id` mustn't be NULL.") - if not isinstance(sample_ids, list) or not all( - isinstance(s, str) for s in sample_ids - ): + if not isinstance(sample_ids, list) or not all(isinstance(s, str) for s in sample_ids): raise TypeError("`sample_id` must be a list of strings.") if len(set(sample_ids)) != len(path): - raise ValueError( - "The number of unique sample_ids must match the length of path." - ) + raise ValueError("The number of unique sample_ids must match the length of path.") # put images into list with one element per sample if image_sources is None: @@ -94,9 +89,7 @@ def read_img_data( images = [[img for img in image_sources if p in img] for p in path] - img_data = BiocFrame( - {"sample_id": [], "image_id": [], "data": [], "scale_factor": []} - ) + img_data = BiocFrame({"sample_id": [], "image_id": [], "data": [], "scale_factor": []}) for i, sample_id in enumerate(sample_ids): with open(scale_factors[i], "r") as f: curr_scale_factors = json.load(f) @@ -111,23 +104,13 @@ def read_img_data( "aligned_fiducials": "aligned", }.get(image_name, None) - scale_factor_name = {"lowres": "tissue_lowres_scalef"}.get( - image_id, "tissue_hires_scalef" - ) + scale_factor_name = {"lowres": "tissue_lowres_scalef"}.get(image_id, "tissue_hires_scalef") scale_factor = next( - ( - value - for key, value in curr_scale_factors.items() - if scale_factor_name in key - ), + (value for key, value in curr_scale_factors.items() if scale_factor_name in key), None, ) curr_image_data = construct_img_data( - img=image, - scale_factor=scale_factor, - sample_id=sample_id, - image_id=image_id, - load=load + img=image, scale_factor=scale_factor, sample_id=sample_id, image_id=image_id, load=load ) img_data = img_data.combine_rows(curr_image_data) @@ -180,9 +163,7 @@ def read_tenx_visium( for image in images: if image not in allowed_images: - raise ValueError( - f"`images` must be one of {allowed_images}. got `{image}`." - ) + raise ValueError(f"`images` must be one of {allowed_images}. got `{image}`.") if sample_ids is None: sample_ids = [f"sample{str(i).zfill(2)}" for i in range(1, len(samples) + 1)] @@ -206,9 +187,7 @@ def read_tenx_visium( # setup file paths ext = ".h5" if type == "HDF5" else "" counts_dirs = [f"{data}_feature_bc_matrix{ext}" for _ in samples] - counts_dir_paths = [ - os.path.join(sample, fn) for sample, fn in zip(samples, counts_dirs) - ] + counts_dir_paths = [os.path.join(sample, fn) for sample, fn in zip(samples, counts_dirs)] # spatial parts spatial_dir_paths = [os.path.join(sample, "spatial") for sample in samples] @@ -228,10 +207,7 @@ def read_tenx_visium( for tissue_positions_path in tissue_positions_paths if os.path.exists(tissue_positions_path) ] - scale_factors_paths = [ - os.path.join(spatial_dir, "scalefactors_json.json") - for spatial_dir in spatial_dir_paths - ] + scale_factors_paths = [os.path.join(spatial_dir, "scalefactors_json.json") for spatial_dir in spatial_dir_paths] # read image data image_files_mapper = { @@ -241,18 +217,12 @@ def read_tenx_visium( "aligned": "aligned_fiducials.jpg", } - image_files = [ - image_files_mapper[image] for image in images if image in image_files_mapper - ] + image_files = [image_files_mapper[image] for image in images if image in image_files_mapper] image_file_paths = [ - os.path.join(spatial_dir, image_file) - for spatial_dir in spatial_dir_paths - for image_file in image_files + os.path.join(spatial_dir, image_file) for spatial_dir in spatial_dir_paths for image_file in image_files ] - missing_files = [ - not os.path.exists(image_file_path) for image_file_path in image_file_paths - ] + missing_files = [not os.path.exists(image_file_path) for image_file_path in image_file_paths] if all(missing_files): raise FileNotFoundError(f"No matching files found for 'images={images}'") @@ -261,15 +231,11 @@ def read_tenx_visium( print( "Skipping missing images\n " + "\n ".join( - image_file_path - for image_file_path, missing in zip(image_file_paths, missing_files) - if missing + image_file_path for image_file_path, missing in zip(image_file_paths, missing_files) if missing ) ) image_file_paths = [ - image_file_path - for image_file_path, missing in zip(image_file_paths, missing_files) - if not missing + image_file_path for image_file_path, missing in zip(image_file_paths, missing_files) if not missing ] image = read_img_data( @@ -277,7 +243,7 @@ def read_tenx_visium( sample_ids=sample_ids, image_sources=image_file_paths, scale_factors=scale_factors_paths, - load=load + load=load, ) spes = [] @@ -294,19 +260,14 @@ def read_tenx_visium( tissue_positions = tissue_positions.loc[obs, :] tissue_positions["sample_id"] = sample_ids[i] spatial_coords, column_data = construct_spatial_coords_from_names( - spatial_coords_names=["pxl_col_in_fullres", "pxl_row_in_fullres"], - column_data=tissue_positions + spatial_coords_names=["pxl_col_in_fullres", "pxl_row_in_fullres"], column_data=tissue_positions ) spe = SpatialExperiment( assays=sce.assays, - row_data=BiocFrame( - { - "symbol": sce.row_data["gene_symbols"] - } - ), + row_data=BiocFrame({"symbol": sce.row_data["gene_symbols"]}), column_data=column_data, - spatial_coords=spatial_coords + spatial_coords=spatial_coords, ) spes.append(spe) diff --git a/tests/10xVisium/section1/outs/spatial/scalefactors_json.json b/tests/10xVisium/section1/outs/spatial/scalefactors_json.json index 9d7dbe6..ad05fab 100644 --- a/tests/10xVisium/section1/outs/spatial/scalefactors_json.json +++ b/tests/10xVisium/section1/outs/spatial/scalefactors_json.json @@ -1 +1 @@ -{"spot_diameter_fullres": 89.44476048022638, "tissue_hires_scalef": 0.17011142, "fiducial_diameter_fullres": 144.48769000651953, "tissue_lowres_scalef": 0.051033426} \ No newline at end of file +{"spot_diameter_fullres": 89.44476048022638, "tissue_hires_scalef": 0.17011142, "fiducial_diameter_fullres": 144.48769000651953, "tissue_lowres_scalef": 0.051033426} diff --git a/tests/10xVisium/section2/outs/spatial/scalefactors_json.json b/tests/10xVisium/section2/outs/spatial/scalefactors_json.json index 9d7dbe6..ad05fab 100644 --- a/tests/10xVisium/section2/outs/spatial/scalefactors_json.json +++ b/tests/10xVisium/section2/outs/spatial/scalefactors_json.json @@ -1 +1 @@ -{"spot_diameter_fullres": 89.44476048022638, "tissue_hires_scalef": 0.17011142, "fiducial_diameter_fullres": 144.48769000651953, "tissue_lowres_scalef": 0.051033426} \ No newline at end of file +{"spot_diameter_fullres": 89.44476048022638, "tissue_hires_scalef": 0.17011142, "fiducial_diameter_fullres": 144.48769000651953, "tissue_lowres_scalef": 0.051033426} diff --git a/tests/conftest.py b/tests/conftest.py index 8667128..561c095 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,7 +54,7 @@ def spe(): spatial_coords = BiocFrame({"x": x_coords, "y": y_coords}) img_data = BiocFrame( - { + data={ "sample_id": ["sample_1", "sample_1", "sample_2"], "image_id": ["aurora", "dice", "desert"], "data": [ @@ -63,7 +63,8 @@ def spe(): construct_spatial_image_class("tests/images/sample_image3.jpg"), ], "scale_factor": [1, 1, 1], - } + }, + row_names=[0, 1, 2] ) spe_instance = SpatialExperiment( @@ -88,4 +89,4 @@ def sample_ids(): @pytest.fixture def samples(dir, sample_ids): - return [os.path.join(dir, sample_id, "outs") for sample_id in sample_ids] \ No newline at end of file + return [os.path.join(dir, sample_id, "outs") for sample_id in sample_ids] diff --git a/tests/test_img_data_methods.py b/tests/test_img_data_methods.py index 7a685a5..2f690e8 100644 --- a/tests/test_img_data_methods.py +++ b/tests/test_img_data_methods.py @@ -115,3 +115,137 @@ def test_add_img_already_exists(spe): sample_id=img_data["sample_id"][0], image_id=img_data["image_id"][0], ) + + +def test_remove_img_no_img_data(spe): + tspe = deepcopy(spe) + tspe.img_data = None + with pytest.raises(AttributeError): + tspe.remove_img() + + +def test_remove_img_no_matches(spe): + with pytest.raises(ValueError): + spe.remove_img(sample_id="foo", image_id="foo") + + +def test_remove_img_both_str(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + + result = tspe.remove_img(sample_id="sample_1", image_id="dice") + + # Check if not in-place + assert id(result) != id(tspe) + assert tspe.img_data.shape[0] == original_shape + assert result.img_data.shape[0] == original_shape - 1 + + # Check the image was actually removed + with pytest.raises(ValueError): + result.get_img(sample_id="sample_1", image_id="dice") + + +def test_remove_img_in_place(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + + result = tspe.remove_img(sample_id="sample_1", image_id="dice", in_place=True) + + # Check if in-place + assert id(result) == id(tspe) + assert tspe.img_data.shape[0] == original_shape - 1 + + # Check the image was actually removed + with pytest.raises(ValueError): + tspe.get_img(sample_id="sample_1", image_id="dice") + + +def test_remove_img_both_true(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + + result = tspe.remove_img(sample_id=True, image_id=True) + + # Check if not in-place + assert id(result) != id(tspe) + assert tspe.img_data.shape[0] == original_shape + assert result.img_data.shape[0] == 0 + + +def test_remove_img_both_none(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + first_sample = tspe.img_data["sample_id"][0] + first_image = tspe.img_data["image_id"][0] + + result = tspe.remove_img(sample_id=None, image_id=None) + + # Check if not in-place + assert id(result) != id(tspe) + assert tspe.img_data.shape[0] == original_shape + assert result.img_data.shape[0] == original_shape - 1 + + # Check first image was removed + with pytest.raises(ValueError): + result.get_img(sample_id=first_sample, image_id=first_image) + + +def test_remove_img_sample_str_image_true(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + sample_images = len(tspe.get_img(sample_id="sample_1", image_id=True)) + + result = tspe.remove_img(sample_id="sample_1", image_id=True) + + # Check correct number of images removed + assert id(result) != id(tspe) + assert tspe.img_data.shape[0] == original_shape + assert result.img_data.shape[0] == original_shape - sample_images + + # Check no images remain for sample_1 + with pytest.raises(ValueError): + result.get_img(sample_id="sample_1", image_id=True) + + +def test_remove_img_sample_true_image_str(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + image_count = sum(1 for img_id in tspe.img_data["image_id"] if img_id == "desert") + + result = tspe.remove_img(sample_id=True, image_id="desert") + + # Check correct number of images removed + assert result.img_data.shape[0] == original_shape - image_count + + # Check no images remain with image_id "desert" + with pytest.raises(ValueError): + result.get_img(sample_id=True, image_id="desert") + + +def test_remove_img_sample_str_image_none(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + + result = tspe.remove_img(sample_id="sample_1", image_id=None) + + # Check one image was removed + assert result.img_data.shape[0] == original_shape - 1 + + # Verify first image of sample_1 was removed but others might remain + sample_1_images_before = len([i for i, s in enumerate(tspe.img_data["sample_id"]) if s == "sample_1"]) + sample_1_images_after = len([i for i, s in enumerate(result.img_data["sample_id"]) if s == "sample_1"]) + assert sample_1_images_after == sample_1_images_before - 1 + + +def test_remove_img_sample_none_image_str(spe): + tspe = deepcopy(spe) + original_shape = tspe.img_data.shape[0] + + result = tspe.remove_img(sample_id=None, image_id="aurora") + + # Check one image was removed + assert result.img_data.shape[0] == original_shape - 1 + + # Check the specific image was removed + with pytest.raises(ValueError): + result.get_img(sample_id=None, image_id="aurora")