From fe0f2c63583a6fcb431bc7e5b1f869ab0c7ebefb Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Sun, 8 Sep 2024 17:55:11 +0200 Subject: [PATCH 01/17] Move check_target_region_column_symmetry to separate validation module --- src/spatialdata/_core/spatialdata.py | 2 +- src/spatialdata/_core/validation.py | 48 ++++++++++++++++++++++++++++ src/spatialdata/models/__init__.py | 2 +- src/spatialdata/models/models.py | 44 ------------------------- 4 files changed, 50 insertions(+), 46 deletions(-) create mode 100644 src/spatialdata/_core/validation.py diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 1c3affa20..10b9b0f2c 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -22,6 +22,7 @@ from xarray import DataArray from spatialdata._core._elements import Images, Labels, Points, Shapes, Tables +from spatialdata._core.validation import check_target_region_column_symmetry from spatialdata._logging import logger from spatialdata._types import ArrayLike, Raster_T from spatialdata._utils import _deprecation_alias, _error_message_add_element @@ -33,7 +34,6 @@ PointsModel, ShapesModel, TableModel, - check_target_region_column_symmetry, get_model, get_table_keys, ) diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py new file mode 100644 index 000000000..b4c7bc5b1 --- /dev/null +++ b/src/spatialdata/_core/validation.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import pandas as pd +from anndata import AnnData + + +def check_target_region_column_symmetry(table: AnnData, region_key: str, target: str | pd.Series) -> None: + """ + Check region and region_key column symmetry. + + This checks whether the specified targets are also present in the region key column in obs and raises an error + if this is not the case. + + Parameters + ---------- + table + Table annotating specific SpatialElements + region_key + The column in obs containing for each row which SpatialElement is annotated by that row. + target + Name of target(s) SpatialElement(s) + + Raises + ------ + ValueError + If there is a mismatch between specified target regions and regions in the region key column of table.obs. + + Example + ------- + Assuming we have a table with region column in obs given by `region_key` called 'region' for which we want to check + whether it contains the specified annotation targets in the `target` variable as `pd.Series['region1', 'region2']`: + + ```python + check_target_region_column_symmetry(table, region_key=region_key, target=target) + ``` + + This returns None if both specified targets are present in the region_key obs column. In this case the annotation + targets can be safely set. If not then a ValueError is raised stating the elements that are not shared between + the region_key column in obs and the specified targets. + """ + found_regions = set(table.obs[region_key].unique().tolist()) + target_element_set = [target] if isinstance(target, str) else target + symmetric_difference = found_regions.symmetric_difference(target_element_set) + if symmetric_difference: + raise ValueError( + f"Mismatch(es) found between regions in region column in obs and target element: " + f"{', '.join(diff for diff in symmetric_difference)}" + ) diff --git a/src/spatialdata/models/__init__.py b/src/spatialdata/models/__init__.py index 0c2bacd85..42c2a0280 100644 --- a/src/spatialdata/models/__init__.py +++ b/src/spatialdata/models/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +from spatialdata._core.validation import check_target_region_column_symmetry from spatialdata.models._utils import ( C, SpatialElement, @@ -23,7 +24,6 @@ PointsModel, ShapesModel, TableModel, - check_target_region_column_symmetry, get_model, get_table_keys, ) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index b5d136cfb..4512347de 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -1094,47 +1094,3 @@ def get_table_keys(table: AnnData) -> tuple[str | list[str], str, str]: raise ValueError( "No spatialdata_attrs key found in table.uns, therefore, no table keys found. Please parse the table." ) - - -def check_target_region_column_symmetry(table: AnnData, region_key: str, target: str | pd.Series) -> None: - """ - Check region and region_key column symmetry. - - This checks whether the specified targets are also present in the region key column in obs and raises an error - if this is not the case. - - Parameters - ---------- - table - Table annotating specific SpatialElements - region_key - The column in obs containing for each row which SpatialElement is annotated by that row. - target - Name of target(s) SpatialElement(s) - - Raises - ------ - ValueError - If there is a mismatch between specified target regions and regions in the region key column of table.obs. - - Example - ------- - Assuming we have a table with region column in obs given by `region_key` called 'region' for which we want to check - whether it contains the specified annotation targets in the `target` variable as `pd.Series['region1', 'region2']`: - - ```python - check_target_region_column_symmetry(table, region_key=region_key, target=target) - ``` - - This returns None if both specified targets are present in the region_key obs column. In this case the annotation - targets can be safely set. If not then a ValueError is raised stating the elements that are not shared between - the region_key column in obs and the specified targets. - """ - found_regions = set(table.obs[region_key].unique().tolist()) - target_element_set = [target] if isinstance(target, str) else target - symmetric_difference = found_regions.symmetric_difference(target_element_set) - if symmetric_difference: - raise ValueError( - f"Mismatch(es) found between regions in region column in obs and target element: " - f"{', '.join(diff for diff in symmetric_difference)}" - ) From 0c8c2dbc298d0ba4dfe5bb83e105b0439131cf55 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Sun, 8 Sep 2024 18:16:50 +0200 Subject: [PATCH 02/17] Refactor shared keys handling to dedicated methods --- src/spatialdata/_core/_elements.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/_elements.py b/src/spatialdata/_core/_elements.py index e27874d60..ed8a8b729 100644 --- a/src/spatialdata/_core/_elements.py +++ b/src/spatialdata/_core/_elements.py @@ -39,6 +39,12 @@ def _check_valid_name(name: str) -> None: if not all(c.isalnum() or c in "_-" for c in name): raise ValueError("Name must contain only alphanumeric characters, underscores, and hyphens.") + def _add_shared_key(self, key: str) -> None: + self._shared_keys.add(key) + + def _remove_shared_key(self, key: str) -> None: + self._shared_keys.remove(key) + @staticmethod def _check_key(key: str, element_keys: Iterable[str], shared_keys: set[str | None]) -> None: Elements._check_valid_name(key) @@ -49,11 +55,11 @@ def _check_key(key: str, element_keys: Iterable[str], shared_keys: set[str | Non raise KeyError(f"Key `{key}` already exists.") def __setitem__(self, key: str, value: Any) -> None: - self._shared_keys.add(key) + self._add_shared_key(key) super().__setitem__(key, value) def __delitem__(self, key: str) -> None: - self._shared_keys.remove(key) + self._remove_shared_key(key) super().__delitem__(key) From 1a06f5a10c68d3b3083645873473432e682da42d Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Sun, 8 Sep 2024 18:55:58 +0200 Subject: [PATCH 03/17] Refactor check_valid_name to validation module --- src/spatialdata/_core/_elements.py | 12 ++---------- src/spatialdata/_core/spatialdata.py | 17 +++++------------ src/spatialdata/_core/validation.py | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/spatialdata/_core/_elements.py b/src/spatialdata/_core/_elements.py index ed8a8b729..3e3e531c7 100644 --- a/src/spatialdata/_core/_elements.py +++ b/src/spatialdata/_core/_elements.py @@ -11,6 +11,7 @@ from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame +from spatialdata._core.validation import check_valid_name from spatialdata._types import Raster_T from spatialdata.models import ( Image2DModel, @@ -30,15 +31,6 @@ def __init__(self, shared_keys: set[str | None]) -> None: self._shared_keys = shared_keys super().__init__() - @staticmethod - def _check_valid_name(name: str) -> None: - if not isinstance(name, str): - raise TypeError(f"Name must be a string, not {type(name).__name__}.") - if len(name) == 0: - raise ValueError("Name cannot be an empty string.") - if not all(c.isalnum() or c in "_-" for c in name): - raise ValueError("Name must contain only alphanumeric characters, underscores, and hyphens.") - def _add_shared_key(self, key: str) -> None: self._shared_keys.add(key) @@ -47,7 +39,7 @@ def _remove_shared_key(self, key: str) -> None: @staticmethod def _check_key(key: str, element_keys: Iterable[str], shared_keys: set[str | None]) -> None: - Elements._check_valid_name(key) + check_valid_name(key) if key in element_keys: warn(f"Key `{key}` already exists. Overwriting it in-memory.", UserWarning, stacklevel=2) else: diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 10b9b0f2c..1732b9b71 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -22,7 +22,7 @@ from xarray import DataArray from spatialdata._core._elements import Images, Labels, Points, Shapes, Tables -from spatialdata._core.validation import check_target_region_column_symmetry +from spatialdata._core.validation import check_valid_name from spatialdata._logging import logger from spatialdata._types import ArrayLike, Raster_T from spatialdata._utils import _deprecation_alias, _error_message_add_element @@ -1244,9 +1244,7 @@ def write_element( self.write_element(name, overwrite=overwrite) return - from spatialdata._core._elements import Elements - - Elements._check_valid_name(element_name) + check_valid_name(element_name) self._validate_element_names_are_unique() element = self.get(element_name) if element is None: @@ -1316,10 +1314,9 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None: self.delete_element_from_disk(name) return - from spatialdata._core._elements import Elements from spatialdata._io._utils import _backed_elements_contained_in_path - Elements._check_valid_name(element_name) + check_valid_name(element_name) if self.path is None: raise ValueError("The SpatialData object is not backed by a Zarr store.") @@ -1451,10 +1448,8 @@ def write_transformations(self, element_name: str | None = None) -> None: element_name The name of the element to write. If None, write the transformations of all elements. """ - from spatialdata._core._elements import Elements - if element_name is not None: - Elements._check_valid_name(element_name) + check_valid_name(element_name) # recursively write the transformation for all the SpatialElement if element_name is None: @@ -1541,10 +1536,8 @@ def write_metadata(self, element_name: str | None = None, consolidate_metadata: ----- When using the methods `write()` and `write_element()`, the metadata is written automatically. """ - from spatialdata._core._elements import Elements - if element_name is not None: - Elements._check_valid_name(element_name) + check_valid_name(element_name) self.write_transformations(element_name) # TODO: write .uns['spatialdata_attrs'] metadata for AnnData. diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index b4c7bc5b1..be4b98eec 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -46,3 +46,30 @@ def check_target_region_column_symmetry(table: AnnData, region_key: str, target: f"Mismatch(es) found between regions in region column in obs and target element: " f"{', '.join(diff for diff in symmetric_difference)}" ) + + +def check_valid_name(name: str) -> None: + """ + Check that a name is valid for SpatialData elements. + + This checks whether the proposed name fulfills the naming restrictions and raises an error + otherwise. + + Parameters + ---------- + name + The name for a SpatialData element + + Raises + ------ + TypeError + If given argument is not of type string. + ValueError + If the proposed name viloates a naming restriction. + """ + if not isinstance(name, str): + raise TypeError(f"Name must be a string, not {type(name).__name__}.") + if len(name) == 0: + raise ValueError("Name cannot be an empty string.") + if not all(c.isalnum() or c in "_-" for c in name): + raise ValueError("Name must contain only alphanumeric characters, underscores, and hyphens.") From edb108c6300c52c574f11278a006c6c1d43f3167 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Sun, 8 Sep 2024 19:03:15 +0200 Subject: [PATCH 04/17] Change name validation to allow dot, disallow ".", "..", "__" --- src/spatialdata/_core/validation.py | 10 ++++++++-- tests/io/test_readwrite.py | 30 +++++++++++++++++++++-------- tests/models/test_models.py | 18 +++++++++++++++++ 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index be4b98eec..1845bdb2b 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -71,5 +71,11 @@ def check_valid_name(name: str) -> None: raise TypeError(f"Name must be a string, not {type(name).__name__}.") if len(name) == 0: raise ValueError("Name cannot be an empty string.") - if not all(c.isalnum() or c in "_-" for c in name): - raise ValueError("Name must contain only alphanumeric characters, underscores, and hyphens.") + if name == ".": + raise ValueError("Name cannot be '.'.") + if name == "..": + raise ValueError("Name cannot be '..'.") + if name.startswith("__"): + raise ValueError("Name cannot start with '__'.") + if not all(c.isalnum() or c in "_-." for c in name): + raise ValueError("Name must contain only alphanumeric characters, underscores, dots and hyphens.") diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index 261b0b898..f27d9c347 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -567,17 +567,31 @@ def _check_valid_name(f: Callable[[str], Any]) -> None: f(2) with pytest.raises(ValueError, match="Name cannot be an empty string."): f("") - with pytest.raises(ValueError, match="Name must contain only alphanumeric characters, underscores, and hyphens."): - f("not valid") - with pytest.raises(ValueError, match="Name must contain only alphanumeric characters, underscores, and hyphens."): + with pytest.raises(ValueError, match="Name cannot be '.'"): + f(".") + with pytest.raises(ValueError, match="Name cannot be '..'"): + f("..") + with pytest.raises(ValueError, match="Name cannot start with '__'"): + f("__a") + with pytest.raises( + ValueError, match="Name must contain only alphanumeric characters, underscores, dots and hyphens." + ): + f("has whitespace") + with pytest.raises( + ValueError, match="Name must contain only alphanumeric characters, underscores, dots and hyphens." + ): f("this/is/not/valid") + with pytest.raises( + ValueError, match="Name must contain only alphanumeric characters, underscores, dots and hyphens." + ): + f("non-alnum_#$%&()*+,?@") -def test_incremental_io_valid_name(points: SpatialData) -> None: - _check_valid_name(points.write_element) - _check_valid_name(points.write_metadata) - _check_valid_name(points.write_transformations) - _check_valid_name(points.delete_element_from_disk) +def test_incremental_io_valid_name(full_sdata: SpatialData) -> None: + _check_valid_name(full_sdata.write_element) + _check_valid_name(full_sdata.write_metadata) + _check_valid_name(full_sdata.write_transformations) + _check_valid_name(full_sdata.delete_element_from_disk) cached_sdata_blobs = blobs() diff --git a/tests/models/test_models.py b/tests/models/test_models.py index 4b5b218c6..c8fac6a46 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -366,6 +366,24 @@ def test_table_model( assert TableModel.REGION_KEY_KEY in table.uns[TableModel.ATTRS_KEY] assert table.uns[TableModel.ATTRS_KEY][TableModel.REGION_KEY] == region + @pytest.mark.parametrize( + "name", + [ + "", + ".", + "..", + "__dunder", + "has whitespace", + "path/separator", + "non-alnum_#$%&()*+,?@", + ], + ) + @pytest.mark.parametrize("element_type", ["images", "labels", "points", "shapes", "tables"]) + def test_model_invalid_names(self, full_sdata, element_type: str, name: str): + element = next(iter(getattr(full_sdata, element_type).values())) + with pytest.raises(ValueError, match="Name (must|cannot)"): + SpatialData(**{element_type: {name: element}}) + @pytest.mark.parametrize("model", [TableModel]) @pytest.mark.parametrize("region", [["sample_1"] * 5 + ["sample_2"] * 5]) def test_table_instance_key_values_not_unique(self, model: TableModel, region: str | np.ndarray): From e01f4579d0ae66f1d0be71d3b4f474cb6c2be22f Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Sun, 8 Sep 2024 20:16:37 +0200 Subject: [PATCH 05/17] Add name validation for case-insensitive uniqueness --- src/spatialdata/_core/_elements.py | 9 ++- src/spatialdata/_core/spatialdata.py | 12 +-- src/spatialdata/_core/validation.py | 74 ++++++++++++++++++- .../operations/test_spatialdata_operations.py | 11 +++ tests/io/test_readwrite.py | 8 +- tests/models/test_models.py | 12 +++ 6 files changed, 112 insertions(+), 14 deletions(-) diff --git a/src/spatialdata/_core/_elements.py b/src/spatialdata/_core/_elements.py index 3e3e531c7..63bdcde76 100644 --- a/src/spatialdata/_core/_elements.py +++ b/src/spatialdata/_core/_elements.py @@ -11,7 +11,7 @@ from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame -from spatialdata._core.validation import check_valid_name +from spatialdata._core.validation import check_key_is_case_insensitively_unique, check_valid_name from spatialdata._types import Raster_T from spatialdata.models import ( Image2DModel, @@ -43,8 +43,11 @@ def _check_key(key: str, element_keys: Iterable[str], shared_keys: set[str | Non if key in element_keys: warn(f"Key `{key}` already exists. Overwriting it in-memory.", UserWarning, stacklevel=2) else: - if key in shared_keys: - raise KeyError(f"Key `{key}` already exists.") + try: + check_key_is_case_insensitively_unique(key, shared_keys) + except ValueError as e: + # Validation raises ValueError, but inappropriate mapping key must raise KeyError. + raise KeyError(*e.args) from e def __setitem__(self, key: str, value: Any) -> None: self._add_shared_key(key) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 1732b9b71..a5c1d0e28 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -22,7 +22,11 @@ from xarray import DataArray from spatialdata._core._elements import Images, Labels, Points, Shapes, Tables -from spatialdata._core.validation import check_valid_name +from spatialdata._core.validation import ( + check_all_keys_case_insensitively_unique, + check_target_region_column_symmetry, + check_valid_name, +) from spatialdata._logging import logger from spatialdata._types import ArrayLike, Raster_T from spatialdata._utils import _deprecation_alias, _error_message_add_element @@ -1987,11 +1991,7 @@ def _validate_element_names_are_unique(self) -> None: ValueError If the element names are not unique. """ - element_names = set() - for _, element_name, _ in self.gen_elements(): - if element_name in element_names: - raise ValueError(f"Element name {element_name!r} is not unique.") - element_names.add(element_name) + check_all_keys_case_insensitively_unique([name for _, name, _ in self.gen_elements()]) def _find_element(self, element_name: str) -> tuple[str, str, SpatialElement | AnnData]: """ diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index 1845bdb2b..94bd7b5ce 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -1,5 +1,7 @@ from __future__ import annotations +from collections.abc import Collection + import pandas as pd from anndata import AnnData @@ -65,7 +67,7 @@ def check_valid_name(name: str) -> None: TypeError If given argument is not of type string. ValueError - If the proposed name viloates a naming restriction. + If the proposed name violates a naming restriction. """ if not isinstance(name, str): raise TypeError(f"Name must be a string, not {type(name).__name__}.") @@ -79,3 +81,73 @@ def check_valid_name(name: str) -> None: raise ValueError("Name cannot start with '__'.") if not all(c.isalnum() or c in "_-." for c in name): raise ValueError("Name must contain only alphanumeric characters, underscores, dots and hyphens.") + + +def check_all_keys_case_insensitively_unique(keys: Collection[str]) -> None: + """ + Check that all keys are unique when ignoring case. + + This checks whether the keys contain no duplicates on an case-insensitive system. If keys + differ in character case, an error is raised. + + Parameters + ---------- + keys + A collection of string keys + + Raises + ------ + ValueError + If two keys differ only in character case. + + Example + ------- + + ```pycon + >>> check_all_keys_case_insensitively_unique(["abc", "def"]) + >>> check_all_keys_case_insensitively_unique(["abc", "def", "Abc"]) + Traceback (most recent call last): + ... + ValueError: Key `Abc` is not unique, or another case-variant of it exists. + ``` + """ + seen: set[str | None] = set() + for key in keys: + normalized_key = key.lower() + check_key_is_case_insensitively_unique(key, seen) + seen.add(normalized_key) + + +def check_key_is_case_insensitively_unique(key: str, other_keys: set[str | None]) -> None: + """ + Check that a specific key is not contained in a set of keys, ignoring case. + + This checks whether a given key is not contained among a set of reference keys. If the key or + a case-variant of it is contained, an error is raised. + + Parameters + ---------- + key + A string key + other_keys + A collection of string keys + + Raises + ------ + ValueError + If reference keys contain a variant of the key that only differs in character case. + + Example + ------- + + ```pycon + >>> check_key_is_case_insensitively_unique("def", ["abc"]) + >>> check_key_is_case_insensitively_unique("abc", ["def", "Abc"]) + Traceback (most recent call last): + ... + ValueError: Key `Abc` is not unique, or another case-variant of it exists. + ``` + """ + normalized_key = key.lower() + if normalized_key in other_keys: + raise ValueError(f"Key `{key}` is not unique, or another case-variant of it exists.") diff --git a/tests/core/operations/test_spatialdata_operations.py b/tests/core/operations/test_spatialdata_operations.py index 8a59147f1..820a828ab 100644 --- a/tests/core/operations/test_spatialdata_operations.py +++ b/tests/core/operations/test_spatialdata_operations.py @@ -66,6 +66,17 @@ def test_element_names_unique() -> None: with pytest.raises(KeyError): sdata.shapes["labels"] = shapes + # add elements with the case-variant of an existing name + # of element of same type + with pytest.raises(KeyError): + sdata.images["Image"] = image + with pytest.raises(KeyError): + sdata.points["POINTS"] = points + with pytest.raises(KeyError): + sdata.shapes["Shapes"] = shapes + with pytest.raises(KeyError): + sdata.labels["Labels"] = labels + assert sdata["image"].shape == image.shape assert sdata["labels"].shape == labels.shape assert len(sdata["points"]) == len(points) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index f27d9c347..d97575ad0 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -104,7 +104,7 @@ def test_incremental_io_in_memory( with pytest.warns(UserWarning): sdata.images[f"additional_{k}"] = v sdata[f"additional_{k}"] = v - with pytest.raises(KeyError, match="Key `table` already exists."): + with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v for k, v in _get_labels().items(): @@ -112,7 +112,7 @@ def test_incremental_io_in_memory( with pytest.warns(UserWarning): sdata.labels[f"additional_{k}"] = v sdata[f"additional_{k}"] = v - with pytest.raises(KeyError, match="Key `table` already exists."): + with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v for k, v in _get_shapes().items(): @@ -120,7 +120,7 @@ def test_incremental_io_in_memory( with pytest.warns(UserWarning): sdata.shapes[f"additional_{k}"] = v sdata[f"additional_{k}"] = v - with pytest.raises(KeyError, match="Key `table` already exists."): + with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v for k, v in _get_points().items(): @@ -128,7 +128,7 @@ def test_incremental_io_in_memory( with pytest.warns(UserWarning): sdata.points[f"additional_{k}"] = v sdata[f"additional_{k}"] = v - with pytest.raises(KeyError, match="Key `table` already exists."): + with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v def test_incremental_io_list_of_elements(self, shapes: SpatialData) -> None: diff --git a/tests/models/test_models.py b/tests/models/test_models.py index c8fac6a46..3ac738667 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -384,6 +384,18 @@ def test_model_invalid_names(self, full_sdata, element_type: str, name: str): with pytest.raises(ValueError, match="Name (must|cannot)"): SpatialData(**{element_type: {name: element}}) + @pytest.mark.parametrize( + "names", + [ + ["abc", "Abc"], + ], + ) + @pytest.mark.parametrize("element_type", ["images", "labels", "points", "shapes", "tables"]) + def test_model_not_unique_names(self, full_sdata, element_type: str, names: list[str]): + element = next(iter(getattr(full_sdata, element_type).values())) + with pytest.raises(KeyError, match="Key `.*` is not unique"): + SpatialData(**{element_type: {name: element for name in names}}) + @pytest.mark.parametrize("model", [TableModel]) @pytest.mark.parametrize("region", [["sample_1"] * 5 + ["sample_2"] * 5]) def test_table_instance_key_values_not_unique(self, model: TableModel, region: str | np.ndarray): From 141c17eeef7abcd36bb37719b68188626fc1d9ef Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Mon, 9 Sep 2024 14:03:47 +0200 Subject: [PATCH 06/17] Add name validation for table columns --- src/spatialdata/_core/validation.py | 78 ++++++++++++++++++++++++++++- src/spatialdata/models/models.py | 2 + tests/models/test_models.py | 50 ++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index 94bd7b5ce..80867c152 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -1,6 +1,7 @@ from __future__ import annotations -from collections.abc import Collection +from collections import defaultdict +from collections.abc import Callable, Collection import pandas as pd from anndata import AnnData @@ -151,3 +152,78 @@ def check_key_is_case_insensitively_unique(key: str, other_keys: set[str | None] normalized_key = key.lower() if normalized_key in other_keys: raise ValueError(f"Key `{key}` is not unique, or another case-variant of it exists.") + + +def check_valid_dataframe_column_name(name: str) -> None: + """ + Check that a name is valid for SpatialData table dataframe. + + This checks whether the proposed name fulfills the naming restrictions and raises an error + otherwise. In addition to the element naming restriction, a column cannot be named "_index". + + Parameters + ---------- + name + The name for a table column + + Raises + ------ + TypeError + If given argument is not of type string. + ValueError + If the proposed name violates a naming restriction. + """ + check_valid_name(name) + if name == "_index": + raise ValueError("Name cannot be '_index'") + + +def _iter_anndata_attr_keys_collect_value_errors( + adata: AnnData, attr_visitor: Callable[[str], None], key_visitor: Callable[[str, str], None] +) -> None: + messages_per_attr: dict[str, list[str]] = defaultdict(list) + for attr in ("obs", "obsm", "obsp", "var", "varm", "varp", "uns"): + try: + attr_visitor(attr) + except ValueError as e: + messages_per_attr[attr].append(f" {e.args[0]}") + for key in getattr(adata, attr): + try: + key_visitor(attr, key) + except ValueError as e: + messages_per_attr[attr].append(f" '{key}': {e.args[0]}") + if messages_per_attr: + raise ValueError( + "Table contains invalid names:\n" + + "\n".join(f"{attr}:\n" + "\n".join(messages) for attr, messages in messages_per_attr.items()) + ) + + +def validate_table_attr_keys(data: AnnData) -> None: + """ + Check that all keys of all AnnData attributes have valid names. + + This checks for AnnData obs, var, obsm, obsp, varm, varp, uns whether their keys fulfill the + naming restrictions and raises an error otherwise. + + Parameters + ---------- + data + The AnnData table + + Raises + ------ + ValueError + If the AnnData contains on or several invalid keys. + """ + + def _check_valid_attr_keys(attr: str) -> None: + check_all_keys_case_insensitively_unique(getattr(data, attr).keys()) + + def _check_valid_attr_key(attr: str, key: str) -> None: + if attr in ("obs", "var"): + check_valid_dataframe_column_name(key) + else: + check_valid_name(key) + + _iter_anndata_attr_keys_collect_value_errors(data, _check_valid_attr_keys, _check_valid_attr_key) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index 4512347de..83543d89b 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -34,6 +34,7 @@ ) from xarray_schema.dataarray import DataArraySchema +from spatialdata._core.validation import validate_table_attr_keys from spatialdata._logging import logger from spatialdata._types import ArrayLike from spatialdata.models import C, X, Y, Z, get_axes_names @@ -974,6 +975,7 @@ def parse( ------- The parsed data. """ + validate_table_attr_keys(adata) # either all live in adata.uns or all be passed in as argument n_args = sum([region is not None, region_key is not None, instance_key is not None]) if n_args == 0: diff --git a/tests/models/test_models.py b/tests/models/test_models.py index 3ac738667..d8b38ba40 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -411,6 +411,56 @@ def test_table_instance_key_values_not_unique(self, model: TableModel, region: s with pytest.raises(ValueError, match=re.escape("Instance key column for region(s) `sample_1, sample_2`")): model.parse(adata, region=region, region_key=region_key, instance_key="A") + @pytest.mark.parametrize( + "key", + [ + "", + ".", + "..", + "__dunder", + "_index", + "has whitespace", + "path/separator", + "non-alnum_#$%&()*+,?@", + ], + ) + @pytest.mark.parametrize("attr", ["obs", "obsm", "obsp", "var", "varm", "varp", "uns"]) + def test_table_model_invalid_names(self, key: str, attr: str): + if attr in ("obs", "var"): + with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + df = pd.DataFrame([[None]], columns=[key], index=["1"]) + adata = AnnData(np.array([[0]]), **{attr: df}) + TableModel.parse(adata) + elif key != "_index": # "_index" is only disallowed in obs/var + if attr in ("obsm", "varm", "obsp", "varp"): + with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + array = np.array([[0]]) + adata = AnnData(np.array([[0]]), **{attr: {key: array}}) + TableModel.parse(adata) + elif attr == "uns": + with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + adata = AnnData(np.array([[0]]), **{attr: {key: {}}}) + TableModel.parse(adata) + + @pytest.mark.parametrize( + "keys", + [ + ["abc", "abc"], + ["abc", "Abc", "ABC"], + ], + ) + @pytest.mark.parametrize("attr", ["obs", "var"]) + def test_table_model_not_unique_columns(self, keys: list[str], attr: str): + key_regex = re.escape(keys[1]) + df = pd.DataFrame([[None] * len(keys)], columns=keys, index=["1"]) + with pytest.raises( + ValueError, + match=f"Table contains invalid names:\n{attr}:\n" + + f" Key `{key_regex}` is not unique, or another case-variant of it exists.", + ): + adata = AnnData(np.array([[0]]), **{attr: df}) + TableModel.parse(adata) + def test_get_schema(): images = _get_images() From 5b4f407390b37b10a9ee12ab6550f00c114463ba Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Mon, 9 Sep 2024 16:10:39 +0200 Subject: [PATCH 07/17] Add validation before writing SpatialData --- src/spatialdata/_core/spatialdata.py | 8 ++++++++ tests/io/test_readwrite.py | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index a5c1d0e28..a200ef29b 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -26,6 +26,7 @@ check_all_keys_case_insensitively_unique, check_target_region_column_symmetry, check_valid_name, + validate_table_attr_keys, ) from spatialdata._logging import logger from spatialdata._types import ArrayLike, Raster_T @@ -1119,6 +1120,12 @@ def _validate_can_safely_write_to_path( " the current Zarr store." + WORKAROUND ) + def _validate_all_elements(self) -> None: + for element_type, element_name, element in self.gen_elements(): + check_valid_name(element_name) + if element_type == "table": + validate_table_attr_keys(element) + def write( self, file_path: str | Path, @@ -1154,6 +1161,7 @@ def write( if isinstance(file_path, str): file_path = Path(file_path) self._validate_can_safely_write_to_path(file_path, overwrite=overwrite) + self._validate_all_elements() store = parse_url(file_path, mode="w").store _ = zarr.group(store=store, overwrite=overwrite) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index d97575ad0..0169e778f 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -20,7 +20,7 @@ set_transformation, ) from spatialdata.transformations.transformations import Identity, Scale -from tests.conftest import _get_images, _get_labels, _get_points, _get_shapes +from tests.conftest import _get_images, _get_labels, _get_points, _get_shapes, _get_table RNG = default_rng(0) @@ -684,3 +684,16 @@ def test_element_already_on_disk_different_type(full_sdata, element_name: str) - match=ERROR_MSG, ): full_sdata.write_transformations(element_name) + + +def test_writing_invalid_name(tmp_path: Path): + invalid_sdata = SpatialData() + # Circumvent validation at construction time and check validation happens again at writing time. + invalid_sdata.images.data[""] = next(iter(_get_images().values())) + # invalid_sdata.labels.data["."] = next(iter(_get_labels().values())) + invalid_sdata.points.data["path/separator"] = next(iter(_get_points().values())) + invalid_sdata.shapes.data["non-alnum_#$%&()*+,?@"] = next(iter(_get_shapes().values())) + invalid_sdata.tables.data["has whitespace"] = _get_table() + + with pytest.raises(ValueError, match="Name (must|cannot)"): + invalid_sdata.write(tmp_path / "data.zarr") From 8c8835c366f425aa27a176518061978a0a5ba6bf Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Mon, 9 Sep 2024 14:40:23 +0200 Subject: [PATCH 08/17] Update _get_table to multiple tables --- tests/conftest.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2ce363bfe..2a6fb3cbc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -67,7 +67,7 @@ def points() -> SpatialData: @pytest.fixture() def table_single_annotation() -> SpatialData: - return SpatialData(tables=_get_table(region="labels2d")) + return SpatialData(tables={"table": _get_table(region="labels2d")}) @pytest.fixture() @@ -94,7 +94,7 @@ def full_sdata() -> SpatialData: labels=_get_labels(), shapes=_get_shapes(), points=_get_points(), - tables=_get_table(region="labels2d"), + tables=_get_tables(region="labels2d"), ) @@ -129,7 +129,7 @@ def sdata(request) -> SpatialData: labels=_get_labels(), shapes=_get_shapes(), points=_get_points(), - tables=_get_table("labels2d"), + tables=_get_tables(region="labels2d"), ) if request.param == "empty": return SpatialData() @@ -279,6 +279,14 @@ def _get_points() -> dict[str, DaskDataFrame]: return out +def _get_tables( + region: None | str | list[str] = "sample1", + region_key: None | str = "region", + instance_key: None | str = "instance_id", +) -> dict[str, AnnData]: + return {"table": _get_table(region=region, region_key=region_key, instance_key=instance_key)} + + def _get_table( region: None | str | list[str] = "sample1", region_key: None | str = "region", From 1a021193a9ea51635ba5b73660207d8d9bb94cff Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Mon, 9 Sep 2024 18:28:46 +0200 Subject: [PATCH 09/17] Document naming restrictions --- docs/design_doc.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/design_doc.md b/docs/design_doc.md index 45d41b7bf..66839ee51 100644 --- a/docs/design_doc.md +++ b/docs/design_doc.md @@ -130,6 +130,18 @@ _SpatialData_ follows the OME-NGFF specifications whenever possible and therefor - Any `Element` MAY be annotated by `Tables`; also `Shapes` and `Points` MAY contain annotations within themselves as additional dataframe columns (e.g. intensity of point spread function of a each point, or gene id). - `Tables` CAN NOT be annotated by other `Tables`. +#### Naming + +Names of SpatialData elements must fulfill certain restrictions to ensure robust storage and compatibility: + +- MUST NOT be the empty string ``. +- MUST only contain alphanumeric characters or hyphens `-`, dots `.`, underscores `_`. Alphanumeric includes letters from different alphabets and number-like symbols, but excludes whitespace, slashes and other symbols. +- MUST NOT be the full strings `.` or `..`, which would have special meanings as file paths. +- MUST NOT start with double underscores `__`. +- MUST NOT only differ in character case, to avoid name collisions on case-insensitive systems. + +Additionally, dataframes in tables MUST NOT have a column named `_index`, which is reserved. + #### Images Images of a sample. Should conform to the [OME-NGFF concept of an image](https://ngff.openmicroscopy.org/latest/#image-layout). From a42d69818cba588e00e91a3bec58306e2d677882 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Mon, 9 Sep 2024 18:29:01 +0200 Subject: [PATCH 10/17] Add change log --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cdb72dbd..3cd98a752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning][]. ## [0.x.x] - 2024-xx-xx +### Changed + +- Validation of element and column names allowing `_-.` and alphanumeric characters #624 + ### Minor - Added `clip: bool = False` parameter to `polygon_query()` #670 From abc21ca54038cf8327dadf0030858a8e67c1ad19 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Sat, 28 Sep 2024 12:07:27 +0200 Subject: [PATCH 11/17] edit docstring, design doc --- src/spatialdata/_core/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index 80867c152..18df1174e 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -214,7 +214,7 @@ def validate_table_attr_keys(data: AnnData) -> None: Raises ------ ValueError - If the AnnData contains on or several invalid keys. + If the AnnData contains one or several invalid keys. """ def _check_valid_attr_keys(attr: str) -> None: From 4c319edafbaadb231c77398073571ec9a410d979 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Sat, 28 Sep 2024 13:16:33 +0200 Subject: [PATCH 12/17] code review changes for valid name validation --- src/spatialdata/_core/spatialdata.py | 4 +- src/spatialdata/_core/validation.py | 4 +- src/spatialdata/models/models.py | 3 ++ .../operations/test_spatialdata_operations.py | 29 ++++++++-- tests/io/test_readwrite.py | 54 ++++++++++++++++++- tests/models/test_models.py | 42 ++++++++++----- 6 files changed, 114 insertions(+), 22 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index a200ef29b..8eb6f160d 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -1123,7 +1123,7 @@ def _validate_can_safely_write_to_path( def _validate_all_elements(self) -> None: for element_type, element_name, element in self.gen_elements(): check_valid_name(element_name) - if element_type == "table": + if element_type == "tables": validate_table_attr_keys(element) def write( @@ -1275,6 +1275,8 @@ def write_element( break if element_type is None: raise ValueError(f"Element with name {element_name} not found in SpatialData object.") + if element_type == "tables": + validate_table_attr_keys(element) self._check_element_not_on_disk_with_different_type(element_type=element_type, element_name=element_name) diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index 18df1174e..71e867fa5 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -182,7 +182,7 @@ def _iter_anndata_attr_keys_collect_value_errors( adata: AnnData, attr_visitor: Callable[[str], None], key_visitor: Callable[[str, str], None] ) -> None: messages_per_attr: dict[str, list[str]] = defaultdict(list) - for attr in ("obs", "obsm", "obsp", "var", "varm", "varp", "uns"): + for attr in ("obs", "obsm", "obsp", "var", "varm", "varp", "uns", "layers"): try: attr_visitor(attr) except ValueError as e: @@ -203,7 +203,7 @@ def validate_table_attr_keys(data: AnnData) -> None: """ Check that all keys of all AnnData attributes have valid names. - This checks for AnnData obs, var, obsm, obsp, varm, varp, uns whether their keys fulfill the + This checks for AnnData obs, var, obsm, obsp, varm, varp, uns, layers whether their keys fulfill the naming restrictions and raises an error otherwise. Parameters diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index 83543d89b..dd151e977 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -942,6 +942,7 @@ def validate( ------- The validated data. """ + validate_table_attr_keys(data) if ATTRS_KEY not in data.uns: return data @@ -1005,6 +1006,8 @@ def parse( if instance_key is None: raise ValueError("`instance_key` must be provided.") + # note! this is an expensive check and therefore we skip it during validation + # https://github.com/scverse/spatialdata/issues/715 grouped = adata.obs.groupby(region_key, observed=True) grouped_size = grouped.size() grouped_nunique = grouped.nunique() diff --git a/tests/core/operations/test_spatialdata_operations.py b/tests/core/operations/test_spatialdata_operations.py index 820a828ab..41b4f497c 100644 --- a/tests/core/operations/test_spatialdata_operations.py +++ b/tests/core/operations/test_spatialdata_operations.py @@ -30,6 +30,7 @@ def test_element_names_unique() -> None: points = PointsModel.parse(np.array([[0, 0]])) labels = Labels2DModel.parse(np.array([[0, 0], [0, 0]]), dims=["y", "x"]) image = Image2DModel.parse(np.array([[[0, 0], [0, 0]]]), dims=["c", "y", "x"]) + table = TableModel.parse(AnnData(shape=(1, 0))) with pytest.raises(KeyError): SpatialData(images={"image": image}, points={"image": points}) @@ -37,9 +38,15 @@ def test_element_names_unique() -> None: SpatialData(images={"image": image}, shapes={"image": shapes}) with pytest.raises(KeyError): SpatialData(images={"image": image}, labels={"image": labels}) + with pytest.raises(KeyError): + SpatialData(images={"image": image}, labels={"image": table}) sdata = SpatialData( - images={"image": image}, points={"points": points}, shapes={"shapes": shapes}, labels={"labels": labels} + images={"image": image}, + points={"points": points}, + shapes={"shapes": shapes}, + labels={"labels": labels}, + tables={"table": table}, ) # add elements with the same name @@ -52,6 +59,8 @@ def test_element_names_unique() -> None: sdata.shapes["shapes"] = shapes with pytest.warns(UserWarning): sdata.labels["labels"] = labels + with pytest.warns(UserWarning): + sdata.tables["table"] = table # add elements with the same name # of element of different type @@ -65,6 +74,8 @@ def test_element_names_unique() -> None: sdata.points["shapes"] = points with pytest.raises(KeyError): sdata.shapes["labels"] = shapes + with pytest.raises(KeyError): + sdata.tables["labels"] = table # add elements with the case-variant of an existing name # of element of same type @@ -76,11 +87,14 @@ def test_element_names_unique() -> None: sdata.shapes["Shapes"] = shapes with pytest.raises(KeyError): sdata.labels["Labels"] = labels + with pytest.raises(KeyError): + sdata.tables["Table"] = table assert sdata["image"].shape == image.shape assert sdata["labels"].shape == labels.shape assert len(sdata["points"]) == len(points) assert sdata["shapes"].shape == shapes.shape + assert len(sdata["table"]) == len(table) # add elements with the same name, test only couples of elements with pytest.raises(KeyError): @@ -93,7 +107,11 @@ def test_element_names_unique() -> None: # test replacing complete attribute sdata = SpatialData( - images={"image": image}, points={"points": points}, shapes={"shapes": shapes}, labels={"labels": labels} + images={"image": image}, + points={"points": points}, + shapes={"shapes": shapes}, + labels={"labels": labels}, + tables={"table": table}, ) # test for images sdata.images = {"image2": image} @@ -110,11 +128,16 @@ def test_element_names_unique() -> None: assert set(sdata.points.keys()) == {"points2"} assert "points2" in sdata._shared_keys assert "points" not in sdata._shared_keys - # test for points + # test for shapes sdata.shapes = {"shapes2": shapes} assert set(sdata.shapes.keys()) == {"shapes2"} assert "shapes2" in sdata._shared_keys assert "shapes" not in sdata._shared_keys + # test for tables + sdata.tables = {"table2": table} + assert set(sdata.tables.keys()) == {"table2"} + assert "table2" in sdata._shared_keys + assert "table" not in sdata._shared_keys def test_element_type_from_element_name(points: SpatialData) -> None: diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index 0169e778f..48576b041 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -20,7 +20,7 @@ set_transformation, ) from spatialdata.transformations.transformations import Identity, Scale -from tests.conftest import _get_images, _get_labels, _get_points, _get_shapes, _get_table +from tests.conftest import _get_images, _get_labels, _get_points, _get_shapes, _get_table, _get_tables RNG = default_rng(0) @@ -103,6 +103,7 @@ def test_incremental_io_in_memory( sdata.images[f"additional_{k}"] = v with pytest.warns(UserWarning): sdata.images[f"additional_{k}"] = v + with pytest.warns(UserWarning): sdata[f"additional_{k}"] = v with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v @@ -111,6 +112,7 @@ def test_incremental_io_in_memory( sdata.labels[f"additional_{k}"] = v with pytest.warns(UserWarning): sdata.labels[f"additional_{k}"] = v + with pytest.warns(UserWarning): sdata[f"additional_{k}"] = v with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v @@ -119,6 +121,7 @@ def test_incremental_io_in_memory( sdata.shapes[f"additional_{k}"] = v with pytest.warns(UserWarning): sdata.shapes[f"additional_{k}"] = v + with pytest.warns(UserWarning): sdata[f"additional_{k}"] = v with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v @@ -127,10 +130,20 @@ def test_incremental_io_in_memory( sdata.points[f"additional_{k}"] = v with pytest.warns(UserWarning): sdata.points[f"additional_{k}"] = v + with pytest.warns(UserWarning): sdata[f"additional_{k}"] = v with pytest.raises(KeyError, match="Key `table` is not unique"): sdata["table"] = v + for k, v in _get_tables().items(): + sdata.tables[f"additional_{k}"] = v + with pytest.warns(UserWarning): + sdata.tables[f"additional_{k}"] = v + with pytest.warns(UserWarning): + sdata[f"additional_{k}"] = v + with pytest.raises(KeyError, match="Key `poly` is not unique"): + sdata["poly"] = v + def test_incremental_io_list_of_elements(self, shapes: SpatialData) -> None: with tempfile.TemporaryDirectory() as tmpdir: f = os.path.join(tmpdir, "data.zarr") @@ -690,10 +703,47 @@ def test_writing_invalid_name(tmp_path: Path): invalid_sdata = SpatialData() # Circumvent validation at construction time and check validation happens again at writing time. invalid_sdata.images.data[""] = next(iter(_get_images().values())) - # invalid_sdata.labels.data["."] = next(iter(_get_labels().values())) + invalid_sdata.labels.data["."] = next(iter(_get_labels().values())) invalid_sdata.points.data["path/separator"] = next(iter(_get_points().values())) invalid_sdata.shapes.data["non-alnum_#$%&()*+,?@"] = next(iter(_get_shapes().values())) invalid_sdata.tables.data["has whitespace"] = _get_table() with pytest.raises(ValueError, match="Name (must|cannot)"): invalid_sdata.write(tmp_path / "data.zarr") + + +def test_writing_valid_table_name_invalid_table(tmp_path: Path): + # also try with a valid table name but invalid table + # testing just one case, all the cases are in test_table_model_invalid_names() + invalid_sdata = SpatialData() + invalid_sdata.tables.data["valid_name"] = AnnData(np.array([[0]]), layers={"invalid name": np.array([[0]])}) + with pytest.raises(ValueError, match="Name (must|cannot)"): + invalid_sdata.write(tmp_path / "data.zarr") + + +def test_incremental_writing_invalid_name(tmp_path: Path): + invalid_sdata = SpatialData() + invalid_sdata.write(tmp_path / "data.zarr") + + # Circumvent validation at construction time and check validation happens again at writing time. + invalid_sdata.images.data[""] = next(iter(_get_images().values())) + invalid_sdata.labels.data["."] = next(iter(_get_labels().values())) + invalid_sdata.points.data["path/separator"] = next(iter(_get_points().values())) + invalid_sdata.shapes.data["non-alnum_#$%&()*+,?@"] = next(iter(_get_shapes().values())) + invalid_sdata.tables.data["has whitespace"] = _get_table() + + for element_type in ["images", "labels", "points", "shapes", "tables"]: + elements = getattr(invalid_sdata, element_type) + for name in elements: + with pytest.raises(ValueError, match="Name (must|cannot)"): + invalid_sdata.write_element(name) + + +def test_incremental_writing_valid_table_name_invalid_table(tmp_path: Path): + # also try with a valid table name but invalid table + # testing just one case, all the cases are in test_table_model_invalid_names() + invalid_sdata = SpatialData() + invalid_sdata.write(tmp_path / "data2.zarr") + invalid_sdata.tables.data["valid_name"] = AnnData(np.array([[0]]), layers={"invalid name": np.array([[0]])}) + with pytest.raises(ValueError, match="Name (must|cannot)"): + invalid_sdata.write_element("valid_name") diff --git a/tests/models/test_models.py b/tests/models/test_models.py index d8b38ba40..f83ed2296 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -424,23 +424,33 @@ def test_table_instance_key_values_not_unique(self, model: TableModel, region: s "non-alnum_#$%&()*+,?@", ], ) - @pytest.mark.parametrize("attr", ["obs", "obsm", "obsp", "var", "varm", "varp", "uns"]) - def test_table_model_invalid_names(self, key: str, attr: str): + @pytest.mark.parametrize("attr", ["obs", "obsm", "obsp", "var", "varm", "varp", "uns", "layers"]) + @pytest.mark.parametrize("parse", [True, False]) + def test_table_model_invalid_names(self, key: str, attr: str, parse: bool): if attr in ("obs", "var"): + df = pd.DataFrame([[None]], columns=[key], index=["1"]) + adata = AnnData(np.array([[0]]), **{attr: df}) with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): - df = pd.DataFrame([[None]], columns=[key], index=["1"]) - adata = AnnData(np.array([[0]]), **{attr: df}) - TableModel.parse(adata) + if parse: + TableModel.parse(adata) + else: + TableModel().validate(adata) elif key != "_index": # "_index" is only disallowed in obs/var - if attr in ("obsm", "varm", "obsp", "varp"): + if attr in ("obsm", "varm", "obsp", "varp", "layers"): + array = np.array([[0]]) + adata = AnnData(np.array([[0]]), **{attr: {key: array}}) with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): - array = np.array([[0]]) - adata = AnnData(np.array([[0]]), **{attr: {key: array}}) - TableModel.parse(adata) + if parse: + TableModel.parse(adata) + else: + TableModel().validate(adata) elif attr == "uns": + adata = AnnData(np.array([[0]]), **{attr: {key: {}}}) with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): - adata = AnnData(np.array([[0]]), **{attr: {key: {}}}) - TableModel.parse(adata) + if parse: + TableModel.parse(adata) + else: + TableModel().validate(adata) @pytest.mark.parametrize( "keys", @@ -450,16 +460,20 @@ def test_table_model_invalid_names(self, key: str, attr: str): ], ) @pytest.mark.parametrize("attr", ["obs", "var"]) - def test_table_model_not_unique_columns(self, keys: list[str], attr: str): + @pytest.mark.parametrize("parse", [True, False]) + def test_table_model_not_unique_columns(self, keys: list[str], attr: str, parse: bool): key_regex = re.escape(keys[1]) df = pd.DataFrame([[None] * len(keys)], columns=keys, index=["1"]) + adata = AnnData(np.array([[0]]), **{attr: df}) with pytest.raises( ValueError, match=f"Table contains invalid names:\n{attr}:\n" + f" Key `{key_regex}` is not unique, or another case-variant of it exists.", ): - adata = AnnData(np.array([[0]]), **{attr: df}) - TableModel.parse(adata) + if parse: + TableModel.parse(adata) + else: + TableModel().validate(adata) def test_get_schema(): From 726657f7fad377e4084b1783a63cb534097afd96 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Sat, 28 Sep 2024 13:22:03 +0200 Subject: [PATCH 13/17] added missing update design doc --- docs/design_doc.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/design_doc.md b/docs/design_doc.md index 66839ee51..0ecf84b0a 100644 --- a/docs/design_doc.md +++ b/docs/design_doc.md @@ -140,6 +140,9 @@ Names of SpatialData elements must fulfill certain restrictions to ensure robust - MUST NOT start with double underscores `__`. - MUST NOT only differ in character case, to avoid name collisions on case-insensitive systems. +In tables, the above restrictions apply to the column names of `obs` and `var`, and to the key names of the `obsm`, +`obsp`, `varm`, `varp`, `uns`, `layers` slots (for example `adata.obs['has space']` and `adata.layers['.']` are not +allowed). Additionally, dataframes in tables MUST NOT have a column named `_index`, which is reserved. #### Images From cd4630287fe55409166f001433d217aab6c527e5 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Tue, 29 Oct 2024 00:19:12 +0100 Subject: [PATCH 14/17] Refactor formatting of multiple validation errors --- src/spatialdata/_core/spatialdata.py | 70 ++++++--- src/spatialdata/_core/validation.py | 226 ++++++++++++++++++++++----- tests/core/test_validation.py | 29 ++++ tests/models/test_models.py | 16 +- 4 files changed, 273 insertions(+), 68 deletions(-) create mode 100644 tests/core/test_validation.py diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 8eb6f160d..865ef58c5 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -26,6 +26,7 @@ check_all_keys_case_insensitively_unique, check_target_region_column_symmetry, check_valid_name, + raise_validation_errors, validate_table_attr_keys, ) from spatialdata._logging import logger @@ -142,26 +143,37 @@ def __init__( f"Element names must be unique. The following element names are used multiple times: {duplicates}" ) - if images is not None: - for k, v in images.items(): - self.images[k] = v - - if labels is not None: - for k, v in labels.items(): - self.labels[k] = v - - if shapes is not None: - for k, v in shapes.items(): - self.shapes[k] = v - - if points is not None: - for k, v in points.items(): - self.points[k] = v - - if tables is not None: - for k, v in tables.items(): - self.validate_table_in_spatialdata(v) - self.tables[k] = v + with raise_validation_errors( + title="Cannot construct SpatialData object, input contains invalid elements.\n" + "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .", + exc_type=(ValueError, KeyError), + ) as collect_error: + + if images is not None: + for k, v in images.items(): + with collect_error(location=("images", k)): + self.images[k] = v + + if labels is not None: + for k, v in labels.items(): + with collect_error(location=("labels", k)): + self.labels[k] = v + + if shapes is not None: + for k, v in shapes.items(): + with collect_error(location=("shapes", k)): + self.shapes[k] = v + + if points is not None: + for k, v in points.items(): + with collect_error(location=("points", k)): + self.points[k] = v + + if tables is not None: + for k, v in tables.items(): + with collect_error(location=("tables", k)): + self.validate_table_in_spatialdata(v) + self.tables[k] = v self._query = QueryManager(self) @@ -1121,10 +1133,18 @@ def _validate_can_safely_write_to_path( ) def _validate_all_elements(self) -> None: - for element_type, element_name, element in self.gen_elements(): - check_valid_name(element_name) - if element_type == "tables": - validate_table_attr_keys(element) + with raise_validation_errors( + title="SpatialData contains elements with invalid names.\n" + "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .", + exc_type=ValueError, + ) as collect_error: + for element_type, element_name, element in self.gen_elements(): + element_path = (element_type, element_name) + with collect_error(location=element_path): + check_valid_name(element_name) + if element_type == "tables": + with collect_error(location=element_path): + validate_table_attr_keys(element, location=element_path) def write( self, @@ -2001,7 +2021,7 @@ def _validate_element_names_are_unique(self) -> None: ValueError If the element names are not unique. """ - check_all_keys_case_insensitively_unique([name for _, name, _ in self.gen_elements()]) + check_all_keys_case_insensitively_unique([name for _, name, _ in self.gen_elements()], location=()) def _find_element(self, element_name: str) -> tuple[str, str, SpatialElement | AnnData]: """ diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py index 71e867fa5..d10ba3572 100644 --- a/src/spatialdata/_core/validation.py +++ b/src/spatialdata/_core/validation.py @@ -1,12 +1,40 @@ from __future__ import annotations -from collections import defaultdict -from collections.abc import Callable, Collection +from collections.abc import Collection +from types import TracebackType +from typing import NamedTuple, cast import pandas as pd from anndata import AnnData +class ErrorDetails(NamedTuple): + location: tuple[str, ...] + """Tuple of strings identifying the element for which the error occurred.""" + + message: str + """A human readable error message.""" + + +class ValidationError(ValueError): + def __init__(self, title: str, errors: list[ErrorDetails]): + self._errors: list[ErrorDetails] = list(errors) + super().__init__(title) + + @property + def title(self) -> str: + return str(self.args[0]) if self.args else "" + + @property + def errors(self) -> list[ErrorDetails]: + return list(self._errors) + + def __str__(self) -> str: + return f"{self.title}\n" + "\n".join( + f" {'/'.join(str(key) for key in details.location)}: {details.message}" for details in self.errors + ) + + def check_target_region_column_symmetry(table: AnnData, region_key: str, target: str | pd.Series) -> None: """ Check region and region_key column symmetry. @@ -84,7 +112,7 @@ def check_valid_name(name: str) -> None: raise ValueError("Name must contain only alphanumeric characters, underscores, dots and hyphens.") -def check_all_keys_case_insensitively_unique(keys: Collection[str]) -> None: +def check_all_keys_case_insensitively_unique(keys: Collection[str], location: tuple[str, ...] = ()) -> None: """ Check that all keys are unique when ignoring case. @@ -95,6 +123,8 @@ def check_all_keys_case_insensitively_unique(keys: Collection[str]) -> None: ---------- keys A collection of string keys + location + Tuple of strings identifying the parent element Raises ------ @@ -113,10 +143,16 @@ def check_all_keys_case_insensitively_unique(keys: Collection[str]) -> None: ``` """ seen: set[str | None] = set() - for key in keys: - normalized_key = key.lower() - check_key_is_case_insensitively_unique(key, seen) - seen.add(normalized_key) + with raise_validation_errors( + title="Element contains conflicting keys.\n" + "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .", + exc_type=ValueError, + ) as collect_error: + for key in keys: + normalized_key = key.lower() + with collect_error(location=location + (key,)): + check_key_is_case_insensitively_unique(key, seen) + seen.add(normalized_key) def check_key_is_case_insensitively_unique(key: str, other_keys: set[str | None]) -> None: @@ -178,28 +214,7 @@ def check_valid_dataframe_column_name(name: str) -> None: raise ValueError("Name cannot be '_index'") -def _iter_anndata_attr_keys_collect_value_errors( - adata: AnnData, attr_visitor: Callable[[str], None], key_visitor: Callable[[str, str], None] -) -> None: - messages_per_attr: dict[str, list[str]] = defaultdict(list) - for attr in ("obs", "obsm", "obsp", "var", "varm", "varp", "uns", "layers"): - try: - attr_visitor(attr) - except ValueError as e: - messages_per_attr[attr].append(f" {e.args[0]}") - for key in getattr(adata, attr): - try: - key_visitor(attr, key) - except ValueError as e: - messages_per_attr[attr].append(f" '{key}': {e.args[0]}") - if messages_per_attr: - raise ValueError( - "Table contains invalid names:\n" - + "\n".join(f"{attr}:\n" + "\n".join(messages) for attr, messages in messages_per_attr.items()) - ) - - -def validate_table_attr_keys(data: AnnData) -> None: +def validate_table_attr_keys(data: AnnData, location: tuple[str, ...] = ()) -> None: """ Check that all keys of all AnnData attributes have valid names. @@ -210,20 +225,159 @@ def validate_table_attr_keys(data: AnnData) -> None: ---------- data The AnnData table + location + Tuple of strings identifying the parent element Raises ------ ValueError If the AnnData contains one or several invalid keys. """ + with raise_validation_errors( + title="Table contains invalid names.\n" + "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .", + exc_type=ValueError, + ) as collect_error: + for attr in ("obs", "obsm", "obsp", "var", "varm", "varp", "uns", "layers"): + attr_path = location + (attr,) + with collect_error(location=attr_path): + check_all_keys_case_insensitively_unique(getattr(data, attr).keys(), location=attr_path) + for key in getattr(data, attr): + key_path = attr_path + (key,) + with collect_error(location=key_path): + if attr in ("obs", "var"): + check_valid_dataframe_column_name(key) + else: + check_valid_name(key) + + +class _ErrorDetailsCollector: + """ + Context manager to collect possible exceptions into a list. - def _check_valid_attr_keys(attr: str) -> None: - check_all_keys_case_insensitively_unique(getattr(data, attr).keys()) + This is syntactic sugar for shortening the try/except construction when the error handling is + the same. Only for internal use by `raise_validation_errors`. + + Parameters + ---------- + exc_type + The class of the exception to catch. Other exceptions are raised. + """ - def _check_valid_attr_key(attr: str, key: str) -> None: - if attr in ("obs", "var"): - check_valid_dataframe_column_name(key) + def __init__( + self, + exc_type: type[BaseException] | tuple[type[BaseException], ...], + ) -> None: + self.errors: list[ErrorDetails] = [] + self._location: tuple[str, ...] = () + self._exc_type = exc_type + self._exc_type_override: type[BaseException] | tuple[type[BaseException], ...] | None = None + + def __call__( + self, + location: str | tuple[str, ...] = (), + expected_exception: type[BaseException] | tuple[type[BaseException], ...] | None = None, + ) -> _ErrorDetailsCollector: + """ + Set or override error details in advance before an exception is raised. + + Parameters + ---------- + location + Tuple of strings identifying the parent element + expected_exception + The class of the exception to catch. Other exceptions are raised. + """ + if isinstance(location, str): + location = (location,) + self._location = location + if expected_exception is not None: + self._exc_type_override = expected_exception + return self + + def __enter__(self) -> None: + pass + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> bool: + # No exception happened + if exc_type is None: + return True + # An exception that we cannot handle, let the interpreter raise it. + handled_exception_type = self._exc_type_override or self._exc_type + if not issubclass(exc_type, handled_exception_type): + return False + # One of the expected exceptions happened, or another ValidationError that we can merge. + assert exc_val is not None + if issubclass(exc_type, ValidationError): + exc_val = cast(ValidationError, exc_val) + self.errors += exc_val.errors else: - check_valid_name(key) + details = ErrorDetails(location=self._location, message=str(exc_val.args[0])) + self.errors.append(details) + # Reset temporary attributes + self._location = () + self._exc_type_override = None + return True + + +class raise_validation_errors: + """ + Context manager to raise collected exceptions together as one ValidationError. + + This is syntactic sugar for shortening the try/except construction when the error handling is + the same. + + Parameters + ---------- + title + A validation error summary to display above the individual errors + exc_type + The class of the exception to catch. Other exceptions are raised. + + Example + ------- + + ```pycon + >>> with raise_validation_errors( + ... "Some errors happened", exc_type=ValueError + ... ) as collect_error: + ... for key, value in {"first": 1, "second": 2, "third": 3}.items(): + ... with collect_error(location=key): + ... if value % 2 != 0: + ... raise ValueError("Odd value encountered") + ... + spatialdata._core.validation.ValidationErro: Some errors happened + first: Odd value encountered + third: Odd value encountered + ``` + """ - _iter_anndata_attr_keys_collect_value_errors(data, _check_valid_attr_keys, _check_valid_attr_key) + def __init__( + self, + title: str = "Validation errors happened", + exc_type: type[BaseException] | tuple[type[BaseException], ...] = ValueError, + ) -> None: + self._message = title + self._collector = _ErrorDetailsCollector(exc_type=exc_type) + + def __enter__(self) -> _ErrorDetailsCollector: + return self._collector + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> bool: + # An unexpected exception happened, let the interpreter handle it. + if exc_type is not None: + return False + # Exceptions were collected that we want to raise as a combined validation error. + if self._collector.errors: + raise ValidationError(title=self._message, errors=self._collector.errors) + return True diff --git a/tests/core/test_validation.py b/tests/core/test_validation.py new file mode 100644 index 000000000..258c14711 --- /dev/null +++ b/tests/core/test_validation.py @@ -0,0 +1,29 @@ +import pytest + +from spatialdata._core.validation import ValidationError, raise_validation_errors + + +def test_raise_validation_errors(): + with pytest.raises(expected_exception=ValidationError, match="Some errors happened") as actual_exc_info: + ... + with raise_validation_errors("Some errors happened", exc_type=ValueError) as collect_error: + with collect_error(expected_exception=TypeError): + raise TypeError("Another error type") + for key, value in {"first": 1, "second": 2, "third": 3}.items(): + with collect_error(location=key): + if value % 2 != 0: + raise ValueError("Odd value encountered") + actual_message = str(actual_exc_info.value) + assert "Another error" in actual_message + assert "first" in actual_message + assert "second" not in actual_message + assert "third" in actual_message + + +def test_raise_validation_errors_does_not_catch_other_errors(): + with pytest.raises(expected_exception=RuntimeError, match="Not to be collected"): + ... + with raise_validation_errors(exc_type=ValueError) as collect_error: + ... + with collect_error: + raise RuntimeError("Not to be collected as ValidationError") diff --git a/tests/models/test_models.py b/tests/models/test_models.py index f83ed2296..7562b32ad 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -25,6 +25,7 @@ from xarray import DataArray from spatialdata._core.spatialdata import SpatialData +from spatialdata._core.validation import ValidationError from spatialdata._types import ArrayLike from spatialdata.models._utils import ( force_2d, @@ -393,7 +394,7 @@ def test_model_invalid_names(self, full_sdata, element_type: str, name: str): @pytest.mark.parametrize("element_type", ["images", "labels", "points", "shapes", "tables"]) def test_model_not_unique_names(self, full_sdata, element_type: str, names: list[str]): element = next(iter(getattr(full_sdata, element_type).values())) - with pytest.raises(KeyError, match="Key `.*` is not unique"): + with pytest.raises(ValidationError, match="Key `.*` is not unique"): SpatialData(**{element_type: {name: element for name in names}}) @pytest.mark.parametrize("model", [TableModel]) @@ -430,7 +431,7 @@ def test_table_model_invalid_names(self, key: str, attr: str, parse: bool): if attr in ("obs", "var"): df = pd.DataFrame([[None]], columns=[key], index=["1"]) adata = AnnData(np.array([[0]]), **{attr: df}) - with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + with pytest.raises(ValueError, match=f"Table contains invalid names(.|\n)*\n {attr}/{re.escape(key)}"): if parse: TableModel.parse(adata) else: @@ -439,14 +440,14 @@ def test_table_model_invalid_names(self, key: str, attr: str, parse: bool): if attr in ("obsm", "varm", "obsp", "varp", "layers"): array = np.array([[0]]) adata = AnnData(np.array([[0]]), **{attr: {key: array}}) - with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + with pytest.raises(ValueError, match=f"Table contains invalid names(.|\n)*\n {attr}/{re.escape(key)}"): if parse: TableModel.parse(adata) else: TableModel().validate(adata) elif attr == "uns": adata = AnnData(np.array([[0]]), **{attr: {key: {}}}) - with pytest.raises(ValueError, match=f"Table contains invalid names:\n{attr}:\n '{re.escape(key)}'"): + with pytest.raises(ValueError, match=f"Table contains invalid names(.|\n)*\n {attr}/{re.escape(key)}"): if parse: TableModel.parse(adata) else: @@ -462,13 +463,14 @@ def test_table_model_invalid_names(self, key: str, attr: str, parse: bool): @pytest.mark.parametrize("attr", ["obs", "var"]) @pytest.mark.parametrize("parse", [True, False]) def test_table_model_not_unique_columns(self, keys: list[str], attr: str, parse: bool): - key_regex = re.escape(keys[1]) + invalid_key = keys[1] + key_regex = re.escape(invalid_key) df = pd.DataFrame([[None] * len(keys)], columns=keys, index=["1"]) adata = AnnData(np.array([[0]]), **{attr: df}) with pytest.raises( ValueError, - match=f"Table contains invalid names:\n{attr}:\n" - + f" Key `{key_regex}` is not unique, or another case-variant of it exists.", + match=f"Table contains invalid names(.|\n)*\n {attr}/{invalid_key}: " + + f"Key `{key_regex}` is not unique, or another case-variant of it exists.", ): if parse: TableModel.parse(adata) From a5cbae5b2b1beee3ce8def69413bf9dc6a321c35 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Wed, 30 Oct 2024 17:01:33 +0100 Subject: [PATCH 15/17] Allow deleting element with invalid name --- src/spatialdata/_core/spatialdata.py | 2 -- tests/io/test_readwrite.py | 1 - 2 files changed, 3 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 865ef58c5..c98ed98b5 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -1350,8 +1350,6 @@ def delete_element_from_disk(self, element_name: str | list[str]) -> None: from spatialdata._io._utils import _backed_elements_contained_in_path - check_valid_name(element_name) - if self.path is None: raise ValueError("The SpatialData object is not backed by a Zarr store.") diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index 48576b041..bef99973e 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -604,7 +604,6 @@ def test_incremental_io_valid_name(full_sdata: SpatialData) -> None: _check_valid_name(full_sdata.write_element) _check_valid_name(full_sdata.write_metadata) _check_valid_name(full_sdata.write_transformations) - _check_valid_name(full_sdata.delete_element_from_disk) cached_sdata_blobs = blobs() From 3a837688a004da9b680e47e3d068ed39305eb011 Mon Sep 17 00:00:00 2001 From: Andreas Eisenbarth Date: Wed, 30 Oct 2024 17:47:02 +0100 Subject: [PATCH 16/17] Add test for reading invalid names --- tests/io/test_readwrite.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index bef99973e..ca4d413fb 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -10,6 +10,7 @@ from numpy.random import default_rng from spatialdata import SpatialData, deepcopy, read_zarr +from spatialdata._core.validation import ValidationError from spatialdata._io._utils import _are_directories_identical, get_dask_backing_files from spatialdata.datasets import blobs from spatialdata.models import Image2DModel @@ -746,3 +747,33 @@ def test_incremental_writing_valid_table_name_invalid_table(tmp_path: Path): invalid_sdata.tables.data["valid_name"] = AnnData(np.array([[0]]), layers={"invalid name": np.array([[0]])}) with pytest.raises(ValueError, match="Name (must|cannot)"): invalid_sdata.write_element("valid_name") + + +def test_reading_invalid_name(tmp_path: Path): + image_name, image = next(iter(_get_images().items())) + labels_name, labels = next(iter(_get_labels().items())) + points_name, points = next(iter(_get_points().items())) + shapes_name, shapes = next(iter(_get_shapes().items())) + table_name, table = "table", _get_table() + valid_sdata = SpatialData( + images={image_name: image}, + labels={labels_name: labels}, + points={points_name: points}, + shapes={shapes_name: shapes}, + tables={table_name: table}, + ) + valid_sdata.write(tmp_path / "data.zarr") + # Circumvent validation at construction time and check validation happens again at writing time. + (tmp_path / "data.zarr/points" / points_name).rename(tmp_path / "data.zarr/points" / "has whitespace") + (tmp_path / "data.zarr/shapes" / shapes_name).rename(tmp_path / "data.zarr/shapes" / "non-alnum_#$%&()*+,?@") + + with pytest.raises(ValidationError, match="Cannot construct SpatialData") as exc_info: + read_zarr(tmp_path / "data.zarr") + + actual_message = str(exc_info.value) + assert "points/has whitespace" in actual_message + assert "shapes/non-alnum_#$%&()*+,?@" in actual_message + assert ( + "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707" + in actual_message + ) From 1dd32a526b30af14669e839ab0a3a8b83613858d Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Wed, 15 Jan 2025 17:06:56 +0100 Subject: [PATCH 17/17] fix tests after merge --- src/spatialdata/_core/spatialdata.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 25fd636d5..a931839ba 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -1509,8 +1509,10 @@ def write_channel_names(self, element_name: str | None = None) -> None: The name of the element to write the channel names of. If None, write the channel names of all image elements. """ - if element_name is not None and element_name not in self: - raise ValueError(f"Element with name {element_name} not found in SpatialData object.") + if element_name is not None: + check_valid_name(element_name) + if element_name not in self: + raise ValueError(f"Element with name {element_name} not found in SpatialData object.") # recursively write the transformation for all the SpatialElement if element_name is None: @@ -1547,6 +1549,8 @@ def write_transformations(self, element_name: str | None = None) -> None: """ if element_name is not None: check_valid_name(element_name) + if element_name not in self: + raise ValueError(f"Element with name {element_name} not found in SpatialData object.") # recursively write the transformation for all the SpatialElement if element_name is None: @@ -1665,6 +1669,8 @@ def write_metadata( """ if element_name is not None: check_valid_name(element_name) + if element_name not in self: + raise ValueError(f"Element with name {element_name} not found in SpatialData object.") self.write_transformations(element_name) self.write_channel_names(element_name)