From f5301e615d259e400b1d511b968c86880c038fae Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Tue, 26 Mar 2024 19:06:28 +0000 Subject: [PATCH 1/2] Port sample/feature selection from equisolve --- .../metatensor/learn/selection/_selection.py | 248 ++++++++++++++++++ .../selection/feature_selection/__init__.py | 4 + .../feature_selection/feature_selection.py | 93 +++++++ .../selection/sample_selection/__init__.py | 4 + .../sample_selection/sample_selection.py | 94 +++++++ .../tests/feature_selection.py | 129 +++++++++ .../tests/sample_selection.py | 129 +++++++++ .../metatensor-learn/tests/selection_utils.py | 172 ++++++++++++ tox.ini | 3 + 9 files changed, 876 insertions(+) create mode 100644 python/metatensor-learn/metatensor/learn/selection/_selection.py create mode 100644 python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py create mode 100644 python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py create mode 100644 python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py create mode 100644 python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py create mode 100644 python/metatensor-learn/tests/feature_selection.py create mode 100644 python/metatensor-learn/tests/sample_selection.py create mode 100644 python/metatensor-learn/tests/selection_utils.py diff --git a/python/metatensor-learn/metatensor/learn/selection/_selection.py b/python/metatensor-learn/metatensor/learn/selection/_selection.py new file mode 100644 index 000000000..670dd8964 --- /dev/null +++ b/python/metatensor-learn/metatensor/learn/selection/_selection.py @@ -0,0 +1,248 @@ +from typing import Type, Union + +import numpy as np +import skmatter._selection + +import metatensor + +from .._backend import Labels, TensorBlock, TensorMap + + +class GreedySelector: + """ + Wraps :py:class:`skmatter._selection.GreedySelector` for a TensorMap. + + The class creates a selector for each block. The selection will be done based the + values of each :py:class:`TensorBlock`. Gradients will not be considered for the + selection. + """ + + def __init__( + self, + selector_class: Type[skmatter._selection.GreedySelector], + selection_type: str, + n_to_select: Union[int, dict], + **selector_arguments, + ) -> None: + self._selector_class = selector_class + self._selection_type = selection_type + self._n_to_select = n_to_select + self._selector_arguments = selector_arguments + + self._selector_arguments["selection_type"] = self._selection_type + self._support = None + self._select_distance = None + + @property + def selector_class(self) -> Type[skmatter._selection.GreedySelector]: + """ + The class to perform the selection. Usually one of 'FPS' or 'CUR'. + """ + return self._selector_class + + @property + def selection_type(self) -> str: + """ + Whether to choose a subset of columns ('feature') or rows ('sample'). + """ + return self._selection_type + + @property + def selector_arguments(self) -> dict: + """ + Arguments passed to the ``selector_class``. + """ + return self._selector_arguments + + @property + def support(self) -> TensorMap: + """ + TensorMap containing the support. + """ + if self._support is None: + raise ValueError("No selections. Call fit method first.") + + return self._support + + @property + def get_select_distance(self) -> TensorMap: + """ + Returns a TensorMap containing the Hausdorff distances. + + For each block, the metadata of the relevant axis (i.e. samples or properties, + depending on whether sample or feature selection is being performed) is sorted + and returned according to the Hausdorff distance, in descending order. + """ + if self._selector_class == skmatter._selection._CUR: + raise ValueError("Hausdorff distances not available for CUR in skmatter.") + if self._select_distance is None: + raise ValueError("No Hausdorff distances. Call fit method first.") + + return self._select_distance + + def fit(self, X: TensorMap, warm_start: bool = False) -> None: + """ + Learn the features to select. + + :param X: the input training vectors to fit. + :param warm_start: bool, whether the fit should continue after having already + run, after increasing `n_to_select`. Assumes it is called with the same X. + """ + # Check that we have only 0 or 1 comoponent axes + if len(X.component_names) == 0: + has_components = False + elif len(X.component_names) == 1: + has_components = True + else: + assert len(X.component_names) > 1 + raise ValueError("Can only handle TensorMaps with a single component axis.") + + support_blocks = [] + if self._selector_class == skmatter._selection._FPS: + hausdorff_blocks = [] + for key, block in X.items(): + # Parse the n_to_select argument + max_n = ( + len(block.properties) + if self._selection_type == "feature" + else len(block.samples) + ) + if isinstance(self._n_to_select, int): + if ( + self._n_to_select == -1 + ): # set to the number of samples/features for this block + tmp_n_to_select = max_n + else: + tmp_n_to_select = self._n_to_select + + elif isinstance(self._n_to_select, dict): + tmp_n_to_select = self._n_to_select[tuple(key.values)] + else: + raise ValueError("n_to_select must be an int or a dict.") + + if not (0 < tmp_n_to_select <= max_n): + raise ValueError( + f"n_to_select ({tmp_n_to_select}) must > 0 and <= the number of " + f"{self._selection_type} for the given block ({max_n})." + ) + + selector = self.selector_class( + n_to_select=tmp_n_to_select, **self.selector_arguments + ) + + # If the block has components, reshape to a 2D array such that the + # components expand along the dimension *not* being selected. + block_vals = block.values + if has_components: + n_components = len(block.components[0]) + if self._selection_type == "feature": + # Move components into samples + block_vals = block_vals.reshape( + (block_vals.shape[0] * n_components, block_vals.shape[2]) + ) + else: + assert self._selection_type == "sample" + # Move components into features + block_vals = block.values.reshape( + (block_vals.shape[0], block_vals.shape[2] * n_components) + ) + + # Fit on the block values + selector.fit(block_vals, warm_start=warm_start) + + # Build the support TensorMap. In this case we want the mask to be a + # list of bools, such that the original order of the metadata is + # preserved. + supp_mask = selector.get_support() + if self._selection_type == "feature": + supp_samples = Labels.single() + supp_properties = Labels( + names=block.properties.names, + values=block.properties.values[supp_mask], + ) + elif self._selection_type == "sample": + supp_samples = Labels( + names=block.samples.names, values=block.samples.values[supp_mask] + ) + supp_properties = Labels.single() + + supp_vals = np.zeros( + [len(supp_samples), len(supp_properties)], dtype=np.int32 + ) + support_blocks.append( + TensorBlock( + values=supp_vals, + samples=supp_samples, + components=[], + properties=supp_properties, + ) + ) + + if self._selector_class == skmatter._selection._FPS: + # Build the Hausdorff TensorMap, only for FPS. In this case we want the + # mask to be a list of int such that the samples/properties are + # reordered according to the Hausdorff distance. + haus_mask = selector.get_support(indices=True, ordered=True) + if self._selection_type == "feature": + haus_samples = Labels.single() + haus_properties = Labels( + names=block.properties.names, + values=block.properties.values[haus_mask], + ) + elif self._selection_type == "sample": + haus_samples = Labels( + names=block.samples.names, + values=block.samples.values[haus_mask], + ) + haus_properties = Labels.single() + + haus_vals = selector.hausdorff_at_select_[haus_mask].reshape( + len(haus_samples), len(haus_properties) + ) + hausdorff_blocks.append( + TensorBlock( + values=haus_vals, + samples=haus_samples, + components=[], + properties=haus_properties, + ) + ) + + self._support = TensorMap(X.keys, support_blocks) + if self._selector_class == skmatter._selection._FPS: + self._select_distance = TensorMap(X.keys, hausdorff_blocks) + + return self + + def transform(self, X: TensorMap) -> TensorMap: + """ + Reduce X to the selected features. + + :param X: the input tensor. + :returns: the selected subset of the input. + """ + blocks = [] + for key, block in X.items(): + block_support = self.support.block(key) + + if self._selection_type == "feature": + new_block = metatensor.slice_block( + block, "properties", block_support.properties + ) + elif self._selection_type == "sample": + new_block = metatensor.slice_block( + block, "samples", block_support.samples + ) + blocks.append(new_block) + + return TensorMap(X.keys, blocks) + + def fit_transform(self, X: TensorMap, warm_start: bool = False) -> TensorMap: + """ + Fit to data, then transform it. + + :param X: TensorMap of the training vectors. + :param warm_start: bool, whether the fit should continue after having already + run, after increasing `n_to_select`. Assumes it is called with the same X. + """ + return self.fit(X, warm_start=warm_start).transform(X) diff --git a/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py b/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py new file mode 100644 index 000000000..5d981efb2 --- /dev/null +++ b/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py @@ -0,0 +1,4 @@ +from .feature_selection import CUR, FPS # noqa + + +___all__ = ["CUR", "FPS"] diff --git a/python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py b/python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py new file mode 100644 index 000000000..a82514fb9 --- /dev/null +++ b/python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py @@ -0,0 +1,93 @@ +""" +Wrappers for the feature selectors of `scikit-matter`_. + +.. _`scikit-matter`: https://scikit-matter.readthedocs.io/en/latest/selection.html +""" + +from skmatter._selection import _CUR, _FPS + +from .._selection import GreedySelector + + +class FPS(GreedySelector): + """ + Transformer that performs Greedy Feature Selection using Farthest Point Sampling. + + If `n_to_select` is an `int`, all blocks will have this many features selected. In + this case, `n_to_select` must be <= than the fewest number of features in any block. + + If `n_to_select` is a dict, it must have keys that are tuples corresponding to the + key values of each block. In this case, the values of the `n_to_select` dict can be + int that specify different number of features to select for each block. + + If `n_to_select` is -1, all features for every block will be selected. This is + useful, for instance, for plotting Hausdorff distances, which can be accessed + through the selector.haussdorf_at_select property after calling the fit() method. + + Refer to :py:class:`skmatter.feature_selection.FPS` for full documentation. + """ + + def __init__( + self, + initialize=0, + n_to_select=None, + score_threshold=None, + score_threshold_type="absolute", + progress_bar=False, + full=False, + random_state=0, + ): + super().__init__( + selector_class=_FPS, + selection_type="feature", + initialize=initialize, + n_to_select=n_to_select, + score_threshold=score_threshold, + score_threshold_type=score_threshold_type, + progress_bar=progress_bar, + full=full, + random_state=random_state, + ) + + +class CUR(GreedySelector): + """ + Transformer that performs Greedy Feature Selection with CUR. + + If `n_to_select` is an `int`, all blocks will have this many features selected. In + this case, `n_to_select` must be <= than the fewest number of features in any block. + + If `n_to_select` is a dict, it must have keys that are tuples corresponding to the + key values of each block. In this case, the values of the `n_to_select` dict can be + int that specify different number of features to select for each block. + + If `n_to_select` is -1, all features for every block will be selected. + + Refer to :py:class:`skmatter.feature_selection.CUR` for full documentation. + """ + + def __init__( + self, + recompute_every=1, + k=1, + tolerance=1e-12, + n_to_select=None, + score_threshold=None, + score_threshold_type="absolute", + progress_bar=False, + full=False, + random_state=0, + ): + super().__init__( + selector_class=_CUR, + selection_type="feature", + recompute_every=recompute_every, + k=k, + tolerance=tolerance, + n_to_select=n_to_select, + score_threshold=score_threshold, + score_threshold_type=score_threshold_type, + progress_bar=progress_bar, + full=full, + random_state=random_state, + ) diff --git a/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py b/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py new file mode 100644 index 000000000..8d65c776c --- /dev/null +++ b/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py @@ -0,0 +1,4 @@ +from .sample_selection import CUR, FPS # noqa + + +___all__ = ["CUR", "FPS"] diff --git a/python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py b/python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py new file mode 100644 index 000000000..8276ee7db --- /dev/null +++ b/python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py @@ -0,0 +1,94 @@ +""" +Wrappers for the sample selectors of `scikit-matter`_. + +.. _`scikit-matter`: https://scikit-matter.readthedocs.io/en/latest/selection.html +""" + +from skmatter._selection import _CUR, _FPS + +from .._selection import GreedySelector + + +class FPS(GreedySelector): + """ + Transformer that performs Greedy Sample Selection using Farthest Point Sampling. + + If `n_to_select` is an `int`, all blocks will have this many samples selected. In + this case, `n_to_select` must be <= than the fewest number of samples in any block. + + If `n_to_select` is a dict, it must have keys that are tuples corresponding to the + key values of each block. In this case, the values of the `n_to_select` dict can be + int that specify different number of samples to select for each block. + + If `n_to_select` is -1, all samples for every block will be selected. This is + useful, for instance, for plotting Hausdorff distances, which can be accessed + through the selector.haussdorf_at_select property method after calling the fit() + method. + + Refer to :py:class:`skmatter.sample_selection.FPS` for full documentation. + """ + + def __init__( + self, + initialize=0, + n_to_select=None, + score_threshold=None, + score_threshold_type="absolute", + progress_bar=False, + full=False, + random_state=0, + ): + super().__init__( + selector_class=_FPS, + selection_type="sample", + initialize=initialize, + n_to_select=n_to_select, + score_threshold=score_threshold, + score_threshold_type=score_threshold_type, + progress_bar=progress_bar, + full=full, + random_state=random_state, + ) + + +class CUR(GreedySelector): + """ + Transformer that performs Greedy Sample Selection using CUR. + + If `n_to_select` is an `int`, all blocks will have this many samples selected. In + this case, `n_to_select` must be <= than the fewest number of samples in any block. + + If `n_to_select` is a dict, it must have keys that are tuples corresponding to the + key values of each block. In this case, the values of the `n_to_select` dict can be + int that specify different number of samples to select for each block. + + If `n_to_select` is -1, all samples for every block will be selected. + + Refer to :py:class:`skmatter.sample_selection.CUR` for full documentation. + """ + + def __init__( + self, + recompute_every=1, + k=1, + tolerance=1e-12, + n_to_select=None, + score_threshold=None, + score_threshold_type="absolute", + progress_bar=False, + full=False, + random_state=0, + ): + super().__init__( + selector_class=_CUR, + selection_type="sample", + recompute_every=recompute_every, + k=k, + tolerance=tolerance, + n_to_select=n_to_select, + score_threshold=score_threshold, + score_threshold_type=score_threshold_type, + progress_bar=progress_bar, + full=full, + random_state=random_state, + ) diff --git a/python/metatensor-learn/tests/feature_selection.py b/python/metatensor-learn/tests/feature_selection.py new file mode 100644 index 000000000..cf8913cb5 --- /dev/null +++ b/python/metatensor-learn/tests/feature_selection.py @@ -0,0 +1,129 @@ +""" +Module to test FPS and CUR selectors in +metatensor.learn.selection.feature_selection +""" + +import numpy as np +import pytest +import skmatter.feature_selection +from numpy.testing import assert_equal, assert_raises + +import metatensor +from metatensor import Labels +from metatensor.learn.selection.feature_selection import CUR, FPS + +from .selection_utils import ( + random_single_block_no_components_tensor_map, + random_tensor_map_with_components, +) + + +@pytest.fixture +def X1(): + return random_single_block_no_components_tensor_map( + use_torch=False, use_metatensor_torch=False + ) + + +@pytest.fixture +def X2(): + return random_tensor_map_with_components( + use_torch=False, use_metatensor_torch=False + ) + + +@pytest.mark.parametrize( + "selector_class, skmatter_selector_class", + [(FPS, skmatter.feature_selection.FPS), (CUR, skmatter.feature_selection.CUR)], +) +def test_fit(X1, selector_class, skmatter_selector_class): + selector = selector_class(n_to_select=2) + selector.fit(X1) + support = selector.support[0].properties + + skmatter_selector = skmatter_selector_class(n_to_select=2) + skmatter_selector.fit(X1[0].values) + skmatter_support = skmatter_selector.get_support(indices=True) + skmatter_support_labels = Labels( + names=["properties"], + values=np.array( + [[support_i] for support_i in skmatter_support], dtype=np.int32 + ), + ) + + assert support == skmatter_support_labels + + +@pytest.mark.parametrize( + "selector_class, skmatter_selector_class", + [(FPS, skmatter.feature_selection.FPS), (CUR, skmatter.feature_selection.CUR)], +) +def test_transform(X1, selector_class, skmatter_selector_class): + selector = selector_class(n_to_select=2) + selector.fit(X1) + X_trans = selector.transform(X1) + + skmatter_selector = skmatter_selector_class(n_to_select=2) + skmatter_selector.fit(X1[0].values) + X_trans_skmatter = skmatter_selector.transform(X1[0].values) + + assert_equal(X_trans[0].values, X_trans_skmatter) + + +@pytest.mark.parametrize("selector_class", [FPS, CUR]) +def test_fit_transform(X1, selector_class): + selector = selector_class(n_to_select=2) + + X_ft = selector.fit(X1).transform(X1) + metatensor.equal_raise(selector.fit_transform(X1), X_ft) + + +@pytest.mark.parametrize("selector_class", [FPS]) +def test_get_select_distance(X2, selector_class): + selector = selector_class(n_to_select=3) + selector.fit(X2) + select_distance = selector.get_select_distance + + assert select_distance is not None + + # Check distances sorted in descending order, with an inf as the first + # entry + for block in select_distance: + assert block.values[0][0] == np.inf + for i, val in enumerate(block.values[0][1:], start=1): + assert val < block.values[0][i - 1] + + +@pytest.mark.parametrize("selector_class", [FPS]) +def test_get_select_distance_n_to_select(X2, selector_class): + # Case 1: select all features for every block (n_to_select = -1) + selector = selector_class(n_to_select=-1) + selector.fit(X2) + select_distance = selector.get_select_distance + for block in select_distance: + assert len(block.properties) == 5 + + # Case 2: select subset of features but same for each block + n = 2 + selector = selector_class(n_to_select=n) + selector.fit(X2) + select_distance = selector.get_select_distance + for block in select_distance: + assert len(block.properties) == n + + # Case 3: select subset of features but different for each block + keys = X2.keys + n = {tuple(key): 2 * i + 1 for i, key in enumerate(keys)} + selector = selector_class(n_to_select=n) + selector.fit(X2) + select_distance = selector.get_select_distance + for i, key in enumerate(keys): + assert len(select_distance[key].properties) == 2 * i + 1 + + +@pytest.mark.parametrize("selector_class", [CUR]) +def test_get_select_distance_raises(X2, selector_class): + selector = selector_class(n_to_select=3) + selector.fit(X2) + with assert_raises(ValueError): + selector.get_select_distance diff --git a/python/metatensor-learn/tests/sample_selection.py b/python/metatensor-learn/tests/sample_selection.py new file mode 100644 index 000000000..253439797 --- /dev/null +++ b/python/metatensor-learn/tests/sample_selection.py @@ -0,0 +1,129 @@ +""" +Module to test FPS and CUR selectors in +metatensor.learn.selection.sample_selection +""" + +import numpy as np +import pytest +import skmatter.sample_selection +from numpy.testing import assert_equal, assert_raises + +import metatensor +from metatensor import Labels +from metatensor.learn.selection.sample_selection import CUR, FPS + +from .selection_utils import ( + random_single_block_no_components_tensor_map, + random_tensor_map_with_components, +) + + +@pytest.fixture +def X1(): + return random_single_block_no_components_tensor_map( + use_torch=False, use_metatensor_torch=False + ) + + +@pytest.fixture +def X2(): + return random_tensor_map_with_components( + use_torch=False, use_metatensor_torch=False + ) + + +@pytest.mark.parametrize( + "selector_class, skmatter_selector_class", + [(FPS, skmatter.sample_selection.FPS), (CUR, skmatter.sample_selection.CUR)], +) +def test_fit(X1, selector_class, skmatter_selector_class): + selector = selector_class(n_to_select=2) + selector.fit(X1) + support = selector.support[0].samples + + skmatter_selector = skmatter_selector_class(n_to_select=2) + skmatter_selector.fit(X1[0].values) + skmatter_support = skmatter_selector.get_support(indices=True) + skmatter_support_labels = Labels( + names=["sample", "structure"], + values=np.array( + [[support_i, support_i] for support_i in skmatter_support], + dtype=np.int32, + ), + ) + + assert support == skmatter_support_labels + + +@pytest.mark.parametrize( + "selector_class, skmatter_selector_class", + [(FPS, skmatter.sample_selection.FPS), (CUR, skmatter.sample_selection.CUR)], +) +def test_transform(X1, selector_class, skmatter_selector_class): + selector = selector_class(n_to_select=2, random_state=0) + selector.fit(X1) + X_trans = selector.transform(X1) + + skmatter_selector = skmatter_selector_class(n_to_select=2, random_state=0) + skmatter_selector.fit(X1[0].values) + X_trans_skmatter = X1[0].values[skmatter_selector.get_support()] + assert_equal(X_trans[0].values, X_trans_skmatter) + + +@pytest.mark.parametrize("selector_class", [FPS, CUR]) +def test_fit_transform(X1, selector_class): + selector = selector_class(n_to_select=2) + + X_ft = selector.fit(X1).transform(X1) + metatensor.equal_raise(selector.fit_transform(X1), X_ft) + + +@pytest.mark.parametrize("selector_class", [FPS]) +def test_get_select_distance(X2, selector_class): + selector = selector_class(n_to_select=3) + selector.fit(X2) + select_distance = selector.get_select_distance + + assert select_distance is not None + + # Check distances sorted in descending order, with an inf as the first + # entry + for block in select_distance: + assert block.values[0][0] == np.inf + for i, val in enumerate(block.values[0][1:], start=1): + assert val < block.values[0][i - 1] + + +@pytest.mark.parametrize("selector_class", [FPS]) +def test_get_select_distance_n_to_select(X2, selector_class): + # Case 1: select all features for every block (n_to_select = -1) + selector = selector_class(n_to_select=-1) + selector.fit(X2) + select_distance = selector.get_select_distance + for block in select_distance: + assert len(block.samples) == 4 + + # Case 2: select subset of features but same for each block + n = 2 + selector = selector_class(n_to_select=n) + selector.fit(X2) + select_distance = selector.get_select_distance + for block in select_distance: + assert len(block.samples) == n + + # Case 3: select subset of features but different for each block + keys = X2.keys + n = {tuple(key): i for i, key in enumerate(keys, start=1)} + selector = selector_class(n_to_select=n) + selector.fit(X2) + select_distance = selector.get_select_distance + for i, key in enumerate(keys, start=1): + assert len(select_distance[key].samples) == i + + +@pytest.mark.parametrize("selector_class", [CUR]) +def test_get_select_distance_raises(X2, selector_class): + selector = selector_class(n_to_select=3) + selector.fit(X2) + with assert_raises(ValueError): + selector.get_select_distance diff --git a/python/metatensor-learn/tests/selection_utils.py b/python/metatensor-learn/tests/selection_utils.py new file mode 100644 index 000000000..47860d48d --- /dev/null +++ b/python/metatensor-learn/tests/selection_utils.py @@ -0,0 +1,172 @@ +import functools + + +def random_single_block_no_components_tensor_map(use_torch, use_metatensor_torch): + """ + Create a dummy tensor map to be used in tests. This is the same one as the + tensor map used in `tensor.rs` tests. + """ + if not use_torch and use_metatensor_torch: + raise ValueError( + "torch.TensorMap cannot be created without torch.Tensor block values." + ) + if use_metatensor_torch: + import torch + + from metatensor.torch import Labels, TensorBlock, TensorMap + + create_int32_array = functools.partial(torch.tensor, dtype=torch.int32) + else: + import numpy as np + + from metatensor import Labels, TensorBlock, TensorMap + + create_int32_array = functools.partial(np.array, dtype=np.int32) + + if use_torch: + import torch + + create_random_array = torch.rand + else: + import numpy as np + + create_random_array = np.random.rand + + block_1 = TensorBlock( + values=create_random_array(4, 2), + samples=Labels( + ["sample", "structure"], + create_int32_array([[0, 0], [1, 1], [2, 2], [3, 3]]), + ), + components=[], + properties=Labels(["properties"], create_int32_array([[0], [1]])), + ) + positions_gradient = TensorBlock( + values=create_random_array(7, 3, 2), + samples=Labels( + ["sample", "structure", "center"], + create_int32_array( + [ + [0, 0, 1], + [0, 0, 2], + [1, 1, 0], + [1, 1, 1], + [1, 1, 2], + [2, 2, 0], + [3, 3, 0], + ], + ), + ), + components=[Labels(["direction"], create_int32_array([[0], [1], [2]]))], + properties=block_1.properties, + ) + block_1.add_gradient("positions", positions_gradient) + + cell_gradient = TensorBlock( + values=create_random_array(4, 6, 2), + samples=Labels( + ["sample", "structure"], + create_int32_array([[0, 0], [1, 1], [2, 2], [3, 3]]), + ), + components=[ + Labels( + ["direction_xx_yy_zz_yz_xz_xy"], + create_int32_array([[0], [1], [2], [3], [4], [5]]), + ) + ], + properties=block_1.properties, + ) + block_1.add_gradient("cell", cell_gradient) + + return TensorMap(Labels.single(), [block_1]) + + +def random_tensor_map_with_components(use_torch, use_metatensor_torch): + """ + Create a dummy tensor map to be used in tests. This is the same one as the + tensor map used in `tensor.rs` tests. + """ + if not use_torch and use_metatensor_torch: + raise ValueError( + "torch.TensorMap cannot be created without torch.Tensor block values." + ) + if use_metatensor_torch: + import torch + + from metatensor.torch import Labels, TensorBlock, TensorMap + + create_int32_array = functools.partial(torch.tensor, dtype=torch.int32) + else: + import numpy as np + + from metatensor import Labels, TensorBlock, TensorMap + + create_int32_array = functools.partial(np.array, dtype=np.int32) + + if use_torch: + import torch + + create_random_array = torch.rand + else: + import numpy as np + + create_random_array = np.random.rand + + blocks = [] + for i in range(3): + block = TensorBlock( + values=create_random_array(4, 2 * i + 1, 5), + samples=Labels( + ["sample", "structure"], + create_int32_array([[0, 0], [1, 1], [2, 2], [3, 3]]), + ), + components=[ + Labels(names=["component"], values=np.arange(2 * i + 1).reshape(-1, 1)), + ], + properties=Labels( + ["properties"], create_int32_array([[0], [1], [2], [5], [10]]) + ), + ) + positions_gradient = TensorBlock( + values=create_random_array(7, 3, 2 * i + 1, 5), + samples=Labels( + ["sample", "structure", "center"], + create_int32_array( + [ + [0, 0, 1], + [0, 0, 2], + [1, 1, 0], + [1, 1, 1], + [1, 1, 2], + [2, 2, 0], + [3, 3, 0], + ], + ), + ), + components=[ + Labels(["direction"], create_int32_array([[0], [1], [2]])), + Labels(names=["component"], values=np.arange(2 * i + 1).reshape(-1, 1)), + ], + properties=block.properties, + ) + block.add_gradient("positions", positions_gradient) + + cell_gradient = TensorBlock( + values=create_random_array(4, 6, 2 * i + 1, 5), + samples=Labels( + ["sample", "structure"], + create_int32_array([[0, 0], [1, 1], [2, 2], [3, 3]]), + ), + components=[ + Labels( + ["direction_xx_yy_zz_yz_xz_xy"], + create_int32_array([[0], [1], [2], [3], [4], [5]]), + ), + Labels(names=["component"], values=np.arange(2 * i + 1).reshape(-1, 1)), + ], + properties=block.properties, + ) + block.add_gradient("cell", cell_gradient) + blocks.append(block) + + return TensorMap(Labels(names=["key"], values=np.arange(3).reshape(-1, 1)), blocks) diff --git a/tox.ini b/tox.ini index 888459668..4b4f6d7d2 100644 --- a/tox.ini +++ b/tox.ini @@ -117,6 +117,7 @@ deps = {[testenv]packaging_deps} {[testenv]testing_deps} numpy <2.0 + skmatter changedir = python/metatensor-learn commands = @@ -136,6 +137,7 @@ deps = {[testenv]packaging_deps} {[testenv]testing_deps} torch=={env:METATENSOR_TESTS_TORCH_VERSION:2.2.*} + skmatter changedir = python/metatensor-learn commands = @@ -185,6 +187,7 @@ deps = numpy <2.0 torch=={env:METATENSOR_TESTS_TORCH_VERSION:2.2.*} ase + skmatter setenv = # ignore the fact that metatensor.torch.operations was loaded from a file From f1dbb06d99b4dd2b4f858cdb000245ecf5360e60 Mon Sep 17 00:00:00 2001 From: Joseph Abbott Date: Wed, 3 Apr 2024 14:14:05 +0200 Subject: [PATCH 2/2] Fix docs tests --- .../metatensor-learn/metatensor/learn/selection/__init__.py | 0 .../selection/{feature_selection => }/feature_selection.py | 2 +- .../metatensor/learn/selection/feature_selection/__init__.py | 4 ---- .../selection/{sample_selection => }/sample_selection.py | 2 +- .../metatensor/learn/selection/sample_selection/__init__.py | 4 ---- 5 files changed, 2 insertions(+), 10 deletions(-) create mode 100644 python/metatensor-learn/metatensor/learn/selection/__init__.py rename python/metatensor-learn/metatensor/learn/selection/{feature_selection => }/feature_selection.py (98%) delete mode 100644 python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py rename python/metatensor-learn/metatensor/learn/selection/{sample_selection => }/sample_selection.py (98%) delete mode 100644 python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py diff --git a/python/metatensor-learn/metatensor/learn/selection/__init__.py b/python/metatensor-learn/metatensor/learn/selection/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py b/python/metatensor-learn/metatensor/learn/selection/feature_selection.py similarity index 98% rename from python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py rename to python/metatensor-learn/metatensor/learn/selection/feature_selection.py index a82514fb9..2fdd5acad 100644 --- a/python/metatensor-learn/metatensor/learn/selection/feature_selection/feature_selection.py +++ b/python/metatensor-learn/metatensor/learn/selection/feature_selection.py @@ -6,7 +6,7 @@ from skmatter._selection import _CUR, _FPS -from .._selection import GreedySelector +from ._selection import GreedySelector class FPS(GreedySelector): diff --git a/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py b/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py deleted file mode 100644 index 5d981efb2..000000000 --- a/python/metatensor-learn/metatensor/learn/selection/feature_selection/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .feature_selection import CUR, FPS # noqa - - -___all__ = ["CUR", "FPS"] diff --git a/python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py b/python/metatensor-learn/metatensor/learn/selection/sample_selection.py similarity index 98% rename from python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py rename to python/metatensor-learn/metatensor/learn/selection/sample_selection.py index 8276ee7db..b1e72d3e1 100644 --- a/python/metatensor-learn/metatensor/learn/selection/sample_selection/sample_selection.py +++ b/python/metatensor-learn/metatensor/learn/selection/sample_selection.py @@ -6,7 +6,7 @@ from skmatter._selection import _CUR, _FPS -from .._selection import GreedySelector +from ._selection import GreedySelector class FPS(GreedySelector): diff --git a/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py b/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py deleted file mode 100644 index 8d65c776c..000000000 --- a/python/metatensor-learn/metatensor/learn/selection/sample_selection/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .sample_selection import CUR, FPS # noqa - - -___all__ = ["CUR", "FPS"]