From f7acd778287200c82de9015d77a54f8586e83a99 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Wed, 25 Oct 2023 16:38:19 -0400 Subject: [PATCH 01/20] Change the default behaviour of the logger so that it doesn't display messages Fixes #186 --- src/f3dasm/_src/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/f3dasm/_src/logger.py b/src/f3dasm/_src/logger.py index 92515338..ed30ee35 100644 --- a/src/f3dasm/_src/logger.py +++ b/src/f3dasm/_src/logger.py @@ -39,8 +39,8 @@ handler = logging.StreamHandler() handler.setFormatter(formatter) -# Set the level for the "f3dasm" logger -logger.setLevel(logging.INFO) +# Set the default level for the "f3dasm" logger +logger.setLevel(logging.WARNING) # Add the custom handler to the "f3dasm" logger logger.addHandler(handler) From 138d4a006ddadf7c66d39a1aaa0409beaf166bf9 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 13:28:59 -0400 Subject: [PATCH 02/20] Make DataGenerator.run method private Fixes #193 --- src/f3dasm/_src/datageneration/datagenerator.py | 2 +- src/f3dasm/_src/datageneration/functions/function.py | 2 +- src/f3dasm/_src/experimentdata/experimentdata.py | 6 +++--- .../_src/optimization/adapters/scipy_implementations.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/f3dasm/_src/datageneration/datagenerator.py b/src/f3dasm/_src/datageneration/datagenerator.py index 81a4742e..335f5465 100644 --- a/src/f3dasm/_src/datageneration/datagenerator.py +++ b/src/f3dasm/_src/datageneration/datagenerator.py @@ -55,7 +55,7 @@ def post_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: ... @time_and_log - def run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSample: + def _run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSample: """Run the data generator Parameters diff --git a/src/f3dasm/_src/datageneration/functions/function.py b/src/f3dasm/_src/datageneration/functions/function.py index c5e376bd..2036e939 100644 --- a/src/f3dasm/_src/datageneration/functions/function.py +++ b/src/f3dasm/_src/datageneration/functions/function.py @@ -99,7 +99,7 @@ def execute(self, experiment_sample: ExperimentSample) -> ExperimentSample: experiment_sample["y"] = self(x).ravel().astype(np.float32) return experiment_sample - def run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSample: + def _run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSample: return self.execute(experiment_sample) def _retrieve_original_input(self, x: np.ndarray): diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index ade061cc..5bccf733 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -702,7 +702,7 @@ def _run_sequential(self, data_generator: DataGenerator, kwargs: dict): logger.debug( f"Running experiment_sample {experiment_sample._jobnumber} with kwargs {kwargs}") - _experiment_sample = data_generator.run(experiment_sample, **kwargs) # no *args! + _experiment_sample = data_generator._run(experiment_sample, **kwargs) # no *args! self._set_experiment_sample(_experiment_sample) except Exception as e: error_msg = f"Error in experiment_sample {experiment_sample._jobnumber}: {e}" @@ -737,7 +737,7 @@ def _run_multiprocessing(self, data_generator: DataGenerator, kwargs: dict): def f(options: Dict[str, Any]) -> Any: logger.debug(f"Running experiment_sample {options['experiment_sample'].job_number}") - return data_generator.run(**options) + return data_generator._run(**options) with mp.Pool() as pool: # maybe implement pool.starmap_async ? @@ -775,7 +775,7 @@ def _run_cluster(self, data_generator: DataGenerator, kwargs: dict): break try: - _experiment_sample = data_generator.run(experiment_sample, **kwargs) + _experiment_sample = data_generator._run(experiment_sample, **kwargs) self._write_experiment_sample(_experiment_sample) except Exception as e: error_msg = f"Error in experiment_sample {experiment_sample._jobnumber}: {e}" diff --git a/src/f3dasm/_src/optimization/adapters/scipy_implementations.py b/src/f3dasm/_src/optimization/adapters/scipy_implementations.py index 269fc7e8..fca44036 100644 --- a/src/f3dasm/_src/optimization/adapters/scipy_implementations.py +++ b/src/f3dasm/_src/optimization/adapters/scipy_implementations.py @@ -44,7 +44,7 @@ def run_algorithm(self, iterations: int, data_generator: DataGenerator): """ def fun(x): - sample: ExperimentSample = data_generator.run( + sample: ExperimentSample = data_generator._run( ExperimentSample.from_numpy(x)) _, y = sample.to_numpy() return float(y) From eaf66014cac273632ac33fb0c8be53735a133f20 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 13:43:05 -0400 Subject: [PATCH 03/20] Handle similar bounds on add_float to make a constant parameter Fixes #195 --- src/f3dasm/_src/design/domain.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py index e4ea5e43..2a911a5b 100644 --- a/src/f3dasm/_src/design/domain.py +++ b/src/f3dasm/_src/design/domain.py @@ -252,7 +252,14 @@ def add_int(self, name: str, low: int, high: int, step: int = 1): >>> domain.add_int('param1', 0, 10, 2) >>> domain.space {'param1': DiscreteParameter(lower_bound=0, upper_bound=10, step=2)} + + Note + ---- + If the lower and upper bound are equal, then then a constant parameter + will be added to the domain! """ + if low == high: + self.add_constant(name, low) self._add(name, DiscreteParameter(low, high, step)) def add_float(self, name: str, low: float, high: float, log: bool = False): @@ -267,7 +274,7 @@ def add_float(self, name: str, low: float, high: float, log: bool = False): high : float Upper bound of the input parameter. log : bool, optional - Whether to use a logarithmic scale, by default False. + Whether to u_add(name, ConstantParameter(low))se a logarithmic scale, by default False. Example ------- @@ -275,8 +282,16 @@ def add_float(self, name: str, low: float, high: float, log: bool = False): >>> domain.add_float('param1', 0., 10., log=True) >>> domain.space {'param1': ContinuousParameter(lower_bound=0., upper_bound=10., log=True)} + + Note + ---- + If the lower and upper bound are equal, then then a constant parameter + will be added to the domain! """ - self._add(name, ContinuousParameter(low, high, log)) + if low == high: + self.add_constant(name, low) + else: + self._add(name, ContinuousParameter(low, high, log)) def add_category(self, name: str, categories: Sequence[CategoricalType]): """Add a new categorical input parameter to the domain. From 80bf59de932ee8e3327376446b23b7e21d5fd703 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:05:14 -0400 Subject: [PATCH 04/20] Change iter-behaviour of ExperimentData object to provide ExperimentSample objects Fixes #189 --- .../_src/experimentdata/experimentdata.py | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index ade061cc..d11db887 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -119,10 +119,16 @@ def __len__(self): return len(self.input_data) def __iter__(self) -> Iterator[Tuple[Dict[str, Any]]]: - return self.input_data.__iter__() + self.current_index = 0 + return self - def __next__(self): - return self.input_data.__next__() + def __next__(self) -> ExperimentSample: + if self.current_index >= len(self): + raise StopIteration + else: + index = self.index[self.current_index] + self.current_index += 1 + return self._get_experiment_sample(index) def __add__(self, other: ExperimentData | ExperimentSample) -> ExperimentData: """The + operator combines two ExperimentData objects""" @@ -171,6 +177,20 @@ def wrapper_func(self, *args, **kwargs) -> None: return wrapper_func + # Properties + # ============================================================================= + + @property + def index(self) -> pd.Index: + """Returns the index of the ExperimentData + + Returns + ------- + pd.Index + The indices of the experiments + """ + return self.input_data.indices + # Alternative Constructors # ============================================================================= From c3198d1f17db42cc49ca3695e8625e0c82e6b061 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:05:20 -0400 Subject: [PATCH 05/20] add test --- tests/experimentdata/test_experimentdata.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/experimentdata/test_experimentdata.py b/tests/experimentdata/test_experimentdata.py index 19f53301..7c6d631c 100644 --- a/tests/experimentdata/test_experimentdata.py +++ b/tests/experimentdata/test_experimentdata.py @@ -10,7 +10,7 @@ import pytest import xarray as xr -from f3dasm import ExperimentData +from f3dasm import ExperimentData, ExperimentSample from f3dasm._src.experimentdata.experimentdata import DataTypes from f3dasm.design import (ContinuousParameter, Domain, Status, _Data, _JobQueue, make_nd_continuous_domain) @@ -667,5 +667,14 @@ def test_evaluate_mode(mode: str, experimentdata_continuous: ExperimentData, tmp "scale_bounds": np.array([[0., 1.], [0., 1.], [0., 1.]]), 'seed': SEED}) +def test_iter_behaviour(experimentdata_continuous: ExperimentData): + for i in experimentdata_continuous: + assert isinstance(i, ExperimentSample) + + selected_experimentdata = experimentdata_continuous.select([0, 2, 4]) + for i in selected_experimentdata: + assert isinstance(i, ExperimentSample) + + if __name__ == "__main__": # pragma: no cover pytest.main() From 4770610e891a6c96c543f0a6223a1dc467afec9f Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:15:44 -0400 Subject: [PATCH 06/20] Fixes Get public access to ExperimentSample by providing index #188 --- .../classes/design/experimentsample.rst | 13 +++++++++++++ src/f3dasm/_src/experimentdata/experimentdata.py | 8 ++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/source/rst_doc_files/classes/design/experimentsample.rst b/docs/source/rst_doc_files/classes/design/experimentsample.rst index bd928b30..00be3b85 100644 --- a/docs/source/rst_doc_files/classes/design/experimentsample.rst +++ b/docs/source/rst_doc_files/classes/design/experimentsample.rst @@ -54,6 +54,19 @@ An KeyError will be raised if the key is not found. >>> experiment_sample.get('param_1') 0.0249 +Manually iterating over ExperimentData +---------------------------------------- + +The :class:`~f3dasm.design.ExperimentData` object can be manually iterated over to get :class:`~f3dasm.design.ExperimentSample` objects for each experiment: + +.. code-block:: python + + >>> for experiment_sample in experiment_data: + ... print(experiment_sample) + ExperimentSample(0 : {'x0': 0.8184054141827567, 'x1': 0.937852542255321, 'x2': 0.7376563782762678} - {}) + ExperimentSample(1 : {'x0': 0.7203461491873061, 'x1': 0.7320604457665572, 'x2': 0.2524387342272223} - {}) + ExperimentSample(2 : {'x0': 0.35449352388104904, 'x1': 0.11413412225748525, 'x2': 0.1467895592274866} - {}) + Storing output parameters to the experiment sample -------------------------------------------------- diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index d11db887..6ec35fcd 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -128,7 +128,7 @@ def __next__(self) -> ExperimentSample: else: index = self.index[self.current_index] self.current_index += 1 - return self._get_experiment_sample(index) + return self.get_experiment_sample(index) def __add__(self, other: ExperimentData | ExperimentSample) -> ExperimentData: """The + operator combines two ExperimentData objects""" @@ -506,7 +506,7 @@ def _reset_index(self) -> None: # ExperimentSample # ============================================================================= - def _get_experiment_sample(self, index: int) -> ExperimentSample: + def get_experiment_sample(self, index: int) -> ExperimentSample: """ Gets the experiment_sample at the given index. @@ -560,7 +560,7 @@ def _access_open_job_data(self) -> ExperimentSample: """ job_index = self.jobs.get_open_job() self.jobs.mark(job_index, status=Status.IN_PROGRESS) - experiment_sample = self._get_experiment_sample(job_index) + experiment_sample = self.get_experiment_sample(job_index) return experiment_sample @_access_file @@ -922,7 +922,7 @@ def _iterate_scipy(self, optimizer: Optimizer, data_generator: DataGenerator, # Repeat last iteration to fill up total iteration if len(self) < n_data_before_iterate + iterations: - last_design = self._get_experiment_sample(len(self)-1) + last_design = self.get_experiment_sample(len(self)-1) for repetition in range(iterations - (len(self) - n_data_before_iterate)): self._add_experiments(last_design) From c4e6b56be164504adbde68b28efe73981c8c4e22 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:49:18 -0400 Subject: [PATCH 07/20] removed custom __getitem__ method ExperimentData --- src/f3dasm/_src/experimentdata/experimentdata.py | 14 ++++++++------ tests/experimentdata/test_experimentdata.py | 10 ---------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index ade061cc..00a9ffbb 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -145,10 +145,10 @@ def __eq__(self, __o: ExperimentData) -> bool: self.jobs == __o.jobs, self.domain == __o.domain]) - def __getitem__(self, index: int | slice | Iterable[int]) -> _Data: - """The [] operator returns a single datapoint or a subset of datapoints""" - return ExperimentData(input_data=self.input_data[index], output_data=self.output_data[index], - jobs=self.jobs[index], domain=self.domain, filename=self.filename, path=self.path) + # def __getitem__(self, index: int | slice | Iterable[int]) -> _Data: + # """The [] operator returns a single datapoint or a subset of datapoints""" + # return ExperimentData(input_data=self.input_data[index], output_data=self.output_data[index], + # jobs=self.jobs[index], domain=self.domain, filename=self.filename, path=self.path) def _repr_html_(self) -> str: return self.input_data.combine_data_to_multiindex(self.output_data, self.jobs.to_dataframe())._repr_html_() @@ -296,7 +296,9 @@ def select(self, indices: int | slice | Iterable[int]) -> ExperimentData: ExperimentData The selected ExperimentData object with only the selected indices. """ - return self[indices] + + return ExperimentData(input_data=self.input_data[indices], output_data=self.output_data[indices], + jobs=self.jobs[indices], domain=self.domain, filename=self.filename, path=self.path) def store(self, filename: str = None): """Store the ExperimentData to disk, with checking for a lock @@ -362,7 +364,7 @@ def get_n_best_output(self, n_samples: int) -> ExperimentData: New experimentData object with a selection of the n best samples. """ df = self.output_data.n_best_samples(n_samples, self.output_data.names) - return self[df.index] + return self.select(df.index) # Append or remove data # ============================================================================= diff --git a/tests/experimentdata/test_experimentdata.py b/tests/experimentdata/test_experimentdata.py index 19f53301..a4316860 100644 --- a/tests/experimentdata/test_experimentdata.py +++ b/tests/experimentdata/test_experimentdata.py @@ -53,16 +53,6 @@ def test_experiment_data_len_equals_output_data(experimentdata: ExperimentData): assert len(experimentdata) == len(experimentdata.output_data) -@pytest.mark.parametrize("slice_type", [3, [0, 1, 3], slice(0, 3)]) -def test_experiment_data_getitem_(slice_type: int | Iterable[int], experimentdata: ExperimentData): - input_data = experimentdata.input_data[slice_type] - output_data = experimentdata.output_data[slice_type] - jobs = experimentdata.jobs[slice_type] - constructed_experimentdata = ExperimentData( - input_data=input_data, output_data=output_data, jobs=jobs, domain=experimentdata.domain) - assert constructed_experimentdata == experimentdata[slice_type] - - @pytest.mark.parametrize("slice_type", [3, [0, 1, 3], slice(0, 3)]) def test_experiment_data_select(slice_type: int | Iterable[int], experimentdata: ExperimentData): input_data = experimentdata.input_data[slice_type] From 8c5bbf87026e38a44e953d52ae1f59b5aed12554 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:50:46 -0400 Subject: [PATCH 08/20] removed commented code --- src/f3dasm/_src/experimentdata/experimentdata.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index 00a9ffbb..808fc6bb 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -145,11 +145,6 @@ def __eq__(self, __o: ExperimentData) -> bool: self.jobs == __o.jobs, self.domain == __o.domain]) - # def __getitem__(self, index: int | slice | Iterable[int]) -> _Data: - # """The [] operator returns a single datapoint or a subset of datapoints""" - # return ExperimentData(input_data=self.input_data[index], output_data=self.output_data[index], - # jobs=self.jobs[index], domain=self.domain, filename=self.filename, path=self.path) - def _repr_html_(self) -> str: return self.input_data.combine_data_to_multiindex(self.output_data, self.jobs.to_dataframe())._repr_html_() From 9b29a803d54c5267038a0771653fddfb07efc068 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 16:30:16 -0400 Subject: [PATCH 09/20] Fixes Get a way to extract input and output data from ExperimentData object directly #192 --- src/f3dasm/_src/design/domain.py | 34 ++++++++- src/f3dasm/_src/experimentdata/_data.py | 17 +++++ .../_src/experimentdata/experimentdata.py | 72 +++++++++++++++++-- tests/experimentdata/test_experimentdata.py | 34 +++++++++ 4 files changed, 151 insertions(+), 6 deletions(-) diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py index e4ea5e43..69b147e9 100644 --- a/src/f3dasm/_src/design/domain.py +++ b/src/f3dasm/_src/design/domain.py @@ -11,7 +11,7 @@ import pickle from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, Iterator, List, Sequence, Type +from typing import Any, Dict, Iterable, Iterator, List, Sequence, Type # Third-party core import numpy as np @@ -573,6 +573,38 @@ def _filter(self, type: Type[Parameter]) -> Domain: if isinstance(parameter, type)} ) + def select(self, names: str | Iterable[str]) -> Domain: + """Select a subset of parameters from the domain. + + Parameters + ---------- + + names : str or Iterable[str] + The names of the parameters to select. + + Returns + ------- + Domain + A new domain with the selected parameters. + + Example + ------- + >>> domain = Domain() + >>> domain.space = { + ... 'param1': ContinuousParameter(lower_bound=0., upper_bound=1.), + ... 'param2': DiscreteParameter(lower_bound=0, upper_bound=8), + ... 'param3': CategoricalParameter(categories=['cat1', 'cat2']) + ... } + >>> domain.select(['param1', 'param3']) + Domain({'param1': ContinuousParameter(lower_bound=0, upper_bound=1), + 'param3': CategoricalParameter(categories=['cat1', 'cat2'])}) + """ + + if isinstance(names, str): + names = [names] + + return Domain(space={key: self.space[key] for key in names}) + # Miscellaneous # ============================================================================= diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py index 8faa63e6..abc85881 100644 --- a/src/f3dasm/_src/experimentdata/_data.py +++ b/src/f3dasm/_src/experimentdata/_data.py @@ -291,6 +291,23 @@ def n_best_samples(self, nosamples: int, column_name: List[str] | str) -> pd.Dat """ return self.data.nsmallest(n=nosamples, columns=column_name) + def select_columns(self, columns: Iterable[str] | str) -> _Data: + """Filter the data on the selected columns. + + Parameters + ---------- + columns : Iterable[str] | str + The columns to select. + + Returns + ------- + _Data + The data only with the selected columns + """ + # This is necessary otherwise self.data[columns] will be a Series + if isinstance(columns, str): + columns = [columns] + return _Data(self.data[columns]) # Append and remove data # ============================================================================= diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index 808fc6bb..48d8ae2e 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -104,7 +104,7 @@ def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTyp if self.input_data.is_empty(): self.input_data = _Data.from_domain(self.domain) - self.jobs = jobs_factory(jobs, self.input_data, job_value) + self.jobs = jobs_factory(jobs, self.input_data, self.output_data, job_value) # Check if the columns of input_data are in the domain if not self.input_data.has_columnnames(self.domain.names): @@ -275,7 +275,7 @@ def _from_file_attempt(cls: Type[ExperimentData], filename: Path) -> ExperimentD except FileNotFoundError: raise FileNotFoundError(f"Cannot find the files from {filename}.") - # Export + # Selecting subsets # ============================================================================= def select(self, indices: int | slice | Iterable[int]) -> ExperimentData: @@ -295,6 +295,62 @@ def select(self, indices: int | slice | Iterable[int]) -> ExperimentData: return ExperimentData(input_data=self.input_data[indices], output_data=self.output_data[indices], jobs=self.jobs[indices], domain=self.domain, filename=self.filename, path=self.path) + def get_input_data(self, parameter_names: Optional[str | Iterable[str]] = None) -> ExperimentData: + """Retrieve a subset of the input data from the ExperimentData object + + Parameters + ---------- + parameter_names : str | Iterable[str], optional + The name(s) of the input parameters that you want to retrieve, + if None all input parameters are retrieved, by default None + + Returns + ------- + ExperimentData + The selected ExperimentData object with only the selected input data. + + Notes + ----- + If parameter_names is None, all input data is retrieved. + The returned ExperimentData object has the domain of the original ExperimentData object, + but only with the selected input parameters. + """ + if parameter_names is None: + return ExperimentData(input_data=self.input_data, jobs=self.jobs, + domain=self.domain, filename=self.filename, path=self.path) + else: + return ExperimentData(input_data=self.input_data.select_columns(parameter_names), jobs=self.jobs, + domain=self.domain.select(parameter_names), filename=self.filename, path=self.path) + + def get_output_data(self, parameter_names: Optional[str | Iterable[str]] = None) -> ExperimentData: + """Retrieve a subset of the output data from the ExperimentData object + + Parameters + ---------- + parameter_names : str | Iterable[str], optional + The name(s) of the output parameters that you want to retrieve, + if None all output parameters are retrieved, by default None + + Returns + ------- + ExperimentData + The selected ExperimentData object with only the selected output data. + + Notes + ----- + If parameter_names is None, all output data is retrieved. + The returned ExperimentData object has no domain object and no input data! + """ + if parameter_names is None: + return ExperimentData(output_data=self.output_data, jobs=self.jobs, + filename=self.filename, path=self.path) + else: + return ExperimentData(output_data=self.output_data.select_columns(parameter_names), jobs=self.jobs, + filename=self.filename, path=self.path) + + # Export + # ============================================================================= + def store(self, filename: str = None): """Store the ExperimentData to disk, with checking for a lock @@ -480,7 +536,7 @@ def _reset_index(self) -> None: self.output_data.reset_index() self.jobs.reset_index() - # ExperimentSample +# ExperimentSample # ============================================================================= def _get_experiment_sample(self, index: int) -> ExperimentSample: @@ -983,7 +1039,8 @@ def domain_factory(domain: Domain | None, input_data: _Data) -> Domain: raise TypeError(f"Domain must be of type Domain or None, not {type(domain)}") -def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, job_value: Status) -> _JobQueue: +def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, + output_data: _Data, job_value: Status) -> _JobQueue: """Creates a _JobQueue object from particular inpute Parameters @@ -991,7 +1048,9 @@ def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, job_val jobs : Path | str | None input data for the jobs input_data : _Data - _Data object to extract indices from, if necessary + _Data object of input data to extract indices from, if necessary + output_data : _Data + _Data object of output data to extract indices from, if necessary job_value : Status initial value of all the jobs @@ -1006,4 +1065,7 @@ def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, job_val if isinstance(jobs, (Path, str)): return _JobQueue.from_file(Path(jobs)) + if input_data.is_empty(): + return _JobQueue.from_data(output_data, value=job_value) + return _JobQueue.from_data(input_data, value=job_value) diff --git a/tests/experimentdata/test_experimentdata.py b/tests/experimentdata/test_experimentdata.py index a4316860..af879dae 100644 --- a/tests/experimentdata/test_experimentdata.py +++ b/tests/experimentdata/test_experimentdata.py @@ -657,5 +657,39 @@ def test_evaluate_mode(mode: str, experimentdata_continuous: ExperimentData, tmp "scale_bounds": np.array([[0., 1.], [0., 1.], [0., 1.]]), 'seed': SEED}) +def test_get_input_data(experimentdata_expected_no_output: ExperimentData): + input_data = experimentdata_expected_no_output.get_input_data() + df, _ = input_data.to_pandas() + pd.testing.assert_frame_equal(df, pd_input()) + assert experimentdata_expected_no_output.input_data == input_data.input_data + + +@pytest.mark.parametrize("selection", ["x0", ["x0"], ["x0", "x2"]]) +def test_get_input_data_selection(experimentdata_expected_no_output: ExperimentData, selection: Iterable[str] | str): + input_data = experimentdata_expected_no_output.get_input_data(selection) + df, _ = input_data.to_pandas() + if isinstance(selection, str): + selection = [selection] + selected_pd = pd_input()[selection] + pd.testing.assert_frame_equal(df, selected_pd) + + +def test_get_output_data(experimentdata_expected: ExperimentData): + output_data = experimentdata_expected.get_output_data() + _, df = output_data.to_pandas() + pd.testing.assert_frame_equal(df, pd_output()) + assert experimentdata_expected.output_data == output_data.output_data + + +@pytest.mark.parametrize("selection", ["y", ["y"]]) +def test_get_output_data_selection(experimentdata_expected: ExperimentData, selection: Iterable[str] | str): + output_data = experimentdata_expected.get_output_data(selection) + _, df = output_data.to_pandas() + if isinstance(selection, str): + selection = [selection] + selected_pd = pd_output()[selection] + pd.testing.assert_frame_equal(df, selected_pd) + + if __name__ == "__main__": # pragma: no cover pytest.main() From 15ceae7f9bfb2a60a0bbd718ef7771c8267fe572 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 16:52:50 -0400 Subject: [PATCH 10/20] ExperimentData.store documentation is not correct Fixes #190 --- src/f3dasm/_src/experimentdata/experimentdata.py | 2 +- .../_src/experimentdata/experimentsample.py | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index ade061cc..b49e297f 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -298,7 +298,7 @@ def select(self, indices: int | slice | Iterable[int]) -> ExperimentData: """ return self[indices] - def store(self, filename: str = None): + def store(self, filename: Optional[str] = None): """Store the ExperimentData to disk, with checking for a lock Parameters diff --git a/src/f3dasm/_src/experimentdata/experimentsample.py b/src/f3dasm/_src/experimentdata/experimentsample.py index 5d4f979b..0d8433b4 100644 --- a/src/f3dasm/_src/experimentdata/experimentsample.py +++ b/src/f3dasm/_src/experimentdata/experimentsample.py @@ -349,27 +349,26 @@ def to_dict(self) -> Dict[str, Any]: """ return {**self.input_data, **self.output_data_loaded, 'job_number': self.job_number} - def store(self, object: Any, name: str, to_disk: bool = False, + def store(self, name: str, object: Any, to_disk: bool = False, store_method: Optional[Type[_Store]] = None) -> None: """Store an object to disk. Parameters ---------- - object : Any - The object to store. name : str The name of the file to store the object in. + object : Any + The object to store. to_disk : bool, optional Whether to store the object to disk, by default False store_method : Store, optional The method to use to store the object, by default None - Raises - ------ - - TypeError - If the object type is not supported and no store_method is provided. + Notes + ----- + If to_disk is True and no store_method is provided, the default store method will be used. + The default store method is saving the object as a pickle file (.pkl). """ if to_disk: self._store_to_disk(object=object, name=name, store_method=store_method) From df3f5f1f2569e98dfea8a634b28fe63e308465e5 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:04:26 -0400 Subject: [PATCH 11/20] Fixes #194 --- .../_src/datageneration/datagenerator.py | 44 +++++++++++++++++-- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/f3dasm/_src/datageneration/datagenerator.py b/src/f3dasm/_src/datageneration/datagenerator.py index 81a4742e..ff663164 100644 --- a/src/f3dasm/_src/datageneration/datagenerator.py +++ b/src/f3dasm/_src/datageneration/datagenerator.py @@ -7,6 +7,7 @@ # Standard import sys +from abc import abstractmethod from functools import partial from typing import Any, Callable @@ -43,15 +44,32 @@ class DataGenerator: """Base class for a data generator""" def pre_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: - """Function that handles the pre-processing""" + """Interface function that handles the pre-processing of the data generator + + Notes + ----- + If not implemented the function will be skipped + """ ... + @abstractmethod def execute(self, **kwargs) -> None: - """Function that calls the FEM simulator the pre-processing""" - raise NotImplementedError("No execute function implemented!") + """Interface function that handles the execution of the data generator + + Raises + ------ + NotImplementedError + If the function is not implemented by the user + """ + ... def post_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: - """Function that handles the post-processing""" + """Interface function that handles the post-processing of the data generator + + Notes + ----- + If not implemented the function will be skipped + """ ... @time_and_log @@ -88,7 +106,25 @@ def _post_simulation(self) -> None: ... def add_pre_process(self, func: Callable, **kwargs): + """Add a pre-processing function to the data generator + + Parameters + ---------- + func : Callable + The function to add to the pre-processing + kwargs : dict + The keyword arguments to pass to the pre-processing function + """ self.pre_process = partial(func, **kwargs) def add_post_process(self, func: Callable, **kwargs): + """Add a post-processing function to the data generator + + Parameters + ---------- + func : Callable + The function to add to the post-processing + kwargs : dict + The keyword arguments to pass to the post-processing function + """ self.post_process = partial(func, **kwargs) From 0d74d03cf1e8b4d28a284fa2cd1ef1be70800bf2 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:25:25 -0400 Subject: [PATCH 12/20] Handle similar bounds on add_float to make a constant parameter Fixes #195 --- src/f3dasm/_src/design/domain.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py index 2a911a5b..b43dc834 100644 --- a/src/f3dasm/_src/design/domain.py +++ b/src/f3dasm/_src/design/domain.py @@ -8,6 +8,7 @@ from __future__ import annotations # Standard +import math import pickle from dataclasses import dataclass, field from pathlib import Path @@ -255,7 +256,7 @@ def add_int(self, name: str, low: int, high: int, step: int = 1): Note ---- - If the lower and upper bound are equal, then then a constant parameter + If the lower and upper bound are equal, then a constant parameter will be added to the domain! """ if low == high: @@ -274,7 +275,7 @@ def add_float(self, name: str, low: float, high: float, log: bool = False): high : float Upper bound of the input parameter. log : bool, optional - Whether to u_add(name, ConstantParameter(low))se a logarithmic scale, by default False. + Whether to use a logarithmic scale, by default False. Example ------- @@ -285,10 +286,10 @@ def add_float(self, name: str, low: float, high: float, log: bool = False): Note ---- - If the lower and upper bound are equal, then then a constant parameter + If the lower and upper bound are equal, then a constant parameter will be added to the domain! """ - if low == high: + if math.isclose(low, high): self.add_constant(name, low) else: self._add(name, ContinuousParameter(low, high, log)) From df9744a3517680ee928caa57d4b3bab20314890f Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:40:12 -0400 Subject: [PATCH 13/20] Change iter-behaviour of ExperimentData object to provide ExperimentSample objects Fixes #189 --- src/f3dasm/_src/experimentdata/experimentdata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index 6ec35fcd..518af2b7 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -182,12 +182,12 @@ def wrapper_func(self, *args, **kwargs) -> None: @property def index(self) -> pd.Index: - """Returns the index of the ExperimentData + """Returns an iterable of the job number of the experiments Returns ------- pd.Index - The indices of the experiments + The job number of all the experiments in pandas Index format """ return self.input_data.indices @@ -503,7 +503,7 @@ def _reset_index(self) -> None: self.output_data.reset_index() self.jobs.reset_index() - # ExperimentSample +# ExperimentSample # ============================================================================= def get_experiment_sample(self, index: int) -> ExperimentSample: From 8ebd19382c23a83f3b687327f68f099375962ba7 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:50:45 -0400 Subject: [PATCH 14/20] ExperimentData.store documentation is not correct Fixes #190 --- src/f3dasm/_src/experimentdata/experimentdata.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index b49e297f..29eada0d 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -305,6 +305,16 @@ def store(self, filename: Optional[str] = None): ---------- filename : str, optional filename of the files to store, without suffix + + Notes + ----- + The ExperimentData object is stored at the location provided by the `.path` attribute + that is set upon creation of the object. + The ExperimentData object is stored in four files. The name is used as a prefix for the four files: + - the input data (_input.csv) + - the output data (_output.csv) + - the jobs (_jobs.pkl) + - the domain (_domain.pkl) """ if filename is None: filename = self.filename From db5382718a50f04fd499c84872cbf83e2d89dbba Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:51:43 -0400 Subject: [PATCH 15/20] added the reason why filename is optional --- src/f3dasm/_src/experimentdata/experimentdata.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index 29eada0d..1ca0aa5d 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -308,6 +308,8 @@ def store(self, filename: Optional[str] = None): Notes ----- + If no filename is given, the filename of the ExperimentData object is used. + The ExperimentData object is stored at the location provided by the `.path` attribute that is set upon creation of the object. The ExperimentData object is stored in four files. The name is used as a prefix for the four files: From d7f9180b7eb56de44bda31b8425576b3deae336d Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Sat, 28 Oct 2023 18:02:33 -0400 Subject: [PATCH 16/20] increased version number to 1.4.4 --- VERSION | 2 +- src/f3dasm/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index 3c80e4f0..e1df5de7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.3 \ No newline at end of file +1.4.4 \ No newline at end of file diff --git a/src/f3dasm/__init__.py b/src/f3dasm/__init__.py index b4e60693..58852bc9 100644 --- a/src/f3dasm/__init__.py +++ b/src/f3dasm/__init__.py @@ -37,7 +37,7 @@ # ============================================================================= -__version__ = '1.4.3' +__version__ = '1.4.4' # Log welcome message and the version of f3dasm From dc6664ea1a1e9f26f7508bbbd0b111fd6e4f85e4 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Sat, 28 Oct 2023 18:02:34 -0400 Subject: [PATCH 17/20] Increased version number to 1.4.4 --- docs/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cffb629f..7b6218b3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,8 +24,8 @@ project = 'f3dasm' author = 'Martin van der Schelling' copyright = '2022, Martin van der Schelling' -version = '1.4.3' -release = '1.4.3' +version = '1.4.4' +release = '1.4.4' # -- General configuration ---------------------------------------------------- From 452c53b5729da5977610170a1027b270cdb05ce3 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Sat, 28 Oct 2023 18:06:47 -0400 Subject: [PATCH 18/20] changed github workflows --- .github/workflows/pr_to_pr.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr_to_pr.yml b/.github/workflows/pr_to_pr.yml index 224f7909..1861700f 100644 --- a/.github/workflows/pr_to_pr.yml +++ b/.github/workflows/pr_to_pr.yml @@ -1,9 +1,12 @@ -name: Pull request to pr/** branches +name: Pull request and push to pr/** branches on: pull_request: branches: - "pr/**" + push: + branches: + - "pr/**" jobs: check-coding-style: From f47bc0f29ad9abc4b18cb9122d1c2a5940d3fdf1 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:27:19 -0400 Subject: [PATCH 19/20] added more descriptive docstrings --- .../_src/datageneration/datagenerator.py | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/f3dasm/_src/datageneration/datagenerator.py b/src/f3dasm/_src/datageneration/datagenerator.py index ff663164..c0f501e0 100644 --- a/src/f3dasm/_src/datageneration/datagenerator.py +++ b/src/f3dasm/_src/datageneration/datagenerator.py @@ -49,6 +49,10 @@ def pre_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: Notes ----- If not implemented the function will be skipped + + The experiment_sample is cached inside the data generator. This + allows the user to access the experiment_sample in the pre_process, execute + and post_process functions as a class variable called self.experiment_sample. """ ... @@ -60,7 +64,14 @@ def execute(self, **kwargs) -> None: ------ NotImplementedError If the function is not implemented by the user + + Notes + ----- + The experiment_sample is cached inside the data generator. This + allows the user to access the experiment_sample in the pre_process, execute + and post_process functions as a class variable called self.experiment_sample. """ + ... def post_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: @@ -69,22 +80,42 @@ def post_process(self, experiment_sample: ExperimentSample, **kwargs) -> None: Notes ----- If not implemented the function will be skipped + + The experiment_sample is cached inside the data generator. This + allows the user to access the experiment_sample in the pre_process, execute + and post_process functions as a class variable called self.experiment_sample. """ ... @time_and_log def run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSample: - """Run the data generator + """This function chains the following methods together + + * pre_process(); to combine the experiment_sample and the parameters + of the data generator to an input file that can be used to run the data generator + + * execute(); to run the data generator and generate the response of the experiment + + * post_process(); to process the response of the experiment and store it back + in the experiment_sample + + The function also caches the experiment_sample in the data generator. This + allows the user to access the experiment_sample in the pre_process, execute + and post_process functions as a class variable called self.experiment_sample. Parameters ---------- ExperimentSample : ExperimentSample The design to run the data generator on + kwargs : dict + The keyword arguments to pass to the pre_process, execute and post_process + Returns ------- ExperimentSample - Processed design + Processed design with the response of the data generator saved in the + experiment_sample """ # Cache the design self.experiment_sample: ExperimentSample = experiment_sample From e11a00352fc4e0a0db7b5188df05ec4e02c41ab8 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Mon, 30 Oct 2023 10:35:17 -0400 Subject: [PATCH 20/20] flake8 error resolved at docstring --- src/f3dasm/_src/datageneration/datagenerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/f3dasm/_src/datageneration/datagenerator.py b/src/f3dasm/_src/datageneration/datagenerator.py index 71a8a30d..93d421f5 100644 --- a/src/f3dasm/_src/datageneration/datagenerator.py +++ b/src/f3dasm/_src/datageneration/datagenerator.py @@ -116,7 +116,7 @@ def _run(self, experiment_sample: ExperimentSample, **kwargs) -> ExperimentSampl Returns ------- ExperimentSample - Processed design with the response of the data generator saved in the + Processed design with the response of the data generator saved in the experiment_sample """ # Cache the design