diff --git a/VERSION b/VERSION index c9929e36..3c80e4f0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.2 \ No newline at end of file +1.4.3 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index e4840430..cffb629f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,8 +24,8 @@ project = 'f3dasm' author = 'Martin van der Schelling' copyright = '2022, Martin van der Schelling' -version = '1.4.2' -release = '1.4.2' +version = '1.4.3' +release = '1.4.3' # -- General configuration ---------------------------------------------------- diff --git a/src/f3dasm/__init__.py b/src/f3dasm/__init__.py index a2814250..b4e60693 100644 --- a/src/f3dasm/__init__.py +++ b/src/f3dasm/__init__.py @@ -37,7 +37,7 @@ # ============================================================================= -__version__ = '1.4.2' +__version__ = '1.4.3' # Log welcome message and the version of f3dasm diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py index d5c3f1d8..98811936 100644 --- a/src/f3dasm/_src/experimentdata/_jobqueue.py +++ b/src/f3dasm/_src/experimentdata/_jobqueue.py @@ -248,6 +248,9 @@ def mark_all_in_progress_open(self) -> None: """Marks all jobs as 'open'.""" self.jobs = self.jobs.replace(Status.IN_PROGRESS, Status.OPEN) + def mark_all_error_open(self) -> None: + """Marks all jobs as 'open'.""" + self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN) # Miscellanous # ============================================================================= diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index f7734ead..ade061cc 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -13,7 +13,7 @@ from functools import wraps from pathlib import Path from typing import (Any, Callable, Dict, Iterable, Iterator, List, Optional, - Tuple, Type, Union) + Tuple, Type) # Third-party import numpy as np @@ -57,7 +57,7 @@ class ExperimentData: def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTypes] = None, output_data: Optional[DataTypes] = None, jobs: Optional[Path | str] = None, - filename: Optional[str] = 'experimentdata'): + filename: Optional[str] = 'experimentdata', path: Optional[Path] = None): """ Initializes an instance of ExperimentData. @@ -73,6 +73,8 @@ def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTyp The path to the jobs file, by default None filename : str, optional The filename of the experiment, by default 'experimentdata' + path : Path, optional + The path to the experimentdata file, by default the current working directory """ if isinstance(input_data, np.ndarray) and domain is None: @@ -80,6 +82,11 @@ def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTyp self.filename = filename + if path is None: + path = Path().cwd() + + self.path = path + self.input_data = data_factory(input_data) self.output_data = data_factory(output_data) @@ -141,7 +148,7 @@ def __eq__(self, __o: ExperimentData) -> bool: def __getitem__(self, index: int | slice | Iterable[int]) -> _Data: """The [] operator returns a single datapoint or a subset of datapoints""" return ExperimentData(input_data=self.input_data[index], output_data=self.output_data[index], - jobs=self.jobs[index], domain=self.domain, filename=self.filename) + jobs=self.jobs[index], domain=self.domain, filename=self.filename, path=self.path) def _repr_html_(self) -> str: return self.input_data.combine_data_to_multiindex(self.output_data, self.jobs.to_dataframe())._repr_html_() @@ -269,7 +276,7 @@ def _from_file_attempt(cls: Type[ExperimentData], filename: Path) -> ExperimentD try: return cls(domain=Path(f"{filename}{DOMAIN_SUFFIX}"), input_data=Path(f"{filename}{INPUT_DATA_SUFFIX}"), output_data=Path(f"{filename}{OUTPUT_DATA_SUFFIX}"), jobs=Path(f"{filename}{JOBS_SUFFIX}"), - filename=filename.name) + filename=filename.name, path=filename.parent) except FileNotFoundError: raise FileNotFoundError(f"Cannot find the files from {filename}.") @@ -494,7 +501,8 @@ def _get_experiment_sample(self, index: int) -> ExperimentSample: The ExperimentSample at the given index. """ return ExperimentSample(dict_input=self.input_data.get_data_dict(index), - dict_output=self.output_data.get_data_dict(index), jobnumber=index) + dict_output=self.output_data.get_data_dict(index), jobnumber=index, + experimentdata_directory=self.path) def _set_experiment_sample(self, experiment_sample: ExperimentSample) -> None: """ @@ -508,7 +516,6 @@ def _set_experiment_sample(self, experiment_sample: ExperimentSample) -> None: for column, value in experiment_sample.output_data.items(): self.output_data.set_data(index=experiment_sample.job_number, value=value, column=column) - # self.jobs.mark_as_finished(experiment_sample._jobnumber) self.jobs.mark(experiment_sample._jobnumber, status=Status.FINISHED) @_access_file @@ -532,7 +539,6 @@ def _access_open_job_data(self) -> ExperimentSample: The ExperimentSample object of the first available open job. """ job_index = self.jobs.get_open_job() - # self.jobs.mark_as_in_progress(job_index) self.jobs.mark(job_index, status=Status.IN_PROGRESS) experiment_sample = self._get_experiment_sample(job_index) return experiment_sample @@ -622,6 +628,12 @@ def mark_all(self, status: str) -> None: If the given status is not any of 'open', 'in progress', 'finished' or 'error' """ self.mark(self.jobs.indices, status) + + def mark_all_error_open(self) -> None: + """ + Mark all the experiments that have the status 'error' open + """ + self.jobs.mark_all_error_open() # Datageneration # ============================================================================= @@ -957,7 +969,7 @@ def data_factory(data: DataTypes) -> _Data: f"Data must be of type _Data, pd.DataFrame, np.ndarray, Path or str, not {type(data)}") -def domain_factory(domain: Union[None, Domain], input_data: _Data) -> Domain: +def domain_factory(domain: Domain | None, input_data: _Data) -> Domain: if isinstance(domain, Domain): return domain @@ -974,7 +986,7 @@ def domain_factory(domain: Union[None, Domain], input_data: _Data) -> Domain: raise TypeError(f"Domain must be of type Domain or None, not {type(domain)}") -def jobs_factory(jobs: Path | str | None, input_data: _Data, job_value: Status) -> _JobQueue: +def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, job_value: Status) -> _JobQueue: """Creates a _JobQueue object from particular inpute Parameters @@ -991,6 +1003,9 @@ def jobs_factory(jobs: Path | str | None, input_data: _Data, job_value: Status) _JobQueue JobQueue object """ + if isinstance(jobs, _JobQueue): + return jobs + if isinstance(jobs, (Path, str)): return _JobQueue.from_file(Path(jobs)) diff --git a/src/f3dasm/_src/experimentdata/experimentsample.py b/src/f3dasm/_src/experimentdata/experimentsample.py index 778c70d8..5d4f979b 100644 --- a/src/f3dasm/_src/experimentdata/experimentsample.py +++ b/src/f3dasm/_src/experimentdata/experimentsample.py @@ -97,28 +97,32 @@ def load(self) -> xr.DataArray | xr.Dataset: } -def load_object(path: Path, store_method: Type[_Store] = PickleStore) -> Any: +def load_object(path: Path, experimentdata_directory: Path, store_method: Type[_Store] = PickleStore) -> Any: + + _path = experimentdata_directory / path + if store_method is not None: - return store_method(None, path).load() + return store_method(None, _path).load() - if not path.exists(): + if not _path.exists(): return None # Extract the suffix from the item's path - item_suffix = path.suffix + item_suffix = _path.suffix # Use a generator expression to find the first matching store type, or None if no match is found matched_store_type: _Store = next( (store_type for store_type in STORE_TYPE_MAPPING.values() if store_type.suffix == item_suffix), PickleStore) if matched_store_type: - return matched_store_type(None, path).load() + return matched_store_type(None, _path).load() else: # Handle the case when no matching suffix is found raise ValueError(f"No matching store type for item type: '{item_suffix}'") -def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] = None) -> str: +def save_object(object: Any, path: Path, experimentdata_directory: Path, + store_method: Optional[Type[_Store]] = None) -> str: """Function to save the object to path, with the appropriate storing method. Parameters @@ -140,20 +144,22 @@ def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] = TypeError Raises if the object type is not supported, and you haven't provided a custom store method. """ + _path = experimentdata_directory / path + if store_method is not None: - storage = store_method(object, path) + storage = store_method(object, _path) return # Check if object type is supported object_type = type(object) if object_type not in STORE_TYPE_MAPPING: - storage: _Store = PickleStore(object, path) + storage: _Store = PickleStore(object, _path) logger.debug(f"Object type {object_type} is not natively supported. " f"The default pickle storage method will be used.") else: - storage: _Store = STORE_TYPE_MAPPING[object_type](object, path) + storage: _Store = STORE_TYPE_MAPPING[object_type](object, _path) # Store object storage.store() return storage.suffix @@ -163,7 +169,8 @@ def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] = class ExperimentSample: - def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any], jobnumber: int): + def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any], + jobnumber: int, experimentdata_directory: Optional[Path] = None): """Single realization of a design of experiments. Parameters @@ -179,6 +186,11 @@ def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any], jobn self._dict_output = dict_output self._jobnumber = jobnumber + if experimentdata_directory is None: + experimentdata_directory = Path.cwd() + + self._experimentdata_directory = experimentdata_directory + @classmethod def from_numpy(cls: Type[ExperimentSample], input_array: np.ndarray, output_value: Optional[float] = None, jobnumber: int = 0) -> ExperimentSample: @@ -233,7 +245,7 @@ class of defined type to load the data. By default None, return item # Load the object from the reference - return load_object(Path(value), load_method) + return load_object(Path(value), self._experimentdata_directory, load_method) else: # Return the literal value return value @@ -365,14 +377,15 @@ def store(self, object: Any, name: str, to_disk: bool = False, self._store_to_experimentdata(object=object, name=name) def _store_to_disk(self, object: Any, name: str, store_method: Optional[Type[_Store]] = None) -> None: - file_dir = Path().cwd() / name - file_path = file_dir / str(self.job_number) + file_path = Path(name) / str(self.job_number) # Check if the file_dir exists - file_dir.mkdir(parents=True, exist_ok=True) + (self._experimentdata_directory / Path(name)).mkdir(parents=True, exist_ok=True) # Save the object to disk - suffix = save_object(object=object, path=file_dir/str(self.job_number), store_method=store_method) + suffix = save_object(object=object, path=file_path, + experimentdata_directory=self._experimentdata_directory, + store_method=store_method) # Store the path to the object in the output_data self._dict_output[f"{PATH_PREFIX}{name}"] = str(file_path.with_suffix(suffix)) diff --git a/src/f3dasm/_src/optimization/randomsearch.py b/src/f3dasm/_src/optimization/randomsearch.py index a74fad7c..5bfb785a 100644 --- a/src/f3dasm/_src/optimization/randomsearch.py +++ b/src/f3dasm/_src/optimization/randomsearch.py @@ -42,7 +42,8 @@ def set_seed(self): np.random.seed(self.seed) def update_step(self, data_generator: DataGenerator) -> Tuple[np.ndarray, np.ndarray]: - self.set_seed() + # BUG: This setting of seed results in the same value being samples all the time! + # self.set_seed() x_new = np.atleast_2d( [ diff --git a/src/f3dasm/_src/run_optimization.py b/src/f3dasm/_src/run_optimization.py index 4c1cd3b8..586fce34 100644 --- a/src/f3dasm/_src/run_optimization.py +++ b/src/f3dasm/_src/run_optimization.py @@ -7,15 +7,17 @@ from __future__ import annotations # Standard +from time import perf_counter from typing import Any, Callable, Dict, List, Optional # Third-party import numpy as np import pandas as pd import xarray as xr +from pathos.helpers import mp + from f3dasm.design import Domain from f3dasm.optimization import Optimizer -from pathos.helpers import mp # Locals from .datageneration.datagenerator import DataGenerator @@ -37,7 +39,7 @@ class OptimizationResult: def __init__(self, data: List[ExperimentData], optimizer: Optimizer, kwargs: Optional[Dict[str, Any]], data_generator: DataGenerator, - number_of_samples: int, seeds: List[int]): + number_of_samples: int, seeds: List[int], opt_time: float = 0.0): """Optimization results object Parameters @@ -54,6 +56,8 @@ def __init__(self, data: List[ExperimentData], optimizer: Optimizer, number of initial samples, sampled by the sampling strategy seeds list of seeds that were used for each realization + opt_time + total optimization time """ self.data = data self.optimizer = optimizer_factory(optimizer=optimizer, domain=self.data[0].domain) @@ -61,6 +65,7 @@ def __init__(self, data: List[ExperimentData], optimizer: Optimizer, self.kwargs = kwargs, self.number_of_samples = number_of_samples self.seeds = seeds + self.opt_time = opt_time self.func = datagenerator_factory(data_generator=self.data_generator, domain=self.data[0].domain, kwargs=kwargs) @@ -73,7 +78,7 @@ def _log(self): f"dim={len(self.data[0].domain)}, " f"noise={self.func.noise}) " f"with {self.optimizer.get_name()} optimizer for " - f"{len(self.data)} realizations.") + f"{len(self.data)} realizations ({self.opt_time:.3f} s).") ) def to_xarray(self) -> xr.Dataset: @@ -228,6 +233,8 @@ def run_multiple_realizations( Object with the optimization data results """ + start_timer = perf_counter() + if kwargs is None: kwargs = {} @@ -264,6 +271,8 @@ def run_multiple_realizations( } results.append(run_optimization(**args)) + opt_time = perf_counter() - start_timer + return OptimizationResult( data=results, optimizer=optimizer, @@ -271,6 +280,7 @@ def run_multiple_realizations( kwargs=kwargs, number_of_samples=number_of_samples, seeds=[seed + i for i in range(realizations)], + opt_time=opt_time, )