Merge pull request #185 from bessagroup/pr/1.4.3

Pr/1.4.3
bessagroup · Oct 20, 2023 · 322840c · 322840c
2 parents 40f2c1b + 9f7434a
commit 322840c
Show file tree

Hide file tree

Showing 8 changed files with 74 additions and 32 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.4.2
+1.4.3
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -24,8 +24,8 @@
 project = 'f3dasm'
 author = 'Martin van der Schelling'
 copyright = '2022, Martin van der Schelling'
-version = '1.4.2'
-release = '1.4.2'
+version = '1.4.3'
+release = '1.4.3'
 
 
 # -- General configuration ----------------------------------------------------

diff --git a/src/f3dasm/__init__.py b/src/f3dasm/__init__.py
@@ -37,7 +37,7 @@
 # =============================================================================
 
 
-__version__ = '1.4.2'
+__version__ = '1.4.3'
 
 
 # Log welcome message and the version of f3dasm

diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py
@@ -248,6 +248,9 @@ def mark_all_in_progress_open(self) -> None:
  """Marks all jobs as 'open'."""
  self.jobs = self.jobs.replace(Status.IN_PROGRESS, Status.OPEN)
 
+ def mark_all_error_open(self) -> None:
+ """Marks all jobs as 'open'."""
+ self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN)
  # Miscellanous
  # =============================================================================
 

diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -13,7 +13,7 @@
 from functools import wraps
 from pathlib import Path
 from typing import (Any, Callable, Dict, Iterable, Iterator, List, Optional,
- Tuple, Type, Union)
+ Tuple, Type)
 
 # Third-party
 import numpy as np
@@ -57,7 +57,7 @@ class ExperimentData:
 
  def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTypes] = None,
  output_data: Optional[DataTypes] = None, jobs: Optional[Path | str] = None,
- filename: Optional[str] = 'experimentdata'):
+ filename: Optional[str] = 'experimentdata', path: Optional[Path] = None):
  """
  Initializes an instance of ExperimentData.
 
@@ -73,13 +73,20 @@ def __init__(self, domain: Optional[Domain] = None, input_data: Optional[DataTyp
  The path to the jobs file, by default None
  filename : str, optional
  The filename of the experiment, by default 'experimentdata'
+ path : Path, optional
+ The path to the experimentdata file, by default the current working directory
  """
 
  if isinstance(input_data, np.ndarray) and domain is None:
  raise ValueError('If you provide a numpy array as input_data, you have to provide the domain!')
 
  self.filename = filename
 
+ if path is None:
+ path = Path().cwd()
+
+ self.path = path
+
  self.input_data = data_factory(input_data)
  self.output_data = data_factory(output_data)
 
@@ -141,7 +148,7 @@ def __eq__(self, __o: ExperimentData) -> bool:
  def __getitem__(self, index: int | slice | Iterable[int]) -> _Data:
  """The [] operator returns a single datapoint or a subset of datapoints"""
  return ExperimentData(input_data=self.input_data[index], output_data=self.output_data[index],
- jobs=self.jobs[index], domain=self.domain, filename=self.filename)
+ jobs=self.jobs[index], domain=self.domain, filename=self.filename, path=self.path)
 
  def _repr_html_(self) -> str:
  return self.input_data.combine_data_to_multiindex(self.output_data, self.jobs.to_dataframe())._repr_html_()
@@ -269,7 +276,7 @@ def _from_file_attempt(cls: Type[ExperimentData], filename: Path) -> ExperimentD
  try:
  return cls(domain=Path(f"{filename}{DOMAIN_SUFFIX}"), input_data=Path(f"{filename}{INPUT_DATA_SUFFIX}"),
  output_data=Path(f"{filename}{OUTPUT_DATA_SUFFIX}"), jobs=Path(f"{filename}{JOBS_SUFFIX}"),
- filename=filename.name)
+ filename=filename.name, path=filename.parent)
  except FileNotFoundError:
  raise FileNotFoundError(f"Cannot find the files from {filename}.")
 
@@ -494,7 +501,8 @@ def _get_experiment_sample(self, index: int) -> ExperimentSample:
  The ExperimentSample at the given index.
  """
  return ExperimentSample(dict_input=self.input_data.get_data_dict(index),
- dict_output=self.output_data.get_data_dict(index), jobnumber=index)
+ dict_output=self.output_data.get_data_dict(index), jobnumber=index,
+ experimentdata_directory=self.path)
 
  def _set_experiment_sample(self, experiment_sample: ExperimentSample) -> None:
  """
@@ -508,7 +516,6 @@ def _set_experiment_sample(self, experiment_sample: ExperimentSample) -> None:
  for column, value in experiment_sample.output_data.items():
  self.output_data.set_data(index=experiment_sample.job_number, value=value, column=column)
 
- # self.jobs.mark_as_finished(experiment_sample._jobnumber)
  self.jobs.mark(experiment_sample._jobnumber, status=Status.FINISHED)
 
  @_access_file
@@ -532,7 +539,6 @@ def _access_open_job_data(self) -> ExperimentSample:
  The ExperimentSample object of the first available open job.
  """
  job_index = self.jobs.get_open_job()
- # self.jobs.mark_as_in_progress(job_index)
  self.jobs.mark(job_index, status=Status.IN_PROGRESS)
  experiment_sample = self._get_experiment_sample(job_index)
  return experiment_sample
@@ -622,6 +628,12 @@ def mark_all(self, status: str) -> None:
  If the given status is not any of 'open', 'in progress', 'finished' or 'error'
  """
  self.mark(self.jobs.indices, status)
+
+ def mark_all_error_open(self) -> None:
+ """
+ Mark all the experiments that have the status 'error' open
+ """
+ self.jobs.mark_all_error_open()
  # Datageneration
  # =============================================================================
 
@@ -957,7 +969,7 @@ def data_factory(data: DataTypes) -> _Data:
  f"Data must be of type _Data, pd.DataFrame, np.ndarray, Path or str, not {type(data)}")
 
 
-def domain_factory(domain: Union[None, Domain], input_data: _Data) -> Domain:
+def domain_factory(domain: Domain | None, input_data: _Data) -> Domain:
  if isinstance(domain, Domain):
  return domain
 
@@ -974,7 +986,7 @@ def domain_factory(domain: Union[None, Domain], input_data: _Data) -> Domain:
  raise TypeError(f"Domain must be of type Domain or None, not {type(domain)}")
 
 
-def jobs_factory(jobs: Path | str | None, input_data: _Data, job_value: Status) -> _JobQueue:
+def jobs_factory(jobs: Path | str | _JobQueue | None, input_data: _Data, job_value: Status) -> _JobQueue:
  """Creates a _JobQueue object from particular inpute
 
  Parameters
@@ -991,6 +1003,9 @@ def jobs_factory(jobs: Path | str | None, input_data: _Data, job_value: Status)
  _JobQueue
  JobQueue object
  """
+ if isinstance(jobs, _JobQueue):
+ return jobs
+
  if isinstance(jobs, (Path, str)):
  return _JobQueue.from_file(Path(jobs))
 

diff --git a/src/f3dasm/_src/experimentdata/experimentsample.py b/src/f3dasm/_src/experimentdata/experimentsample.py
@@ -97,28 +97,32 @@ def load(self) -> xr.DataArray | xr.Dataset:
 }
 
 
-def load_object(path: Path, store_method: Type[_Store] = PickleStore) -> Any:
+def load_object(path: Path, experimentdata_directory: Path, store_method: Type[_Store] = PickleStore) -> Any:
+
+ _path = experimentdata_directory / path
+
  if store_method is not None:
- return store_method(None, path).load()
+ return store_method(None, _path).load()
 
- if not path.exists():
+ if not _path.exists():
  return None
 
  # Extract the suffix from the item's path
- item_suffix = path.suffix
+ item_suffix = _path.suffix
 
  # Use a generator expression to find the first matching store type, or None if no match is found
  matched_store_type: _Store = next(
  (store_type for store_type in STORE_TYPE_MAPPING.values() if store_type.suffix == item_suffix), PickleStore)
 
  if matched_store_type:
- return matched_store_type(None, path).load()
+ return matched_store_type(None, _path).load()
  else:
  # Handle the case when no matching suffix is found
  raise ValueError(f"No matching store type for item type: '{item_suffix}'")
 
 
-def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] = None) -> str:
+def save_object(object: Any, path: Path, experimentdata_directory: Path,
+ store_method: Optional[Type[_Store]] = None) -> str:
  """Function to save the object to path, with the appropriate storing method.
 
  Parameters
@@ -140,20 +144,22 @@ def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] =
  TypeError
  Raises if the object type is not supported, and you haven't provided a custom store method.
  """
+ _path = experimentdata_directory / path
+
  if store_method is not None:
- storage = store_method(object, path)
+ storage = store_method(object, _path)
  return
 
  # Check if object type is supported
  object_type = type(object)
 
  if object_type not in STORE_TYPE_MAPPING:
- storage: _Store = PickleStore(object, path)
+ storage: _Store = PickleStore(object, _path)
  logger.debug(f"Object type {object_type} is not natively supported. "
  f"The default pickle storage method will be used.")
 
  else:
- storage: _Store = STORE_TYPE_MAPPING[object_type](object, path)
+ storage: _Store = STORE_TYPE_MAPPING[object_type](object, _path)
  # Store object
  storage.store()
  return storage.suffix
@@ -163,7 +169,8 @@ def save_object(object: Any, path: Path, store_method: Optional[Type[_Store]] =
 
 
 class ExperimentSample:
- def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any], jobnumber: int):
+ def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any],
+ jobnumber: int, experimentdata_directory: Optional[Path] = None):
  """Single realization of a design of experiments.
 
  Parameters
@@ -179,6 +186,11 @@ def __init__(self, dict_input: Dict[str, Any], dict_output: Dict[str, Any], jobn
  self._dict_output = dict_output
  self._jobnumber = jobnumber
 
+ if experimentdata_directory is None:
+ experimentdata_directory = Path.cwd()
+
+ self._experimentdata_directory = experimentdata_directory
+
  @classmethod
  def from_numpy(cls: Type[ExperimentSample], input_array: np.ndarray,
  output_value: Optional[float] = None, jobnumber: int = 0) -> ExperimentSample:
@@ -233,7 +245,7 @@ class of defined type to load the data. By default None,
  return item
 
  # Load the object from the reference
- return load_object(Path(value), load_method)
+ return load_object(Path(value), self._experimentdata_directory, load_method)
  else:
  # Return the literal value
  return value
@@ -365,14 +377,15 @@ def store(self, object: Any, name: str, to_disk: bool = False,
  self._store_to_experimentdata(object=object, name=name)
 
  def _store_to_disk(self, object: Any, name: str, store_method: Optional[Type[_Store]] = None) -> None:
- file_dir = Path().cwd() / name
- file_path = file_dir / str(self.job_number)
+ file_path = Path(name) / str(self.job_number)
 
  # Check if the file_dir exists
- file_dir.mkdir(parents=True, exist_ok=True)
+ (self._experimentdata_directory / Path(name)).mkdir(parents=True, exist_ok=True)
 
  # Save the object to disk
- suffix = save_object(object=object, path=file_dir/str(self.job_number), store_method=store_method)
+ suffix = save_object(object=object, path=file_path,
+ experimentdata_directory=self._experimentdata_directory,
+ store_method=store_method)
 
  # Store the path to the object in the output_data
  self._dict_output[f"{PATH_PREFIX}{name}"] = str(file_path.with_suffix(suffix))

diff --git a/src/f3dasm/_src/optimization/randomsearch.py b/src/f3dasm/_src/optimization/randomsearch.py
@@ -42,7 +42,8 @@ def set_seed(self):
  np.random.seed(self.seed)
 
  def update_step(self, data_generator: DataGenerator) -> Tuple[np.ndarray, np.ndarray]:
- self.set_seed()
+ # BUG: This setting of seed results in the same value being samples all the time!
+ # self.set_seed()
 
  x_new = np.atleast_2d(
  [

diff --git a/src/f3dasm/_src/run_optimization.py b/src/f3dasm/_src/run_optimization.py
@@ -7,15 +7,17 @@
 from __future__ import annotations
 
 # Standard
+from time import perf_counter
 from typing import Any, Callable, Dict, List, Optional
 
 # Third-party
 import numpy as np
 import pandas as pd
 import xarray as xr
+from pathos.helpers import mp
+
 from f3dasm.design import Domain
 from f3dasm.optimization import Optimizer
-from pathos.helpers import mp
 
 # Locals
 from .datageneration.datagenerator import DataGenerator
@@ -37,7 +39,7 @@
 class OptimizationResult:
  def __init__(self, data: List[ExperimentData], optimizer: Optimizer,
  kwargs: Optional[Dict[str, Any]], data_generator: DataGenerator,
- number_of_samples: int, seeds: List[int]):
+ number_of_samples: int, seeds: List[int], opt_time: float = 0.0):
  """Optimization results object
 
  Parameters
@@ -54,13 +56,16 @@ def __init__(self, data: List[ExperimentData], optimizer: Optimizer,
  number of initial samples, sampled by the sampling strategy
  seeds
  list of seeds that were used for each realization
+ opt_time
+ total optimization time
  """
  self.data = data
  self.optimizer = optimizer_factory(optimizer=optimizer, domain=self.data[0].domain)
  self.data_generator = data_generator
  self.kwargs = kwargs,
  self.number_of_samples = number_of_samples
  self.seeds = seeds
+ self.opt_time = opt_time
 
  self.func = datagenerator_factory(data_generator=self.data_generator,
  domain=self.data[0].domain, kwargs=kwargs)
@@ -73,7 +78,7 @@ def _log(self):
  f"dim={len(self.data[0].domain)}, "
  f"noise={self.func.noise}) "
  f"with {self.optimizer.get_name()} optimizer for "
- f"{len(self.data)} realizations.")
+ f"{len(self.data)} realizations ({self.opt_time:.3f} s).")
  )
 
  def to_xarray(self) -> xr.Dataset:
@@ -228,6 +233,8 @@ def run_multiple_realizations(
  Object with the optimization data results
  """
 
+ start_timer = perf_counter()
+
  if kwargs is None:
  kwargs = {}
 
@@ -264,13 +271,16 @@ def run_multiple_realizations(
  }
  results.append(run_optimization(**args))
 
+ opt_time = perf_counter() - start_timer
+
  return OptimizationResult(
  data=results,
  optimizer=optimizer,
  data_generator=data_generator,
  kwargs=kwargs,
  number_of_samples=number_of_samples,
  seeds=[seed + i for i in range(realizations)],
+ opt_time=opt_time,
  )