update documentation

bessagroup · May 15, 2024 · 11671b8 · 11671b8
1 parent 0e3aa97
commit 11671b8
Show file tree

Hide file tree

Showing 43 changed files with 248 additions and 34 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -7,6 +7,8 @@
 import os
 import sys
 
+from sphinx_gallery.sorting import FileNameSortKey
+
 # -- Search path for extensions and modules -----------------------------------
 # If extensions or Python modules are in a different directory than this file,
 # then add these directories to sys.path so that Sphinx can search for them
@@ -52,6 +54,8 @@
  'reference_url': {'sphinx_gallery': None, },
  'backreferences_dir': 'gen_modules/backreferences',
  'doc_module': ('f3dasm',),
+ "filename_pattern": r"/*\.py",
+ "within_subsection_order": FileNameSortKey,
 }
 
 # Source: https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-source_suffix

diff --git a/docs/source/rst_doc_files/classes/datageneration/datagenerator.rst b/docs/source/rst_doc_files/classes/datageneration/datagenerator.rst
@@ -33,7 +33,7 @@ We provide the datagenerator to the :meth:`~f3dasm.ExperimentData.evaluate` func
  Any key-word arguments that need to be passed down to the :class:`~f3dasm.datageneration.DataGenerator` can be passed in the :code:`kwargs` argument of the :meth:`~f3dasm.ExperimentData.evaluate` function.
 
 
-There are three methods available of handeling the :class:`~f3dasm.ExperimentSample` objects:
+There are three methods available of handling the :class:`~f3dasm.ExperimentSample` objects:
 
 * :code:`sequential`: regular for-loop over each of the :class:`~f3dasm.ExperimentSample` objects in order
 * :code:`parallel`: utilizing the multiprocessing capabilities (with the `pathos <https://pathos.readthedocs.io/en/latest/pathos.html>`_ multiprocessing library), each :class:`~f3dasm.ExperimentSample` object is run in a separate core

diff --git a/docs/source/sg_execution_times.rst b/docs/source/sg_execution_times.rst
@@ -6,7 +6,7 @@
 
 Computation times
 =================
-**00:00.467** total execution time for 8 files **from all galleries**:
+**00:00.762** total execution time for 10 files **from all galleries**:
 
 .. container::
 
@@ -32,27 +32,33 @@ Computation times
  * - Example
  - Time
  - Mem (MB)
- * - :ref:`sphx_glr_auto_examples_datageneration_plot_own_datagenerator.py` (``../../examples/datageneration/plot_own_datagenerator.py``)
- - 00:00.467
+ * - :ref:`sphx_glr_auto_examples_006_hydra_001_hydra_usage.py` (``../../examples/006_hydra/001_hydra_usage.py``)
+ - 00:00.762
  - 0.0
- * - :ref:`sphx_glr_auto_examples_datageneration_plot_builtin_benchmarkfunctions.py` (``../../examples/datageneration/plot_builtin_benchmarkfunctions.py``)
+ * - :ref:`sphx_glr_auto_examples_001_domain_001_domain_creation.py` (``../../examples/001_domain/001_domain_creation.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_domain_plot_builtin_sampler.py` (``../../examples/domain/plot_builtin_sampler.py``)
+ * - :ref:`sphx_glr_auto_examples_001_domain_002_own_sampler.py` (``../../examples/001_domain/002_own_sampler.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_domain_plot_domain_creation.py` (``../../examples/domain/plot_domain_creation.py``)
+ * - :ref:`sphx_glr_auto_examples_001_domain_003_builtin_sampler.py` (``../../examples/001_domain/003_builtin_sampler.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_domain_plot_own_sampler.py` (``../../examples/domain/plot_own_sampler.py``)
+ * - :ref:`sphx_glr_auto_examples_002_experimentdata_001_experimentdata.py` (``../../examples/002_experimentdata/001_experimentdata.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_experimentdata_plot_experimentdata.py` (``../../examples/experimentdata/plot_experimentdata.py``)
+ * - :ref:`sphx_glr_auto_examples_002_experimentdata_002_experimentdata_storing.py` (``../../examples/002_experimentdata/002_experimentdata_storing.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_experimentdata_plot_experimentdata_storing.py` (``../../examples/experimentdata/plot_experimentdata_storing.py``)
+ * - :ref:`sphx_glr_auto_examples_003_datageneration_001_own_datagenerator.py` (``../../examples/003_datageneration/001_own_datagenerator.py``)
  - 00:00.000
  - 0.0
- * - :ref:`sphx_glr_auto_examples_hydra_plot_hydra_usage.py` (``../../examples/hydra/plot_hydra_usage.py``)
+ * - :ref:`sphx_glr_auto_examples_003_datageneration_002_builtin_benchmarkfunctions.py` (``../../examples/003_datageneration/002_builtin_benchmarkfunctions.py``)
+ - 00:00.000
+ - 0.0
+ * - :ref:`sphx_glr_auto_examples_003_datageneration_003_storing.py` (``../../examples/003_datageneration/003_storing.py``)
+ - 00:00.000
+ - 0.0
+ * - :ref:`sphx_glr_auto_examples_004_optimization_001_builtin_optimizers.py` (``../../examples/004_optimization/001_builtin_optimizers.py``)
  - 00:00.000
  - 0.0
diff --git a/examples/domain/plot_domain_creation.py → examples/001_domain/001_domain_creation.py b/examples/domain/plot_domain_creation.py → examples/001_domain/001_domain_creation.py
diff --git a/examples/domain/plot_own_sampler.py → examples/001_domain/002_own_sampler.py b/examples/domain/plot_own_sampler.py → examples/001_domain/002_own_sampler.py
diff --git a/examples/domain/plot_builtin_sampler.py → examples/001_domain/003_builtin_sampler.py b/examples/domain/plot_builtin_sampler.py → examples/001_domain/003_builtin_sampler.py
diff --git a/examples/domain/README.rst → examples/001_domain/README.rst b/examples/domain/README.rst → examples/001_domain/README.rst
diff --git a/...les/experimentdata/plot_experimentdata.py → .../002_experimentdata/001_experimentdata.py b/...les/experimentdata/plot_experimentdata.py → .../002_experimentdata/001_experimentdata.py
diff --git a/...rimentdata/plot_experimentdata_storing.py → ...erimentdata/002_experimentdata_storing.py b/...rimentdata/plot_experimentdata_storing.py → ...erimentdata/002_experimentdata_storing.py
diff --git a/examples/experimentdata/README.rst → examples/002_experimentdata/README.rst b/examples/experimentdata/README.rst → examples/002_experimentdata/README.rst
diff --git a/...le_project_dir/experiment_data/domain.pkl → ...le_project_dir/experiment_data/domain.pkl b/...le_project_dir/experiment_data/domain.pkl → ...le_project_dir/experiment_data/domain.pkl
diff --git a/...ple_project_dir/experiment_data/input.csv → ...ple_project_dir/experiment_data/input.csv b/...ple_project_dir/experiment_data/input.csv → ...ple_project_dir/experiment_data/input.csv
diff --git a/...mple_project_dir/experiment_data/jobs.pkl → ...mple_project_dir/experiment_data/jobs.pkl b/...mple_project_dir/experiment_data/jobs.pkl → ...mple_project_dir/experiment_data/jobs.pkl
diff --git a/...le_project_dir/experiment_data/output.csv → ...le_project_dir/experiment_data/output.csv b/...le_project_dir/experiment_data/output.csv → ...le_project_dir/experiment_data/output.csv
diff --git a/.../datageneration/plot_own_datagenerator.py → ...3_datageneration/001_own_datagenerator.py b/.../datageneration/plot_own_datagenerator.py → ...3_datageneration/001_own_datagenerator.py
@@ -168,6 +168,16 @@ def execute(self):
 
 
 car_stopping_distance = CarStoppingDistance(mu_z=1.5, sigma_z=0.5)
-experiment_data_class.evaluate(data_generator=car_stopping_distance)
+experiment_data_class.evaluate(
+ data_generator=car_stopping_distance, mode='sequential')
 
 print(experiment_data_class)
+
+###############################################################################
+#
+# There are three methods available of evaluating the experiments:
+#
+# * :code:`sequential`: regular for-loop over each of the experiments in order
+# * :code:`parallel`: utilizing the multiprocessing capabilities (with the `pathos <https://pathos.readthedocs.io/en/latest/pathos.html>`_ multiprocessing library), each experiment is run in a separate core
+# * :code:`cluster`: each experiment is run in a seperate node. This is especially useful on a high-performance computation cluster where you have multiple worker nodes and a commonly accessible resource folder. After completion of an experiment, the node will automatically pick the next available open experiment.
+# * :code:`cluster_parallel`: Combination of the :code:`cluster` and :code:`parallel` mode. Each node will run multiple samples in parallel.
diff --git a/...ration/plot_builtin_benchmarkfunctions.py → ...eration/002_builtin_benchmarkfunctions.py b/...ration/plot_builtin_benchmarkfunctions.py → ...eration/002_builtin_benchmarkfunctions.py
diff --git a/examples/003_datageneration/003_storing.py b/examples/003_datageneration/003_storing.py
@@ -0,0 +1,142 @@
+"""
+Storing data generation output to disk
+======================================
+
+After running your simulation, you can store the result back into the :class:`~f3dasm.ExperimentSample` with the :meth:`~f3dasm.ExperimentSample.store` method.
+There are two ways of storing your output:
+
+* Singular values can be stored directly to the :attr:`~f3dasm.ExperimentData.output_data`
+* Large objects can be stored to disk and a reference path will be stored to the :attr:`~f3dasm.ExperimentData.output_data`.
+"""
+
+import numpy as np
+
+from f3dasm import ExperimentData, StoreProtocol
+from f3dasm.datageneration import DataGenerator
+from f3dasm.design import make_nd_continuous_domain
+
+###############################################################################
+# For this example we create a 3 dimensional continuous domain and generate 10 random samples.
+
+domain = make_nd_continuous_domain([[0., 1.], [0., 1.], [0., 1.]])
+experiment_data = ExperimentData.from_sampling(
+ sampler='random', domain=domain, n_samples=10, seed=42)
+
+###############################################################################
+# Single values
+# -------------
+
+# Single values or small lists can be stored to the :class:`~f3dasm.ExperimentData` using the ``to_disk=False`` argument, with the name of the parameter as the key.
+# This will create a new output parameter if the parameter name is not found in :attr:`~f3dasm.ExperimentData.output_data` of the :class:`~f3dasm.ExperimentData` object:
+# This is especially useful if you want to get a quick overview of some loss or design metric of your sample.
+#
+# We create a custom datagenerator that sums the input features and stores the result back to the :class:`~f3dasm.ExperimentData` object:
+
+
+class MyDataGenerator_SumInput(DataGenerator):
+ def execute(self):
+ input_, _ = self.experiment_sample.to_numpy()
+ y = float(sum(input_))
+ self.experiment_sample.store(object=y, name='y', to_disk=False)
+
+###############################################################################
+# We pass the custom data generator to the :meth:`~f3dasm.ExperimentData.evaluate` method and inspect the experimentdata after completion:
+
+
+my_data_generator_single = MyDataGenerator_SumInput()
+
+experiment_data.evaluate(data_generator=my_data_generator_single)
+print(experiment_data)
+
+###############################################################################
+#
+# All built-in singular types are supported for storing to the :class:`~f3dasm.ExperimentData` this way. Array-like data such as numpy arrays and pandas dataframes are **not** supported and will raise an error.
+#
+# .. note::
+#
+# Outputs stored directly to the :attr:`~f3dasm.ExperimentData.output_data` will be stored within the :class:`~f3dasm.ExperimentData` object.
+# This means that the output will be loaded into memory everytime this object is accessed. For large outputs, it is recommended to store the output to disk.
+#
+# Large objects and array-like data
+# ---------------------------------
+#
+# In order to store large objects or array-like data, the :meth:`~f3dasm.ExperimentSample.store` method using the ``to_disk=True`` argument, can be used.
+# A reference (:code:`Path`) will be saved to the :attr:`~f3dasm.ExperimentData.output_data`.
+#
+# We create a another custom datagenerator that doubles the input features, but leaves them as an array:
+
+experiment_data = ExperimentData.from_sampling(
+ sampler='random', domain=domain, n_samples=10, seed=42)
+
+
+class MyDataGenerator_DoubleInputs(DataGenerator):
+ def execute(self):
+ input_, output_ = self.experiment_sample.to_numpy()
+ y = input_ * 2
+ self.experiment_sample.store(
+ object=y, name='output_numpy', to_disk=True)
+
+
+my_data_generator = MyDataGenerator_DoubleInputs()
+
+experiment_data.evaluate(data_generator=my_data_generator)
+print(experiment_data)
+
+###############################################################################
+# :mod:`f3dasm` will automatically create a new directory in the project directory for each output parameter and store the object with a generated filename referencing the :attr:`~f3dasm.design.ExperimentSample.job_number` of the design.
+#
+# .. code-block:: none
+# :caption: Directory Structure
+#
+# project_dir/
+# ├── output_numpy/
+# │ ├── 0.npy
+# │ ├── 1.npy
+# │ ├── 2.npy
+# │ └── 3.npy
+# │
+# └── experiment_data/
+# ├── domain.pkl
+# ├── input.csv
+# ├── output.csv
+# └── jobs.pkl
+#
+#
+# In the output data of the :class:`~f3dasm.ExperimentData` object, a reference path (e.g. :code:`/output_numpy/0.npy`) to the stored object will be saved.
+#
+# Create a custom storage method
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# :mod:`f3dasm` has built-in storing functions for numpy :class:`~numpy.ndarray`, pandas :class:`~pandas.DataFrame` and xarray :class:`~xarray.DataArray` and :class:`~xarray.Dataset` objects.
+# For any other type of object, the object will be stored in the `pickle <https://docs.python.org/3/library/pickle.html>`_ format
+#
+# You can provide your own storing class to the :class:`~f3dasm.ExperimentSample.store` method call:
+#
+# * a ``store`` method should store an ``self.object`` to disk at the location of ``self.path``
+# * a ``load`` method should load the object from disk at the location of ``self.path`` and return it
+# * a class variable ``suffix`` should be defined, which is the file extension of the stored object as a string.
+# * the class should inherit from the :class:`~f3dasm.StoreProtocol` class
+#
+# You can take the following class for a :class:`~numpy.ndarray` object as an example:
+
+
+class NumpyStore(StoreProtocol):
+ suffix: int = '.npy'
+
+ def store(self) -> None:
+ np.save(file=self.path.with_suffix(self.suffix), arr=self.object)
+
+ def load(self) -> np.ndarray:
+ return np.load(file=self.path.with_suffix(self.suffix))
+
+###############################################################################
+# After defining the storing function, it can be used as an additional argument in the :meth:`~f3dasm.ExperimentSample.store` method:
+
+
+class MyDataGenerator_DoubleInputs(DataGenerator):
+ def execute(self):
+ input_, output_ = self.experiment_sample.to_numpy()
+ y = input_ * 2
+ self.experiment_sample.store(
+ object=y, name='output_numpy',
+ to_disk=True, store_method=NumpyStore)
diff --git a/examples/datageneration/README.rst → examples/003_datageneration/README.rst b/examples/datageneration/README.rst → examples/003_datageneration/README.rst
diff --git a/examples/003_datageneration/output_numpy/0.npy b/examples/003_datageneration/output_numpy/0.npy
diff --git a/examples/003_datageneration/output_numpy/1.npy b/examples/003_datageneration/output_numpy/1.npy
diff --git a/examples/003_datageneration/output_numpy/2.npy b/examples/003_datageneration/output_numpy/2.npy
diff --git a/examples/003_datageneration/output_numpy/3.npy b/examples/003_datageneration/output_numpy/3.npy
diff --git a/examples/003_datageneration/output_numpy/4.npy b/examples/003_datageneration/output_numpy/4.npy
diff --git a/examples/003_datageneration/output_numpy/5.npy b/examples/003_datageneration/output_numpy/5.npy
diff --git a/examples/003_datageneration/output_numpy/6.npy b/examples/003_datageneration/output_numpy/6.npy
diff --git a/examples/003_datageneration/output_numpy/7.npy b/examples/003_datageneration/output_numpy/7.npy
diff --git a/examples/003_datageneration/output_numpy/8.npy b/examples/003_datageneration/output_numpy/8.npy
diff --git a/examples/003_datageneration/output_numpy/9.npy b/examples/003_datageneration/output_numpy/9.npy
diff --git a/examples/004_optimization/001_builtin_optimizers.py b/examples/004_optimization/001_builtin_optimizers.py
@@ -0,0 +1,62 @@
+"""
+Use the built-in optimization algorithms
+========================================
+
+In this example, we will use the built-in optimization algorithms provided by the :mod:`f3dasm.optimization` submodule to optimize the Rosenbrock benchmark function.
+"""
+
+import matplotlib.pyplot as plt
+
+from f3dasm import ExperimentData
+from f3dasm.design import make_nd_continuous_domain
+from f3dasm.optimization import OPTIMIZERS
+
+###############################################################################
+# We create a 3D continous domain and sample one point from it.
+
+domain = make_nd_continuous_domain([[-1., 1.], [-1., 1.], [-1., 1.]])
+
+experimentdata = ExperimentData.from_sampling(
+ domain=domain, sampler="random", seed=42, n_samples=1)
+
+print(experimentdata)
+
+###############################################################################
+# We evaluate the sample point on the Rosenbrock benchmark function:
+
+experimentdata.evaluate(data_generator='Rosenbrock', kwargs={
+ 'scale_bounds': domain.get_bounds(), 'offset': False})
+
+print(experimentdata)
+
+###############################################################################
+# We call the :meth:`~f3dasm.ExperimentData.optimize` method with ``optimizer='CG'``
+# and ``data_generator='Rosenbrock'`` to optimize the Rosenbrock benchmark function with the
+# Conjugate Gradient Optimizer:
+
+experimentdata.optimize(optimizer='CG', data_generator='Rosenbrock', kwargs={
+ 'scale_bounds': domain.get_bounds(), 'offset': False},
+ iterations=50)
+
+print(experimentdata)
+
+###############################################################################
+# We plot the convergence of the optimization process:
+
+_, df_output = experimentdata.to_pandas()
+
+fig, ax = plt.subplots()
+ax.plot(df_output)
+_ = ax.set_xlabel('number of function evaluations')
+_ = ax.set_ylabel('$f(x)$')
+ax.set_yscale('log')
+
+###############################################################################
+# Hyper-parameters of the optimizer can be passed as dictionary to the :meth:`~f3dasm.ExperimentData.optimize` method.
+# If none are provided, default hyper-parameters are used. The hyper-parameters are specific to the optimizer used, and can be found in the corresponding documentation.
+#
+# An overview of the available optimizers can be found in :ref:`this section <implemented optimizers>` of the documentation
+# Access to more off-the-shelf optimizers requires the installation of the `f3dasm_optimize <https://bessagroup.github.io/f3dasm_optimize/>`_ package and its corresponding dependencies.
+# You can check which optimizers can be used by inspecting the ``f3dasm.optimization.OPTIMIZERS`` variable:
+
+print(OPTIMIZERS)
diff --git a/examples/004_optimization/README.rst b/examples/004_optimization/README.rst
@@ -0,0 +1,4 @@
+Optimization
+------------
+
+Examples that use the :mod:`f3dasm.optimization` module.
diff --git a/examples/005_workflow/README.rst b/examples/005_workflow/README.rst
@@ -0,0 +1,2 @@
+Combining everything in a data-driven workflow
+==============================================
diff --git a/examples/hydra/plot_hydra_usage.py → examples/006_hydra/001_hydra_usage.py b/examples/hydra/plot_hydra_usage.py → examples/006_hydra/001_hydra_usage.py
diff --git a/examples/hydra/README.rst → examples/006_hydra/README.rst b/examples/hydra/README.rst → examples/006_hydra/README.rst
diff --git a/examples/hydra/config.yaml → examples/006_hydra/config.yaml b/examples/hydra/config.yaml → examples/006_hydra/config.yaml
diff --git a/examples/hydra/config_combining.yaml → examples/006_hydra/config_combining.yaml b/examples/hydra/config_combining.yaml → examples/006_hydra/config_combining.yaml
diff --git a/examples/hydra/config_from_file.yaml → examples/006_hydra/config_from_file.yaml b/examples/hydra/config_from_file.yaml → examples/006_hydra/config_from_file.yaml
diff --git a/examples/hydra/config_from_sampling.yaml → examples/006_hydra/config_from_sampling.yaml b/examples/hydra/config_from_sampling.yaml → examples/006_hydra/config_from_sampling.yaml
diff --git a/...le_project_dir/experiment_data/domain.pkl → ...le_project_dir/experiment_data/domain.pkl b/...le_project_dir/experiment_data/domain.pkl → ...le_project_dir/experiment_data/domain.pkl
diff --git a/...ple_project_dir/experiment_data/input.csv → ...ple_project_dir/experiment_data/input.csv b/...ple_project_dir/experiment_data/input.csv → ...ple_project_dir/experiment_data/input.csv
diff --git a/...mple_project_dir/experiment_data/jobs.pkl → ...mple_project_dir/experiment_data/jobs.pkl b/...mple_project_dir/experiment_data/jobs.pkl → ...mple_project_dir/experiment_data/jobs.pkl
diff --git a/...le_project_dir/experiment_data/output.csv → ...le_project_dir/experiment_data/output.csv b/...le_project_dir/experiment_data/output.csv → ...le_project_dir/experiment_data/output.csv
diff --git a/src/f3dasm/_src/datageneration/datagenerator.py b/src/f3dasm/_src/datageneration/datagenerator.py
@@ -10,22 +10,18 @@
 
 # Standard
 import inspect
-import sys
 from abc import abstractmethod
 from functools import partial
 from typing import Any, Callable, Dict, List, Optional
 
-if sys.version_info < (3, 8): # NOQA
- from typing_extensions import Protocol # NOQA
-else:
- from typing import Protocol
-
 # Third-party
 import numpy as np
 
 # Local
 from ..design.domain import Domain
-from ..experimentdata.experimentsample import _experimentsample_factory
+# from ..experimentdata._io import StoreProtocol
+from ..experimentdata.experimentsample import (ExperimentSample,
+ _experimentsample_factory)
 from ..logger import time_and_log
 
 # Authorship & Credits
@@ -38,18 +34,6 @@
 # =============================================================================
 
 
-class ExperimentSample(Protocol):
- def get(self, key: str) -> Any:
- ...
-
- def store(self, object: Any, name: str, to_disk: bool) -> None:
- ...
-
- @property
- def job_number(self) -> int:
- ...
-
-
 class DataGenerator:
  """Base class for a data generator"""
 
@@ -231,7 +215,7 @@ def convert_function(f: Callable,
  class TempDataGenerator(DataGenerator):
  def execute(self, **_kwargs) -> None:
  _input = {input_name: self.experiment_sample.get(input_name)
- for input_name in input}
+ for input_name in input if input_name not in kwargs}
  _output = f(**_input, **kwargs)
 
  # check if output is empty

diff --git a/tests/datageneration/test_datagenerator.py b/tests/datageneration/test_datagenerator.py
@@ -10,7 +10,7 @@
 
 def test_convert_function(
  experiment_data: ExperimentData, function_1: Callable):
- data_generator = convert_function(f=function_1, input=['x'], output=[
+ data_generator = convert_function(f=function_1, output=[
  'y0', 'y1'], kwargs={'s': 103})
 
  assert isinstance(data_generator, DataGenerator)
@@ -20,7 +20,7 @@ def test_convert_function(
 
 def test_convert_function2(
  experiment_data: ExperimentData, function_2: Callable):
- data_generator = convert_function(f=function_2, input=['x'], output=[
+ data_generator = convert_function(f=function_2, output=[
  'y0', 'y1'])
 
  assert isinstance(data_generator, DataGenerator)