Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate data generation step #37

Merged
merged 4 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/gen_experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class NoExperiment:
name = "No Experiment"
lookup_dict = {"arg": {"foo": 1}}

@staticmethod
def gen_data(*args: Any, **kwargs: Any) -> dict[str, Any]:
return {}

@staticmethod
def run(
*args: Any, return_all: bool = True, **kwargs: Any
Expand Down
157 changes: 129 additions & 28 deletions src/gen_experiments/data.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
from collections.abc import Sequence
from math import ceil
from pathlib import Path
from typing import Callable, Optional, cast
from typing import Any, Callable, Optional, cast
from warnings import warn

import mitosis
import numpy as np
import scipy

from gen_experiments.gridsearch.typing import GridsearchResultDetails
from gen_experiments.utils import Float1D, Float2D
from .gridsearch.typing import GridsearchResultDetails
from .odes import ode_setup
from .pdes import pde_setup
from .typing import Float1D, Float2D, ProbData

INTEGRATOR_KEYWORDS = {"rtol": 1e-12, "method": "LSODA", "atol": 1e-12}
TRIALS_FOLDER = Path(__file__).parent.absolute() / "trials"


def gen_data(
rhs_func: Callable,
n_coord: int,
group: str,
seed: Optional[int] = None,
n_trajectories: int = 1,
x0_center: Optional[Float1D] = None,
ic_stdev: float = 3,
noise_abs: Optional[float] = None,
noise_rel: Optional[float] = None,
nonnegative: bool = False,
dt: float = 0.01,
t_end: float = 10,
) -> tuple[float, Float1D, list[Float2D], list[Float2D], list[Float2D], list[Float2D]]:
) -> dict[str, Any]:
"""Generate random training and test data

An Experiment step according to the mitosis experiment runner.
Note that test data has no noise.

Arguments:
Expand All @@ -44,20 +45,71 @@ def gen_data(
noise_rel (float): measurement noise-to-signal power ratio.
Either noise_abs or noise_rel must be None. Defaults to
None.
nonnegative (bool): Whether x0 must be nonnegative, such as for
population models. If so, a gamma distribution is
used, rather than a normal distribution.
dt: time step for sample
t_end: end time of simulation

Returns:
dt, t_train, x_train, x_test, x_dot_test, x_train_true
dictionary of data and descriptive information
"""
coeff_true = ode_setup[group]["coeff_true"]
input_features = ode_setup[group]["input_features"]
rhsfunc = ode_setup[group]["rhsfunc"]
try:
x0_center = ode_setup[group]["x0_center"]
except KeyError:
x0_center = np.zeros((len(input_features)), dtype=np.float_)
try:
nonnegative = ode_setup[group]["nonnegative"]
except KeyError:
nonnegative = False
if noise_abs is not None and noise_rel is not None:
raise ValueError("Cannot specify both noise_abs and noise_rel")
elif noise_abs is None and noise_rel is None:
noise_abs = 0.1

dt, t_train, x_train, x_test, x_dot_test, x_train_true = _gen_data(
rhsfunc,
len(input_features),
seed,
x0_center=x0_center,
nonnegative=nonnegative,
n_trajectories=n_trajectories,
ic_stdev=ic_stdev,
noise_abs=noise_abs,
noise_rel=noise_rel,
dt=dt,
t_end=t_end,
)
return {
"data": ProbData(
dt,
t_train,
x_train,
x_test,
x_dot_test,
x_train_true,
input_features,
coeff_true,
),
"main": f"{n_trajectories} trajectories of {rhsfunc.__qualname__}",
"metrics": {"rel_noise": noise_rel, "abs_noise": noise_abs},
}


def _gen_data(
rhs_func: Callable,
n_coord: int,
seed: Optional[int],
n_trajectories: int,
x0_center: Float1D,
ic_stdev: float,
noise_abs: Optional[float],
noise_rel: Optional[float],
nonnegative: bool,
dt: float,
t_end: float,
) -> tuple[float, Float1D, list[Float2D], list[Float2D], list[Float2D], list[Float2D]]:
rng = np.random.default_rng(seed)
if x0_center is None:
x0_center = np.zeros((n_coord), dtype=np.float_)
t_train = np.arange(0, t_end, dt, dtype=np.float_)
t_train_span = (t_train[0], t_train[-1])
if nonnegative:
Expand Down Expand Up @@ -135,39 +187,85 @@ def _alert_short(arr):


def gen_pde_data(
rhs_func: Callable,
group: str,
init_cond: np.ndarray,
args: tuple,
dimension: int,
seed: int | None = None,
noise_abs: float | None = None,
noise_rel: float | None = None,
dt: float = 0.01,
t_end: int = 100,
):
rel_noise: float | None = None,
) -> dict[str, Any]:
"""Generate PDE measurement data for training

For simplicity, Trajectories have been removed,
Test data is the same as Train data.

Arguments:
rhs_func: the function to integrate
group: name of the PDE
init_cond: Initial Conditions for the PDE
args: Arguments for rhsfunc
dimension: Number of spatial dimensions (1, 2, or 3)
seed (int): the random seed for number generation
noise_abs (float): measurement noise standard deviation.
Defaults to .1 if noise_rel is None.
noise_rel (float): measurement noise relative to amplitude of
true data. Amplitude of data is calculated as the max value
of the power spectrum. Either noise_abs or noise_rel must
be None. Defaults to None.
dt (float): time step for the PDE simulation
t_end (int): total time for the PDE simulation

Returns:
dt, t_train, x_train, x_test, x_dot_test, x_train_true
"""
rhsfunc = pde_setup[group]["rhsfunc"]["func"]
input_features = pde_setup[group]["input_features"]
if rel_noise is None:
rel_noise = 0.1
spatial_grid = pde_setup[group]["spatial_grid"]
spatial_args = [
(spatial_grid[-1] - spatial_grid[0]) / len(spatial_grid),
len(spatial_grid),
]
time_args = pde_setup[group]["time_args"]
dimension = pde_setup[group]["rhsfunc"]["dimension"]
coeff_true = pde_setup[group]["coeff_true"]
try:
time_args = pde_setup[group]["time_args"]
except KeyError:
time_args = [0.01, 10]
dt, t_train, x_train, x_test, x_dot_test, x_train_true = _gen_pde_data(
rhsfunc,
init_cond,
spatial_args,
dimension,
seed,
noise_abs=noise_abs,
noise_rel=rel_noise,
dt=time_args[0],
t_end=time_args[1],
)
return {
"data": ProbData(
dt,
t_train,
x_train,
x_test,
x_dot_test,
x_train_true,
input_features,
coeff_true,
),
"main": f"1 trajectories of {rhsfunc.__qualname__}",
"metrics": {"rel_noise": rel_noise, "abs_noise": noise_abs},
}


def _gen_pde_data(
rhs_func: Callable,
init_cond: np.ndarray,
spatial_args: Sequence,
dimension: int,
seed: int | None,
noise_abs: float | None,
noise_rel: float | None,
dt: float,
t_end: int,
):
if noise_abs is not None and noise_rel is not None:
raise ValueError("Cannot specify both noise_abs and noise_rel")
elif noise_abs is None and noise_rel is None:
Expand All @@ -182,7 +280,7 @@ def gen_pde_data(
t_train_span,
init_cond,
t_eval=t_train,
args=args,
args=spatial_args,
**INTEGRATOR_KEYWORDS,
).y.T
)
Expand All @@ -199,7 +297,10 @@ def gen_pde_data(
x_test = x_train
x_test = np.moveaxis(x_test, -1, 0)
x_dot_test = np.array(
[[rhs_func(0, xij, args[0], args[1]) for xij in xi] for xi in x_test]
[
[rhs_func(0, xij, spatial_args[0], spatial_args[1]) for xij in xi]
for xi in x_test
]
)
if dimension == 1:
x_dot_test = [np.moveaxis(x_dot_test, [0, 1], [-1, -2])]
Expand Down
10 changes: 9 additions & 1 deletion src/gen_experiments/gridsearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import gen_experiments

from .. import config
from ..data import gen_data, gen_pde_data
from ..odes import plot_ode_panel
from ..pdes import plot_pde_panel
from ..plotting import _PlotPrefs
Expand Down Expand Up @@ -175,8 +176,12 @@ def run(
base_ex, base_group = gen_experiments.experiments[group]
if base_ex.__name__ == "gen_experiments.odes":
plot_panel = plot_ode_panel
data_step = gen_data
elif base_ex.__name__ == "gen_experiments.pdes":
plot_panel = plot_pde_panel
data_step = gen_pde_data
elif base_ex.__name__ == "NoExperiment":
data_step = gen_experiments.NoExperiment.gen_data
if series_params is None:
series_params = SeriesList(None, None, [SeriesDef(group, {}, [], [])])
legends = False
Expand Down Expand Up @@ -217,9 +222,12 @@ def run(
start = process_time()
for axis_ind, key, val_list in zip(ind, new_grid_params, new_grid_vals):
curr_other_params[key] = val_list[axis_ind]
sim_params = curr_other_params.pop("sim_params", {})
data = data_step(seed=seed, **sim_params)
curr_results, grid_data = base_ex.run(
seed, **curr_other_params, display=False, return_all=True
data, **curr_other_params, display=False, return_all=True
)
curr_results["sim_params"] = sim_params
intermediate_data.append(
{"params": curr_other_params.flatten(), "pind": ind, "data": grid_data}
)
Expand Down
35 changes: 11 additions & 24 deletions src/gen_experiments/odes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import pysindy as ps

from . import config
from .data import gen_data
from .plotting import (
compare_coefficient_plots,
plot_test_trajectories,
plot_training_data,
)
from .typing import ProbData
from .utils import (
FullSINDyTrialData,
SINDyTrialData,
Expand Down Expand Up @@ -154,37 +154,24 @@ def forcing(t, x):


def run(
seed: int,
group: str,
sim_params: dict,
data: ProbData,
diff_params: dict,
feat_params: dict,
opt_params: dict,
display: bool = True,
return_all: bool = False,
) -> dict | tuple[dict, SINDyTrialData | FullSINDyTrialData]:
rhsfunc = ode_setup[group]["rhsfunc"]
input_features = ode_setup[group]["input_features"]
coeff_true = ode_setup[group]["coeff_true"]
try:
x0_center = ode_setup[group]["x0_center"]
except KeyError:
x0_center = None
try:
nonnegative = ode_setup[group]["nonnegative"]
except KeyError:
nonnegative = False
dt, t_train, x_train, x_test, x_dot_test, x_train_true = gen_data(
rhsfunc,
len(input_features),
seed,
x0_center=x0_center,
nonnegative=nonnegative,
**sim_params,
)
input_features = data.input_features
dt = data.dt
x_train = data.x_train
t_train = data.t_train
x_train_true = data.x_train_true
x_test = data.x_test
x_dot_test = data.x_dot_test
coeff_true = data.coeff_true
model = make_model(input_features, dt, diff_params, feat_params, opt_params)

model.fit(x_train)
model.fit(x_train, t=dt)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be t=t_train instead of t=dt, right?

coeff_true, coefficients, feature_names = unionize_coeff_matrices(model, coeff_true)

sim_ind = -1
Expand Down
Loading
Loading