diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py index 172d9ce1..9c31b9df 100644 --- a/src/f3dasm/_src/design/domain.py +++ b/src/f3dasm/_src/design/domain.py @@ -14,7 +14,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import (Any, Dict, Iterable, Iterator, List, Literal, Optional, - Sequence, Type) + Protocol, Sequence, Type) # Third-party core import numpy as np @@ -36,6 +36,13 @@ # ============================================================================= +class _Data(Protocol): + def to_dataframe(self) -> pd.DataFrame: + ... + +# ============================================================================= + + @dataclass class Domain: """Main class for defining the domain of the design of experiments. @@ -238,6 +245,26 @@ def from_dataframe(cls, df_input: pd.DataFrame, return cls(space=input_space, output_space=output_space) + @classmethod + def from_data(cls: Type[Domain], + input_data: _Data, output_data: _Data) -> Domain: + """Initializes a Domain from input and output data. + + Parameters + ---------- + input_data : _Data + Input data. + output_data : _Data + Output data. + + Returns + ------- + Domain + Domain object + """ + return cls.from_dataframe( + input_data.to_dataframe(), output_data.to_dataframe()) + # Export # ============================================================================= @@ -645,9 +672,7 @@ def make_nd_continuous_domain(bounds: np.ndarray | List[List[float]], return Domain(space) -def _domain_factory(domain: Domain | DictConfig | None, - input_data: pd.DataFrame, - output_data: pd.DataFrame) -> Domain: +def _domain_factory(domain: Domain | DictConfig | str | Path) -> Domain: if isinstance(domain, Domain): return domain @@ -657,14 +682,14 @@ def _domain_factory(domain: Domain | DictConfig | None, elif isinstance(domain, DictConfig): return Domain.from_yaml(domain) - elif (input_data.empty and output_data.empty and domain is None): - return Domain() + # elif (input_data.empty and output_data.empty and domain is None): + # return Domain() - elif domain is None: - return Domain.from_dataframe( - input_data, output_data) + # elif domain is None: + # return Domain.from_dataframe( + # input_data, output_data) else: raise TypeError( - f"Domain must be of type Domain, DictConfig " - f"or None, not {type(domain)}") + f"Domain must be of type Domain, DictConfig, str or Path, " + f"not {type(domain)}") diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py index 3817cda3..0721396b 100644 --- a/src/f3dasm/_src/experimentdata/_data.py +++ b/src/f3dasm/_src/experimentdata/_data.py @@ -186,7 +186,8 @@ def from_file(cls, filename: Path | str) -> _Data: return cls(df, columns=_Columns(_columns)) @classmethod - def from_numpy(cls: Type[_Data], array: np.ndarray) -> _Data: + def from_numpy(cls: Type[_Data], + array: np.ndarray, keys: Iterable[str]) -> _Data: """Loads the data from a numpy array. Parameters @@ -458,7 +459,8 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data: return _Data(data=df, columns=_Columns(_columns)) -def _data_factory(data: DataTypes) -> _Data: +def _data_factory(data: DataTypes, + keys: Optional[Iterable[str]] = None) -> _Data: if data is None: return _Data() @@ -469,10 +471,10 @@ def _data_factory(data: DataTypes) -> _Data: return _Data.from_dataframe(data) elif isinstance(data, (Path, str)): - return _Data.from_file(data) + return _Data.from_file(Path(data)) elif isinstance(data, np.ndarray): - return _Data.from_numpy(data) + return _Data.from_numpy(data, keys=keys) else: raise TypeError( diff --git a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py index 82721ace..8e10f4ac 100644 --- a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py +++ b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py @@ -35,6 +35,8 @@ class Status(str, Enum): def __str__(self) -> str: return self.value +# ============================================================================= + class NoOpenJobsError(Exception): """ @@ -52,6 +54,14 @@ def __init__(self, message): class Index: def __init__(self, jobs: pd.Series | None | str = None): + """ + Initializes the Index object. + + Parameters + ---------- + jobs : pd.Series, None, or str, optional + Series of jobs, None, or a single job as a string. + """ if isinstance(jobs, str): self.jobs = pd.Series(jobs, index=[0], dtype='string') @@ -62,9 +72,30 @@ def __init__(self, jobs: pd.Series | None | str = None): self.jobs = jobs def __len__(self) -> int: + """ + Returns the number of jobs. + + Returns + ------- + int + Number of jobs. + """ return len(self.jobs) def __add__(self, __o: Index | str) -> Index: + """ + Adds another Index or a string to this Index. + + Parameters + ---------- + __o : Index or str + Another Index object or a string representing a job. + + Returns + ------- + Index + A new Index object containing the combined jobs. + """ if isinstance(__o, str): __o = Index(__o) @@ -73,135 +104,212 @@ def __add__(self, __o: Index | str) -> Index: # Make a copy of other.jobs and modify its index other_jobs_copy = deepcopy(__o) - other_jobs_copy.jobs.index = pd.Index(range( - len(other_jobs_copy))) + self.jobs.index[-1] + 1 + other_jobs_copy.jobs.index = pd.Index( + range(len(other_jobs_copy))) + self.jobs.index[-1] + 1 return Index(pd.concat([self.jobs, other_jobs_copy.jobs])) def __getitem__(self, indices: int | slice | Iterable[int]) -> Index: + """ + Gets a subset of jobs by indices. + + Parameters + ---------- + indices : int, slice, or Iterable[int] + Indices to get. + + Returns + ------- + Index + A new Index object containing the selected jobs. + """ if isinstance(indices, int): indices = [indices] return Index(self.jobs[indices].copy()) def __eq__(self, __o: Index) -> bool: + """ + Checks if this Index is equal to another Index. + + Parameters + ---------- + __o : Index + Another Index object to compare. + + Returns + ------- + bool + True if the two Index objects are equal, False otherwise. + """ return self.jobs.equals(__o.jobs) def _repr_html_(self) -> str: + """ + Returns an HTML representation of the jobs. + + Returns + ------- + str + HTML representation of the jobs. + """ return self.jobs.__repr__() @property def indices(self) -> pd.Index: - """The indices of the jobs.""" + """ + The indices of the jobs. + + Returns + ------- + pd.Index + The indices of the jobs. + """ return self.jobs.index - def iloc(self, indices: Iterable[int]) -> Iterable[int]: + def iloc(self, indices: Iterable[int] | int) -> Iterable[int]: + """ + Gets the position of the given indices in the jobs. + + Parameters + ---------- + indices : Iterable[int] or int + Indices to locate. + + Returns + ------- + Iterable[int] + Positions of the given indices. + """ + if isinstance(indices, int): + indices = [indices] return self.indices.get_indexer(indices) - # Alternative Constructors - # ========================================================================= + def is_all_finished(self) -> bool: + """ + Checks if all jobs are finished. + + Returns + ------- + bool + True if all jobs are finished, False otherwise. + """ + return all(self.jobs.isin([Status.FINISHED, Status.ERROR])) @classmethod def from_data(cls: Type[Index], data: _Data, value: str = Status.OPEN) -> Index: - """Create a JobQueue object from a Data object. + """ + Create an Index object from a Data object. Parameters ---------- - data : Data + data : _Data Data object containing the data. - value : str + value : str, optional The value to assign to the jobs. Can be 'open', - 'in progress', 'finished', or 'error'. + 'in_progress', 'finished', or 'error'. Default is 'open'. Returns ------- - JobQueue - JobQueue object containing the loaded data. + Index + Index object containing the loaded data. """ return cls(pd.Series([value] * len(data), dtype='string')) @classmethod def from_file(cls: Type[Index], filename: Path | str) -> Index: - """Create a JobQueue object from a pickle file. + """ + Create an Index object from a pickle file. Parameters ---------- - filename : Path | str + filename : Path or str Name of the file. Returns ------- - JobQueue - JobQueue object containing the loaded data. + Index + Index object containing the loaded data. + + Raises + ------ + FileNotFoundError + If the specified file does not exist. """ - # Convert filename to Path if Path(filename).with_suffix('.csv').exists(): - return cls( - pd.read_csv(Path(filename).with_suffix('.csv'), - index_col=0)['0']) - + return cls(pd.read_csv(Path(filename).with_suffix('.csv'), + index_col=0)['0']) elif Path(filename).with_suffix('.pkl').exists(): - return cls( - pd.read_pickle(Path(filename).with_suffix('.pkl'))) - + return cls(pd.read_pickle(Path(filename).with_suffix('.pkl'))) else: raise FileNotFoundError(f"Jobfile {filename} does not exist.") - # Select - # ========================================================================= - def select_all(self, status: str) -> Index: - """Selects all jobs with a certain status. + """ + Selects all jobs with a certain status. Parameters ---------- status : str - Status of the jobs to select + Status of the jobs to select. Returns ------- - JobQueue - JobQueue object containing the selected jobs. + Index + Index object containing the selected jobs. """ return Index(self.jobs[self.jobs == status]) - # Export - # ========================================================================= - def store(self, filename: Path) -> None: - """Stores the jobs in a pickle file. + """ + Stores the jobs in a pickle file. Parameters ---------- filename : Path Path of the file. """ - self.jobs.to_csv(filename.with_suffix('.csv')) + self.jobs.to_pickle(filename.with_suffix('.pkl')) + # self.jobs.to_csv(filename.with_suffix('.csv')) def to_dataframe(self, name: str = "") -> pd.DataFrame: - """Converts the job queue to a DataFrame. + """ + Converts the job queue to a DataFrame. Parameters ---------- name : str, optional - Name of the column, by default "". - - Note - ---- - If the name is not specified, the column name will be an empty string + Name of the column. Default is an empty string. Returns ------- - DataFrame + pd.DataFrame DataFrame containing the jobs. """ - return self.jobs.to_frame("") + return self.jobs.to_frame(name) - # Append and remove jobs - # ========================================================================= + def get_open_job(self) -> int: + """ + Returns the index of an open job. - def remove(self, indices: List[int]): - """Removes a subset of the jobs. + Returns + ------- + int + Index of an open job. + + Raises + ------ + NoOpenJobsError + If no open jobs are found. + """ + try: + return int(self.jobs[self.jobs == Status.OPEN].index[0]) + except IndexError: + raise NoOpenJobsError("No open jobs found.") + + def remove(self, indices: List[int]) -> None: + """ + Removes a subset of the jobs. Parameters ---------- @@ -210,69 +318,56 @@ def remove(self, indices: List[int]): """ self.jobs = self.jobs.drop(indices) - def overwrite( - self, indices: Iterable[int], - other: Index | str) -> None: + def overwrite(self, indices: Iterable[int], other: Index | str) -> None: + """ + Overwrites the jobs at the specified indices with new jobs. + Parameters + ---------- + indices : Iterable[int] + Indices to overwrite. + other : Index or str + New jobs to overwrite with. + """ if isinstance(other, str): - other = Index( - pd.Series([other], index=[0], dtype='string')) + other = Index(pd.Series([other], index=[0], dtype='string')) self.jobs.update(other.jobs.set_axis(indices)) - # Mark - # ========================================================================= - def mark(self, index: int | slice | Iterable[int], status: Status) -> None: - """Marks a job with a certain status. + """ + Marks a job with a certain status. Parameters ---------- - index : int + index : int, slice, or Iterable[int] Index of the job to mark. - status : str + status : Status Status to mark the job with. """ self.jobs.loc[index] = status def mark_all_in_progress_open(self) -> None: - """Marks all jobs as 'open'.""" + """ + Marks all jobs as 'open'. + """ self.jobs = self.jobs.replace(Status.IN_PROGRESS, Status.OPEN) def mark_all_error_open(self) -> None: - """Marks all jobs as 'open'.""" - self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN) - # Miscellanous - # ========================================================================= - - def is_all_finished(self) -> bool: - """Checks if all jobs are finished. - - Returns - ------- - bool - True if all jobs are finished, False otherwise. """ - return all(self.jobs.isin([Status.FINISHED, Status.ERROR])) - - def get_open_job(self) -> int: - """Returns the index of an open job. - - Returns - ------- - int - Index of an open job. + Marks all jobs as 'open'. """ - try: # try to find an open job - return int(self.jobs[self.jobs == Status.OPEN].index[0]) - except IndexError: - raise NoOpenJobsError("No open jobs found.") + self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN) def reset_index(self) -> None: - """Resets the index of the jobs.""" + """ + Resets the index of the jobs. + """ self.jobs.reset_index(drop=True, inplace=True) +# ============================================================================= + def _jobs_factory(jobs: Path | str | Index | None, input_data: _Data, output_data: _Data, job_value: Status) -> Index: """Creates a Index object from particular inpute diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py index 26df0982..c0cc9745 100644 --- a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py +++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py @@ -221,7 +221,8 @@ def from_file(cls, filename: Path) -> _Data: _Data The created _Data object. """ - ... + df = pd.read_csv(filename.with_suffix('.csv'), header=0, index_col=0) + return cls.from_dataframe(df) @classmethod def from_numpy(cls: Type[_Data], array: np.ndarray, @@ -322,7 +323,7 @@ def store(self, filename: Path): filename : Path The file to store the data in. """ - ... + self.to_dataframe().to_csv(filename.with_suffix('.csv')) def get_data_dict(self, row: int) -> Dict[str, Any]: """ diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py index 7851f30f..57e151ae 100644 --- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py +++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py @@ -110,12 +110,20 @@ def __init__(self, self.project_dir = _project_dir_factory(project_dir) - if isinstance(input_data, np.ndarray) and isinstance(domain, Domain): - self._input_data = _data_factory(input_data, domain.names) - self._output_data = _data_factory(output_data, domain.output_names) + # DOMAIN + if domain is None: + self.domain = Domain.from_data( + input_data=_data_factory(input_data), + output_data=_data_factory(output_data)) + else: - self._input_data = _data_factory(input_data) - self._output_data = _data_factory(output_data) + self.domain = _domain_factory(domain=domain) + + # INPUT AND OUTPUT DATAA + self._input_data = _data_factory( + data=input_data, keys=self.domain.names) + self._output_data = _data_factory( + data=output_data, keys=self.domain.output_names) # Create empty output_data from indices if output_data is empty if self._output_data.is_empty(): @@ -125,10 +133,6 @@ def __init__(self, else: job_value = Status.FINISHED - self.domain = _domain_factory( - domain=domain, input_data=self._input_data.to_dataframe(), - output_data=self._output_data.to_dataframe()) - # Create empty input_data from domain if input_data is empty if self._input_data.is_empty(): self._input_data = _Data() diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py index f053fdc5..72f66d0e 100644 --- a/src/f3dasm/_src/experimentdata/experimentdata.py +++ b/src/f3dasm/_src/experimentdata/experimentdata.py @@ -110,8 +110,20 @@ def __init__(self, self.project_dir = _project_dir_factory(project_dir) - self._input_data = _data_factory(input_data) - self._output_data = _data_factory(output_data) + # DOMAIN + if domain is None: + self.domain = Domain.from_data( + input_data=_data_factory(input_data), + output_data=_data_factory(output_data)) + + else: + self.domain = _domain_factory(domain=domain) + + # INPUT AND OUTPUT DATAA + self._input_data = _data_factory( + data=input_data, keys=self.domain.names) + self._output_data = _data_factory( + data=output_data, keys=self.domain.output_names) # Create empty output_data from indices if output_data is empty if self._output_data.is_empty(): @@ -121,9 +133,9 @@ def __init__(self, else: job_value = Status.FINISHED - self.domain = _domain_factory( - domain=domain, input_data=self._input_data.to_dataframe(), - output_data=self._output_data.to_dataframe()) + # self.domain = _domain_factory( + # domain=domain, input_data=self._input_data.to_dataframe(), + # output_data=self._output_data.to_dataframe()) # Create empty input_data from domain if input_data is empty if self._input_data.is_empty(): diff --git a/tests/newdata/test_data.py b/tests/newdata/test_data.py index 6c5abe52..644496ac 100644 --- a/tests/newdata/test_data.py +++ b/tests/newdata/test_data.py @@ -115,7 +115,7 @@ def test_len(): def test_indices(): data = _Data({0: {"a": 1}, 1: {"a": 2}}) - assert data.indices == [0, 1] + assert data.indices.equals(pd.Index([0, 1])) def test_names():