From 546e1cd95370f1aa77342e69911f197874819ab5 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:17:26 +0100 Subject: [PATCH 01/13] fix(uni): leftover kwargs not returned in assign The `assign_params()` method did not return any unused keyword arguments --- lymph/models/unilateral.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 93797b4..83ebf6e 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -214,27 +214,36 @@ def _assign_via_kwargs( new_params_kwargs: dict[str, float], ) -> dict[str, float]: """Assign parameters to egdes and to distributions via keyword arguments.""" - remaining_kwargs = {} + if self.is_trinary: + global_growth_param = new_params_kwargs.pop("growth", None) + global_micro_mod = new_params_kwargs.pop("micro", None) - global_growth_param = new_params_kwargs.pop("growth", None) if self.is_growth_shared and global_growth_param is not None: for growth_edge in self.graph.growth_edges.values(): growth_edge.set_spread_prob(global_growth_param) - global_micro_mod = new_params_kwargs.pop("micro", None) if self.is_micro_mod_shared and global_micro_mod is not None: for lnl_edge in self.graph.lnl_edges.values(): lnl_edge.set_micro_mod(global_micro_mod) edges_and_dists = self.graph.edges.copy() edges_and_dists.update(self.diag_time_dists) - for key, value in new_params_kwargs.items(): - edge_name_or_tstage, type_ = key.rsplit("_", maxsplit=1) + new_params_keys = list(new_params_kwargs.keys()) + for key in new_params_keys: + try: + edge_name_or_tstage, type_ = key.rsplit("_", maxsplit=1) + except ValueError as val_err: + raise KeyError( + "Keyword arguments must be of the form '_' " + "or '_' for the distributions over diagnose " + "times." + ) from val_err if edge_name_or_tstage in edges_and_dists: + value = new_params_kwargs.pop(key) edge_or_dist = edges_and_dists[edge_name_or_tstage] edge_or_dist.set_params(**{type_: value}) - return remaining_kwargs + return new_params_kwargs def assign_params( @@ -301,9 +310,13 @@ def assign_params( ... is_micro_mod_shared=True, ... is_growth_shared=True, ... ) - >>> _ = model.assign_params( - ... 0.7, 0.5, 0.3, 0.2, 0.1, 0.4 + >>> args, kwargs = model.assign_params( + ... 0.7, 0.5, 0.3, 0.2, 0.1, 0.4, 0.99, A_to_B_param="not_used" ... ) + >>> next(args) + 0.99 + >>> kwargs + {'A_to_B_param': 'not_used'} >>> model.get_params(as_dict=True) # doctest: +NORMALIZE_WHITESPACE {'T_to_II_spread': 0.7, 'T_to_III_spread': 0.5, From 764a39eebb99ef53437779fd9a096fb968f09be8 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:11:24 +0100 Subject: [PATCH 02/13] change(uni): trinary params shared by default The micro mod and growth parameters of the trinary unilateral model are now shared by default. Related: #72 --- lymph/models/unilateral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 83ebf6e..f2a1c3e 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -43,8 +43,8 @@ def __init__( tumor_state: int | None = None, allowed_states: list[int] | None = None, max_time: int = 10, - is_micro_mod_shared: bool = False, - is_growth_shared: bool = False, + is_micro_mod_shared: bool = True, + is_growth_shared: bool = True, **_kwargs, ) -> None: """Create a new instance of the :py:class:`~Unilateral` class. From 9df1d5db0e31ffe80af9d9afac537de6811e834d Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:25:50 +0100 Subject: [PATCH 03/13] feat(bi): delegate is__shared from uni to bi The unilateral attributes `is_micro_mod_shared` and `is_growth_shared` are now handled by the bilateral model as well. Fixes: #72 --- lymph/models/bilateral.py | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 0a1fa7a..a8f02a4 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -282,6 +282,58 @@ def trinary(cls, *args, **kwargs) -> Bilateral: return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) + @property + def is_micro_mod_shared(self) -> bool: + """Whether the parameter for microscopic spread is shared among graph edges. + + This attribute only plays a role in trinary models. Since this is basically a + delegated value from the ``ipsi`` and ``contra`` side of the graph, the value + set there is returned. + + If the model is binary or value of the two sides differes, an error is raised. + """ + if self.ipsi.is_micro_mod_shared != self.contra.is_micro_mod_shared: + raise AttributeError( + "Attribute `is_micro_mod_shared` differs between `ipsi` and `contra` " + "model. This might be what you wanted, but then this delegated " + "attribute does not make sense anymore." + ) + + return self.ipsi.is_micro_mod_shared + + @is_micro_mod_shared.setter + def is_micro_mod_shared(self, new_value: bool) -> None: + """Setter of ipsi- and contralateral ``is_micro_mod_shared`` attribute.""" + self.ipsi.is_micro_mod_shared = new_value + self.contra.is_micro_mod_shared = new_value + + + @property + def is_growth_shared(self) -> bool: + """Whether the parameter for growth from micro- to macroscopic is shared. + + This attribute only plays a role in trinary models. Since this is basically a + delegated value from the ``ipsi`` and ``contra`` side of the graph, the value + set there is returned. + + If the model is binary or value of the two sides differes, an error is raised. + """ + if self.ipsi.is_growth_shared != self.contra.is_growth_shared: + raise AttributeError( + "Attribute `is_growth_shared` differs between `ipsi` and `contra` " + "model. This might be what you wanted, but then this delegated " + "attribute does not make sense anymore." + ) + + return self.ipsi.is_growth_shared + + @is_growth_shared.setter + def is_growth_shared(self, new_value: bool) -> None: + """Setter of ipsi- and contralateral ``is_growth_shared`` attribute.""" + self.ipsi.is_growth_shared = new_value + self.contra.is_growth_shared = new_value + + def get_params( self, param: str | None = None, From ed11dbffd076889205642fec4c78e6fc19e7d682 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:31:35 +0100 Subject: [PATCH 04/13] change(uni): prohibit setting `max_time` Previously, setting the `max_time` attribute did not change the diagnose time distributions and might have therefore created weird situations. Setting the `max_time` is therefore now forbidden. --- lymph/models/unilateral.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index f2a1c3e..14b001c 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -100,7 +100,7 @@ def __init__( if 0 >= max_time: raise ValueError("Latest diagnosis time `max_time` must be positive int") - self.max_time = max_time + self._max_time = max_time self.is_micro_mod_shared = is_micro_mod_shared self.is_growth_shared = is_growth_shared @@ -130,6 +130,17 @@ def __str__(self) -> str: return f"Unilateral with {len(self.graph.tumors)} tumors and {len(self.graph.lnls)} LNLs" + @property + def max_time(self) -> int: + """The latest time(-step) to include in the model's evolution. + + This attribute cannot be changed (easily). Thus, we recommend creating a new + instance of the model when you feel like needing to change the initially set + value. + """ + return self._max_time + + def print_info(self): """Print detailed information about the instance.""" num_tumors = len(self.graph.tumors) From 85c1974b18e7588904b5b15864601debcbb40b6b Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:42:14 +0100 Subject: [PATCH 05/13] fix(uni)!: remove `is__shared` entirely I noticed the `is_micro_mod_shared` and `is_growth_shared` attributes are only used when setting params via keyword arguments. And at that point, checking if the gobal parameters are present in the kwargs is sufficient. So, I removed them entirely. Fixes: #72 --- lymph/models/bilateral.py | 52 -------------------------------------- lymph/models/unilateral.py | 13 +++------- 2 files changed, 4 insertions(+), 61 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index a8f02a4..0a1fa7a 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -282,58 +282,6 @@ def trinary(cls, *args, **kwargs) -> Bilateral: return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) - @property - def is_micro_mod_shared(self) -> bool: - """Whether the parameter for microscopic spread is shared among graph edges. - - This attribute only plays a role in trinary models. Since this is basically a - delegated value from the ``ipsi`` and ``contra`` side of the graph, the value - set there is returned. - - If the model is binary or value of the two sides differes, an error is raised. - """ - if self.ipsi.is_micro_mod_shared != self.contra.is_micro_mod_shared: - raise AttributeError( - "Attribute `is_micro_mod_shared` differs between `ipsi` and `contra` " - "model. This might be what you wanted, but then this delegated " - "attribute does not make sense anymore." - ) - - return self.ipsi.is_micro_mod_shared - - @is_micro_mod_shared.setter - def is_micro_mod_shared(self, new_value: bool) -> None: - """Setter of ipsi- and contralateral ``is_micro_mod_shared`` attribute.""" - self.ipsi.is_micro_mod_shared = new_value - self.contra.is_micro_mod_shared = new_value - - - @property - def is_growth_shared(self) -> bool: - """Whether the parameter for growth from micro- to macroscopic is shared. - - This attribute only plays a role in trinary models. Since this is basically a - delegated value from the ``ipsi`` and ``contra`` side of the graph, the value - set there is returned. - - If the model is binary or value of the two sides differes, an error is raised. - """ - if self.ipsi.is_growth_shared != self.contra.is_growth_shared: - raise AttributeError( - "Attribute `is_growth_shared` differs between `ipsi` and `contra` " - "model. This might be what you wanted, but then this delegated " - "attribute does not make sense anymore." - ) - - return self.ipsi.is_growth_shared - - @is_growth_shared.setter - def is_growth_shared(self, new_value: bool) -> None: - """Setter of ipsi- and contralateral ``is_growth_shared`` attribute.""" - self.ipsi.is_growth_shared = new_value - self.contra.is_growth_shared = new_value - - def get_params( self, param: str | None = None, diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 14b001c..6f16beb 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -43,8 +43,6 @@ def __init__( tumor_state: int | None = None, allowed_states: list[int] | None = None, max_time: int = 10, - is_micro_mod_shared: bool = True, - is_growth_shared: bool = True, **_kwargs, ) -> None: """Create a new instance of the :py:class:`~Unilateral` class. @@ -101,8 +99,6 @@ def __init__( raise ValueError("Latest diagnosis time `max_time` must be positive int") self._max_time = max_time - self.is_micro_mod_shared = is_micro_mod_shared - self.is_growth_shared = is_growth_shared self.init_delegation( graph=[ @@ -225,15 +221,14 @@ def _assign_via_kwargs( new_params_kwargs: dict[str, float], ) -> dict[str, float]: """Assign parameters to egdes and to distributions via keyword arguments.""" - if self.is_trinary: - global_growth_param = new_params_kwargs.pop("growth", None) - global_micro_mod = new_params_kwargs.pop("micro", None) + global_growth_param = new_params_kwargs.pop("growth", None) + global_micro_mod = new_params_kwargs.pop("micro", None) - if self.is_growth_shared and global_growth_param is not None: + if global_growth_param is not None: for growth_edge in self.graph.growth_edges.values(): growth_edge.set_spread_prob(global_growth_param) - if self.is_micro_mod_shared and global_micro_mod is not None: + if global_micro_mod is not None: for lnl_edge in self.graph.lnl_edges.values(): lnl_edge.set_micro_mod(global_micro_mod) From ce15432005857dbbb5d44efe20ff3740bf700e2a Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:18:56 +0100 Subject: [PATCH 06/13] fix: T-stage mapping may be dict or callable When loading patient data, the `mapping` argument, to convert from the T-stages reported in the data to those the user wants to use in the model, may now be either a dictionary or a function. --- lymph/helper.py | 14 ++++++++++++++ lymph/models/bilateral.py | 2 +- lymph/models/unilateral.py | 15 ++++++++------- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index d883b30..3ba6175 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -519,3 +519,17 @@ def wrapper(arg0, *args, **kwargs): return wrapper return decorator + + +def dict_to_func(mapping: dict[Any, Any]) -> callable: + """Transform a dictionary into a function. + + >>> char_map = {'a': 1, 'b': 2, 'c': 3} + >>> char_map = dict_to_func(char_map) + >>> char_map('a') + 1 + """ + def callable_mapping(key): + return mapping[key] + + return callable_mapping diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 0a1fa7a..2db108b 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -411,7 +411,7 @@ def modalities(self, new_modalities) -> None: def load_patient_data( self, patient_data: pd.DataFrame, - mapping: callable = early_late_mapping, + mapping: callable | dict[int, Any] = early_late_mapping, ) -> None: """Load patient data into the model. diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 6f16beb..e68c240 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -14,6 +14,7 @@ DelegatorMixin, DiagnoseType, PatternType, + dict_to_func, early_late_mapping, smart_updating_dict_cached_property, ) @@ -572,7 +573,7 @@ def load_patient_data( self, patient_data: pd.DataFrame, side: str = "ipsi", - mapping: callable = early_late_mapping, + mapping: callable | dict[int, Any] = early_late_mapping, ) -> None: """Load patient data in `LyProX`_ format into the model. @@ -580,10 +581,10 @@ def load_patient_data( ipsi- and contralateral) of the neck, the ``side`` parameter is used to select the for which of the two to store the involvement data. - With the ``mapping`` function, the reported T-stages (usually 0, 1, 2, 3, and 4) - can be mapped to any keys also used to access the corresponding distribution - over diagnose times. The default mapping is to map 0, 1, and 2 to "early" and - 3 and 4 to "late". + With the ``mapping`` function or dictionary, the reported T-stages (usually 0, + 1, 2, 3, and 4) can be mapped to any keys also used to access the corresponding + distribution over diagnose times. The default mapping is to map 0, 1, and 2 to + "early" and 3 and 4 to "late". What this method essentially does is to copy the entire data frame, check all necessary information is present, and add a new top-level header ``"_model"`` to @@ -594,8 +595,8 @@ def load_patient_data( """ patient_data = patient_data.copy() - if mapping is None: - mapping = {"early": [0,1,2], "late": [3,4]} + if isinstance(mapping, dict): + mapping = dict_to_func(mapping) for modality_name in self.modalities.keys(): if modality_name not in patient_data: From 6ec1f21206382d2c5398dc0c71c6e716078f4620 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 13 Feb 2024 09:49:37 +0100 Subject: [PATCH 07/13] fix(uni): raise exc when no tumors/lnls in graph --- lymph/graph.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lymph/graph.py b/lymph/graph.py index d1a6cb7..cdbd3b0 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -471,6 +471,12 @@ def _init_nodes(self, graph, tumor_state, allowed_lnl_states): lnl = LymphNodeLevel(name=node_name, allowed_states=allowed_lnl_states) self._nodes[node_name] = lnl + if len(self.tumors) < 1: + raise ValueError("At least one tumor node must be present in the graph") + + if len(self.lnls) < 1: + raise ValueError("At least one LNL node must be present in the graph") + @property def nodes(self) -> dict[str, Tumor | LymphNodeLevel]: From bae445f655132663d7d2cff0c1247da2827ae04b Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 13 Feb 2024 09:50:00 +0100 Subject: [PATCH 08/13] test(uni): check constructor raises exceptions --- tests/binary_unilateral_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index 5e939ff..4799ff4 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -11,6 +11,20 @@ class InitTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): """Test the initialization of a binary model.""" + def test_value_errors(self): + """Check that the model raises errors when the graph has issues.""" + empty_graph = {} + only_tumor = {("tumor", "T"): []} + only_lnl = {("lnl", "II"): []} + duplicate_lnls = { + ("tumor", "T"): ["II", "II"], + ("lnl", "II"): [], + } + self.assertRaises(ValueError, lambda: type(self.model)(empty_graph)) + self.assertRaises(ValueError, lambda: type(self.model)(only_tumor)) + self.assertRaises(ValueError, lambda: type(self.model)(only_lnl)) + self.assertRaises(ValueError, lambda: type(self.model)(duplicate_lnls)) + def test_num_nodes(self): """Check number of nodes initialized.""" num_nodes = len(self.graph_dict) From a67c1b3f0230723588ee787321f618abe7cf86c2 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 13 Feb 2024 17:43:06 +0100 Subject: [PATCH 09/13] docs: fix typo in modalities --- lymph/modalities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/modalities.py b/lymph/modalities.py index 615cd13..2490787 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -224,7 +224,7 @@ def confusion_matrices_hash(self) -> int: 1. It may change over the lifetime of the object, whereas ``__hash__`` should be constant. - 2. It only takes into account the ``confusion_matric`` of the modality, + 2. It only takes into account the ``confusion_matrix`` of the modality, nothing else. """ confusion_mat_bytes = b"" From 708b24e365d37d3124c58440046663b162e24710 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 14 Feb 2024 10:11:27 +0100 Subject: [PATCH 10/13] change!: change llh API The `data` and `load_data_kwargs` are removed from the `likelihood()` method arguments. The most common use case is anyways to preload the patient data. This applies to both the uni- and bilateral llh. In turn, an argument `for_t_stage` was added. It may be used to specify one particular T-stage for which to compute the likelihood. If it is omitted, the llh is computed as normally. This commit also fixes an error in the Bayesian network implementation. Previously, it called the wrong diagnose matrices. Now, it correctly computes the BN llh if no T-stage is specified and only that of the given T-stage if it _is_ provided. BREAKING CHANGE: `data` and `load_data_kwargs` removed from `likelihood()` method. --- lymph/matrix.py | 26 +++++++++++++------- lymph/models/bilateral.py | 40 ++++++++++++++++-------------- lymph/models/unilateral.py | 50 +++++++++++++++++--------------------- 3 files changed, 61 insertions(+), 55 deletions(-) diff --git a/lymph/matrix.py b/lymph/matrix.py index b09a4d7..4f4d3ea 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -204,30 +204,38 @@ def compute_encoding( return encoding -def generate_data_encoding(model: models.Unilateral, t_stage: str) -> np.ndarray: +def generate_data_encoding( + model: models.Unilateral, + t_stage: str, +) -> np.ndarray: """Generate the data matrix for a specific T-stage from patient data. The :py:attr:`~lymph.models.Unilateral.patient_data` needs to contain the column ``"_model"``, which is constructed when loading the data into the model. From this, - a data matrix is constructed for the given ``t_stage``. + a data matrix is constructed for the given ``t_stage``. If ``"_BN"`` is selected, + as T-stage, the data matrix for all patients is returned. This is mainly used for + the computation of the Bayesian network likelihood. The returned matrix has the shape :math:`2^{N \\cdot \\mathcal{O}} \\times M`, where :math:`N` is the number of lymph node levels, :math:`\\mathcal{O}` is the number of diagnostic modalities and :math:`M` is the number of patients with the - given ``t_stage``. + given ``t_stage`` (or just all patients). """ - if not model.patient_data["_model", "#", "t_stage"].isin([t_stage]).any(): - raise ValueError(f"No patients with T-stage {t_stage} in patient data.") + if t_stage == "_BN": + has_t_stage = slice(None) + else: + has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage - has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage - patients_with_t_stage = model.patient_data[has_t_stage] + selected_patients = model.patient_data[has_t_stage] + if len(selected_patients) == 0: + raise ValueError(f"No patients with T-stage {t_stage}.") result = np.ones( - shape=(model.observation_matrix().shape[1], len(patients_with_t_stage)), + shape=(model.observation_matrix().shape[1], len(selected_patients)), dtype=bool, ) - for i, (_, patient_row) in enumerate(patients_with_t_stage["_model"].iterrows()): + for i, (_, patient_row) in enumerate(selected_patients["_model"].iterrows()): patient_encoding = np.ones(shape=1, dtype=bool) for modality_name in model.modalities.keys(): if modality_name not in patient_row: diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 2db108b..4a12371 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -480,14 +480,17 @@ def comp_joint_obs_dist( ) - def _bn_likelihood(self, log: bool = True) -> float: + def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the BN likelihood of data, using the stored params.""" llh = 0. if log else 1. + if t_stage is None: + t_stage = "_BN" + joint_state_dist = self.comp_joint_state_dist(mode="BN") joint_diagnose_dist = np.sum( - self.ipsi.stacked_diagnose_matrix - * (joint_state_dist @ self.contra.stacked_diagnose_matrix), + self.ipsi.diagnose_matrices[t_stage] + * (joint_state_dist @ self.contra.diagnose_matrices[t_stage]), axis=0, ) @@ -498,14 +501,19 @@ def _bn_likelihood(self, log: bool = True) -> float: return llh - def _hmm_likelihood(self, log: bool = True) -> float: + def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the HMM likelihood of data, using the stored params.""" llh = 0. if log else 1. ipsi_dist_evo = self.ipsi.comp_dist_evolution() contra_dist_evo = self.contra.comp_dist_evolution() - for stage in self.t_stages: + if t_stage is None: + t_stages = self.t_stages + else: + t_stages = [t_stage] + + for stage in t_stages: diag_time_matrix = np.diag(self.diag_time_dists[stage].distribution) # Note that I am not using the `comp_joint_state_dist` method here, since @@ -536,19 +544,14 @@ def _hmm_likelihood(self, log: bool = True) -> float: def likelihood( self, - data: pd.DataFrame | None = None, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - load_data_kwargs: dict[str, Any] | None = None, log: bool = True, - mode: str = "HMM" + mode: str = "HMM", + for_t_stage: str | None = None, ): """Compute the (log-)likelihood of the ``data`` given the model (and params). - If the ``data`` is not provided, the previously loaded data is used. One may - specify additional ``load_data_kwargs`` to pass to the - :py:meth:`~load_patient_data` method when loading the data. - The parameters of the model can be set via ``given_param_args`` and ``given_param_kwargs``. Both arguments are used to call the :py:meth:`~assign_params` method. If the parameters are not provided, the @@ -566,11 +569,6 @@ def likelihood( :py:meth:`lymph.models.Unilateral.likelihood` The corresponding unilateral function. """ - if data is not None: - if load_data_kwargs is None: - load_data_kwargs = {} - self.load_patient_data(data, **load_data_kwargs) - if given_param_args is None: given_param_args = [] @@ -584,7 +582,13 @@ def likelihood( except ValueError: return -np.inf if log else 0. - return self._hmm_likelihood(log) if mode == "HMM" else self._bn_likelihood(log) + if mode == "HMM": + return self._hmm_likelihood(log, for_t_stage) + + if mode == "BN": + return self._bn_likelihood(log, for_t_stage) + + raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.") def comp_posterior_joint_state_dist( diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index e68c240..5c2089f 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -559,16 +559,6 @@ def t_stages(self) -> Generator[str, None, None]: yield t_stage - @property - def stacked_diagnose_matrix(self) -> np.ndarray: - """Stacked version of all T-stage's :py:attr:`~diagnose_matrices`. - - This is mainly used for the Bayesian network implementation of the model, which - cannot naturally incorporate the T-stage as a random variable. - """ - return np.hstack(list(self.diagnose_matrices.values())) - - def load_patient_data( self, patient_data: pd.DataFrame, @@ -744,10 +734,13 @@ def comp_obs_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarray return state_dist @ self.observation_matrix() - def _bn_likelihood(self, log: bool = True) -> float: + def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the BN likelihood, using the stored params.""" + if t_stage is None: + t_stage = "_BN" + state_dist = self.comp_state_dist(mode="BN") - patient_likelihoods = state_dist @ self.stacked_diagnose_matrix + patient_likelihoods = state_dist @ self.diagnose_matrices[t_stage] if log: llh = np.sum(np.log(patient_likelihoods)) @@ -756,12 +749,17 @@ def _bn_likelihood(self, log: bool = True) -> float: return llh - def _hmm_likelihood(self, log: bool = True) -> float: + def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the HMM likelihood, using the stored params.""" evolved_model = self.comp_dist_evolution() llh = 0. if log else 1. - for t_stage in self.t_stages: + if t_stage is None: + t_stages = self.t_stages + else: + t_stages = [t_stage] + + for t_stage in t_stages: patient_likelihoods = ( self.diag_time_dists[t_stage].distribution @ evolved_model @@ -777,18 +775,13 @@ def _hmm_likelihood(self, log: bool = True) -> float: def likelihood( self, - data: pd.DataFrame | None = None, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - load_data_kwargs: dict[str, Any] | None = None, log: bool = True, - mode: str = "HMM" + mode: str = "HMM", + for_t_stage: str | None = None, ) -> float: - """Compute the (log-)likelihood of the ``data`` given the model (and params). - - If the ``data`` is not provided, the previously loaded data is used. One may - specify additional ``load_data_kwargs`` to pass to the - :py:meth:`~load_patient_data` method when loading the data. + """Compute the (log-)likelihood of the stored data given the model (and params). The parameters of the model can be set via ``given_param_args`` and ``given_param_kwargs``. Both arguments are used to call the @@ -799,11 +792,6 @@ def likelihood( determines whether the likelihood is computed for the hidden Markov model (``"HMM"``) or the Bayesian network (``"BN"``). """ - if data is not None: - if load_data_kwargs is None: - load_data_kwargs = {} - self.load_patient_data(data, **load_data_kwargs) - if given_param_args is None: given_param_args = [] @@ -817,7 +805,13 @@ def likelihood( except ValueError: return -np.inf if log else 0. - return self._hmm_likelihood(log) if mode == "HMM" else self._bn_likelihood(log) + if mode == "HMM": + return self._hmm_likelihood(log, for_t_stage) + + if mode == "BN": + return self._bn_likelihood(log, for_t_stage) + + raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.") def comp_diagnose_encoding( From a1fa4fca924c63c0763d829cc459c417cd403f3c Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 14 Feb 2024 10:11:54 +0100 Subject: [PATCH 11/13] test: check the Bayesian network likelihood --- tests/bayesian_unilateral_test.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/bayesian_unilateral_test.py b/tests/bayesian_unilateral_test.py index 9f63b58..26c64a4 100644 --- a/tests/bayesian_unilateral_test.py +++ b/tests/bayesian_unilateral_test.py @@ -3,10 +3,9 @@ """ import unittest +import fixtures import numpy as np -from tests import fixtures - class BayesianUnilateralModelTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): """Test the Bayesian Unilateral Model.""" @@ -14,6 +13,8 @@ class BayesianUnilateralModelTestCase(fixtures.BinaryUnilateralModelMixin, unitt def setUp(self): super().setUp() self.model.assign_params(**self.create_random_params()) + self.model.modalities = fixtures.MODALITIES + self.load_patient_data(filename="2021-usz-oropharynx.csv") def test_state_dist(self): """Test the state distribution.""" @@ -22,6 +23,22 @@ def test_state_dist(self): def test_obs_dist(self): """Test the observation distribution.""" - self.model.modalities = fixtures.MODALITIES bayes_obs_dist = self.model.comp_obs_dist(mode="BN") self.assertTrue(np.isclose(bayes_obs_dist.sum(), 1.0)) + + def test_log_likelihood_smaller_zero(self): + """Test the likelihood.""" + likelihood = self.model.likelihood(mode="BN") + self.assertLessEqual(likelihood, 0.) + + def test_likelihood_invalid_params_isinf(self): + """Make sure the likelihood is `-np.inf` for invalid parameters.""" + random_params = self.create_random_params() + for name in random_params: + random_params[name] += 1. + likelihood = self.model.likelihood( + given_param_kwargs=random_params, + log=True, + mode="BN", + ) + self.assertEqual(likelihood, -np.inf) From 196ddc6dbf0889d4ace8abdacebdb91ff73048e8 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:52:05 +0100 Subject: [PATCH 12/13] chore: update changelog --- CHANGELOG.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8c5efb..bd86fb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,34 @@ All notable changes to this project will be documented in this file. + +## [1.0.0.a6] - 2024-02-15 + +With this (still alpha) release, we most notably fixed a long unnoticed bug in the computation of the Bayesian network likelihood. + +### Bug Fixes + +- (**uni**) Leftover `kwargs` now correctly returned in `assign_params()` +- ⚠ **BREAKING** (**uni**) Remove `is__shared` entirely, as it was unused anyways +- T-stage mapping may be dictionary or callable +- (**uni**) Raise exception when there are no tumors or LNLs in graph + +### Documentation + +- Fix typo in modalities + +### Testing + +- (**uni**) Check constructor raises exceptions +- Check the Bayesian network likelihood + +### Change + +- (**uni**) Trinary params are shared by default +- (**uni**) Prohibit setting `max_time` +- ⚠ **BREAKING** Change `likelihood()` API: We don't allow setting the data via the `likelihood()` anymore. It convoluted the method and setting it beforehand is more explicit anyways. + + ## [1.0.0.a5] - 2024-02-06 @@ -298,7 +326,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the - add pre-commit hook to check commit msg -[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...HEAD +[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...HEAD +[1.0.0.a6]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...1.0.0.a6 [1.0.0.a5]: https://github.com/rmnldwg/lymph/compare/1.0.0.a4...1.0.0.a5 [1.0.0.a4]: https://github.com/rmnldwg/lymph/compare/1.0.0.a3...1.0.0.a4 [1.0.0.a3]: https://github.com/rmnldwg/lymph/compare/1.0.0.a2...1.0.0.a3 From aa90be883b804a218f93f02323eb9030c328db16 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:54:58 +0100 Subject: [PATCH 13/13] chore: link to issue in changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd86fb9..7737935 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ With this (still alpha) release, we most notably fixed a long unnoticed bug in t ### Bug Fixes - (**uni**) Leftover `kwargs` now correctly returned in `assign_params()` -- ⚠ **BREAKING** (**uni**) Remove `is__shared` entirely, as it was unused anyways +- ⚠ **BREAKING** (**uni**) Remove `is__shared` entirely, as it was unused anyways. Fixes [#72]. - T-stage mapping may be dictionary or callable - (**uni**) Raise exception when there are no tumors or LNLs in graph @@ -339,6 +339,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the [0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1 [0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0 +[#72]: https://github.com/rmnldwg/lymph/issues/72 [#69]: https://github.com/rmnldwg/lymph/issues/69 [#68]: https://github.com/rmnldwg/lymph/issues/68 [#65]: https://github.com/rmnldwg/lymph/issues/65