Merge pull request #75 from rmnldwg/release-1.0.0.a6

Release 1.0.0.a6
rmnldwg · Feb 15, 2024 · c31ae8b · c31ae8b
2 parents b2e5c8a + aa90be8
commit c31ae8b
Show file tree

Hide file tree

Showing 9 changed files with 190 additions and 83 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,34 @@
 
 All notable changes to this project will be documented in this file.
 
+<a name="1.0.0.a6"></a>
+## [1.0.0.a6] - 2024-02-15
+
+With this (still alpha) release, we most notably fixed a long unnoticed bug in the computation of the Bayesian network likelihood.
+
+### Bug Fixes
+
+- (**uni**) Leftover `kwargs` now correctly returned in `assign_params()`
+- ⚠ **BREAKING** (**uni**) Remove `is_<x>_shared` entirely, as it was unused anyways. Fixes [#72].
+- T-stage mapping may be dictionary or callable
+- (**uni**) Raise exception when there are no tumors or LNLs in graph
+
+### Documentation
+
+- Fix typo in modalities
+
+### Testing
+
+- (**uni**) Check constructor raises exceptions
+- Check the Bayesian network likelihood
+
+### Change
+
+- (**uni**) Trinary params are shared by default
+- (**uni**) Prohibit setting `max_time`
+- ⚠ **BREAKING** Change `likelihood()` API: We don't allow setting the data via the `likelihood()` anymore. It convoluted the method and setting it beforehand is more explicit anyways.
+
+
 <a name="1.0.0.a5"></a>
 ## [1.0.0.a5] - 2024-02-06
 
@@ -298,7 +326,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 - add pre-commit hook to check commit msg
 
 
-[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...HEAD
+[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...HEAD
+[1.0.0.a6]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...1.0.0.a6
 [1.0.0.a5]: https://github.com/rmnldwg/lymph/compare/1.0.0.a4...1.0.0.a5
 [1.0.0.a4]: https://github.com/rmnldwg/lymph/compare/1.0.0.a3...1.0.0.a4
 [1.0.0.a3]: https://github.com/rmnldwg/lymph/compare/1.0.0.a2...1.0.0.a3
@@ -310,6 +339,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 [0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1
 [0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0
 
+[#72]: https://github.com/rmnldwg/lymph/issues/72
 [#69]: https://github.com/rmnldwg/lymph/issues/69
 [#68]: https://github.com/rmnldwg/lymph/issues/68
 [#65]: https://github.com/rmnldwg/lymph/issues/65

diff --git a/lymph/graph.py b/lymph/graph.py
@@ -471,6 +471,12 @@ def _init_nodes(self, graph, tumor_state, allowed_lnl_states):
                 lnl = LymphNodeLevel(name=node_name, allowed_states=allowed_lnl_states)
                 self._nodes[node_name] = lnl
 
+        if len(self.tumors) < 1:
+            raise ValueError("At least one tumor node must be present in the graph")
+
+        if len(self.lnls) < 1:
+            raise ValueError("At least one LNL node must be present in the graph")
+
 
     @property
     def nodes(self) -> dict[str, Tumor | LymphNodeLevel]:

diff --git a/lymph/helper.py b/lymph/helper.py
@@ -519,3 +519,17 @@ def wrapper(arg0, *args, **kwargs):
         return wrapper
 
     return decorator
+
+
+def dict_to_func(mapping: dict[Any, Any]) -> callable:
+    """Transform a dictionary into a function.
+
+    >>> char_map = {'a': 1, 'b': 2, 'c': 3}
+    >>> char_map = dict_to_func(char_map)
+    >>> char_map('a')
+    1
+    """
+    def callable_mapping(key):
+        return mapping[key]
+
+    return callable_mapping
diff --git a/lymph/matrix.py b/lymph/matrix.py
@@ -204,30 +204,38 @@ def compute_encoding(
     return encoding
 
 
-def generate_data_encoding(model: models.Unilateral, t_stage: str) -> np.ndarray:
+def generate_data_encoding(
+    model: models.Unilateral,
+    t_stage: str,
+) -> np.ndarray:
     """Generate the data matrix for a specific T-stage from patient data.
 
     The :py:attr:`~lymph.models.Unilateral.patient_data` needs to contain the column
     ``"_model"``, which is constructed when loading the data into the model. From this,
-    a data matrix is constructed for the given ``t_stage``.
+    a data matrix is constructed for the given ``t_stage``. If ``"_BN"`` is selected,
+    as T-stage, the data matrix for all patients is returned. This is mainly used for
+    the computation of the Bayesian network likelihood.
 
     The returned matrix has the shape :math:`2^{N \\cdot \\mathcal{O}} \\times M`,
     where :math:`N` is the number of lymph node levels, :math:`\\mathcal{O}` is the
     number of diagnostic modalities and :math:`M` is the number of patients with the
-    given ``t_stage``.
+    given ``t_stage`` (or just all patients).
     """
-    if not model.patient_data["_model", "#", "t_stage"].isin([t_stage]).any():
-        raise ValueError(f"No patients with T-stage {t_stage} in patient data.")
+    if t_stage == "_BN":
+        has_t_stage = slice(None)
+    else:
+        has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage
 
-    has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage
-    patients_with_t_stage = model.patient_data[has_t_stage]
+    selected_patients = model.patient_data[has_t_stage]
+    if len(selected_patients) == 0:
+        raise ValueError(f"No patients with T-stage {t_stage}.")
 
     result = np.ones(
-        shape=(model.observation_matrix().shape[1], len(patients_with_t_stage)),
+        shape=(model.observation_matrix().shape[1], len(selected_patients)),
         dtype=bool,
     )
 
-    for i, (_, patient_row) in enumerate(patients_with_t_stage["_model"].iterrows()):
+    for i, (_, patient_row) in enumerate(selected_patients["_model"].iterrows()):
         patient_encoding = np.ones(shape=1, dtype=bool)
         for modality_name in model.modalities.keys():
             if modality_name not in patient_row:

diff --git a/lymph/modalities.py b/lymph/modalities.py
@@ -224,7 +224,7 @@ def confusion_matrices_hash(self) -> int:
 
             1. It may change over the lifetime of the object, whereas ``__hash__``
                 should be constant.
-            2. It only takes into account the ``confusion_matric`` of the modality,
+            2. It only takes into account the ``confusion_matrix`` of the modality,
                 nothing else.
         """
         confusion_mat_bytes = b""

diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py
@@ -411,7 +411,7 @@ def modalities(self, new_modalities) -> None:
     def load_patient_data(
         self,
         patient_data: pd.DataFrame,
-        mapping: callable = early_late_mapping,
+        mapping: callable | dict[int, Any] = early_late_mapping,
     ) -> None:
         """Load patient data into the model.
 
@@ -480,14 +480,17 @@ def comp_joint_obs_dist(
         )
 
 
-    def _bn_likelihood(self, log: bool = True) -> float:
+    def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
         """Compute the BN likelihood of data, using the stored params."""
         llh = 0. if log else 1.
 
+        if t_stage is None:
+            t_stage = "_BN"
+
         joint_state_dist = self.comp_joint_state_dist(mode="BN")
         joint_diagnose_dist = np.sum(
-            self.ipsi.stacked_diagnose_matrix
-            * (joint_state_dist @ self.contra.stacked_diagnose_matrix),
+            self.ipsi.diagnose_matrices[t_stage]
+            * (joint_state_dist @ self.contra.diagnose_matrices[t_stage]),
             axis=0,
         )
 
@@ -498,14 +501,19 @@ def _bn_likelihood(self, log: bool = True) -> float:
         return llh
 
 
-    def _hmm_likelihood(self, log: bool = True) -> float:
+    def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
         """Compute the HMM likelihood of data, using the stored params."""
         llh = 0. if log else 1.
 
         ipsi_dist_evo = self.ipsi.comp_dist_evolution()
         contra_dist_evo = self.contra.comp_dist_evolution()
 
-        for stage in self.t_stages:
+        if t_stage is None:
+            t_stages = self.t_stages
+        else:
+            t_stages = [t_stage]
+
+        for stage in t_stages:
             diag_time_matrix = np.diag(self.diag_time_dists[stage].distribution)
 
             # Note that I am not using the `comp_joint_state_dist` method here, since
@@ -536,19 +544,14 @@ def _hmm_likelihood(self, log: bool = True) -> float:
 
     def likelihood(
         self,
-        data: pd.DataFrame | None = None,
         given_param_args: Iterable[float] | None = None,
         given_param_kwargs: dict[str, float] | None = None,
-        load_data_kwargs: dict[str, Any] | None = None,
         log: bool = True,
-        mode: str = "HMM"
+        mode: str = "HMM",
+        for_t_stage: str | None = None,
     ):
         """Compute the (log-)likelihood of the ``data`` given the model (and params).
 
-        If the ``data`` is not provided, the previously loaded data is used. One may
-        specify additional ``load_data_kwargs`` to pass to the
-        :py:meth:`~load_patient_data` method when loading the data.
-
         The parameters of the model can be set via ``given_param_args`` and
         ``given_param_kwargs``. Both arguments are used to call the
         :py:meth:`~assign_params` method. If the parameters are not provided, the
@@ -566,11 +569,6 @@ def likelihood(
             :py:meth:`lymph.models.Unilateral.likelihood`
                 The corresponding unilateral function.
         """
-        if data is not None:
-            if load_data_kwargs is None:
-                load_data_kwargs = {}
-            self.load_patient_data(data, **load_data_kwargs)
-
         if given_param_args is None:
             given_param_args = []
 
@@ -584,7 +582,13 @@ def likelihood(
         except ValueError:
             return -np.inf if log else 0.
 
-        return self._hmm_likelihood(log) if mode == "HMM" else self._bn_likelihood(log)
+        if mode == "HMM":
+            return self._hmm_likelihood(log, for_t_stage)
+
+        if mode == "BN":
+            return self._bn_likelihood(log, for_t_stage)
+
+        raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.")
 
 
     def comp_posterior_joint_state_dist(