Skip to content

Commit

Permalink
Merge pull request #75 from rmnldwg/release-1.0.0.a6
Browse files Browse the repository at this point in the history
Release 1.0.0.a6
  • Loading branch information
rmnldwg authored Feb 15, 2024
2 parents b2e5c8a + aa90be8 commit c31ae8b
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 83 deletions.
32 changes: 31 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@

All notable changes to this project will be documented in this file.

<a name="1.0.0.a6"></a>
## [1.0.0.a6] - 2024-02-15

With this (still alpha) release, we most notably fixed a long unnoticed bug in the computation of the Bayesian network likelihood.

### Bug Fixes

- (**uni**) Leftover `kwargs` now correctly returned in `assign_params()`
-**BREAKING** (**uni**) Remove `is_<x>_shared` entirely, as it was unused anyways. Fixes [#72].
- T-stage mapping may be dictionary or callable
- (**uni**) Raise exception when there are no tumors or LNLs in graph

### Documentation

- Fix typo in modalities

### Testing

- (**uni**) Check constructor raises exceptions
- Check the Bayesian network likelihood

### Change

- (**uni**) Trinary params are shared by default
- (**uni**) Prohibit setting `max_time`
-**BREAKING** Change `likelihood()` API: We don't allow setting the data via the `likelihood()` anymore. It convoluted the method and setting it beforehand is more explicit anyways.


<a name="1.0.0.a5"></a>
## [1.0.0.a5] - 2024-02-06

Expand Down Expand Up @@ -298,7 +326,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
- add pre-commit hook to check commit msg


[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...HEAD
[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...HEAD
[1.0.0.a6]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...1.0.0.a6
[1.0.0.a5]: https://github.com/rmnldwg/lymph/compare/1.0.0.a4...1.0.0.a5
[1.0.0.a4]: https://github.com/rmnldwg/lymph/compare/1.0.0.a3...1.0.0.a4
[1.0.0.a3]: https://github.com/rmnldwg/lymph/compare/1.0.0.a2...1.0.0.a3
Expand All @@ -310,6 +339,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
[0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1
[0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0

[#72]: https://github.com/rmnldwg/lymph/issues/72
[#69]: https://github.com/rmnldwg/lymph/issues/69
[#68]: https://github.com/rmnldwg/lymph/issues/68
[#65]: https://github.com/rmnldwg/lymph/issues/65
Expand Down
6 changes: 6 additions & 0 deletions lymph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,12 @@ def _init_nodes(self, graph, tumor_state, allowed_lnl_states):
lnl = LymphNodeLevel(name=node_name, allowed_states=allowed_lnl_states)
self._nodes[node_name] = lnl

if len(self.tumors) < 1:
raise ValueError("At least one tumor node must be present in the graph")

if len(self.lnls) < 1:
raise ValueError("At least one LNL node must be present in the graph")


@property
def nodes(self) -> dict[str, Tumor | LymphNodeLevel]:
Expand Down
14 changes: 14 additions & 0 deletions lymph/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,3 +519,17 @@ def wrapper(arg0, *args, **kwargs):
return wrapper

return decorator


def dict_to_func(mapping: dict[Any, Any]) -> callable:
"""Transform a dictionary into a function.
>>> char_map = {'a': 1, 'b': 2, 'c': 3}
>>> char_map = dict_to_func(char_map)
>>> char_map('a')
1
"""
def callable_mapping(key):
return mapping[key]

return callable_mapping
26 changes: 17 additions & 9 deletions lymph/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,30 +204,38 @@ def compute_encoding(
return encoding


def generate_data_encoding(model: models.Unilateral, t_stage: str) -> np.ndarray:
def generate_data_encoding(
model: models.Unilateral,
t_stage: str,
) -> np.ndarray:
"""Generate the data matrix for a specific T-stage from patient data.
The :py:attr:`~lymph.models.Unilateral.patient_data` needs to contain the column
``"_model"``, which is constructed when loading the data into the model. From this,
a data matrix is constructed for the given ``t_stage``.
a data matrix is constructed for the given ``t_stage``. If ``"_BN"`` is selected,
as T-stage, the data matrix for all patients is returned. This is mainly used for
the computation of the Bayesian network likelihood.
The returned matrix has the shape :math:`2^{N \\cdot \\mathcal{O}} \\times M`,
where :math:`N` is the number of lymph node levels, :math:`\\mathcal{O}` is the
number of diagnostic modalities and :math:`M` is the number of patients with the
given ``t_stage``.
given ``t_stage`` (or just all patients).
"""
if not model.patient_data["_model", "#", "t_stage"].isin([t_stage]).any():
raise ValueError(f"No patients with T-stage {t_stage} in patient data.")
if t_stage == "_BN":
has_t_stage = slice(None)
else:
has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage

has_t_stage = model.patient_data["_model", "#", "t_stage"] == t_stage
patients_with_t_stage = model.patient_data[has_t_stage]
selected_patients = model.patient_data[has_t_stage]
if len(selected_patients) == 0:
raise ValueError(f"No patients with T-stage {t_stage}.")

result = np.ones(
shape=(model.observation_matrix().shape[1], len(patients_with_t_stage)),
shape=(model.observation_matrix().shape[1], len(selected_patients)),
dtype=bool,
)

for i, (_, patient_row) in enumerate(patients_with_t_stage["_model"].iterrows()):
for i, (_, patient_row) in enumerate(selected_patients["_model"].iterrows()):
patient_encoding = np.ones(shape=1, dtype=bool)
for modality_name in model.modalities.keys():
if modality_name not in patient_row:
Expand Down
2 changes: 1 addition & 1 deletion lymph/modalities.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def confusion_matrices_hash(self) -> int:
1. It may change over the lifetime of the object, whereas ``__hash__``
should be constant.
2. It only takes into account the ``confusion_matric`` of the modality,
2. It only takes into account the ``confusion_matrix`` of the modality,
nothing else.
"""
confusion_mat_bytes = b""
Expand Down
42 changes: 23 additions & 19 deletions lymph/models/bilateral.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def modalities(self, new_modalities) -> None:
def load_patient_data(
self,
patient_data: pd.DataFrame,
mapping: callable = early_late_mapping,
mapping: callable | dict[int, Any] = early_late_mapping,
) -> None:
"""Load patient data into the model.
Expand Down Expand Up @@ -480,14 +480,17 @@ def comp_joint_obs_dist(
)


def _bn_likelihood(self, log: bool = True) -> float:
def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
"""Compute the BN likelihood of data, using the stored params."""
llh = 0. if log else 1.

if t_stage is None:
t_stage = "_BN"

joint_state_dist = self.comp_joint_state_dist(mode="BN")
joint_diagnose_dist = np.sum(
self.ipsi.stacked_diagnose_matrix
* (joint_state_dist @ self.contra.stacked_diagnose_matrix),
self.ipsi.diagnose_matrices[t_stage]
* (joint_state_dist @ self.contra.diagnose_matrices[t_stage]),
axis=0,
)

Expand All @@ -498,14 +501,19 @@ def _bn_likelihood(self, log: bool = True) -> float:
return llh


def _hmm_likelihood(self, log: bool = True) -> float:
def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float:
"""Compute the HMM likelihood of data, using the stored params."""
llh = 0. if log else 1.

ipsi_dist_evo = self.ipsi.comp_dist_evolution()
contra_dist_evo = self.contra.comp_dist_evolution()

for stage in self.t_stages:
if t_stage is None:
t_stages = self.t_stages
else:
t_stages = [t_stage]

for stage in t_stages:
diag_time_matrix = np.diag(self.diag_time_dists[stage].distribution)

# Note that I am not using the `comp_joint_state_dist` method here, since
Expand Down Expand Up @@ -536,19 +544,14 @@ def _hmm_likelihood(self, log: bool = True) -> float:

def likelihood(
self,
data: pd.DataFrame | None = None,
given_param_args: Iterable[float] | None = None,
given_param_kwargs: dict[str, float] | None = None,
load_data_kwargs: dict[str, Any] | None = None,
log: bool = True,
mode: str = "HMM"
mode: str = "HMM",
for_t_stage: str | None = None,
):
"""Compute the (log-)likelihood of the ``data`` given the model (and params).
If the ``data`` is not provided, the previously loaded data is used. One may
specify additional ``load_data_kwargs`` to pass to the
:py:meth:`~load_patient_data` method when loading the data.
The parameters of the model can be set via ``given_param_args`` and
``given_param_kwargs``. Both arguments are used to call the
:py:meth:`~assign_params` method. If the parameters are not provided, the
Expand All @@ -566,11 +569,6 @@ def likelihood(
:py:meth:`lymph.models.Unilateral.likelihood`
The corresponding unilateral function.
"""
if data is not None:
if load_data_kwargs is None:
load_data_kwargs = {}
self.load_patient_data(data, **load_data_kwargs)

if given_param_args is None:
given_param_args = []

Expand All @@ -584,7 +582,13 @@ def likelihood(
except ValueError:
return -np.inf if log else 0.

return self._hmm_likelihood(log) if mode == "HMM" else self._bn_likelihood(log)
if mode == "HMM":
return self._hmm_likelihood(log, for_t_stage)

if mode == "BN":
return self._bn_likelihood(log, for_t_stage)

raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.")


def comp_posterior_joint_state_dist(
Expand Down
Loading

0 comments on commit c31ae8b

Please sign in to comment.