Skip to content

Commit f0c9723

Browse files
authored
Merge pull request #16 from ghurault/release-v.0.2.0
Release v.0.2.0
2 parents 2776b4c + 9b0107c commit f0c9723

File tree

12 files changed

+226
-137
lines changed

12 files changed

+226
-137
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: EczemaPred
22
Title: Predicting the Evolution of Eczema Severity
3-
Version: 0.1.1
3+
Version: 0.2.0
44
Authors@R:
55
person(given = "Guillem",
66
family = "Hurault",

R/model_doc.R

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -60,53 +60,65 @@ NULL
6060
#' For more details see the BinRW [vignette](https://ghurault.github.io/EczemaPred/articles/BinRW.html).
6161
#'
6262
#' @param max_score Maximum value that the score can take
63-
#' @param prior Named list of the model's priors. If `NULL`, uses the default prior for the model (see [default_prior()]).
63+
#' @param prior Named list of the model's priors.
64+
#' If `NULL`, uses the default prior for the model (see [default_prior()]).
6465
#'
6566
#' @details Details of the model are available in the [paper](#).
6667
#'
6768
#' @section Parameters:
6869
#'
6970
#' ## Population parameters:
7071
#'
71-
#' - `sigma`: Standard deviation of the random walk
72+
#' - `sigma_lat`: Standard deviation of the random walk
73+
#' - `sigma_meas`: Standard deviation (not scale) of the logistic distribution (in `[0, max_score]` space)
74+
#' - `sigma_tot`: Total standard deviation for prediction one step ahead
75+
#' - `rho2`: Proportion of measurement variance to the total variance.
76+
#' It can be interpreted similarly to an R-squared, the proportion of the explained variance
77+
#' (the variance of the measurements) in the total variance.
7278
#' - `mu_y0`: Population mean of `y0` (initial condition).
7379
#' - `sigma_y0`: Population standard deviation of `y0` (initial condition).
74-
#' - `delta`: Difference between cutpoints (vector of length `max_score - 1`)
75-
#' - `ct`: Cutpoints (vector of length `max_score`)
76-
#' - `p0`: Probability distribution of the average patient at t0 (vector of length `max_score`)
80+
#' - `delta`: Relative difference between cutpoints (simplex of length `max_score - 1`)
81+
#' - `ct`: Cutpoints (vector of length `max_score`, in `[0, max_score]` space)
7782
#'
7883
#' ## Patient-dependent parameters:
7984
#'
80-
#' - `y0`: `y_lat` at t0.
85+
#' - `y0`: initial latent score (`y_lat` at t0).
8186
#'
8287
#' ## Observation-dependent (patient- and time-dependent) parameters:
8388
#'
84-
#' - `y_lat`: Latent score
89+
#' - `y_lat`: Latent score (in `[0, max_score]` space)
8590
#'
8691
#' See `list_parameters(model = "OrderedRW")` for more details.
8792
#'
8893
#' @section Priors:
89-
#' The priors are passed as a named list with elements `delta`, `sigma`, `mu_y0` and `sigma_y0`
94+
#' The priors are passed as a named list with elements `delta`, `sigma_lat`, `sigma_meas`, `mu_y0` and `sigma_y0`
9095
#' specifying priors for the corresponding parameters.
9196
#'
92-
#' The element `delta` should be a matrix with 2 rows and `max_score - 1` columns,
93-
#' such as the i-th column is a vector with values x1 and x2, where x2 > 0 and
94-
#' `delta[i] ~ normal+(x1, x2)`.
95-
#' The other parameters are normalised by the difference between the highest and lowest cutpoints (approx. the range of the score),
96-
#' and their priors are defined by a vector of length 2, containing values for x1 and x2, x2 > 0, such as:
97+
#' The element `delta` should be a vector X1 of length `max_score - 1`,
98+
#' such as all all elements of X1 are positive and
99+
#' `delta ~ dirichlet(X1)`.
97100
#'
98-
#' - `sigma ~ normal+(x1, x2)`
101+
#' The latent score can be interpreted in the original `[0, max_score]` space,
102+
#' the priors for the other parameters are specified normalised `max_score`.
103+
#' Their priors are defined by a vector of length 2, containing values for x1 and x2, x2 > 0, such as:
104+
#'
105+
#' - `sigma_meas / max_score ~ lognormal(x1, x2)`
106+
#' - `sigma_lat / max_score ~ lognormal(x1, x2)`
99107
#' - `mu_y0 ~ normal(x1, x2)`
100108
#' - `sigma_y0 ~ normal+(x1, x2)`
101109
#'
102-
#' NB: `delta`, `sigma` and `sigma_y0` are constrained to be positive so x1 are usually set to 0 to define a half-normal distribution.
110+
#' NB: for the lognormal distribution, x1 corresponds to the mean of the log and x2 to the sd of the log.
111+
#' NB: `sigma_y0` is constrained to be positive so x1 are usually set to 0 to define a half-normal distribution.
103112
#'
104113
#' @section Default priors:
105-
#' - The default prior for `delta` is set so that `delta` is less than the width of the logistic distribution.
106-
#' - The default prior for `sigma` assumes it would be to go to a state where `y = 0` is the most likely outcome to
107-
#' a state where `y = M` in two transitions.
108-
#' - The default priors for `mu_y0` and `sigma_y0` have reasonable ranges and translate to an approximately uniform prior
109-
#' over the range of the score for `y0`.
114+
#' - The default prior for `delta` is a uniform symmetric Dirichlet distribution with concentration 2.
115+
#' - The default priors for `sigma_meas` and `sigma_lat` are lognormal distribution which translate to
116+
#' a 95% CI that is approximately `[.02, 0.40] * M`.
117+
#' The prior for `sigma_lat` thus allows fast or slow transitions from a state where `y = 0`
118+
#' is the most likely outcome to a state where `y = M` is the most likely outcome.
119+
#' The prior for `sigma_meas` allows very precise or imprecise measurements.
120+
#' - The default priors for `mu_y0` and `sigma_y0` have reasonable ranges and translate to
121+
#' an approximately uniform prior over the range of the score for `y0`.
110122
#'
111123
#' @name OrderedRW
112124
#'

R/model_prior.R

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,34 +81,33 @@ validate_prior.OrderedRW <- function(model, ...) {
8181
prior <- model$prior
8282
stopifnot(
8383
is.list(prior),
84-
all(c("delta", "sigma", "mu_y0", "sigma_y0") %in% names(prior)),
84+
all(c("delta", "sigma_meas", "sigma_lat", "mu_y0", "sigma_y0") %in% names(prior)),
8585
all(vapply(prior, is.numeric, logical(1))),
86-
dim(prior$delta_sd) == c(2, model$max_score - 1),
87-
all(prior$delta[2, ] > 0),
88-
all(vapply(prior[c("sigma", "mu_y0", "sigma_y0")], function(x) {length(x) == 2}, logical(1))),
89-
all(vapply(prior[c("sigma", "mu_y0", "sigma_y0")], function(x) {x[2] > 0}, logical(1)))
86+
length(prior$delta) == model$max_score - 1,
87+
all(prior$delta > 0),
88+
all(vapply(prior[c("sigma_meas", "sigma_lat", "mu_y0", "sigma_y0")], function(x) {length(x) == 2}, logical(1))),
89+
all(vapply(prior[c("sigma_meas", "sigma_lat", "mu_y0", "sigma_y0")], function(x) {x[2] > 0}, logical(1)))
9090
)
9191
}
9292

9393
#' @export
9494
default_prior.OrderedRW <- function(model, ...) {
9595
list(
96-
delta = matrix(rep(c(0, pi / sqrt(3) * 2), model$max_score - 1),
97-
nrow = 2, byrow = FALSE),
98-
sigma = c(0, 0.1),
96+
delta = rep(2, model$max_score - 1),
97+
sigma_meas = c(-log(10), 0.5 * log(4)),
98+
sigma_lat = c(-log(10), 0.5 * log(4)),
9999
mu_y0 = c(0.5, 0.25),
100100
sigma_y0 = c(0, 0.125)
101101
)
102102
}
103103

104104
#' @export
105105
print_prior.OrderedRW <- function(model, digits = 2, ...) {
106-
for (i in 1:(model$max_score - 1)) {
107-
print_distribution(paste0("delta[", i, "]"), "normal+", model$prior$delta[, i])
108-
}
109-
print_distribution("sigma", "normal+", model$prior$sigma, digits = digits)
110-
print_distribution("mu_y0", "normal", model$prior$mu_y0, digits = digits)
111-
print_distribution("sigma_y0", "normal+", model$prior$sigma_y0, digits = digits)
106+
print_distribution("delta", "dirichlet", model$prior$delta, digits = digits)
107+
print_distribution("sigma_meas / max_score", "lognormal", model$prior$sigma_meas, digits = digits)
108+
print_distribution("sigma_lat / max_score", "lognormal", model$prior$sigma_lat, digits = digits)
109+
print_distribution("mu_y0 / max_score", "normal", model$prior$mu_y0, digits = digits)
110+
print_distribution("sigma_y0 / max_score", "normal+", model$prior$sigma_y0, digits = digits)
112111
}
113112

114113
# BinMC -------------------------------------------------------------------

R/parameters.R

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,26 +30,14 @@ list_parameters.BinRW <- function(model, main = TRUE, ...) {
3030

3131
}
3232

33-
#' @rdname list_parameters
34-
#' @importFrom HuraultMisc is_scalar
3533
#' @export
36-
#' @examples
37-
#' list_parameters(EczemaModel("OrderedRW", max_score = 100))
38-
list_parameters.OrderedRW <- function(model, main = TRUE, ...) {
39-
40-
stopifnot(is_scalar(main),
41-
is.logical(main))
42-
43-
out <- list(Population = c("sigma", "mu_y0", "sigma_y0", "p0", "ct", "delta"),
44-
Patient = "y0",
45-
PatientTime = c("y_lat", "y_rep"),
46-
Test = c("y_pred", "lpd", "cum_err"))
47-
if (main) {
48-
out$Population <- setdiff(out$Population, "p0")
49-
}
50-
51-
return(out)
52-
34+
list_parameters.OrderedRW <- function(model, ...) {
35+
list(
36+
Population = c("sigma_meas", "sigma_lat", "rho2", "sigma_tot", "ct", "delta", "mu_y0", "sigma_y0"),
37+
Patient = "y0",
38+
PatientTime = c("y_lat", "y_rep"),
39+
Test = c("y_pred", "lpd", "cum_err")
40+
)
5341
}
5442

5543
#' @rdname list_parameters

R/plot_fit.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
plot_latent_OrderedRW <- function(fit, id, patient_id) {
1717

1818
stopifnot(is_stanfit(fit),
19-
all(c("y_lat", "ct") %in% fit@model_pars),
19+
all(c("y_lat", "ct", "sigma_meas") %in% fit@model_pars),
2020
is.data.frame(id),
2121
nrow(id) == fit@par_dims[["y_lat"]],
2222
all(c("Patient", "Time", "Index") %in% colnames(id)),
@@ -28,19 +28,22 @@ plot_latent_OrderedRW <- function(fit, id, patient_id) {
2828
sapply(mean))
2929
ct <- rstan::extract(fit, pars = "ct")[[1]] %>%
3030
apply(2, mean)
31+
sigma_meas <- rstan::extract(fit, pars = "sigma_meas")[[1]] %>%
32+
mean()
33+
s <- sigma_meas * sqrt(3) / pi
3134

3235
max_score <- length(ct)
3336
lvl <- seq(0.1, 0.9, 0.1)
3437

3538
# Label location
3639
midpoint <- (ct - lag(ct))[-1] / 2
3740
midpoint <- ct[1:length(midpoint)] + midpoint
38-
midpoint <- c(ct[1] - 3.5, midpoint, ct[length(ct)] + 3.5)
41+
midpoint <- c(ct[1] - 1, midpoint, ct[length(ct)] + 1)
3942

4043
# Dataset containing CI of different levels
4144
ssi <- lapply(lvl,
4245
function(CI) {
43-
z <- stats::qlogis(0.5 + CI / 2)
46+
z <- stats::qlogis(0.5 + CI / 2, scale = s)
4447
out <- mutate(df, Lower = .data$Mean - z, Upper = .data$Mean + z, Level = CI)
4548
return(out)
4649
}) %>%

R/prepare_standata.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,16 @@ prepare_standata.AR1 <- function(model, train, test = NULL, ...) {
201201
add_prior(list(tau = numeric(0)))
202202
}
203203

204+
prepare_standata.OrderedRW <- function(model, train, test = NULL, ...) {
205+
# By default, logistic distribution with unknown delta
206+
NextMethod() %>%
207+
c(list(
208+
measurement_distribution = 0,
209+
delta_known = 0,
210+
delta_data = matrix(numeric(0), nrow = 0, ncol = model$max_score - 1)
211+
))
212+
}
213+
204214
# Prepare data for Markov Chain model -------------------------------------
205215

206216
#' Stop if the dataframe is not a correct input for the Markov Chain model.

0 commit comments

Comments
 (0)