ghurault
diff --git a/‎DESCRIPTION
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/model_doc.R
Lines changed: 32 additions & 20 deletions b/‎R/model_doc.R
Lines changed: 32 additions & 20 deletions
diff --git a/‎R/model_prior.R
Lines changed: 13 additions & 14 deletions b/‎R/model_prior.R
Lines changed: 13 additions & 14 deletions
diff --git a/‎R/parameters.R
Lines changed: 7 additions & 19 deletions b/‎R/parameters.R
Lines changed: 7 additions & 19 deletions
diff --git a/‎R/plot_fit.R
Lines changed: 6 additions & 3 deletions b/‎R/plot_fit.R
Lines changed: 6 additions & 3 deletions
diff --git a/‎R/prepare_standata.R
Lines changed: 10 additions & 0 deletions b/‎R/prepare_standata.R
Lines changed: 10 additions & 0 deletions
@@ -1,6 +1,6 @@
 Package: EczemaPred
 Title: Predicting the Evolution of Eczema Severity
-Version: 0.1.1
+Version: 0.2.0
 Authors@R: 
     person(given = "Guillem",
            family = "Hurault",
 
@@ -60,53 +60,65 @@ NULL
 #' For more details see the BinRW [vignette](https://ghurault.github.io/EczemaPred/articles/BinRW.html).
 #'
 #' @param max_score Maximum value that the score can take
-#' @param prior Named list of the model's priors. If `NULL`, uses the default prior for the model (see [default_prior()]).
+#' @param prior Named list of the model's priors.
+#' If `NULL`, uses the default prior for the model (see [default_prior()]).
 #'
 #' @details Details of the model are available in the [paper](#).
 #'
 #' @section Parameters:
 #'
 #' ## Population parameters:
 #'
-#' - `sigma`: Standard deviation of the random walk
+#' - `sigma_lat`: Standard deviation of the random walk
+#' - `sigma_meas`: Standard deviation (not scale) of the logistic distribution (in `[0, max_score]` space)
+#' - `sigma_tot`: Total standard deviation for prediction one step ahead
+#' - `rho2`: Proportion of measurement variance to the total variance.
+#' It can be interpreted similarly to an R-squared, the proportion of the explained variance
+#' (the variance of the measurements) in the total variance.
 #' - `mu_y0`: Population mean of `y0` (initial condition).
 #' - `sigma_y0`: Population standard deviation of `y0` (initial condition).
-#' - `delta`: Difference between cutpoints (vector of length `max_score - 1`)
-#' - `ct`: Cutpoints (vector of length `max_score`)
-#' - `p0`: Probability distribution of the average patient at t0 (vector of length `max_score`)
+#' - `delta`: Relative difference between cutpoints (simplex of length `max_score - 1`)
+#' - `ct`: Cutpoints (vector of length `max_score`, in `[0, max_score]` space)
 #'
 #' ## Patient-dependent parameters:
 #'
-#' - `y0`: `y_lat` at t0.
+#' - `y0`: initial latent score (`y_lat` at t0).
 #'
 #' ## Observation-dependent (patient- and time-dependent) parameters:
 #'
-#' - `y_lat`: Latent score
+#' - `y_lat`: Latent score (in `[0, max_score]` space)
 #'
 #' See `list_parameters(model = "OrderedRW")` for more details.
 #'
 #' @section Priors:
-#' The priors are passed as a named list with elements `delta`, `sigma`, `mu_y0` and `sigma_y0`
+#' The priors are passed as a named list with elements `delta`, `sigma_lat`, `sigma_meas`, `mu_y0` and `sigma_y0`
 #' specifying priors for the corresponding parameters.
 #'
-#' The element `delta` should be a matrix with 2 rows and `max_score - 1` columns,
-#' such as the i-th column is a vector with values x1 and x2, where x2 > 0 and
-#' `delta[i] ~ normal+(x1, x2)`.
-#' The other parameters are normalised by the difference between the highest and lowest cutpoints (approx. the range of the score),
-#' and their priors are defined by a vector of length 2, containing values for x1 and x2, x2 > 0, such as:
+#' The element `delta` should be a vector X1 of length `max_score - 1`,
+#' such as all all elements of X1 are positive and
+#' `delta ~ dirichlet(X1)`.
 #'
-#' - `sigma ~ normal+(x1, x2)`
+#' The latent score can be interpreted in the original `[0, max_score]` space,
+#' the priors for the other parameters are specified normalised `max_score`.
+#' Their priors are defined by a vector of length 2, containing values for x1 and x2, x2 > 0, such as:
+#'
+#' - `sigma_meas / max_score ~ lognormal(x1, x2)`
+#' - `sigma_lat / max_score ~ lognormal(x1, x2)`
 #' - `mu_y0 ~ normal(x1, x2)`
 #' - `sigma_y0 ~ normal+(x1, x2)`
 #'
-#' NB: `delta`, `sigma` and `sigma_y0` are constrained to be positive so x1 are usually set to 0 to define a half-normal distribution.
+#' NB: for the lognormal distribution, x1 corresponds to the mean of the log and x2 to the sd of the log.
+#' NB: `sigma_y0` is constrained to be positive so x1 are usually set to 0 to define a half-normal distribution.
 #'
 #' @section Default priors:
-#' - The default prior for `delta` is set so that `delta` is less than the width of the logistic distribution.
-#' - The default prior for `sigma` assumes it would be to go to a state where `y = 0` is the most likely outcome to
-#' a state where `y = M` in two transitions.
-#' - The default priors for `mu_y0` and `sigma_y0` have reasonable ranges and translate to an approximately uniform prior
-#' over the range of the score for `y0`.
+#' - The default prior for `delta` is a uniform symmetric Dirichlet distribution with concentration 2.
+#' - The default priors for `sigma_meas` and `sigma_lat` are lognormal distribution which translate to
+#' a 95% CI that is approximately `[.02, 0.40] * M`.
+#' The prior for `sigma_lat` thus allows fast or slow transitions from a state where `y = 0`
+#' is the most likely outcome to a state where `y = M` is the most likely outcome.
+#' The prior for `sigma_meas` allows very precise or imprecise measurements.
+#' - The default priors for `mu_y0` and `sigma_y0` have reasonable ranges and translate to
+#' an approximately uniform prior over the range of the score for `y0`.
 #'
 #' @name OrderedRW
 #'
 
@@ -81,34 +81,33 @@ validate_prior.OrderedRW <- function(model, ...) {
   prior <- model$prior
   stopifnot(
     is.list(prior),
-    all(c("delta", "sigma", "mu_y0", "sigma_y0") %in% names(prior)),
+    all(c("delta", "sigma_meas", "sigma_lat", "mu_y0", "sigma_y0") %in% names(prior)),
     all(vapply(prior, is.numeric, logical(1))),
-    dim(prior$delta_sd) == c(2, model$max_score - 1),
-    all(prior$delta[2, ] > 0),
-    all(vapply(prior[c("sigma", "mu_y0", "sigma_y0")], function(x) {length(x) == 2}, logical(1))),
-    all(vapply(prior[c("sigma", "mu_y0", "sigma_y0")], function(x) {x[2] > 0}, logical(1)))
+    length(prior$delta) == model$max_score - 1,
+    all(prior$delta > 0),
+    all(vapply(prior[c("sigma_meas", "sigma_lat", "mu_y0", "sigma_y0")], function(x) {length(x) == 2}, logical(1))),
+    all(vapply(prior[c("sigma_meas", "sigma_lat", "mu_y0", "sigma_y0")], function(x) {x[2] > 0}, logical(1)))
   )
 }
 
 #' @export
 default_prior.OrderedRW <- function(model, ...) {
   list(
-    delta = matrix(rep(c(0, pi / sqrt(3) * 2), model$max_score - 1),
-                   nrow = 2, byrow = FALSE),
-    sigma = c(0, 0.1),
+    delta = rep(2, model$max_score - 1),
+    sigma_meas = c(-log(10), 0.5 * log(4)),
+    sigma_lat = c(-log(10), 0.5 * log(4)),
     mu_y0 = c(0.5, 0.25),
     sigma_y0 = c(0, 0.125)
   )
 }
 
 #' @export
 print_prior.OrderedRW <- function(model, digits = 2, ...) {
-  for (i in 1:(model$max_score - 1)) {
-    print_distribution(paste0("delta[", i, "]"), "normal+", model$prior$delta[, i])
-  }
-  print_distribution("sigma", "normal+", model$prior$sigma, digits = digits)
-  print_distribution("mu_y0", "normal", model$prior$mu_y0, digits = digits)
-  print_distribution("sigma_y0", "normal+", model$prior$sigma_y0, digits = digits)
+  print_distribution("delta", "dirichlet", model$prior$delta, digits = digits)
+  print_distribution("sigma_meas / max_score", "lognormal", model$prior$sigma_meas, digits = digits)
+  print_distribution("sigma_lat / max_score", "lognormal", model$prior$sigma_lat, digits = digits)
+  print_distribution("mu_y0 / max_score", "normal", model$prior$mu_y0, digits = digits)
+  print_distribution("sigma_y0 / max_score", "normal+", model$prior$sigma_y0, digits = digits)
 }
 
 # BinMC -------------------------------------------------------------------
 
@@ -30,26 +30,14 @@ list_parameters.BinRW <- function(model, main = TRUE, ...) {
 
 }
 
-#' @rdname list_parameters
-#' @importFrom HuraultMisc is_scalar
 #' @export
-#' @examples
-#' list_parameters(EczemaModel("OrderedRW", max_score = 100))
-list_parameters.OrderedRW <- function(model, main = TRUE, ...) {
-
-  stopifnot(is_scalar(main),
-            is.logical(main))
-
-  out <- list(Population = c("sigma", "mu_y0", "sigma_y0", "p0", "ct", "delta"),
-              Patient = "y0",
-              PatientTime = c("y_lat", "y_rep"),
-              Test = c("y_pred", "lpd", "cum_err"))
-  if (main) {
-    out$Population <- setdiff(out$Population, "p0")
-  }
-
-  return(out)
-
+list_parameters.OrderedRW <- function(model, ...) {
+  list(
+    Population = c("sigma_meas", "sigma_lat", "rho2", "sigma_tot", "ct", "delta", "mu_y0", "sigma_y0"),
+    Patient = "y0",
+    PatientTime = c("y_lat", "y_rep"),
+    Test = c("y_pred", "lpd", "cum_err")
+  )
 }
 
 #' @rdname list_parameters
 
@@ -16,7 +16,7 @@
 plot_latent_OrderedRW <- function(fit, id, patient_id) {
 
   stopifnot(is_stanfit(fit),
-            all(c("y_lat", "ct") %in% fit@model_pars),
+            all(c("y_lat", "ct", "sigma_meas") %in% fit@model_pars),
             is.data.frame(id),
             nrow(id) == fit@par_dims[["y_lat"]],
             all(c("Patient", "Time", "Index") %in% colnames(id)),
@@ -28,19 +28,22 @@ plot_latent_OrderedRW <- function(fit, id, patient_id) {
                  sapply(mean))
   ct <- rstan::extract(fit, pars = "ct")[[1]] %>%
     apply(2, mean)
+  sigma_meas <- rstan::extract(fit, pars = "sigma_meas")[[1]] %>%
+    mean()
+  s <- sigma_meas * sqrt(3) / pi
 
   max_score <- length(ct)
   lvl <- seq(0.1, 0.9, 0.1)
 
   # Label location
   midpoint <- (ct - lag(ct))[-1] / 2
   midpoint <- ct[1:length(midpoint)] + midpoint
-  midpoint <- c(ct[1] - 3.5, midpoint, ct[length(ct)] + 3.5)
+  midpoint <- c(ct[1] - 1, midpoint, ct[length(ct)] + 1)
 
   # Dataset containing CI of different levels
   ssi <- lapply(lvl,
                 function(CI) {
-                  z <- stats::qlogis(0.5 + CI / 2)
+                  z <- stats::qlogis(0.5 + CI / 2, scale = s)
                   out <- mutate(df, Lower = .data$Mean - z, Upper = .data$Mean + z, Level = CI)
                   return(out)
                 }) %>%
 
@@ -201,6 +201,16 @@ prepare_standata.AR1 <- function(model, train, test = NULL, ...) {
     add_prior(list(tau = numeric(0)))
 }
 
+prepare_standata.OrderedRW <- function(model, train, test = NULL, ...) {
+  # By default, logistic distribution with unknown delta
+  NextMethod() %>%
+    c(list(
+      measurement_distribution = 0,
+      delta_known = 0,
+      delta_data = matrix(numeric(0), nrow = 0, ncol = model$max_score - 1)
+    ))
+}
+
 # Prepare data for Markov Chain model -------------------------------------
 
 #' Stop if the dataframe is not a correct input for the Markov Chain model.