Use nice illustration of the effect of increasing the mean on increasing average scores #930

nikosbosse · 2024-09-27T18:33:34Z

We have this visualisation:

generated by this code:

library(data.table)
library(dplyr)
library(scoringutils)
library(ggplot2)
library(tidyr)
library(patchwork)

## Real Data
ex <- example_sample_continuous |>
  filter(model == "EuroCOVIDhub-ensemble")

scores <- ex |>
  score()

setnames(scores, old = c("dss", "crps", "log_score"),
         new = c("DSS", "CRPS", "Log score"))

df <- ex[sample_id == 1] |>
  merge(scores) |>
  melt(measure.vars = c("DSS", "CRPS", "Log score"),
       variable.name = "Scoring rule", value.name = "Score")

df[, `Scoring rule` := factor(`Scoring rule`, levels = c("CRPS", "DSS", "Log score"))]

p_true <- df |>
  filter(horizon == 3, location == "DE") |>
  ggplot(aes(x = observed, y = Score, ,group = `Scoring rule`,
             colour = `Scoring rule`)) +
  geom_line() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  scale_y_log10() +
  scale_x_log10() +
  labs(x = "Observed value") +
  theme_scoringutils() +
  theme(legend.position = "bottom")


# ------------------------------------------------------------------------------
# illustration:
# in this we see that the mean as well as the variance of the scores scale
# for crps, while the variance stays constant for dss and log score

library(tidyr)

simulate <- function(n_samples = 5e3,
                     n_replicates = 1e3,
                     true_value = 100,
                     scale_mean = 1,
                     scale_sd = scale_mean) {
  pred <- rnorm(n_replicates * n_samples,
                mean = true_value * scale_mean,
                sd = true_value * scale_sd)

  df <- data.table(
    observed = true_value * scale_mean,
    predicted = pred,
    sample_id = 1:n_samples,
    id = paste0("id", rep(1:n_replicates, each = n_samples)),
    model = paste0("mean_", scale_mean, "_sd_", scale_sd)
  ) |>
    as_forecast_sample()

  scores <- score_simulation(df, scale_mean = scale_mean, scale_sd = scale_sd)
  return(scores)
}

score_simulation <- function(df, scale_mean = 1, scale_sd = scale_mean) {
  scores <- score(
    df,
    metrics = get_metrics(df, select = c("dss", "crps", "log_score"))
  )
  m <- summarise_scores(scores, by = "model", fun = mean) |>
    melt(id.vars = "model", value.name = "mean", variable.name = "score")

  s <- summarise_scores(scores, by = "model", fun = stats::sd) |>
    melt(id.vars = "model", value.name = "sd", variable.name = "score")

  out <- merge(m, s, by = c("model", "score")) |>
    melt(id.vars = c("model", "score"), variable.name = "type")

  return(out[])
}

scales_mean <- scales_sd <- c(1, 2, 5, 10)

grid <- expand.grid(
  scale_mean = scales_mean,
  scale_sd = scales_sd
) |>
  setDT()


if (!file.exists("inst/manuscript/output/relation-to-scale-example.rds")) {
  res <- grid |>
    rowwise() |>
    mutate(simulation := list(simulate(scale_mean = scale_mean, scale_sd = scale_sd)))

  saveRDS(res, file = "inst/manuscript/output/relation-to-scale-example.rds")
} else {
  res2 <- readRDS("inst/manuscript/output/relation-to-scale-example.rds")
}

df <- res |>
  tidyr::unnest(cols = "simulation")

df <- df |>
  rename(`Scoring rule` = score) |>
  mutate(type = ifelse(type == "mean", "Mean score", "Sd score")) |>
  mutate(`Scoring rule` = ifelse(`Scoring rule` == "dss",
                                 "DSS",
                                 ifelse(`Scoring rule` == "crps", "CRPS", "Log score")))

p1 <- df |>
  filter(scale_mean == 1,
         scale_sd < 20) |>
  ggplot(aes(y = value, x = scale_sd,
             group = `Scoring rule`, color = `Scoring rule`)) +
  geom_line() +
  facet_wrap(~ type, scales = "free") +
  scale_y_log10() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  scale_x_log10() +
  theme_scoringutils() +
  labs(y = "Score", x = "Sd of F and G (mean constant)")


p2 <- df |>
  filter(scale_sd == 1,
         scale_mean < 20) |>
  ggplot(aes(y = value, x = scale_mean,
             group = `Scoring rule`, color = `Scoring rule`)) +
  geom_line() +
  facet_wrap(~ type, scales = "free") +
  scale_y_log10() +
  scale_x_log10() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  theme_scoringutils() +
  labs(y = "Score", x = "Mean of F and G (sd constant)")

layout <- "
AAACC
BBBCC
"

p2 + p1 + p_true +
  plot_layout(guides = "collect", design = layout) &
  theme(legend.position = "bottom") &
  plot_annotation(tag_levels = 'A')

ggsave("inst/manuscript/output/illustration-effect-scale.png",
       height = 4.3, width = 8)

It was previously in the manuscript, but we're not using it anymore. But maybe we don't want it at all anymore...

Related: #929

The text was updated successfully, but these errors were encountered:

seabbs · 2024-09-30T20:59:38Z

#929 yes agree!

nikosbosse added the documentation Improvements or additions to documentation label Sep 27, 2024

nikosbosse added this to scoringutils future release Sep 27, 2024

nikosbosse added this to the scoringutils-2.x milestone Sep 27, 2024

This was referenced Sep 28, 2024

Use nice illustration of CRPS/DSS/Log score + the number of samples needed for scores to converge #933

Open

Use illustration of local and global scores #934

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use nice illustration of the effect of increasing the mean on increasing average scores #930

Use nice illustration of the effect of increasing the mean on increasing average scores #930

nikosbosse commented Sep 27, 2024

seabbs commented Sep 30, 2024

Use nice illustration of the effect of increasing the mean on increasing average scores #930

Use nice illustration of the effect of increasing the mean on increasing average scores #930

Comments

nikosbosse commented Sep 27, 2024

seabbs commented Sep 30, 2024