Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use nice illustration of the effect of increasing the mean on increasing average scores #930

Open
nikosbosse opened this issue Sep 27, 2024 · 1 comment
Labels
documentation Improvements or additions to documentation

Comments

@nikosbosse
Copy link
Contributor

We have this visualisation:

image

image

generated by this code:

library(data.table)
library(dplyr)
library(scoringutils)
library(ggplot2)
library(tidyr)
library(patchwork)

## Real Data
ex <- example_sample_continuous |>
  filter(model == "EuroCOVIDhub-ensemble")

scores <- ex |>
  score()

setnames(scores, old = c("dss", "crps", "log_score"),
         new = c("DSS", "CRPS", "Log score"))

df <- ex[sample_id == 1] |>
  merge(scores) |>
  melt(measure.vars = c("DSS", "CRPS", "Log score"),
       variable.name = "Scoring rule", value.name = "Score")

df[, `Scoring rule` := factor(`Scoring rule`, levels = c("CRPS", "DSS", "Log score"))]

p_true <- df |>
  filter(horizon == 3, location == "DE") |>
  ggplot(aes(x = observed, y = Score, ,group = `Scoring rule`,
             colour = `Scoring rule`)) +
  geom_line() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  scale_y_log10() +
  scale_x_log10() +
  labs(x = "Observed value") +
  theme_scoringutils() +
  theme(legend.position = "bottom")


# ------------------------------------------------------------------------------
# illustration:
# in this we see that the mean as well as the variance of the scores scale
# for crps, while the variance stays constant for dss and log score

library(tidyr)

simulate <- function(n_samples = 5e3,
                     n_replicates = 1e3,
                     true_value = 100,
                     scale_mean = 1,
                     scale_sd = scale_mean) {
  pred <- rnorm(n_replicates * n_samples,
                mean = true_value * scale_mean,
                sd = true_value * scale_sd)

  df <- data.table(
    observed = true_value * scale_mean,
    predicted = pred,
    sample_id = 1:n_samples,
    id = paste0("id", rep(1:n_replicates, each = n_samples)),
    model = paste0("mean_", scale_mean, "_sd_", scale_sd)
  ) |>
    as_forecast_sample()

  scores <- score_simulation(df, scale_mean = scale_mean, scale_sd = scale_sd)
  return(scores)
}

score_simulation <- function(df, scale_mean = 1, scale_sd = scale_mean) {
  scores <- score(
    df,
    metrics = get_metrics(df, select = c("dss", "crps", "log_score"))
  )
  m <- summarise_scores(scores, by = "model", fun = mean) |>
    melt(id.vars = "model", value.name = "mean", variable.name = "score")

  s <- summarise_scores(scores, by = "model", fun = stats::sd) |>
    melt(id.vars = "model", value.name = "sd", variable.name = "score")

  out <- merge(m, s, by = c("model", "score")) |>
    melt(id.vars = c("model", "score"), variable.name = "type")

  return(out[])
}

scales_mean <- scales_sd <- c(1, 2, 5, 10)

grid <- expand.grid(
  scale_mean = scales_mean,
  scale_sd = scales_sd
) |>
  setDT()


if (!file.exists("inst/manuscript/output/relation-to-scale-example.rds")) {
  res <- grid |>
    rowwise() |>
    mutate(simulation := list(simulate(scale_mean = scale_mean, scale_sd = scale_sd)))

  saveRDS(res, file = "inst/manuscript/output/relation-to-scale-example.rds")
} else {
  res2 <- readRDS("inst/manuscript/output/relation-to-scale-example.rds")
}

df <- res |>
  tidyr::unnest(cols = "simulation")

df <- df |>
  rename(`Scoring rule` = score) |>
  mutate(type = ifelse(type == "mean", "Mean score", "Sd score")) |>
  mutate(`Scoring rule` = ifelse(`Scoring rule` == "dss",
                                 "DSS",
                                 ifelse(`Scoring rule` == "crps", "CRPS", "Log score")))

p1 <- df |>
  filter(scale_mean == 1,
         scale_sd < 20) |>
  ggplot(aes(y = value, x = scale_sd,
             group = `Scoring rule`, color = `Scoring rule`)) +
  geom_line() +
  facet_wrap(~ type, scales = "free") +
  scale_y_log10() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  scale_x_log10() +
  theme_scoringutils() +
  labs(y = "Score", x = "Sd of F and G (mean constant)")


p2 <- df |>
  filter(scale_sd == 1,
         scale_mean < 20) |>
  ggplot(aes(y = value, x = scale_mean,
             group = `Scoring rule`, color = `Scoring rule`)) +
  geom_line() +
  facet_wrap(~ type, scales = "free") +
  scale_y_log10() +
  scale_x_log10() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  theme_scoringutils() +
  labs(y = "Score", x = "Mean of F and G (sd constant)")

layout <- "
AAACC
BBBCC
"

p2 + p1 + p_true +
  plot_layout(guides = "collect", design = layout) &
  theme(legend.position = "bottom") &
  plot_annotation(tag_levels = 'A')

ggsave("inst/manuscript/output/illustration-effect-scale.png",
       height = 4.3, width = 8)

It was previously in the manuscript, but we're not using it anymore. But maybe we don't want it at all anymore...

Related: #929

@seabbs
Copy link
Contributor

seabbs commented Sep 30, 2024

#929 yes agree!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
documentation Improvements or additions to documentation
Projects
Development

No branches or pull requests

2 participants