Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use nice illustration of CRPS/DSS/Log score + the number of samples needed for scores to converge #933

Open
nikosbosse opened this issue Sep 28, 2024 · 1 comment
Labels
documentation Improvements or additions to documentation

Comments

@nikosbosse
Copy link
Contributor

We have this visualisation, which among others makes the point that you need quite a lot of samples for the sample-based log score to be reliable.

image

image

It was previously used in the manuscript, but we're not using it anymore. I like It and think it makes an important point. It would be really nice to use this visualisation in a vignette explaining which score to use when.

This was the code that generated the plot:

library(data.table)
library(scoringutils)
library(ggplot2)

# ==============================================================================
# =================== Convergence of scores ====================================
# ==============================================================================

sample_sizes <- round(1 * 10^(seq(1, 5, 0.1)))
n_rep <- 500

true_value = 0
sd <- 3
mu <- 2

# analytical scores
true_crps <- scoringRules::crps(y = 0, family = "normal", mean = mu, sd = sd)
true_logs <- scoringRules::logs(y = 0, family = "normal", mean = mu, sd = sd)
true_dss <- scoringRules::dss_norm(y = 0, mean = mu, sd = sd)

if (!file.exists("inst/manuscript/output/sample-convergence.rds")) {
  results <- list()
  for (i in sample_sizes) {
    samples <- as.data.table(
      replicate(n_rep,
                rnorm(n = i, mean = mu, sd = sd))
    )
    setnames(samples, as.character(1:n_rep))
    samples[, sample_id := 1:i]
    samples <- melt(samples, id.vars = "sample_id",
                    variable.name = "repetition",
                    value.name = "predicted")
    samples[, observed := true_value]
    samples <- as_forecast_sample(samples)
    results[[paste(i)]] <- score(
      samples, get_metrics(samples, select = c("crps", "log_score", "dss"))
    )[, n_samples := i]
  }
  saveRDS(results, "inst/manuscript/output/sample-convergence.rds")
} else {
  results <- readRDS("inst/manuscript/output/sample-convergence.rds")
}

results <- rbindlist(results)
results <- melt(results, id.vars = c("n_samples", "repetition"),
                 variable.name = "score")

label_fn <- function(x) {
  ifelse (x >= 1000,
          paste0(x / 1000, "k"),
          x)
}

df <- results[, .(mean = mean(value),
                   quantile_0.05 = quantile(value, 0.05),
                   quantile_0.25 = quantile(value, 0.25),
                   quantile_0.75 = quantile(value, 0.75),
                   quantile_0.95 = quantile(value, 0.95)),
               by = c("n_samples", "score")]
df[score == "crps", true_score := true_crps]
df[score == "log_score", true_score := true_logs]
df[score == "dss", true_score := true_dss]

df[, score := ifelse(score == "dss", "DSS",
                     ifelse(score == "crps", "CRPS",
                            "Log score"))]

p_convergence <- ggplot(df, aes(x = n_samples)) +
  geom_line(aes(y = mean)) +
  geom_ribbon(aes(ymax = quantile_0.95, ymin = quantile_0.05),
              alpha = 0.1) +
  geom_ribbon(aes(ymax = quantile_0.75, ymin = quantile_0.25),
              alpha = 0.3) +
  geom_hline(aes(yintercept = true_score), linetype = "dashed") +
  facet_wrap(~ score, scales = "free") +
  scale_x_continuous(trans = "log10", labels = label_fn) +
  theme_scoringutils() +
  labs(x = "Number of samples",
       y = "Score based on samples")


# ==============================================================================
# =================== scores and outliers, sd ==================================
# ==============================================================================

library(scoringRules)
library(dplyr)
library(patchwork)

# define simulation parameters
n_steps = 500
n_rep <- 5000
true_mean = 0
true_sd = 5
true_values <- rnorm(n = n_rep, mean = true_mean, sd = true_sd)
sd <- 10^(seq(-1, 1.6, length.out = n_steps))
mu <- seq(0, 100, length.out = n_steps)


# look at effect of change in sd on score
res_sd <- data.table(sd = sd,
                     mu = true_mean)

res_sd[, `:=`(CRPS = mean(scoringRules::crps(y = true_values, family = "normal", mean = mu, sd = sd)),
               `Log score` = mean(scoringRules::logs(y = true_values, family = "normal", mean = mu, sd = sd)),
               DSS = mean(scoringRules::dss_norm(y = true_values, mean = mu, sd = sd))),
       by = "sd"]

deviation_sd <- res_sd |>
  melt(id.vars = c("sd", "mu"), value.name = "value", variable.name = "Score") |>
  ggplot(aes(x = sd, y = value, color = Score)) +
  geom_line() +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  theme_scoringutils() +
  geom_vline(aes(xintercept = 5), linetype = "dashed") +
  coord_cartesian(ylim=c(0, 20)) +
  annotate(geom="text", x=6, y=12, label="Sd of true \ndata-generating \ndistribution: 5",
           color="black", hjust = "left", size = 3) +
  labs(y = "Score", x = "Standard deviation of predictive distribution")



# define simulation parameters
true_values <- seq(0, 4, length.out = 1000)
true_sd = 1
true_mu = 0

# look at effect of change in sd on score
res_mu2 <- data.table(true_value = true_values)

res_mu2[, `:=`(CRPS = scoringRules::crps(y = true_value, family = "normal", mean = true_mu, sd = true_sd) / 10,
                `Log score` = scoringRules::logs(y = true_value, family = "normal", mean = true_mu, sd = true_sd) / 10,
                DSS = scoringRules::dss_norm(y = true_value, mean = true_mu, sd = true_sd) / 10)]

label_fn <- function(x) {
  paste(10*x)
}

outlier <- res_mu2 |>
  melt(id.vars = c("true_value"), value.name = "value", variable.name = "Score") |>
  ggplot(aes(x = true_value, y = value, color = Score)) +
  geom_line() +
  theme_scoringutils() +
  annotate(geom="text", x=0, y=.8, label="Predictive distribution: \nN(0,1)",
           color="black", hjust = "left", size = 3) +
  labs(y = "Score", x = "Observed value") +
  # geom_vline(aes(xintercept = 0), linetype = "dashed") +
  scale_color_discrete(type = c("#E69F00", "#56B4E9", "#009E73")) +
  geom_area(stat = "function", fun = dnorm, color = "grey", fill = "grey", alpha = 0.5, xlim = c(0, 4)) +
  scale_y_continuous(label = label_fn)


layout <- "
AA
BC
"

plot <- p_convergence /
deviation_sd + outlier +
  plot_layout(guides = "collect",
              design = layout) &
  plot_annotation(tag_levels = "A") &
  theme(legend.position = "bottom")

ggsave("inst/manuscript/output/score-convergence-outliers.png", plot = plot,
       height = 6, width = 8)

Related: #930, #929

@nikosbosse nikosbosse added the documentation Improvements or additions to documentation label Sep 28, 2024
@nikosbosse nikosbosse added this to the scoringutils-2.x milestone Sep 28, 2024
@seabbs
Copy link
Contributor

seabbs commented Sep 30, 2024

Again same comment(#929) that this is nice and we should plan to use

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
documentation Improvements or additions to documentation
Projects
Development

No branches or pull requests

2 participants