scoringutils
can work well with other exsiting packages.
This vignettes shows some examples.
yardstick
yardstick
(belonging to the tidymodels
family of R packages) is a package designed to evaluate predictions
against observed values. It is similar to scoringutils
in
the sense that it makes a large variety of metrics available to users
through a consistent framework. However, it mostly focuses on binary /
multinomial forecasts and point forecasts (it also has a few functions
for analsys of survival data). It does not currently implement metrics
for full probabilistic forecasts.
library(yardstick)
class_metrics <- metric_set(accuracy, kap)
example_binary |>
to_yardstick_binary_class() |>
group_by(model) |>
class_metrics(truth = true_value, estimate = prediction)
#> # A tibble: 10 × 4
#> model .metric .estimator .estimate
#> <chr> <chr> <chr> <dbl>
#> 1 EuroCOVIDhub-baseline accuracy binary 0.824
#> 2 EuroCOVIDhub-ensemble accuracy binary 0.613
#> 3 UMass-MechBayes accuracy binary 0.531
#> 4 epiforecasts-EpiNow2 accuracy binary 0.563
#> 5 <NA> accuracy binary NaN
#> 6 EuroCOVIDhub-baseline kap binary 0.0585
#> 7 EuroCOVIDhub-ensemble kap binary 0.0703
#> 8 UMass-MechBayes kap binary 0.0320
#> 9 epiforecasts-EpiNow2 kap binary 0.00685
#> 10 <NA> kap binary NaN
example_binary |>
to_yardstick_binary_class_prob() |>
group_by(model) |>
filter(!is.na(prediction)) |>
average_precision(truth = true_value, prediction, event_level = "first")
#> # A tibble: 4 × 4
#> model .metric .estimator .estimate
#> <chr> <chr> <chr> <dbl>
#> 1 EuroCOVIDhub-baseline average_precision binary 0.196
#> 2 EuroCOVIDhub-ensemble average_precision binary 0.423
#> 3 UMass-MechBayes average_precision binary 0.529
#> 4 epiforecasts-EpiNow2 average_precision binary 0.411
example_continuous |>
group_by(model) |>
mae(truth = true_value, estimate = prediction)
#> # A tibble: 5 × 4
#> model .metric .estimator .estimate
#> <chr> <chr> <chr> <dbl>
#> 1 EuroCOVIDhub-baseline mae standard 22829.
#> 2 EuroCOVIDhub-ensemble mae standard 15125.
#> 3 UMass-MechBayes mae standard 133.
#> 4 epiforecasts-EpiNow2 mae standard 19295.
#> 5 <NA> mae standard NaN
probably
The probably
package (part of the tidymodels family) contains tools to facilitate the
assessment of calibration, conversion of probabilties to class
predictions and optimal probability thresholds.
Plots to assess the calibration of binary forecasts can directly be used with the output of [score()].
s <- score(example_binary)
s <- s[model %in% c("EuroCOVIDhub-baseline", "EuroCOVIDhub-ensemble")]
library(probably)
cal_plot_breaks(.data = s, truth = true_value, estimate = prediction, .by = model)
cal_plot_windowed(.data = s, truth = true_value, estimate = prediction, .by = model)
cal_plot_logistic(.data = s, truth = true_value, estimate = prediction, .by = model)
predtools
library(predtools)
s <- score(example_binary)
#> The following messages were produced when checking inputs:
#> 1. 144 values for `true_value` are NA in the data provided and the corresponding rows were removed. This may indicate a problem if unexpected.
#> 2. 144 values for `prediction` are NA in the data provided and the corresponding rows were removed. This may indicate a problem if unexpected.
s <- s[model %in% c("EuroCOVIDhub-baseline", "EuroCOVIDhub-ensemble")]
calibration_plot(data = s, obs = "true_value",
pred = "prediction", group = "model")
#> $calibration_plot
library(scoring)
calcscore(true_value ~ prediction, data = example_binary, fam="pow", param=2, bounds=c(-1,1))
#> Warning in calcscore.default(object = c(NA, NA, NA, NA, NA, NA, NA, NA, : Some
#> scores are NA. This may be due to missing data in your forecasts or outcomes,
#> or an ill-defined param argument.
#> [1] NA NA NA NA NA NA NA NA
#> [9] NA NA NA NA NA NA NA NA
#> [17] NA NA NA NA NA NA NA NA
#> [25] NA NA NA NA NA NA NA NA
#> [33] NA NA NA NA -0.71875 -0.54875 -0.63875 -0.63875
#> [41] -0.50000 -0.39500 -0.54875 -0.50000 -0.44875 -0.50000 -0.50000 -0.44875
#> [49] -0.54875 -0.54875 -0.50000 -0.63875 -0.50000 -0.50000 -0.33875 -0.54875
#> [57] -0.44875 -0.59500 -0.59500 -0.54875 -0.54875 -0.33875 -0.50000 -0.50000
#> [65] -0.50000 -0.54875 -0.54875 -0.63875 -0.44875 -0.71875 -0.50000 -0.39500
#> [73] -0.59500 -0.33875 -0.39500 -0.54875 -0.54875 -0.33875 -0.59500 -0.63875
#> [81] -0.50000 -0.44875 -0.50000 -0.59500 -0.68000 -0.59500 -0.50000 -0.59500
#> [89] -0.39500 -0.50000 -0.59500 -0.63875 -0.68000 -0.28000 -0.33875 -0.28000
#> [97] -0.50000 -0.59500 -0.44875 -0.59500 -0.54875 -0.59500 -0.59500 -0.75500
#> [105] -0.71875 -0.71875 -0.50000 -0.50000 -0.50000 -0.39500 -0.54875 -0.63875
#> [113] -0.63875 -0.68000 -0.50000 -0.44875 -0.33875 -0.50000 -0.44875 -0.33875
#> [121] -0.59500 -0.71875 -0.63875 -0.50000 -0.59500 -0.75500 -0.59500 -0.68000
#> [129] -0.50000 -0.59500 -0.63875 -0.59500 -0.68000 -0.71875 -0.68000 -0.50000
#> [137] -0.54875 -0.50000 -0.44875 -0.33875 -0.39500 -0.59500 -0.68000 -0.68000
#> [145] -0.63875 -0.63875 -0.63875 -0.68000 -0.54875 -0.59500 -0.39500 -0.50000
#> [153] -0.50000 -0.68000 -0.68000 -0.44875 -0.44875 -0.54875 -0.50000 -0.28000
#> [161] -0.33875 -0.33875 -0.71875 -0.68000 -0.63875 -0.63875 -0.63875 -0.82000
#> [169] -0.63875 -0.59500 -0.44875 -0.59500 -0.33875 -0.59500 -0.75500 -0.63875
#> [177] -0.54875 -0.15500 -0.28000 -0.21875 -0.44875 -0.33875 -0.21875 -0.63875
#> [185] -0.63875 -0.28000 -0.75500 -0.84875 -0.82000 -0.59500 -0.39500 -0.21875
#> [193] -0.44875 -0.63875 -0.33875 -0.63875 -0.59500 -0.50000 -0.33875 -0.54875
#> [201] -0.39500 -0.39500 -0.44875 -0.28000 -0.39500 -0.28000 -0.39500 -0.82000
#> [209] -0.75500 -0.82000 -0.21875 -0.44875 -0.50000 -0.54875 -0.39500 -0.54875
#> [217] -0.50000 -0.63875 -0.68000 -0.50000 -0.33875 -0.39500 -0.44875 -0.39500
#> [225] -0.50000 -0.21875 -0.15500 -0.15500 -0.71875 -0.78875 0.05125 -0.21875
#> [233] -0.33875 -0.21875 -0.71875 -0.71875 -0.63875 -0.68000 -0.63875 -0.75500
#> [241] -0.54875 -0.21875 -0.59500 -0.39500 -0.50000 -0.63875 -0.15500 -0.08875
#> [249] -0.08875 -0.50000 -0.08875 -0.15500 -0.33875 -0.75500 -0.75500 -0.75500
#> [257] -0.44875 -0.50000 -0.15500 -0.71875 NA NA NA NA
#> [265] NA NA NA NA NA NA NA NA
#> [273] NA NA NA NA NA NA NA NA
#> [281] NA NA NA NA NA NA NA NA
#> [289] NA NA NA NA NA NA NA NA
#> [297] -0.54875 -0.50000 -0.50000 -0.68000 -0.68000 -0.63875 -0.54875 -0.50000
#> [305] -0.78875 -0.63875 -0.50000 -0.63875 -0.59500 -0.71875 -0.54875 -0.68000
#> [313] -0.54875 -0.59500 -0.50000 -0.59500 -0.39500 -0.68000 -0.71875 -0.39500
#> [321] -0.54875 -0.63875 -0.63875 -0.68000 -0.54875 -0.54875 -0.63875 -0.59500
#> [329] -0.54875 -0.59500 -0.59500 -0.71875 -0.63875 -0.75500 -0.33875 -0.50000
#> [337] -0.39500 -0.59500 -0.63875 -0.59500 -0.21875 -0.68000 -0.75500 1.00000
#> [345] -0.63875 -0.59500 -0.39500 -0.50000 -0.82000 -0.21875 -0.59500 -0.54875
#> [353] -0.68000 -0.68000 -0.54875 -0.44875 -0.63875 -0.68000 -0.39500 -0.71875
#> [361] -0.21875 -0.44875 -0.68000 0.90125 -0.59500 -0.68000 -0.28000 -0.68000
#> [369] -0.50000 -0.33875 -0.33875 -0.71875 -0.71875 -0.54875 -0.63875 -0.44875
#> [377] -0.21875 -0.68000 -0.44875 -0.08875 -0.54875 -0.44875 -0.99875 -0.71875
#> [385] -0.75500 -0.71875 -0.82000 -0.63875 -0.15500 -0.63875 -0.54875 -0.71875
#> [393] -0.75500 -0.63875 -0.33875 -0.54875 -0.54875 -0.68000 -0.68000 -0.68000
#> [401] -0.59500 -0.78875 -0.78875 -0.82000 -0.63875 -0.71875 -0.68000 -0.71875
#> [409] -0.54875 -0.50000 -0.75500 -0.63875 -0.68000 -0.50000 -0.44875 -0.59500
#> [417] -0.78875 -0.63875 -0.54875 -0.87500 -0.84875 -0.89875 -0.78875 -0.71875
#> [425] -0.54875 -0.75500 -0.50000 -0.59500 -0.75500 -0.78875 -0.78875 -0.59500
#> [433] -0.75500 -0.75500 -0.54875 -0.82000 -0.28000 -0.28000 -0.68000 -0.92000
#> [441] -0.92000 -0.82000 -0.39500 -0.39500 -0.68000 -0.15500 -0.54875 -0.63875
#> [449] -0.89875 -0.68000 -0.68000 -0.68000 -0.68000 -0.39500 -0.44875 -0.28000
#> [457] -0.44875 -0.33875 -0.87500 0.20125 0.20125 -0.28000 -0.44875 -0.44875
#> [465] -0.75500 -0.54875 -0.54875 -0.78875 -0.75500 -0.78875 -0.54875 -0.68000
#> [473] -0.59500 -0.15500 -0.39500 -0.21875 -0.39500 -0.21875 -0.33875 -0.87500
#> [481] 0.36125 -0.02000 -0.02000 -0.21875 -0.39500 -0.21875 -0.63875 -0.59500
#> [489] -0.75500 -0.71875 -0.87500 -0.59500 -0.54875 -0.50000 -0.08875 -0.21875
#> [497] -0.63875 0.28000 -0.21875 0.36125 0.20125 -0.02000 -0.21875 -0.78875
#> [505] -0.71875 -0.71875 -0.78875 -0.63875 -0.71875 -0.28000 -0.75500 NA
#> [513] NA NA NA NA NA NA NA NA
#> [521] NA NA NA NA NA NA NA NA
#> [529] NA NA NA NA NA NA NA NA
#> [537] NA NA NA NA NA NA NA NA
#> [545] NA NA NA -0.63875 -0.78875 -0.44875 -0.54875 -0.84875
#> [553] -0.71875 -0.75500 -0.21875 -0.54875 -0.71875 -0.75500 -0.15500 -0.59500
#> [561] -0.54875 -0.78875 -0.84875 -0.87500 -0.44875 -0.54875 -0.68000 -0.54875
#> [569] -0.39500 -0.54875 -0.68000 -0.82000 -0.75500 -0.63875 -0.33875 -0.63875
#> [577] -0.59500 -0.44875 -0.54875 -0.54875 -0.75500 -0.78875 -0.82000 -0.68000
#> [585] -0.54875 -0.50000 -0.59500 -0.63875 -0.59500 -0.39500 -0.44875 -0.33875
#> [593] -0.71875 -0.82000 -0.28000 -0.39500 -0.33875 -0.44875 -0.50000 -0.50000
#> [601] -0.15500 -0.78875 -0.95500 -0.87500 -0.63875 -0.44875 -0.50000 -0.50000
#> [609] -0.50000 -0.59500 -0.44875 -0.33875 -0.50000 -0.71875 -0.02000 -0.54875
#> [617] -0.33875 -0.39500 -0.50000 -0.78875 -0.39500 -0.63875 -0.78875 -0.68000
#> [625] -0.87500 -0.54875 -0.50000 -0.59500 -0.68000 -0.44875 -0.54875 -0.33875
#> [633] -0.44875 -0.44875 -0.28000 -0.08875 -0.44875 -0.21875 -0.44875 -0.08875
#> [641] -0.15500 -0.59500 -0.71875 -0.71875 -0.87500 -0.93875 -0.33875 -0.44875
#> [649] -0.59500 -0.68000 -0.50000 -0.63875 -0.28000 -0.50000 -0.54875 -0.28000
#> [657] -0.28000 -0.15500 -0.08875 -0.50000 -0.63875 -0.68000 -0.50000 -0.50000
#> [665] -0.78875 -0.71875 -0.93875 -0.59500 -0.63875 -0.59500 -0.78875 -0.68000
#> [673] -0.68000 -0.39500 -0.21875 -0.54875 -0.21875 -0.28000 -0.39500 -0.50000
#> [681] -0.63875 -0.59500 -0.63875 -0.63875 -0.39500 -0.78875 -0.78875 -0.87500
#> [689] -0.71875 -0.68000 -0.44875 -0.68000 -0.63875 -0.59500 -0.28000 -0.28000
#> [697] -0.28000 -0.15500 -0.39500 -0.39500 -0.54875 -0.39500 -0.44875 -0.68000
#> [705] -0.82000 -0.54875 -0.84875 -0.82000 -0.92000 -0.50000 -0.44875 -0.44875
#> [713] -0.68000 -0.63875 -0.54875 -0.44875 -0.50000 -0.59500 -0.39500 -0.54875
#> [721] -0.50000 -0.15500 -0.50000 -0.54875 -0.63875 -0.63875 -0.71875 -0.78875
#> [729] -0.75500 -0.84875 -0.59500 -0.63875 -0.68000 -0.82000 -0.54875 -0.50000
#> [737] -0.50000 -0.33875 -0.44875 -0.33875 -0.50000 -0.44875 -0.54875 -0.63875
#> [745] -0.44875 -0.63875 -0.75500 -0.44875 -0.84875 -0.75500 -0.75500 -0.75500
#> [753] -0.54875 -0.54875 -0.71875 -0.50000 -0.28000 -0.78875 -0.59500 -0.59500
#> [761] -0.50000 -0.54875 -0.63875 -0.75500 -0.50000 -0.82000 -0.87500 -0.44875
#> [769] -0.63875 -0.59500 -0.54875 NA NA NA NA NA
#> [777] NA NA NA NA NA NA NA NA
#> [785] NA NA NA NA NA NA NA NA
#> [793] NA NA NA NA NA NA NA NA
#> [801] NA NA NA NA NA NA NA -0.33875
#> [809] -0.50000 -0.68000 -0.50000 -0.63875 -0.54875 -0.54875 -0.54875 -0.44875
#> [817] -0.59500 -0.50000 -0.44875 -0.39500 -0.63875 -0.44875 -0.63875 -0.71875
#> [825] -0.54875 -0.33875 -0.59500 -0.44875 -0.44875 -0.39500 -0.63875 -0.39500
#> [833] -0.59500 -0.59500 -0.63875 -0.50000 -0.44875 -0.54875 -0.54875 -0.54875
#> [841] -0.44875 -0.63875 -0.59500 -0.63875 -0.28000 -0.50000 -0.44875 -0.39500
#> [849] -0.28000 -0.63875 -0.59500 -0.59500 -0.59500 -0.59500 -0.59500 -0.50000
#> [857] -0.59500 -0.59500 -0.59500 -0.59500 -0.68000 -0.54875 -0.63875 -0.50000
#> [865] -0.68000 -0.54875 -0.63875 -0.50000 -0.54875 -0.50000 -0.50000 -0.68000
#> [873] -0.44875 -0.50000 -0.68000 -0.78875 -0.44875 -0.44875 -0.63875 -0.44875
#> [881] -0.54875 -0.59500 -0.59500 -0.44875 -0.68000 -0.71875 -0.50000 -0.54875
#> [889] -0.50000 -0.59500 -0.59500 -0.59500 -0.50000 -0.59500 -0.59500 -0.63875
#> [897] -0.68000 -0.44875 -0.33875 -0.50000 -0.63875 -0.54875 -0.39500 -0.54875
#> [905] -0.44875 -0.78875 -0.50000 -0.33875 -0.33875 -0.15500 -0.50000 -0.54875
#> [913] -0.50000 -0.59500 -0.54875 -0.63875 -0.59500 -0.63875 -0.59500 -0.54875
#> [921] -0.44875 -0.68000 -0.44875 -0.71875 -0.68000 -0.78875 -0.71875 -0.63875
#> [929] -0.33875 -0.44875 -0.59500 -0.63875 -0.50000 -0.59500 -0.68000 -0.63875
#> [937] -0.78875 -0.75500 -0.78875 -0.50000 -0.54875 -0.59500 -0.39500 -0.59500
#> [945] -0.54875 -0.63875 -0.68000 -0.78875 -0.28000 -0.33875 -0.50000 -0.39500
#> [953] -0.44875 -0.71875 -0.78875 -0.39500 -0.39500 -0.71875 -0.82000 -0.75500
#> [961] -0.63875 -0.54875 -0.39500 -0.59500 -0.44875 -0.33875 -0.68000 -0.82000
#> [969] -0.82000 -0.39500 -0.28000 -0.50000 -0.54875 -0.33875 -0.39500 -0.44875
#> [977] -0.44875 -0.28000 -0.82000 -0.68000 -0.78875 -0.39500 -0.44875 -0.28000
#> [985] -0.59500 -0.44875 -0.59500 -0.68000 -0.84875 -0.75500 -0.33875 -0.44875
#> [993] -0.50000 -0.44875 -0.50000 -0.28000 -0.44875 -0.28000 -0.54875 -0.08875
#> [1001] 0.20125 -0.02000 -0.54875 -0.50000 -0.54875 -0.44875 -0.28000 -0.54875
#> [1009] -0.71875 -0.84875 -0.82000 -0.39500 -0.54875 -0.59500 -0.39500 -0.63875
#> [1017] -0.63875 -0.08875 -0.33875 -0.33875 -0.21875 -0.33875 -0.68000 -0.54875
#> [1025] -0.68000 -0.82000 -0.87500 -0.54875 -0.59500 -0.71875 -0.82000
library(verification)
#> Loading required package: fields
#> Loading required package: spam
#> Spam version 2.9-1 (2022-08-07) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#>
#> Attaching package: 'spam'
#> The following objects are masked from 'package:base':
#>
#> backsolve, forwardsolve
#> Loading required package: viridisLite
#>
#> Try help(fields) to get started.
#> Loading required package: boot
#> Loading required package: CircStats
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: dtw
#> Loading required package: proxy
#>
#> Attaching package: 'proxy'
#> The following object is masked from 'package:spam':
#>
#> as.matrix
#> The following objects are masked from 'package:stats':
#>
#> as.dist, dist
#> The following object is masked from 'package:base':
#>
#> as.matrix
#> Loaded dtw v1.23-1. See ?dtw for help, citation("dtw") for use in publication.
#> Registered S3 method overwritten by 'verification':
#> method from
#> lines.roc pROC
# discrimination plot for binary data
# shows how often models made forecasts with different levels of confidence --> can visually assess the forecasts
df <- example_binary[!is.na(prediction)]
discrimination.plot(df$model, df$prediction)
# receiver operating characteristic curve for binary predicitons
roc.plot(x = df$true_value, pred = df$prediction)
# scoring binary forecasts with verification - binary/probabilistic case
df <- example_binary[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")]
res <- verify(obs = df$true_value, pred = df$prediction)
#> If baseline is not included, baseline values will be calculated from the sample obs.
summary(res)
#>
#> The forecasts are probabilistic, the observations are binary.
#> Sample baseline calculated from observations.
#> Brier Score (BS) = 0.2698
#> Brier Score - Baseline = 0.2495
#> Skill Score = -0.08133
#> Reliability = 0.03619
#> Resolution = 0.0159
#> Uncertainty = 0.2495
# attribute plot and reliability plot
attribute(res)
#> NULL
reliability.plot(res)
# scoring continuous point forecasts
df <- example_continuous[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")][,
.('obs' = mean(true_value),
'pred' = mean(prediction)
),
by = c("location", "target_end_date")
]
res <- verify(obs = df$obs, pred = df$pred, obs.type = "cont", frcst.type = "cont")
summary(res)
#>
#> The forecasts are continuous, the observations are continous.
#> Sample baseline calcluated from observations.
#> MAE = 2.489e+04
#> ME = 8527
#> MSE = 3.808e+09
#> MSE - baseline = 6.81e+09
#> MSE - persistence = 7.955e+09
#> SS - baseline = 0.4407
# plot(res)
# scoring quantile forecasts
df <- example_quantile[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")]
res_scoringutils <- score(df) |>
summarise_scores(by = "model")
qs <- quantile_score(true_values = df$true_value, predictions = df$prediction,
quantiles = df$quantile)
all.equal(mean(qs), res_scoringutils$interval_score)
#> [1] TRUE