scoringutils and other packages

Nikos Bosse

2023-09-27

scoringutils can work well with other exsiting packages. This vignettes shows some examples.

yardstick

yardstick (belonging to the tidymodels family of R packages) is a package designed to evaluate predictions against observed values. It is similar to scoringutils in the sense that it makes a large variety of metrics available to users through a consistent framework. However, it mostly focuses on binary / multinomial forecasts and point forecasts (it also has a few functions for analsys of survival data). It does not currently implement metrics for full probabilistic forecasts.

Binary class prediction

library(yardstick)

class_metrics <- metric_set(accuracy, kap)

example_binary |>
  to_yardstick_binary_class() |>
  group_by(model) |>
  class_metrics(truth = true_value, estimate = prediction)
#> # A tibble: 10 × 4
#>    model                 .metric  .estimator .estimate
#>    <chr>                 <chr>    <chr>          <dbl>
#>  1 EuroCOVIDhub-baseline accuracy binary       0.824  
#>  2 EuroCOVIDhub-ensemble accuracy binary       0.613  
#>  3 UMass-MechBayes       accuracy binary       0.531  
#>  4 epiforecasts-EpiNow2  accuracy binary       0.563  
#>  5 <NA>                  accuracy binary     NaN      
#>  6 EuroCOVIDhub-baseline kap      binary       0.0585 
#>  7 EuroCOVIDhub-ensemble kap      binary       0.0703 
#>  8 UMass-MechBayes       kap      binary       0.0320 
#>  9 epiforecasts-EpiNow2  kap      binary       0.00685
#> 10 <NA>                  kap      binary     NaN

Binary class probability prediction

example_binary |>
  to_yardstick_binary_class_prob() |>
  group_by(model) |>
  filter(!is.na(prediction)) |>
  average_precision(truth = true_value, prediction, event_level = "first")
#> # A tibble: 4 × 4
#>   model                 .metric           .estimator .estimate
#>   <chr>                 <chr>             <chr>          <dbl>
#> 1 EuroCOVIDhub-baseline average_precision binary         0.196
#> 2 EuroCOVIDhub-ensemble average_precision binary         0.423
#> 3 UMass-MechBayes       average_precision binary         0.529
#> 4 epiforecasts-EpiNow2  average_precision binary         0.411

Numeric predictions

example_continuous |>
  group_by(model) |>
  mae(truth = true_value, estimate = prediction)
#> # A tibble: 5 × 4
#>   model                 .metric .estimator .estimate
#>   <chr>                 <chr>   <chr>          <dbl>
#> 1 EuroCOVIDhub-baseline mae     standard      22829.
#> 2 EuroCOVIDhub-ensemble mae     standard      15125.
#> 3 UMass-MechBayes       mae     standard        133.
#> 4 epiforecasts-EpiNow2  mae     standard      19295.
#> 5 <NA>                  mae     standard        NaN

probably

The probably package (part of the tidymodels family) contains tools to facilitate the assessment of calibration, conversion of probabilties to class predictions and optimal probability thresholds.

Plots to assess the calibration of binary forecasts can directly be used with the output of [score()].

s <- score(example_binary)
s <- s[model %in% c("EuroCOVIDhub-baseline", "EuroCOVIDhub-ensemble")]

library(probably)
cal_plot_breaks(.data = s, truth = true_value, estimate = prediction, .by = model)
cal_plot_windowed(.data = s, truth = true_value, estimate = prediction, .by = model)
cal_plot_logistic(.data = s, truth = true_value, estimate = prediction, .by = model)

predtools

library(predtools)
s <- score(example_binary)
#> The following messages were produced when checking inputs:
#> 1.  144 values for `true_value` are NA in the data provided and the corresponding rows were removed. This may indicate a problem if unexpected.
#> 2.  144 values for `prediction` are NA in the data provided and the corresponding rows were removed. This may indicate a problem if unexpected.
s <- s[model %in% c("EuroCOVIDhub-baseline", "EuroCOVIDhub-ensemble")]

calibration_plot(data = s, obs = "true_value",
                 pred = "prediction", group = "model")
#> $calibration_plot

library(scoring)
calcscore(true_value ~ prediction, data = example_binary, fam="pow", param=2, bounds=c(-1,1))
#> Warning in calcscore.default(object = c(NA, NA, NA, NA, NA, NA, NA, NA, : Some
#> scores are NA. This may be due to missing data in your forecasts or outcomes,
#> or an ill-defined param argument.
#>    [1]       NA       NA       NA       NA       NA       NA       NA       NA
#>    [9]       NA       NA       NA       NA       NA       NA       NA       NA
#>   [17]       NA       NA       NA       NA       NA       NA       NA       NA
#>   [25]       NA       NA       NA       NA       NA       NA       NA       NA
#>   [33]       NA       NA       NA       NA -0.71875 -0.54875 -0.63875 -0.63875
#>   [41] -0.50000 -0.39500 -0.54875 -0.50000 -0.44875 -0.50000 -0.50000 -0.44875
#>   [49] -0.54875 -0.54875 -0.50000 -0.63875 -0.50000 -0.50000 -0.33875 -0.54875
#>   [57] -0.44875 -0.59500 -0.59500 -0.54875 -0.54875 -0.33875 -0.50000 -0.50000
#>   [65] -0.50000 -0.54875 -0.54875 -0.63875 -0.44875 -0.71875 -0.50000 -0.39500
#>   [73] -0.59500 -0.33875 -0.39500 -0.54875 -0.54875 -0.33875 -0.59500 -0.63875
#>   [81] -0.50000 -0.44875 -0.50000 -0.59500 -0.68000 -0.59500 -0.50000 -0.59500
#>   [89] -0.39500 -0.50000 -0.59500 -0.63875 -0.68000 -0.28000 -0.33875 -0.28000
#>   [97] -0.50000 -0.59500 -0.44875 -0.59500 -0.54875 -0.59500 -0.59500 -0.75500
#>  [105] -0.71875 -0.71875 -0.50000 -0.50000 -0.50000 -0.39500 -0.54875 -0.63875
#>  [113] -0.63875 -0.68000 -0.50000 -0.44875 -0.33875 -0.50000 -0.44875 -0.33875
#>  [121] -0.59500 -0.71875 -0.63875 -0.50000 -0.59500 -0.75500 -0.59500 -0.68000
#>  [129] -0.50000 -0.59500 -0.63875 -0.59500 -0.68000 -0.71875 -0.68000 -0.50000
#>  [137] -0.54875 -0.50000 -0.44875 -0.33875 -0.39500 -0.59500 -0.68000 -0.68000
#>  [145] -0.63875 -0.63875 -0.63875 -0.68000 -0.54875 -0.59500 -0.39500 -0.50000
#>  [153] -0.50000 -0.68000 -0.68000 -0.44875 -0.44875 -0.54875 -0.50000 -0.28000
#>  [161] -0.33875 -0.33875 -0.71875 -0.68000 -0.63875 -0.63875 -0.63875 -0.82000
#>  [169] -0.63875 -0.59500 -0.44875 -0.59500 -0.33875 -0.59500 -0.75500 -0.63875
#>  [177] -0.54875 -0.15500 -0.28000 -0.21875 -0.44875 -0.33875 -0.21875 -0.63875
#>  [185] -0.63875 -0.28000 -0.75500 -0.84875 -0.82000 -0.59500 -0.39500 -0.21875
#>  [193] -0.44875 -0.63875 -0.33875 -0.63875 -0.59500 -0.50000 -0.33875 -0.54875
#>  [201] -0.39500 -0.39500 -0.44875 -0.28000 -0.39500 -0.28000 -0.39500 -0.82000
#>  [209] -0.75500 -0.82000 -0.21875 -0.44875 -0.50000 -0.54875 -0.39500 -0.54875
#>  [217] -0.50000 -0.63875 -0.68000 -0.50000 -0.33875 -0.39500 -0.44875 -0.39500
#>  [225] -0.50000 -0.21875 -0.15500 -0.15500 -0.71875 -0.78875  0.05125 -0.21875
#>  [233] -0.33875 -0.21875 -0.71875 -0.71875 -0.63875 -0.68000 -0.63875 -0.75500
#>  [241] -0.54875 -0.21875 -0.59500 -0.39500 -0.50000 -0.63875 -0.15500 -0.08875
#>  [249] -0.08875 -0.50000 -0.08875 -0.15500 -0.33875 -0.75500 -0.75500 -0.75500
#>  [257] -0.44875 -0.50000 -0.15500 -0.71875       NA       NA       NA       NA
#>  [265]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [273]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [281]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [289]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [297] -0.54875 -0.50000 -0.50000 -0.68000 -0.68000 -0.63875 -0.54875 -0.50000
#>  [305] -0.78875 -0.63875 -0.50000 -0.63875 -0.59500 -0.71875 -0.54875 -0.68000
#>  [313] -0.54875 -0.59500 -0.50000 -0.59500 -0.39500 -0.68000 -0.71875 -0.39500
#>  [321] -0.54875 -0.63875 -0.63875 -0.68000 -0.54875 -0.54875 -0.63875 -0.59500
#>  [329] -0.54875 -0.59500 -0.59500 -0.71875 -0.63875 -0.75500 -0.33875 -0.50000
#>  [337] -0.39500 -0.59500 -0.63875 -0.59500 -0.21875 -0.68000 -0.75500  1.00000
#>  [345] -0.63875 -0.59500 -0.39500 -0.50000 -0.82000 -0.21875 -0.59500 -0.54875
#>  [353] -0.68000 -0.68000 -0.54875 -0.44875 -0.63875 -0.68000 -0.39500 -0.71875
#>  [361] -0.21875 -0.44875 -0.68000  0.90125 -0.59500 -0.68000 -0.28000 -0.68000
#>  [369] -0.50000 -0.33875 -0.33875 -0.71875 -0.71875 -0.54875 -0.63875 -0.44875
#>  [377] -0.21875 -0.68000 -0.44875 -0.08875 -0.54875 -0.44875 -0.99875 -0.71875
#>  [385] -0.75500 -0.71875 -0.82000 -0.63875 -0.15500 -0.63875 -0.54875 -0.71875
#>  [393] -0.75500 -0.63875 -0.33875 -0.54875 -0.54875 -0.68000 -0.68000 -0.68000
#>  [401] -0.59500 -0.78875 -0.78875 -0.82000 -0.63875 -0.71875 -0.68000 -0.71875
#>  [409] -0.54875 -0.50000 -0.75500 -0.63875 -0.68000 -0.50000 -0.44875 -0.59500
#>  [417] -0.78875 -0.63875 -0.54875 -0.87500 -0.84875 -0.89875 -0.78875 -0.71875
#>  [425] -0.54875 -0.75500 -0.50000 -0.59500 -0.75500 -0.78875 -0.78875 -0.59500
#>  [433] -0.75500 -0.75500 -0.54875 -0.82000 -0.28000 -0.28000 -0.68000 -0.92000
#>  [441] -0.92000 -0.82000 -0.39500 -0.39500 -0.68000 -0.15500 -0.54875 -0.63875
#>  [449] -0.89875 -0.68000 -0.68000 -0.68000 -0.68000 -0.39500 -0.44875 -0.28000
#>  [457] -0.44875 -0.33875 -0.87500  0.20125  0.20125 -0.28000 -0.44875 -0.44875
#>  [465] -0.75500 -0.54875 -0.54875 -0.78875 -0.75500 -0.78875 -0.54875 -0.68000
#>  [473] -0.59500 -0.15500 -0.39500 -0.21875 -0.39500 -0.21875 -0.33875 -0.87500
#>  [481]  0.36125 -0.02000 -0.02000 -0.21875 -0.39500 -0.21875 -0.63875 -0.59500
#>  [489] -0.75500 -0.71875 -0.87500 -0.59500 -0.54875 -0.50000 -0.08875 -0.21875
#>  [497] -0.63875  0.28000 -0.21875  0.36125  0.20125 -0.02000 -0.21875 -0.78875
#>  [505] -0.71875 -0.71875 -0.78875 -0.63875 -0.71875 -0.28000 -0.75500       NA
#>  [513]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [521]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [529]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [537]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [545]       NA       NA       NA -0.63875 -0.78875 -0.44875 -0.54875 -0.84875
#>  [553] -0.71875 -0.75500 -0.21875 -0.54875 -0.71875 -0.75500 -0.15500 -0.59500
#>  [561] -0.54875 -0.78875 -0.84875 -0.87500 -0.44875 -0.54875 -0.68000 -0.54875
#>  [569] -0.39500 -0.54875 -0.68000 -0.82000 -0.75500 -0.63875 -0.33875 -0.63875
#>  [577] -0.59500 -0.44875 -0.54875 -0.54875 -0.75500 -0.78875 -0.82000 -0.68000
#>  [585] -0.54875 -0.50000 -0.59500 -0.63875 -0.59500 -0.39500 -0.44875 -0.33875
#>  [593] -0.71875 -0.82000 -0.28000 -0.39500 -0.33875 -0.44875 -0.50000 -0.50000
#>  [601] -0.15500 -0.78875 -0.95500 -0.87500 -0.63875 -0.44875 -0.50000 -0.50000
#>  [609] -0.50000 -0.59500 -0.44875 -0.33875 -0.50000 -0.71875 -0.02000 -0.54875
#>  [617] -0.33875 -0.39500 -0.50000 -0.78875 -0.39500 -0.63875 -0.78875 -0.68000
#>  [625] -0.87500 -0.54875 -0.50000 -0.59500 -0.68000 -0.44875 -0.54875 -0.33875
#>  [633] -0.44875 -0.44875 -0.28000 -0.08875 -0.44875 -0.21875 -0.44875 -0.08875
#>  [641] -0.15500 -0.59500 -0.71875 -0.71875 -0.87500 -0.93875 -0.33875 -0.44875
#>  [649] -0.59500 -0.68000 -0.50000 -0.63875 -0.28000 -0.50000 -0.54875 -0.28000
#>  [657] -0.28000 -0.15500 -0.08875 -0.50000 -0.63875 -0.68000 -0.50000 -0.50000
#>  [665] -0.78875 -0.71875 -0.93875 -0.59500 -0.63875 -0.59500 -0.78875 -0.68000
#>  [673] -0.68000 -0.39500 -0.21875 -0.54875 -0.21875 -0.28000 -0.39500 -0.50000
#>  [681] -0.63875 -0.59500 -0.63875 -0.63875 -0.39500 -0.78875 -0.78875 -0.87500
#>  [689] -0.71875 -0.68000 -0.44875 -0.68000 -0.63875 -0.59500 -0.28000 -0.28000
#>  [697] -0.28000 -0.15500 -0.39500 -0.39500 -0.54875 -0.39500 -0.44875 -0.68000
#>  [705] -0.82000 -0.54875 -0.84875 -0.82000 -0.92000 -0.50000 -0.44875 -0.44875
#>  [713] -0.68000 -0.63875 -0.54875 -0.44875 -0.50000 -0.59500 -0.39500 -0.54875
#>  [721] -0.50000 -0.15500 -0.50000 -0.54875 -0.63875 -0.63875 -0.71875 -0.78875
#>  [729] -0.75500 -0.84875 -0.59500 -0.63875 -0.68000 -0.82000 -0.54875 -0.50000
#>  [737] -0.50000 -0.33875 -0.44875 -0.33875 -0.50000 -0.44875 -0.54875 -0.63875
#>  [745] -0.44875 -0.63875 -0.75500 -0.44875 -0.84875 -0.75500 -0.75500 -0.75500
#>  [753] -0.54875 -0.54875 -0.71875 -0.50000 -0.28000 -0.78875 -0.59500 -0.59500
#>  [761] -0.50000 -0.54875 -0.63875 -0.75500 -0.50000 -0.82000 -0.87500 -0.44875
#>  [769] -0.63875 -0.59500 -0.54875       NA       NA       NA       NA       NA
#>  [777]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [785]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [793]       NA       NA       NA       NA       NA       NA       NA       NA
#>  [801]       NA       NA       NA       NA       NA       NA       NA -0.33875
#>  [809] -0.50000 -0.68000 -0.50000 -0.63875 -0.54875 -0.54875 -0.54875 -0.44875
#>  [817] -0.59500 -0.50000 -0.44875 -0.39500 -0.63875 -0.44875 -0.63875 -0.71875
#>  [825] -0.54875 -0.33875 -0.59500 -0.44875 -0.44875 -0.39500 -0.63875 -0.39500
#>  [833] -0.59500 -0.59500 -0.63875 -0.50000 -0.44875 -0.54875 -0.54875 -0.54875
#>  [841] -0.44875 -0.63875 -0.59500 -0.63875 -0.28000 -0.50000 -0.44875 -0.39500
#>  [849] -0.28000 -0.63875 -0.59500 -0.59500 -0.59500 -0.59500 -0.59500 -0.50000
#>  [857] -0.59500 -0.59500 -0.59500 -0.59500 -0.68000 -0.54875 -0.63875 -0.50000
#>  [865] -0.68000 -0.54875 -0.63875 -0.50000 -0.54875 -0.50000 -0.50000 -0.68000
#>  [873] -0.44875 -0.50000 -0.68000 -0.78875 -0.44875 -0.44875 -0.63875 -0.44875
#>  [881] -0.54875 -0.59500 -0.59500 -0.44875 -0.68000 -0.71875 -0.50000 -0.54875
#>  [889] -0.50000 -0.59500 -0.59500 -0.59500 -0.50000 -0.59500 -0.59500 -0.63875
#>  [897] -0.68000 -0.44875 -0.33875 -0.50000 -0.63875 -0.54875 -0.39500 -0.54875
#>  [905] -0.44875 -0.78875 -0.50000 -0.33875 -0.33875 -0.15500 -0.50000 -0.54875
#>  [913] -0.50000 -0.59500 -0.54875 -0.63875 -0.59500 -0.63875 -0.59500 -0.54875
#>  [921] -0.44875 -0.68000 -0.44875 -0.71875 -0.68000 -0.78875 -0.71875 -0.63875
#>  [929] -0.33875 -0.44875 -0.59500 -0.63875 -0.50000 -0.59500 -0.68000 -0.63875
#>  [937] -0.78875 -0.75500 -0.78875 -0.50000 -0.54875 -0.59500 -0.39500 -0.59500
#>  [945] -0.54875 -0.63875 -0.68000 -0.78875 -0.28000 -0.33875 -0.50000 -0.39500
#>  [953] -0.44875 -0.71875 -0.78875 -0.39500 -0.39500 -0.71875 -0.82000 -0.75500
#>  [961] -0.63875 -0.54875 -0.39500 -0.59500 -0.44875 -0.33875 -0.68000 -0.82000
#>  [969] -0.82000 -0.39500 -0.28000 -0.50000 -0.54875 -0.33875 -0.39500 -0.44875
#>  [977] -0.44875 -0.28000 -0.82000 -0.68000 -0.78875 -0.39500 -0.44875 -0.28000
#>  [985] -0.59500 -0.44875 -0.59500 -0.68000 -0.84875 -0.75500 -0.33875 -0.44875
#>  [993] -0.50000 -0.44875 -0.50000 -0.28000 -0.44875 -0.28000 -0.54875 -0.08875
#> [1001]  0.20125 -0.02000 -0.54875 -0.50000 -0.54875 -0.44875 -0.28000 -0.54875
#> [1009] -0.71875 -0.84875 -0.82000 -0.39500 -0.54875 -0.59500 -0.39500 -0.63875
#> [1017] -0.63875 -0.08875 -0.33875 -0.33875 -0.21875 -0.33875 -0.68000 -0.54875
#> [1025] -0.68000 -0.82000 -0.87500 -0.54875 -0.59500 -0.71875 -0.82000
library(verification)
#> Loading required package: fields
#> Loading required package: spam
#> Spam version 2.9-1 (2022-08-07) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction 
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#> 
#> Attaching package: 'spam'
#> The following objects are masked from 'package:base':
#> 
#>     backsolve, forwardsolve
#> Loading required package: viridisLite
#> 
#> Try help(fields) to get started.
#> Loading required package: boot
#> Loading required package: CircStats
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> Loading required package: dtw
#> Loading required package: proxy
#> 
#> Attaching package: 'proxy'
#> The following object is masked from 'package:spam':
#> 
#>     as.matrix
#> The following objects are masked from 'package:stats':
#> 
#>     as.dist, dist
#> The following object is masked from 'package:base':
#> 
#>     as.matrix
#> Loaded dtw v1.23-1. See ?dtw for help, citation("dtw") for use in publication.
#> Registered S3 method overwritten by 'verification':
#>   method    from
#>   lines.roc pROC

# discrimination plot for binary data
# shows how often models made forecasts with different levels of confidence --> can visually assess the forecasts
df <- example_binary[!is.na(prediction)]
discrimination.plot(df$model, df$prediction)


# receiver operating characteristic curve for binary predicitons
roc.plot(x = df$true_value, pred = df$prediction)



# scoring binary forecasts with verification - binary/probabilistic case
df <- example_binary[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")]
res <- verify(obs = df$true_value, pred = df$prediction)
#> If baseline is not included, baseline values  will be calculated from the  sample obs.
summary(res)
#> 
#> The forecasts are probabilistic, the observations are binary.
#> Sample baseline calculated from observations.
#> Brier Score (BS)           =  0.2698 
#> Brier Score - Baseline     =  0.2495 
#> Skill Score                =  -0.08133 
#> Reliability                =  0.03619 
#> Resolution                 =  0.0159 
#> Uncertainty              =  0.2495

# attribute plot and reliability plot
attribute(res)

#> NULL
reliability.plot(res)



# scoring continuous point forecasts
df <- example_continuous[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")][,
                         .('obs' = mean(true_value),
                           'pred' = mean(prediction)
                         ),
                         by = c("location", "target_end_date")
]

res <- verify(obs = df$obs, pred = df$pred, obs.type = "cont", frcst.type = "cont")
summary(res)
#> 
#> The forecasts are continuous, the observations are continous.
#> Sample baseline calcluated from observations.
#> MAE               =  2.489e+04 
#> ME                =   8527 
#> MSE               =  3.808e+09 
#> MSE - baseline    =  6.81e+09 
#> MSE - persistence =  7.955e+09 
#> SS  - baseline     =  0.4407
# plot(res)

# scoring quantile forecasts
df <- example_quantile[(model == "EuroCOVIDhub-ensemble" & horizon == 2 & target_type == "Cases")]

res_scoringutils <- score(df) |>
  summarise_scores(by = "model")

qs <- quantile_score(true_values = df$true_value, predictions = df$prediction, 
               quantiles = df$quantile)

all.equal(mean(qs), res_scoringutils$interval_score)
#> [1] TRUE
library(yardstick)