epiforecasts · nikosbosse · Jan 16, 2023 · Jan 12, 2023 · Jan 12, 2023 · Jan 12, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -8,13 +8,13 @@ A minor update to the package with some bug fixes and minor changes.
 
 - Removed the on attach message which warned of breaking changes in `1.0.0`.
 - Renamed the `metric` argument of `summarise_scores()` to `relative_skill_metric`. This argument is now deprecated and will be removed in a future version of the package. Please use the new argument instead.
-- Updated the documentation for `score()` and related functions to make the soft requirement for a `model`
-column in the input data more explicit.
+- Updated the documentation for `score()` and related functions to make the soft requirement for a `model` column in the input data more explicit.
 
 ## Bug fixes
 
 - Missing baseline forecasts in `pairwise_comparison()` now trigger an explicit and informative error message.
 - The requirements table in the getting started vignette is now correct.
+- Added support for an optional `sample` column when using a quantile forecast format. Previously this resulted in an error.
 
 # scoringutils 1.0.0
 

diff --git a/R/check_forecasts.R b/R/check_forecasts.R
@@ -124,9 +124,10 @@ check_forecasts <- function(data) {
 
 
   # get information about the forecasts ----------------------------------------
-  forecast_unit <- get_forecast_unit(data)
-  target_type <- get_target_type(data)
   prediction_type <- get_prediction_type(data)
+  forecast_unit <- get_forecast_unit(data, prediction_type = prediction_type)
+  target_type <- get_target_type(data)
+
 
 
   # check whether a column called 'quantile' or 'sample' is present ------------
@@ -145,7 +146,7 @@ check_forecasts <- function(data) {
   # the length of prediction is greater 1 for a sample / quantile for
   # a single forecast
 
-  check_duplicates <- find_duplicates(data)
+  check_duplicates <- find_duplicates(data, forecast_unit = forecast_unit)
 
   if (nrow(check_duplicates) > 0) {
     errors <- c(
@@ -275,21 +276,25 @@ print.scoringutils_check <- function(x, ...) {
 #'
 #' @param data A data.frame as used for [score()]
 #'
+#' @param forecast_unit A character vector with the column names that define
+#' the unit of a single forecast. If missing the function tries to infer the
+#'
+#' @param ... Additional arguments passed to [get_forecast_unit()].
 #' @return A data.frame with all rows for which a duplicate forecast was found
 #' @export
 #' @keywords check-forecasts
 #' @examples
 #' example <- rbind(example_quantile, example_quantile[1000:1010])
 #' find_duplicates(example)
 
-find_duplicates <- function(data) {
+find_duplicates <- function(data, forecast_unit, ...) {
   type <- c("sample", "quantile")[c("sample", "quantile") %in% colnames(data)]
-  forecast_unit <- get_forecast_unit(data)
-
+  if (missing(forecast_unit)) {
+     forecast_unit <- get_forecast_unit(data, ...)
+  }
   data <- as.data.table(data)
   data[, InternalDuplicateCheck := .N, by = c(forecast_unit, type)]
   out <- data[InternalDuplicateCheck > 1]
   out[, InternalDuplicateCheck := NULL]
   return(out[])
 }
-
diff --git a/R/summarise_scores.R b/R/summarise_scores.R
@@ -78,7 +78,8 @@ summarise_scores <- function(scores,
   }
   # preparations ---------------------------------------------------------------
   # get unit of a single forecast
-  forecast_unit <- get_forecast_unit(scores)
+  prediction_type <- get_prediction_type(scores)
+  forecast_unit <- get_forecast_unit(scores, prediction_type = prediction_type)
 
   # if by is not provided, set to the unit of a single forecast
   if (is.null(by)) {

diff --git a/R/utils.R b/R/utils.R
@@ -230,20 +230,29 @@ get_target_type <- function(data) {
 #' @description Helper function to get the unit of a single forecast, i.e.
 #' the column names that define where a single forecast was made for
 #'
+#' @param prediction_type The prediction type of the forecast. This is used to
+#' adjust the list of protected columns.
+#'
 #' @inheritParams check_forecasts
 #'
 #' @return A character vector with the column names that define the unit of
 #' a single forecast
 #'
 #' @keywords internal
 
-get_forecast_unit <- function(data) {
+get_forecast_unit <- function(data, prediction_type) {
+
   protected_columns <- c(
     "prediction", "true_value", "sample", "quantile", "upper", "lower",
     "pit_value",
     "range", "boundary", available_metrics(),
     names(data)[grepl("coverage_", names(data))]
   )
+  if (!missing(prediction_type)) {
+    if (prediction_type == "quantile") {
+      protected_columns <- setdiff(protected_columns, "sample")
+    }
+  }
   forecast_unit <- setdiff(colnames(data), protected_columns)
   return(forecast_unit)
 }
diff --git a/man/find_duplicates.Rd b/man/find_duplicates.Rd
diff --git a/man/get_forecast_unit.Rd b/man/get_forecast_unit.Rd
diff --git a/tests/testthat/_snaps/score.md b/tests/testthat/_snaps/score.md
@@ -0,0 +1,20 @@
+# score() can support a sample column when a quantile forecast is
+ used
+
+    Code
+      summarise_scores(summarise_scores(scores, by = "model"), by = "model", fun = signif,
+      digits = 2)
+    Output
+                         model interval_score dispersion underprediction
+                        <char>          <num>      <num>           <num>
+      1: EuroCOVIDhub-baseline           8500        850               0
+      2: EuroCOVIDhub-ensemble             NA         NA              NA
+      3:  epiforecasts-EpiNow2          13000       4100               0
+      4:       UMass-MechBayes            120         77              39
+         overprediction coverage_deviation  bias ae_median
+                  <num>              <num> <num>     <num>
+      1:           7600             -0.081  0.62     13000
+      2:          11000                 NA  0.60     21000
+      3:           8600              0.050  0.50     22000
+      4:              0              0.050 -0.50       210
+
diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R
@@ -171,3 +171,17 @@ test_that("function produces output for a continuous format case", {
     TRUE
   )
 })
+
+test_that("score() can support a sample column when a quantile forecast is
+ used", {
+  ex <- example_quantile[!is.na(quantile)][1:200, ]
+  ex <- rbind(
+    data.table::copy(ex)[, sample := 1],
+    ex[, sample := 2]
+  )
+  scores <- suppressWarnings(score(ex))
+  expect_snapshot(summarise_scores(
+    summarise_scores(scores, by = "model"), by = "model", 
+    fun = signif, digits = 2
+  ))
+ })