Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ A minor update to the package with some bug fixes and minor changes.

- Removed the on attach message which warned of breaking changes in `1.0.0`.
- Renamed the `metric` argument of `summarise_scores()` to `relative_skill_metric`. This argument is now deprecated and will be removed in a future version of the package. Please use the new argument instead.
- Updated the documentation for `score()` and related functions to make the soft requirement for a `model`
column in the input data more explicit.
- Updated the documentation for `score()` and related functions to make the soft requirement for a `model` column in the input data more explicit.

## Bug fixes

- Missing baseline forecasts in `pairwise_comparison()` now trigger an explicit and informative error message.
- The requirements table in the getting started vignette is now correct.
- Added support for an optional `sample` column when using a quantile forecast format. Previously this resulted in an error.

# scoringutils 1.0.0

Expand Down
19 changes: 12 additions & 7 deletions R/check_forecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,10 @@ check_forecasts <- function(data) {


# get information about the forecasts ----------------------------------------
forecast_unit <- get_forecast_unit(data)
target_type <- get_target_type(data)
prediction_type <- get_prediction_type(data)
forecast_unit <- get_forecast_unit(data, prediction_type = prediction_type)
target_type <- get_target_type(data)



# check whether a column called 'quantile' or 'sample' is present ------------
Expand All @@ -145,7 +146,7 @@ check_forecasts <- function(data) {
# the length of prediction is greater 1 for a sample / quantile for
# a single forecast

check_duplicates <- find_duplicates(data)
check_duplicates <- find_duplicates(data, forecast_unit = forecast_unit)

if (nrow(check_duplicates) > 0) {
errors <- c(
Expand Down Expand Up @@ -275,21 +276,25 @@ print.scoringutils_check <- function(x, ...) {
#'
#' @param data A data.frame as used for [score()]
#'
#' @param forecast_unit A character vector with the column names that define
#' the unit of a single forecast. If missing the function tries to infer the
#'
#' @param ... Additional arguments passed to [get_forecast_unit()].
#' @return A data.frame with all rows for which a duplicate forecast was found
#' @export
#' @keywords check-forecasts
#' @examples
#' example <- rbind(example_quantile, example_quantile[1000:1010])
#' find_duplicates(example)

find_duplicates <- function(data) {
find_duplicates <- function(data, forecast_unit, ...) {
type <- c("sample", "quantile")[c("sample", "quantile") %in% colnames(data)]
forecast_unit <- get_forecast_unit(data)

if (missing(forecast_unit)) {
forecast_unit <- get_forecast_unit(data, ...)
}
data <- as.data.table(data)
data[, InternalDuplicateCheck := .N, by = c(forecast_unit, type)]
out <- data[InternalDuplicateCheck > 1]
out[, InternalDuplicateCheck := NULL]
return(out[])
}

3 changes: 2 additions & 1 deletion R/summarise_scores.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ summarise_scores <- function(scores,
}
# preparations ---------------------------------------------------------------
# get unit of a single forecast
forecast_unit <- get_forecast_unit(scores)
prediction_type <- get_prediction_type(scores)
forecast_unit <- get_forecast_unit(scores, prediction_type = prediction_type)

# if by is not provided, set to the unit of a single forecast
if (is.null(by)) {
Expand Down
11 changes: 10 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,20 +230,29 @@ get_target_type <- function(data) {
#' @description Helper function to get the unit of a single forecast, i.e.
#' the column names that define where a single forecast was made for
#'
#' @param prediction_type The prediction type of the forecast. This is used to
#' adjust the list of protected columns.
#'
#' @inheritParams check_forecasts
#'
#' @return A character vector with the column names that define the unit of
#' a single forecast
#'
#' @keywords internal

get_forecast_unit <- function(data) {
get_forecast_unit <- function(data, prediction_type) {

protected_columns <- c(
"prediction", "true_value", "sample", "quantile", "upper", "lower",
"pit_value",
"range", "boundary", available_metrics(),
names(data)[grepl("coverage_", names(data))]
)
if (!missing(prediction_type)) {
if (prediction_type == "quantile") {
protected_columns <- setdiff(protected_columns, "sample")
}
}
forecast_unit <- setdiff(colnames(data), protected_columns)
return(forecast_unit)
}
7 changes: 6 additions & 1 deletion man/find_duplicates.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/get_forecast_unit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions tests/testthat/_snaps/score.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# score() can support a sample column when a quantile forecast is
used

Code
summarise_scores(summarise_scores(scores, by = "model"), by = "model", fun = signif,
digits = 2)
Output
model interval_score dispersion underprediction
<char> <num> <num> <num>
1: EuroCOVIDhub-baseline 8500 850 0
2: EuroCOVIDhub-ensemble NA NA NA
3: epiforecasts-EpiNow2 13000 4100 0
4: UMass-MechBayes 120 77 39
overprediction coverage_deviation bias ae_median
<num> <num> <num> <num>
1: 7600 -0.081 0.62 13000
2: 11000 NA 0.60 21000
3: 8600 0.050 0.50 22000
4: 0 0.050 -0.50 210

14 changes: 14 additions & 0 deletions tests/testthat/test-score.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,17 @@ test_that("function produces output for a continuous format case", {
TRUE
)
})

test_that("score() can support a sample column when a quantile forecast is
used", {
ex <- example_quantile[!is.na(quantile)][1:200, ]
ex <- rbind(
data.table::copy(ex)[, sample := 1],
ex[, sample := 2]
)
scores <- suppressWarnings(score(ex))
expect_snapshot(summarise_scores(
summarise_scores(scores, by = "model"), by = "model",
fun = signif, digits = 2
))
})