Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/fiphde-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,5 +136,6 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c(".",
"target_name",
"target_prob",
"nnetar",
"mean_flu_admits",
"."))

24 changes: 23 additions & 1 deletion R/prep.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#' @param trim Named list with elements for epiyear and epiweek corresponding to the minimum epidemiological week to retain; defaults to `list(epiyear=2020, epiweek=43)`, which is the first date of report in the healthdata.gov hospitalization data; if set to `NULL` the data will not be trimmed
#' @param remove_incomplete Logical as to whether or not to remove the last week if incomplete; defaults is `TRUE`.
#' @param min_per_week The minimum number of flu.admits per week needed to retain that state. Default removes states with less than 1 flu admission per week over the last 30 days.
#' @param augment Logical as to whether or not the data should be augmented with NHSN hospitalizations imputed backwards in time (see 'Details' for more); default is `FALSE`.
#' @param augment_stop Date at which the time series imputation data should stop; yyyy-mm-dd format; only used if "augment" is `TRUE` default is `"2020-10-18"`
#' @return A `tibble` with hospitalization data summarized to epiyear/epiweek with the following columns:
#'
#' - **abbreviation**: Abbreviation for the location
Expand All @@ -25,6 +27,10 @@
#' - **hosp_mean**: Estimate of historical flu hospitalization rate for the given epidemiological week
#' - **hosp_rank**: Rank of the given epidemiological week in terms of flu hospitalizations across season (1 being highest average activity)
#'
#' @details
#'
#' The preparation for the weekly flu hospitalization data includes an option to "augment" the input time series. The augmentation is based on an extended time series that was developed with an imputation approach. The extended time series estimates flu hospitalizations at the state-level in years before NHSN reporting became available. If the user decides to include the imputed data, then the time series is extended backwards in time from the "augment_stop" date (defaults to October 18, 2020). The prepended data augmentation is formatted to match the true NSHN reporting. For more details on the data augmentation approach, refer to the publication: <https://www.medrxiv.org/content/10.1101/2024.07.31.24311314v1>.
#'
#' @export
#' @examples
#' \dontrun{
Expand All @@ -38,12 +44,28 @@ prep_hdgov_hosp <- function(hdgov_hosp,
statesonly=TRUE,
trim=list(epiyear=2020, epiweek=43),
remove_incomplete=TRUE,
min_per_week=1) {
min_per_week=1,
augment = FALSE,
augment_stop = "2020-10-18") {
# What's the last date you have data on? You'll need this to chop the data later on.
last_date <- max(hdgov_hosp$date)

# Summarize to epiyear, epiweek
message("Summarizing to epiyear/epiweek")

## if the augment option is set then prepend with the imputed data
if(augment) {
hdgov_hosp <-
nhsn_imputed %>%
dplyr::select(state = location, date, flu.admits = mean_flu_admits) %>%
## NOTE: augmentation includes data that extends beyond the date we have used to trim HHS data previously
## this line (in combination with filter in the next line) ...
## ... will ensure that the data pulled from healthdata.gov is used for all dates after augment_stop date
dplyr::filter(date <= as.Date(augment_stop)) %>%
dplyr::bind_rows(dplyr::filter(hdgov_hosp, date > as.Date(augment_stop)),.) %>%
dplyr::arrange(state, date)
}

hweek <- hdgov_hosp %>%
dplyr::rename(location=state) %>%
dplyr::mutate(epiyear=lubridate::epiyear(date), epiweek=lubridate::epiweek(date), .after=date) %>%
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
Binary file added data-raw/control_imputed_summaries.rds
Binary file not shown.
34 changes: 24 additions & 10 deletions data-raw/generate_sysdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,23 @@ hosp <- fiphde:::hospitalizations(surveillance_area = "flusurv", region="all")
hosp
hospstats <-
hosp %>%
filter(age_label == "Overall") %>%
dplyr::filter(age_label=="Overall") %>%
dplyr::filter(race_label=="Overall") %>%
dplyr::filter(sexid == 0) %>%
dplyr::filter(name == "FluSurv-NET") %>%
## get the overall for influenza a/b
dplyr::filter(flutype == 0) %>%
filter(year %>% between(2010, 2021)) %>%
mutate("counts" = as.numeric(weeklyrate)*3300) %>%
dplyr::select(c("wk_start", "counts", "year_wk_num")) %>%
rename(epiweek = year_wk_num) %>%
dplyr::select(c("weekstart", "counts", "weeknumber")) %>%
rename(epiweek = weeknumber) %>%
group_by(epiweek) %>%
summarise(min = min(counts),
summarise(min = min(counts, na.rm = TRUE),
lowhinge = IQR(counts, 0.25),
med = median(counts),
med = median(counts, na.rm=TRUE),
uprhinge = IQR(counts, 0.75),
max = max(counts),
mean = mean(counts))
max = max(counts, na.rm=TRUE),
mean = mean(counts, na.rm = TRUE))
hospstats

# Get weighted and unweighted ILI (2010-2019), summarize by epiweek
Expand All @@ -107,8 +112,13 @@ ilisum
hospsum <-
hosp %>%
filter(year %>% between(2010, 2019)) %>%
filter(age_label=="Overall") %>%
rename(epiweek=year_wk_num) %>%
dplyr::filter(age_label=="Overall") %>%
dplyr::filter(race_label=="Overall") %>%
dplyr::filter(sexid == 0) %>%
dplyr::filter(name == "FluSurv-NET") %>%
## get the overall for influenza a/b
dplyr::filter(flutype == 0) %>%
rename(epiweek=weeknumber) %>%
group_by(epiweek) %>%
summarize(hosp_mean=mean(weeklyrate)) %>%
mutate(hosp_rank=rank(hosp_mean) %>% as.integer())
Expand Down Expand Up @@ -183,7 +193,7 @@ vd$hosp_fitfor <- ts_fit_forecast(vd$prepped_hosp_tsibble,
covariates=TRUE)

# Format for submission
vd$formatted_list <- format_for_submission(vd$hosp_fitfor$tsfor)
vd$formatted_list <- format_for_submission(vd$hosp_fitfor$tsfor, method = "ts", format = "legacy")

# CREG ILI data - stuff in vd$ is created here and saved
# # Original: no time limit and nowcast
Expand Down Expand Up @@ -289,9 +299,13 @@ if (!file.exists(here::here("data-raw/ilinearby.csv"))) {
ilinearby <- read_csv(here::here("data-raw/ilinearby.csv"), col_types="cciid")
}

## read in imputed data
nhsn_imputed <- readRDS(here::here("data-raw/control_imputed_summaries.rds"))

# Write package data ------------------------------------------------------

usethis::use_data(locations,
nhsn_imputed,
legacy_rate_change,
hubverse_rate_change,
q,
Expand Down
4 changes: 2 additions & 2 deletions inst/testdata/testdata-create.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ hosp_fitfor <- ts_fit_forecast(prepped_tsibble,
covariates=TRUE)
hosp_fitfor

prepped_forecast_ts <- format_for_submission(hosp_fitfor$tsfor, method = "ts")
prepped_forecast_ts <- format_for_submission(hosp_fitfor$tsfor, method = "ts", format = "legacy")
prepped_forecast_ts

prepped_forecast_ts_cat <- forecast_categorical(prepped_forecast_ts$ensemble, prepped_hosp)
prepped_forecast_ts_cat <- forecast_categorical(prepped_forecast_ts$ensemble, prepped_hosp, format = "legacy", method = "interpolation")
prepped_forecast_ts_cat

forcplot <- plot_forecast(prepped_tsibble, prepped_forecast_ts$ensemble)
Expand Down
Binary file modified inst/testdata/testdata.rd
Binary file not shown.
11 changes: 10 additions & 1 deletion man/prep_hdgov_hosp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.