-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c50373f
commit 55c32b7
Showing
15 changed files
with
391 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
###########################. | ||
### D&G UCPN Update Fix ### | ||
###########################. | ||
|
||
# Author: Charlie Smith | ||
# Date: 2024-09-13 | ||
|
||
# Issue: D&G PT moving to Morse from early October 2024, meaning that PT will adopt a UCPN. | ||
# This will mean that pre-existing records' UCPNs will no longer match. Phil will | ||
# provide the old-style UCPN in the UPI field. | ||
|
||
# Solution: for submission from 2024-10-24 onwards, ensure old UCPN (in UPI field) | ||
# for D&G pathways IF pwathway has info from prior 2024-10-24. | ||
|
||
|
||
# Create a test dataset to test function on: | ||
# ~ | ||
|
||
df_test_data <- as.data.frame( | ||
|
||
header_ref_date <- c("", "", "", ""), | ||
|
||
hb_name <- c("NHS Dumfries and Galloway", "NHS Dumfries and Galloway", "NHS Dumfries and Galloway", "NHS Fife"), | ||
|
||
dataset_type <- c("PT", "PT", "PT", "PT"), | ||
|
||
ucpn <- c("12345", "12345", "62891", "62891"), | ||
|
||
upi <- c("2639123", "12345", "62891", "62891"), | ||
|
||
ref_rec_date <- c("", "", "", "") | ||
|
||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
07_publication/script/functions/compile_open_cases_summary.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
|
||
############################################. | ||
### Compile open cases excel summary ### | ||
############################################. | ||
|
||
# Author: Charlie Smith | ||
# Date: 2024-09-20 | ||
|
||
|
||
compile_open_cases_summary <- function(){ | ||
|
||
# get file names | ||
#filnames <- list.files(open_dir) | ||
|
||
# load parquet files | ||
p1 <- read_parquet(paste0(open_dir, "open_cases_all_hb.parquet")) | ||
p2 <- read_parquet(paste0(open_dir, "open_cases_all_hb_sex.parquet")) | ||
p3 <- read_parquet(paste0(open_dir, "open_cases_all_hb_age.parquet")) | ||
p4 <- read_parquet(paste0(open_dir, "open_cases_all_hb_simd.parquet")) | ||
|
||
p5 <- read_parquet(paste0(open_dir, "open_cases_month_hb.parquet")) | ||
p6 <- read_parquet(paste0(open_dir, "open_cases_month_hb_sex.parquet")) | ||
p7 <- read_parquet(paste0(open_dir, "open_cases_month_hb_age.parquet")) | ||
p8 <- read_parquet(paste0(open_dir, "open_cases_month_hb_simd.parquet")) | ||
|
||
p9 <- read_parquet(paste0(open_dir, "open_cases_quarter_hb.parquet")) | ||
p10 <- read_parquet(paste0(open_dir, "open_cases_quarter_hb_sex.parquet")) | ||
p11 <- read_parquet(paste0(open_dir, "open_cases_quarter_hb_age.parquet")) | ||
p12 <- read_parquet(paste0(open_dir, "open_cases_quarter_hb_simd.parquet")) | ||
|
||
# name tabs | ||
list_tabs <- list( | ||
all_hb = p1, | ||
all_sex = p2, | ||
all_age = p3, | ||
all_simd = p4, | ||
|
||
month_hb = p5, | ||
month_sex = p6, | ||
month_age = p7, | ||
month_simd = p8, | ||
|
||
quart_hb = p9, | ||
quart_sex = p10, | ||
quart_age = p11, | ||
quart_simd = p12) | ||
|
||
# save output as excel doc | ||
filepath = paste0(shorewise_pub_measure_summaries_dir, "/open_cases_summary.xlsx") | ||
export(list_tabs, file = filepath) | ||
|
||
# format report | ||
wb <- loadWorkbook(filepath) | ||
|
||
for(i in 1:length(list_tabs)){ | ||
setColWidths(wb, sheet = i, cols = 1:9, widths = "auto") | ||
} | ||
|
||
saveWorkbook(wb, filepath, overwrite =TRUE) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
|
||
############################. | ||
### Calculate open cases ### | ||
############################. | ||
|
||
# Author: Charlie Smith | ||
# Date: 2024-09-19 | ||
|
||
# This is more fiddly than I anticipated. It makes sense to report monthly open cases | ||
# as cumulative sums | ||
|
||
|
||
summarise_open_cases <- function(){ | ||
|
||
dir.create(open_dir) | ||
measure_label <- "open_cases_" | ||
|
||
# single row per individual | ||
df_single_row <- read_parquet(paste0(root_dir,'/swift_glob_completed_rtt.parquet')) |> | ||
lazy_dt() |> | ||
filter(!!sym(referral_month_o) <= month_end & # want total to latest month end | ||
!!sym(rtt_eval_o) == "seen - active") |> # the same as open cases? | ||
group_by(!!!syms(data_keys)) |> | ||
slice(1) |> | ||
ungroup() |> | ||
add_sex_description() |> | ||
tidy_age_group_order() |> | ||
as.data.frame() | ||
|
||
df_single_row_monthly <- read_parquet(paste0(root_dir,'/swift_glob_completed_rtt.parquet')) |> | ||
lazy_dt() |> | ||
filter(#!!sym(referral_month_o) %in% date_range & # want to apply range filter later | ||
!!sym(rtt_eval_o) == "seen - active") |> # the same as open cases? | ||
group_by(!!!syms(data_keys)) |> | ||
slice(1) |> | ||
ungroup() |> | ||
add_sex_description() |> | ||
tidy_age_group_order() |> | ||
as.data.frame() | ||
|
||
# overall ----------------------------------------------------------------- | ||
|
||
# by hb | ||
df_all_hb <- df_single_row |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
group_by(!!sym(dataset_type_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
#add_proportion_ds_hb() |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "all_hb")) | ||
|
||
# by sex | ||
df_all_sex <- df_single_row |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(sex_reported_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
group_by(!!sym(dataset_type_o), !!sym(sex_reported_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
#add_proportion_ds_hb(vec_group = c("dataset_type", "hb_name")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "all_hb_sex")) | ||
|
||
# by age | ||
df_all_age <- df_single_row |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(age_group_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
group_by(!!sym(dataset_type_o), #age_at_ref_rec, | ||
!!sym(age_group_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
#add_proportion_ds_hb(vec_group = c("dataset_type", "hb_name")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "all_hb_age")) | ||
|
||
# by simd | ||
df_all_simd <- df_single_row |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(simd_quintile_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
group_by(!!sym(dataset_type_o), !!sym(simd_quintile_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
#add_proportion_ds_hb(vec_group = c("dataset_type", "hb_name")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "all_hb_simd")) | ||
|
||
|
||
|
||
|
||
|
||
# by month ---------------------------------------------------------------- | ||
|
||
# by hb and month | ||
df_month_hb <- df_single_row_monthly |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
mutate(count = cumsum(count)) |> | ||
filter(!!sym(referral_month_o) %in% date_range) |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "month_hb")) |> | ||
append_quarter_ending(date_col = "referral_month") |> | ||
group_by(quarter_ending, !!!syms(c(dataset_type_o, hb_name_o))) |> | ||
filter(!!sym(referral_month_o) == max(!!sym(referral_month_o))) |> # need last value per quarter only | ||
summarise_by_quarter(vec_group = c("quarter_ending", "dataset_type", "hb_name")) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "quarter_hb")) | ||
|
||
|
||
# by hb, month, and sex | ||
df_month_hb_sex <- df_single_row_monthly |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(hb_name_o), | ||
!!sym(sex_reported_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(sex_reported_o)) |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(sex_reported_o)) |> | ||
mutate(count = cumsum(count)) |> | ||
filter(!!sym(referral_month_o) %in% date_range) |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(sex_reported_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(sex_reported_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "month_hb_sex")) |> | ||
append_quarter_ending(date_col = "referral_month") |> | ||
ungroup() |> | ||
group_by(quarter_ending, !!!syms(c(dataset_type_o, hb_name_o, sex_reported_o))) |> | ||
filter(!!sym(referral_month_o) == max(!!sym(referral_month_o))) |> # need last value per quarter only | ||
summarise_by_quarter(vec_group = c("quarter_ending", "dataset_type", "hb_name", "sex_reported")) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(sex_reported_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "quarter_hb_sex")) | ||
|
||
|
||
# by hb, month, and age | ||
df_month_hb_age <- df_single_row_monthly |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(hb_name_o), #age_at_ref_rec, | ||
!!sym(age_group_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(age_group_o)) |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(age_group_o)) |> | ||
mutate(count = cumsum(count)) |> | ||
filter(!!sym(referral_month_o) %in% date_range) |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(age_group_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(age_group_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "month_hb_age")) |> | ||
append_quarter_ending(date_col = "referral_month") |> | ||
ungroup() |> | ||
group_by(quarter_ending, !!!syms(c(dataset_type_o, hb_name_o, age_group_o))) |> | ||
filter(!!sym(referral_month_o) == max(!!sym(referral_month_o))) |> # need last value per quarter only | ||
summarise_by_quarter(vec_group = c("quarter_ending", "dataset_type", "hb_name", "age_group")) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(age_group_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "quarter_hb_age")) | ||
|
||
|
||
|
||
|
||
# by hb, month, and simd | ||
df_month_hb_simd <- df_single_row |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(hb_name_o), | ||
!!sym(simd_quintile_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(simd_quintile_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
add_proportion_ds_hb(vec_group = c("referral_month", "dataset_type", "hb_name")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "month_hb_simd")) |> | ||
|
||
append_quarter_ending(date_col = "referral_month") |> | ||
summarise_by_quarter(vec_group = c("quarter_ending", "dataset_type", "hb_name", "simd2020_quintile")) |> | ||
add_proportion_ds_hb(vec_group = c("quarter_ending", "dataset_type", "hb_name")) |> | ||
arrange(!!dataset_type_o, !!hb_name_o) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "quarter_hb_simd")) | ||
|
||
df_month_hb_simd <- df_single_row_monthly |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(hb_name_o), #age_at_ref_rec, | ||
!!sym(simd_quintile_o)) |> | ||
summarise(count = n(), .groups = "drop") |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(simd_quintile_o)) |> | ||
group_by(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(simd_quintile_o)) |> | ||
mutate(count = cumsum(count)) |> | ||
filter(!!sym(referral_month_o) %in% date_range) |> | ||
group_by(!!sym(referral_month_o), !!sym(dataset_type_o), !!sym(simd_quintile_o)) %>% | ||
bind_rows(summarise(., | ||
across(where(is.numeric), sum), | ||
across(!!sym(hb_name_o), ~"NHS Scotland"), | ||
.groups = "drop")) |> | ||
mutate(!!sym(hb_name_o) := factor(!!sym(hb_name_o), levels = level_order_hb)) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(simd_quintile_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "month_hb_simd")) |> | ||
append_quarter_ending(date_col = "referral_month") |> | ||
ungroup() |> | ||
group_by(quarter_ending, !!!syms(c(dataset_type_o, hb_name_o, simd_quintile_o))) |> | ||
filter(!!sym(referral_month_o) == max(!!sym(referral_month_o))) |> # need last value per quarter only | ||
summarise_by_quarter(vec_group = c("quarter_ending", "dataset_type", "hb_name", "simd2020_quintile")) |> | ||
arrange(!!sym(dataset_type_o), !!sym(hb_name_o), !!sym(simd_quintile_o)) |> | ||
save_as_parquet(path = paste0(open_dir, measure_label, "quarter_hb_simd")) | ||
|
||
} | ||
|
||
|
Oops, something went wrong.