diff --git a/DESCRIPTION b/DESCRIPTION index 930b57f..6f44e9c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,11 +27,11 @@ Imports: strex, stringr, tictoc, + magrittr, tidyr, zip Suggests: fuimus, - magrittr, roxyglobals, testthat (>= 3.0.0) Remotes: diff --git a/NAMESPACE b/NAMESPACE index 7ef75d1..a01ab4f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(ask) +export(clean_credentials) export(create_zip_file_names) export(dispense) export(download_zips) diff --git a/R/globals.R b/R/globals.R index cc11461..6e4ce40 100644 --- a/R/globals.R +++ b/R/globals.R @@ -743,8 +743,6 @@ utils::globalVariables(c( # # "uncompressed_size", - # - "zip_paths", # "zipfile", NULL diff --git a/R/nber.R b/R/nber.R index 63cd19c..a7ea760 100644 --- a/R/nber.R +++ b/R/nber.R @@ -149,22 +149,33 @@ download_zips <- function(table, directory) { #' @export create_zip_file_names <- function(x){ - stopifnot( - !all( - fs::is_absolute_path(zip_paths) - ) == "All paths must be absolute paths." - ) - - basename(zip_paths) |> - stringr::str_remove_all(".zip|week") |> + basename(x) |> + stringr::str_remove_all(".zip|week|npidata_pfile_") |> strex::str_split_by_numbers() |> purrr::list_transpose() |> purrr::discard_at(2) |> purrr::set_names(c("start", "end")) |> - purrr::map(lubridate::mdy) |> + purrr::map(anytime::anydate) |> purrr::list_transpose() |> purrr::map(paste0, collapse = "|") |> purrr::map(yasp::wrap, left = "week:", right = "") |> unlist(use.names = FALSE) } + +#' Clean credentials +#' +#' Replaces periods with empty strings +#' +#' @param x a vector of provider credentials +#' +#' @returns vector +#' +#' @autoglobal +#' +#' @keywords internal +#' +#' @export +clean_credentials <- function(x) { + gsub("\\.", "", x) +} diff --git a/data-raw/nber_download.R b/data-raw/nber_download.R index cda15ba..a6379f0 100644 --- a/data-raw/nber_download.R +++ b/data-raw/nber_download.R @@ -13,3 +13,52 @@ # monthly = NULL, # byvar = NULL # ) + + +get_pin("nber_weekly_info")$unzipped[1] + +npi_2024_01_01 <- get_pin("npi_2024_01_01__2024_01_07") +npi_2024_01_22 <- get_pin("npi_2024_01_22__2024_01_28") + + +release <- create_zip_file_names(c(npi_2024_01_01$release, npi_2024_01_22$release)) + +npi_2024_01_01 <- vctrs::vec_cbind( + dplyr::tibble(release = release[1]), + npi_2024_01_01[["base"]] +) + +npi_2024_01_22 <- vctrs::vec_cbind( + dplyr::tibble(release = release[2]), + npi_2024_01_22[["base"]] +) + +# fuimus::create_vec(names(npi_2024_01_01)) + +clnm <- c( + "entity", + "enum_date", + "deact_date", + "react_date", + "sole_prop", + "org_sub", + "gender", + "credential") + +npi_2024_01_01 |> + dplyr::mutate( + credential = provider:::clean_credentials(credential)) |> + hacksaw::count_split( + entity, + enum_date, + deact_date, + react_date, + sole_prop, + org_sub, + gender, + credential + ) |> + purrr::map(\(df) dplyr::filter(df, !is.na(df[1]))) |> + purrr::map(\(df) dplyr::rename(df, val = names(df[1]))) |> + purrr::set_names(clnm) |> + purrr::list_rbind(names_to = "var") diff --git a/data-raw/nber_taxonomy_license.R b/data-raw/nber_taxonomy_license.R index fee48d3..6e656cd 100644 --- a/data-raw/nber_taxonomy_license.R +++ b/data-raw/nber_taxonomy_license.R @@ -5,11 +5,11 @@ get_pin("nber_weekly_info") #----------- NPIData Base #### npidata <- dplyr::filter(get_pin("nber_weekly_info")$unzipped, file == "npidata_pfile") -release_id <- tools::file_path_sans_ext(basename(npidata$path[1])) +release_id <- tools::file_path_sans_ext(basename(npidata$path[2])) release_id npi_raw <- tidytable::fread( - npidata$path[1], + npidata$path[2], colClasses = list(character = 1:330)) |> janitor::clean_names() |> fuimus::remove_quiet() |> @@ -38,7 +38,7 @@ npi_base <- npi_raw |> middle = provider_middle_name, last = provider_last_name_legal_name, suffix = provider_name_suffix_text, - credential = provider_credential_text + credential = clean_credentials(provider_credential_text) ) |> fuimus::remove_quiet() @@ -57,18 +57,12 @@ npi_other <- npi_raw |> other_last = provider_other_last_name, other_last_type = provider_other_last_name_type_code, other_suffix = provider_other_name_suffix_text, - other_credential = provider_other_credential_text - ) |> - dplyr::rowwise() |> - dplyr::mutate(na_count = list(sum(is.na(dplyr::c_across(other_org_name:other_credential))))) |> - tidyr::unnest(na_count) |> - dplyr::filter(na_count < 9) |> - dplyr::select(-na_count) |> - fuimus::remove_quiet() + other_credential = clean_credentials(provider_other_credential_text) + ) -npi_other +npi_other <- vctrs::vec_slice(npi_other, which(cheapr::row_na_counts(npi_other) < 9)) -npi_authorized_official <- npi_raw |> +npi_auth_ofc <- npi_raw |> dplyr::reframe( npi, ao_prefix = authorized_official_name_prefix_text, @@ -76,18 +70,12 @@ npi_authorized_official <- npi_raw |> ao_middle = authorized_official_middle_name, ao_last = authorized_official_last_name, ao_suffix = authorized_official_name_suffix_text, - ao_credential = authorized_official_credential_text, + ao_credential = clean_credentials(authorized_official_credential_text), ao_title = authorized_official_title_or_position, ao_phone = authorized_official_telephone_number - ) |> - dplyr::rowwise() |> - dplyr::mutate(na_count = list(sum(is.na(dplyr::c_across(ao_prefix:ao_phone))))) |> - tidyr::unnest(na_count) |> - dplyr::filter(na_count < 8) |> - dplyr::select(-na_count) |> - fuimus::remove_quiet() + ) -npi_authorized_official +npi_auth_ofc <- vctrs::vec_slice(npi_auth_ofc, which(cheapr::row_na_counts(npi_auth_ofc) < 8)) npi_address <- npi_raw |> dplyr::reframe( @@ -114,9 +102,8 @@ npi_address <- npi_raw |> purrr::map_dfr(fuimus::na_if_common) |> fuimus::remove_quiet() -npi_address - #----------- NPIData Taxonomy/License #### + cols_pattern <- fuimus::single_line_string(" healthcare_provider_taxonomy_code |provider_license_number @@ -125,7 +112,7 @@ healthcare_provider_taxonomy_code |healthcare_provider_taxonomy_group" ) -npi_taxonomy_license <- npi_raw |> +npi_tax_lis <- npi_raw |> dplyr::select(npi, dplyr::matches(rlang::as_string(cols_pattern))) |> fuimus::remove_quiet() |> dplyr::mutate(row_id = dplyr::row_number(), .before = 1) |> @@ -157,8 +144,6 @@ npi_taxonomy_license <- npi_raw |> license_state) |> dplyr::arrange(npi, taxonomy_primary) -npi_taxonomy_license |> - tidyr::nest(data = -npi) #----------- NPIData Other Identifiers #### npi_identifiers <- npi_raw |> @@ -191,21 +176,19 @@ npi_identifiers <- npi_raw |> other_id_issuer ) -npi_identifiers - #----------- Weekly Release pin #### npi_week <- list( - release = release_id, - base = npi_base, - addr = npi_address, + release = create_zip_file_names(release_id), + basic = npi_base, + address = npi_address, other = npi_other, - ao = npi_authorized_official, - tax = npi_taxonomy_license, - ids = npi_identifiers + authorized = npi_auth_ofc, + taxonomy = npi_tax_lis, + identifier = npi_identifiers ) pin_update( x = npi_week, - name = "npi_2024_01_01__2024_01_07", - title = "NBER NPI Weekly Release 01-01-24" + name = "2024-01-22_2024-01-28", + title = "NBER NPI Weekly Release 2024-01-22" ) diff --git a/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/2024-01-22_2024-01-28.qs b/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/2024-01-22_2024-01-28.qs new file mode 100644 index 0000000..7776a33 Binary files /dev/null and b/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/2024-01-22_2024-01-28.qs differ diff --git a/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/data.txt b/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/data.txt new file mode 100644 index 0000000..fa755a3 --- /dev/null +++ b/inst/extdata/pins/2024-01-22_2024-01-28/20240802T063427Z-fa95b/data.txt @@ -0,0 +1,10 @@ +file: 2024-01-22_2024-01-28.qs +file_size: 2247579 +pin_hash: fa95b74bd411aa78 +type: qs +title: NBER NPI Weekly Release 2024-01-22 +description: ~ +tags: ~ +urls: ~ +created: 20240802T063427Z +api_version: 1 diff --git a/inst/extdata/pins/_pins.yaml b/inst/extdata/pins/_pins.yaml index 6b0782d..acec2ec 100644 --- a/inst/extdata/pins/_pins.yaml +++ b/inst/extdata/pins/_pins.yaml @@ -1,8 +1,6 @@ +2024-01-22_2024-01-28: +- 2024-01-22_2024-01-28/20240802T063427Z-fa95b/ nber_weekly_info: - nber_weekly_info/20240801T132355Z-6664b/ -npi_2024_01_01__2024_01_07: -- npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/ -npi_2024_01_22__2024_01_28: -- npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/ -npi_wk_040124: -- npi_wk_040124/20240801T200711Z-f0069/ +week_2024-01-01_2024-01-07: +- week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/ diff --git a/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/data.txt b/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/data.txt deleted file mode 100644 index 03c3320..0000000 --- a/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/data.txt +++ /dev/null @@ -1,10 +0,0 @@ -file: npi_2024_01_01__2024_01_07.qs -file_size: 1755660 -pin_hash: f6dd71214b1bca28 -type: qs -title: NBER NPI Weekly Release 01-01-24 -description: ~ -tags: ~ -urls: ~ -created: 20240802T022458Z -api_version: 1 diff --git a/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/npi_2024_01_01__2024_01_07.qs b/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/npi_2024_01_01__2024_01_07.qs deleted file mode 100644 index 2d9c3cc..0000000 Binary files a/inst/extdata/pins/npi_2024_01_01__2024_01_07/20240802T022458Z-f6dd7/npi_2024_01_01__2024_01_07.qs and /dev/null differ diff --git a/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/data.txt b/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/data.txt deleted file mode 100644 index 27f3216..0000000 --- a/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/data.txt +++ /dev/null @@ -1,10 +0,0 @@ -file: npi_2024_01_22__2024_01_28.qs -file_size: 2249680 -pin_hash: 9c4a6d619ed705c6 -type: qs -title: NBER NPI Weekly Release 01-22-24 -description: ~ -tags: ~ -urls: ~ -created: 20240802T021816Z -api_version: 1 diff --git a/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/npi_2024_01_22__2024_01_28.qs b/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/npi_2024_01_22__2024_01_28.qs deleted file mode 100644 index 11f39d7..0000000 Binary files a/inst/extdata/pins/npi_2024_01_22__2024_01_28/20240802T021816Z-9c4a6/npi_2024_01_22__2024_01_28.qs and /dev/null differ diff --git a/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/data.txt b/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/data.txt new file mode 100644 index 0000000..7497f31 --- /dev/null +++ b/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/data.txt @@ -0,0 +1,10 @@ +file: week_2024-01-01_2024-01-07.qs +file_size: 1754940 +pin_hash: a6c21a95965b6788 +type: qs +title: NBER NPI Weekly Release 2024-01-01 +description: ~ +tags: ~ +urls: ~ +created: 20240802T063048Z +api_version: 1 diff --git a/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/week_2024-01-01_2024-01-07.qs b/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/week_2024-01-01_2024-01-07.qs new file mode 100644 index 0000000..39a95be Binary files /dev/null and b/inst/extdata/pins/week_2024-01-01_2024-01-07/20240802T063048Z-a6c21/week_2024-01-01_2024-01-07.qs differ diff --git a/man/clean_credentials.Rd b/man/clean_credentials.Rd new file mode 100644 index 0000000..42e8f5a --- /dev/null +++ b/man/clean_credentials.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nber.R +\name{clean_credentials} +\alias{clean_credentials} +\title{Clean credentials} +\usage{ +clean_credentials(x) +} +\arguments{ +\item{x}{a vector of provider credentials} +} +\value{ +vector +} +\description{ +Replaces periods with empty strings +} +\keyword{internal}