Skip to content

Commit

Permalink
refactored ask and grab
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Mar 30, 2024
1 parent 149cbbc commit b7d687e
Show file tree
Hide file tree
Showing 18 changed files with 126 additions and 240 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Imports:
tidyr,
vctrs,
zip,
here,
data.table,
tictoc
Suggests:
roxyglobals,
Expand Down
7 changes: 0 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,7 @@

export("%>%")
export(ask)
export(clean_deactivation)
export(clean_endpoints)
export(clean_locations)
export(clean_number)
export(clean_weekly)
export(create_address)
export(create_identifiers)
export(create_taxonomy)
export(dispense)
export(grab)
export(is_directory)
Expand Down
67 changes: 41 additions & 26 deletions R/browse.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
#' List the most recent NPPES Data Dissemination releases
#' NPPES Data Dissemination releases
#'
#' @param save `<chr>` write to disk using `readr::write_csv()` ; default is `FALSE`
#' @param save `<lgl>` write data to disk with `data.table::fwrite()`; default is `FALSE`
#'
#' @param path `<chr>` path to save to; default is `here::here()`
#' @param path `<chr>` path to save csv to; default is `fs::path_wd()`
#'
#' @return tibble with
#' @return A [tibble][tibble::tibble-package] of the search results.
#'
#' @examplesIf interactive()
#' nppez::ask()
#' tmp <- fs::dir_create(fs::path_temp("nppez"))
#'
#' nppez::ask(save = TRUE)
#'
#' fs::dir_delete(tmp)
#' @autoglobal
#' @export
ask <- function(save = FALSE,
path = here::here("./")) {
path = fs::path_wd()) {

tictoc::tic("Download Time")
tictoc::tic("Download Time") ########################

url <- "https://download.cms.gov/nppes/NPI_Files.html"
html <- rvest::read_html(url)

tictoc::toc()
tictoc::toc() ########################################

names <- html |>
rvest::html_elements("li") |>
Expand Down Expand Up @@ -56,42 +59,54 @@ ask <- function(save = FALSE,
) |>
dplyr::select(-date_wk1, -name)

class(results) <- c("ask", class(results))

if (save) {
readr::write_csv(
data.table::fwrite(
results,
file = paste0(
path,
stringr::str_replace(
results[[1]][[1]],
".zip",
".csv"
)
)
fs::path(path,
fs::path_ext_set(
results[[1]][[1]],
".csv"
)
)
)
}
return(results)
}

#' Download NPPES ZIP files to a local directory
#'
#' @param dir path to local directory to download ZIPs to
#' @param obj `<tbl_df>` object of class `ask`, returned from `nppez::ask()`
#' @param files `<chr>` vector of files to download from ZIPs; default behavior is to download all files
#' @param path `<chr>` path to download ZIPs to; default is `fs::path_wd()`
#'
#' @return tibble
#'
#' @examplesIf interactive()
#' nppez::grab(dir = tempdir())
#' nppez::ask() |>
#' nppez::grab(files = "NPPES_Deactivated_NPI_Report_031124.zip")
#'
#' @autoglobal
#' @export
grab <- function(dir) {
grab <- function(obj,
files = NULL,
path = fs::path_wd()) {

stopifnot("`obj` must be of class 'ask'" = inherits(obj, "ask"))

if (!is.null(files)) {

stopifnot("No `files` in results" = files %in% obj$file)

obj <- vctrs::vec_slice(obj, vctrs::vec_in(obj$file, files))

zips <- nppez::ask()$url
}

curl::multi_download(
urls = zips,
destfiles = stringr::str_c(
dir,
basename(zips)
urls = obj$url,
destfiles = fs::path(
path,
basename(obj$url)
)
)
}
Expand Down
14 changes: 7 additions & 7 deletions R/clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' clean_deactivation("D:/<directory name>")
#'
#' @autoglobal
#' @export
#' @noRd
clean_deactivation <- function(dir) {

path <- fs::dir_ls(dir, glob = "*.xlsx")
Expand Down Expand Up @@ -54,7 +54,7 @@ clean_deactivation <- function(dir) {
#' clean_endpoints("D:/<directory>/<filename>.csv")
#'
#' @autoglobal
#' @export
#' @noRd
clean_endpoints <- function(dir) {

path <- "D:/nppez_data/unzips/endpoint_pfile_20050523-20230409.csv"
Expand Down Expand Up @@ -89,7 +89,7 @@ clean_endpoints <- function(dir) {
#' clean_locations("D:/<directory>/<filename>.csv")
#' }
#' @autoglobal
#' @export
#' @noRd
clean_locations <- function(csv = NULL) {

## Non-Primary Practice Locations
Expand Down Expand Up @@ -127,7 +127,7 @@ clean_locations <- function(csv = NULL) {
#' clean_weekly("D:/<directory>/<filename>.csv")
#' }
#' @autoglobal
#' @export
#' @noRd
clean_weekly <- function(csv = NULL) {

week <- readr::read_csv(
Expand Down Expand Up @@ -515,7 +515,7 @@ clean_weekly <- function(csv = NULL) {
#' create_identifiers()
#' }
#' @autoglobal
#' @export
#' @noRd
create_identifiers <- function(df) {

identifier <- df |>
Expand Down Expand Up @@ -569,7 +569,7 @@ create_identifiers <- function(df) {
#' create_taxonomy()
#' }
#' @autoglobal
#' @export
#' @noRd
create_taxonomy <- function(df) {

code <- df |>
Expand Down Expand Up @@ -641,7 +641,7 @@ create_taxonomy <- function(df) {
#' create_address()
#' }
#' @autoglobal
#' @export
#' @noRd
create_address <- function(df) {

practice <- df |>
Expand Down
31 changes: 20 additions & 11 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#' Generate a sequence of numbers with a new prefix
#'
#' @param n `<int>` Number of sequences to generate
#' @param n `<int>` Numeric sequence to generate
#' @param new `<chr>` New prefix
#' @param old `<chr>` Old prefix
#' @param between `<chr>` Separator between `new` and `old`, default `" = "`
#' @param enclose `<chr>` *(optional)* Vector `length(x) == 2` with which to enclose output
#' @param old `<chr>` Old prefix
#' @param collapse `<chr>` Separator between sequences, default `", "`
#' @param enclose `<chr>` *(optional)* Vector of `length(x) == 2` with which to enclose output
#' @param style `<lgl>` Apply `styler::style_text()` to output, default `TRUE`
#'
#' @return `<chr>` collapsed vector of `n` sequences
#'
#' @examples
#' x <- rename_seq(
#' n = 3, # can also be 25:300 etc.
#' rename_seq(
#' n = 10,
#' new = "id_issuer_",
#' old = "Other.ID.Issuer.",
#' between = " = ",
#' old = "Other.ID.Issuer.",
#' enclose = c("x = c(", ")"),
#' collapse = ",\n ",
#' style = TRUE)
Expand All @@ -24,10 +24,10 @@
#' @export
rename_seq <- function(n,
new,
old,
between = " = ",
enclose = NULL,
old,
collapse = ", ",
enclose = NULL,
style = TRUE) {

x <- stringr::str_c(
Expand All @@ -38,8 +38,17 @@ rename_seq <- function(n,
seq(n),
collapse = collapse)

if (!is.null(enclose)) {x <- stringr::str_c(enclose[1], x, enclose[2])}
if (style) {x <- styler::style_text(x)}
if (!is.null(enclose)) {
x <- stringr::str_c(
enclose[1],
x,
enclose[2]
)
}

if (style) {
x <- styler::style_text(x)
}
return(x)
}

Expand All @@ -50,7 +59,7 @@ rename_seq <- function(n,
#' @returns `<chr>` single line character vector
#'
#' @examplesIf interactive()
#' is_directory("D:/")
#' single_line_string("D:/")
#'
#' @autoglobal
#' @export
Expand Down
22 changes: 15 additions & 7 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,33 @@ library(nppez)
List the most recent NPPES Data Dissemination releases

```{r ask}
nppez::ask()
x <- nppez::ask()
x
```

## Grab
### Grab

Download NPPES ZIP files to a local directory

```{r grab, eval=FALSE}
nppez::grab(dir = "C:/<folder-to-save-zip-files-to>")
```{r grab}
nppez::grab(x, files = "NPPES_Deactivated_NPI_Report_031124.zip")
```


```{r, echo=FALSE}
fs::file_delete("NPPES_Deactivated_NPI_Report_031124.zip")
```



|download_date | size|file |
|:-------------|-------:|:--------------------------------------------------------------------|
|2023-04-12 | 3.62M|D:/nppez_data/zips/NPPES_Data_Dissemination_040323_040923_Weekly.zip |
|2023-04-12 | 851.05M|D:/nppez_data/zips/NPPES_Data_Dissemination_April_2023.zip |
|2023-04-12 | 1.78M|D:/nppez_data/zips/NPPES_Deactivated_NPI_Report_041023.zip |


# Peek
### Peek

```{r peek, eval=FALSE}
nppez::peek(dir = "<path-to-downloaded-zip-files>")
Expand Down Expand Up @@ -114,13 +122,13 @@ dplyr::filter(parent_zip == "NPPES_Data_Dissemination_April_2023.zip") |>
|NPPES_Data_Dissemination_April_2023.zip |npidata_pfile_20050523-20230409.csv | 801.64M| 8.66G|


# Prune
### Prune

```{r prune, eval=FALSE}
nppez::prune(dir = "<path-to-downloaded-zip-files>")
```

# Dispense
### Dispense

```{r dispense, eval=FALSE}
nppez::dispense(from = "<path-to-downloaded-zip-files>",
Expand Down
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ library(nppez)
List the most recent NPPES Data Dissemination releases

``` r
nppez::ask()
#> Download Time: 0.42 sec elapsed
x <- nppez::ask()
#> Download Time: 0.34 sec elapsed
x
#> # A tibble: 5 × 4
#> file url date size
#> <chr> <chr> <date> <fs::b>
Expand All @@ -40,12 +41,17 @@ nppez::ask()
#> 5 NPPES_Data_Dissemination_031824_032424_Weekly.zip https://… 2024-03-18 4.31M
```

## Grab
### Grab

Download NPPES ZIP files to a local directory

``` r
nppez::grab(dir = "C:/<folder-to-save-zip-files-to>")
nppez::grab(x, files = "NPPES_Deactivated_NPI_Report_031124.zip")
#> # A tibble: 1 × 10
#> success status_code resumefrom url destfile error type modified
#> <lgl> <int> <dbl> <chr> <chr> <chr> <chr> <dttm>
#> 1 TRUE 200 0 https… "C:\\Us… <NA> appl… 2024-03-11 00:01:43
#> # ℹ 2 more variables: time <dbl>, headers <list>
```

| download_date | size | file |
Expand All @@ -54,7 +60,7 @@ nppez::grab(dir = "C:/<folder-to-save-zip-files-to>")
| 2023-04-12 | 851.05M | D:/nppez_data/zips/NPPES_Data_Dissemination_April_2023.zip |
| 2023-04-12 | 1.78M | D:/nppez_data/zips/NPPES_Deactivated_NPI_Report_041023.zip |

# Peek
### Peek

``` r
nppez::peek(dir = "<path-to-downloaded-zip-files>")
Expand All @@ -74,13 +80,13 @@ nppez::peek(dir = "<path-to-downloaded-zip-files>")
| NPPES_Data_Dissemination_April_2023.zip | pl_pfile_20050523-20230409.csv | 22M | 65.51M |
| NPPES_Data_Dissemination_April_2023.zip | npidata_pfile_20050523-20230409.csv | 801.64M | 8.66G |

# Prune
### Prune

``` r
nppez::prune(dir = "<path-to-downloaded-zip-files>")
```

# Dispense
### Dispense

``` r
nppez::dispense(from = "<path-to-downloaded-zip-files>",
Expand Down
Loading

0 comments on commit b7d687e

Please sign in to comment.