From 5e0fd8de18d2b89a02a9d59313a651ef95478258 Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Mon, 18 Nov 2024 12:17:54 +0000 Subject: [PATCH] add `post_where` --- DESCRIPTION | 2 +- R/get_diver_data.R | 20 ++++++++++++++++++-- R/presets.R | 6 ++++-- man/get_diver_data.Rd | 3 ++- man/presets.Rd | 6 ++++-- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4d23cc5..457cdba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: certedb Title: A Certe R Package for Connecting to Databases -Version: 1.12.5 +Version: 1.12.6 Authors@R: c( person(given = c("Matthijs", "S."), family = "Berends", diff --git a/R/get_diver_data.R b/R/get_diver_data.R index 604a086..9bc84a1 100644 --- a/R/get_diver_data.R +++ b/R/get_diver_data.R @@ -21,7 +21,7 @@ #' #' Thes functions can be used to download local or remote database data, e.g. Spectre data from DiveLine on a Diver server (from [Dimensional Insight](https://www.dimins.com)). The [get_diver_data()] function sets up an ODBC connection (using [db_connect()]), which requires their quite limited [DI-ODBC driver](https://www.dimins.com/online-help/workbench_help/Content/ODBC/di-odbc.html). #' @param date_range date range, can be length 1 or 2 (or more to use the min/max) to filter on the column specified in the YAML file, see [presets]. Defaults to [this_year()]. Use `NULL` to set no date filter. Can also be years, or functions such as [`last_month()`][certetoolbox::last_month()]. Date-time ojects will be converted to dates, so using times as input is useless. It is supported to filter on a date-time column though. -#' @param where arguments to filter data on, will be passed on to [`filter()`][dplyr::filter()]. **Do not use `&&` or `||` but only `&` or `|` in filtering.** +#' @param where,post_where arguments to filter data on, will be passed on to [`filter()`][dplyr::filter()]. **Do not use `&&` or `||` but only `&` or `|` in filtering.** The `post_where` will be run after all downloading and post-processing, but before the auto-transform. #' @param diver_cbase,diver_project,diver_dsn,diver_testserver properties to set in [db_connect()]. The `diver_cbase` argument will be based on `preset`, but can also be set to blank `NULL` to manually select a cBase in a popup window. #' @param diver_tablename name of the database table to download data from. This is hard-coded by DI and should normally never be changed. #' @param review_qry a [logical] to indicate whether the query must be reviewed first, defaults to `TRUE` in interactive mode and `FALSE` otherwise. This will always be `FALSE` in Quarto / R Markdown, since the output of [knitr::pandoc_to()] must be `NULL`. @@ -111,6 +111,7 @@ #' } get_diver_data <- function(date_range = this_year(), where = NULL, + post_where = NULL, review_qry = interactive(), antibiogram_type = "sir", distinct = TRUE, @@ -284,7 +285,7 @@ get_diver_data <- function(date_range = this_year(), # set query ---- if (!is.null(tryCatch(where, error = function(e) 0))) { msg_init("Validating WHERE statement...", print = info) - # fill in columns from the 'di' object + # fill in columns from the 'di' or 'gl' object if (isTRUE(list(...)$where_as_character)) { where <- str2lang(where) } @@ -555,6 +556,21 @@ get_diver_data <- function(date_range = this_year(), } } + # post WHERE ---- + if (!is.null(tryCatch(post_where, error = function(e) 0))) { + msg_init("Validating post-WHERE statement...", print = info, prefix_time = TRUE) + # fill in columns from the 'di' or 'gl' object + post_where <- where_convert_di_gl(substitute(post_where)) + # convert objects, this will return msg "OK" + post_where <- where_convert_objects(deparse(substitute(post_where)), info = info) + out_new <- out |> filter(!!post_where) + if (nrow(out_new) < nrow(out)) { + msg_init("Removing ", nrow(out) - nrow(out_new), " rows from ", font_blue("post_where"), "...", print = info, prefix_time = TRUE) + out <- out_new + msg_ok(dimensions = dim(out), print = info) + } + } + # only real patients ---- if (isTRUE(only_real_patients)) { out_new <- out diff --git a/R/presets.R b/R/presets.R index 6a51e83..1df48ff 100644 --- a/R/presets.R +++ b/R/presets.R @@ -46,6 +46,7 @@ #' cbase: "location/to/another.cbase" #' by: ColumnName1, ColumnName2 #' type: "left" +#' filter: ColumnName1 == "abc" #' select: ColumnName1, col_name_2 = ColumnName2, ColumnName3, everything(), !starts_with("abc") #' wide_names_from: ColumnName3 #' join2: @@ -70,8 +71,9 @@ #' #' After this, the arguments in [get_diver_data()] will run: #' -#' 5. Distinct if `distinct = TRUE` using [`distinct()`][dplyr::distinct()] -#' 6. Auto-transform if `autotransform = TRUE` using [`auto_transform()`][certetoolbox::auto_transform()] +#' 5. Post-WHERE if `post_where` is set, using [`filter()`][dplyr::filter()] +#' 6. Distinct if `distinct = TRUE`, using [`distinct()`][dplyr::distinct()] +#' 7. Auto-transform if `autotransform = TRUE`, using [`auto_transform()`][certetoolbox::auto_transform()] #' #' ## cBase (`cbase`) #' diff --git a/man/get_diver_data.Rd b/man/get_diver_data.Rd index 2c84b0e..d242cd8 100644 --- a/man/get_diver_data.Rd +++ b/man/get_diver_data.Rd @@ -13,6 +13,7 @@ get_diver_data( date_range = this_year(), where = NULL, + post_where = NULL, review_qry = interactive(), antibiogram_type = "sir", distinct = TRUE, @@ -104,7 +105,7 @@ get_glims10_data( \arguments{ \item{date_range}{date range, can be length 1 or 2 (or more to use the min/max) to filter on the column specified in the YAML file, see \link{presets}. Defaults to \code{\link[=this_year]{this_year()}}. Use \code{NULL} to set no date filter. Can also be years, or functions such as \code{\link[certetoolbox:days_around_today]{last_month()}}. Date-time ojects will be converted to dates, so using times as input is useless. It is supported to filter on a date-time column though.} -\item{where}{arguments to filter data on, will be passed on to \code{\link[dplyr:filter]{filter()}}. \strong{Do not use \code{&&} or \code{||} but only \code{&} or \code{|} in filtering.}} +\item{where, post_where}{arguments to filter data on, will be passed on to \code{\link[dplyr:filter]{filter()}}. \strong{Do not use \code{&&} or \code{||} but only \code{&} or \code{|} in filtering.} The \code{post_where} will be run after all downloading and post-processing, but before the auto-transform.} \item{review_qry}{a \link{logical} to indicate whether the query must be reviewed first, defaults to \code{TRUE} in interactive mode and \code{FALSE} otherwise. This will always be \code{FALSE} in Quarto / R Markdown, since the output of \code{\link[knitr:output_type]{knitr::pandoc_to()}} must be \code{NULL}.} diff --git a/man/presets.Rd b/man/presets.Rd index b7e8903..bc2be80 100644 --- a/man/presets.Rd +++ b/man/presets.Rd @@ -43,6 +43,7 @@ The most extensive YAML form: cbase: "location/to/another.cbase" by: ColumnName1, ColumnName2 type: "left" + filter: ColumnName1 == "abc" select: ColumnName1, col_name_2 = ColumnName2, ColumnName3, everything(), !starts_with("abc") wide_names_from: ColumnName3 join2: @@ -67,8 +68,9 @@ The YAML keys run in this order: After this, the arguments in \code{\link[=get_diver_data]{get_diver_data()}} will run: \enumerate{ -\item Distinct if \code{distinct = TRUE} using \code{\link[dplyr:distinct]{distinct()}} -\item Auto-transform if \code{autotransform = TRUE} using \code{\link[certetoolbox:auto_transform]{auto_transform()}} +\item Post-WHERE if \code{post_where} is set, using \code{\link[dplyr:filter]{filter()}} +\item Distinct if \code{distinct = TRUE}, using \code{\link[dplyr:distinct]{distinct()}} +\item Auto-transform if \code{autotransform = TRUE}, using \code{\link[certetoolbox:auto_transform]{auto_transform()}} } }