|
| 1 | +#' Require c() to be applied before relatively expensive vectorized functions |
| 2 | +#' |
| 3 | +#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`; |
| 4 | +#' ditto for the equivalence of several other vectorized functions like |
| 5 | +#' [as.POSIXct()] and math functions like [sin()]. The former is to be |
| 6 | +#' preferred so that the most expensive part of the operation ([as.Date()]) |
| 7 | +#' is applied only once. |
| 8 | +#' |
| 9 | +#' @evalRd rd_tags("inner_combine_linter") |
| 10 | +#' @seealso [linters] for a complete list of linters available in lintr. |
| 11 | +#' @export |
| 12 | +inner_combine_linter <- function() { |
| 13 | + Linter(function(source_file) { |
| 14 | + if (length(source_file$xml_parsed_content) == 0L) { |
| 15 | + return(list()) |
| 16 | + } |
| 17 | + |
| 18 | + xml <- source_file$xml_parsed_content |
| 19 | + |
| 20 | + # these don't take any other arguments (except maybe by non-default |
| 21 | + # methods), so don't need to check equality of other arguments |
| 22 | + no_arg_vectorized_funs <- c( |
| 23 | + "sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan", |
| 24 | + "log2", "log10", "log1p", "exp", "expm1", |
| 25 | + "sqrt", "abs" |
| 26 | + ) |
| 27 | + |
| 28 | + # TODO(michaelchirico): the need to spell out specific arguments is pretty brittle, |
| 29 | + # but writing the xpath for the alternative case was proving too tricky. |
| 30 | + # It's messy enough as is -- it may make sense to take another pass at |
| 31 | + # writing the xpath from scratch to see if it can't be simplified. |
| 32 | + |
| 33 | + # See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default |
| 34 | + # POSIXct method, but it is in as.Date.character and as.POSIXlt.character -- |
| 35 | + # the latter is what actually gets invoked when running as.POSIXct |
| 36 | + # on a character. So it is indeed an argument by pass-through. |
| 37 | + date_args <- c("format", "origin", "tz", "tryFormats") |
| 38 | + date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt") |
| 39 | + |
| 40 | + # See ?log. Only these two take a 'base' argument. |
| 41 | + log_funs <- c("log", "logb") |
| 42 | + log_args <- "base" |
| 43 | + |
| 44 | + # See ?lubridate::ymd and ?lubridate::ymd_hms |
| 45 | + lubridate_args <- c("quiet", "tz", "locale", "truncated") |
| 46 | + lubridate_funs <- c( |
| 47 | + "ymd", "ydm", "mdy", "myd", "dmy", "dym", |
| 48 | + "yq", "ym", "my", |
| 49 | + "ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h", |
| 50 | + "mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h" |
| 51 | + ) |
| 52 | + |
| 53 | + date_args_cond <- build_arg_condition(date_funs, date_args) |
| 54 | + log_args_cond <- build_arg_condition(log_funs, log_args) |
| 55 | + lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args) |
| 56 | + |
| 57 | + c_expr_cond <- xp_and( |
| 58 | + sprintf( |
| 59 | + "expr[SYMBOL_FUNCTION_CALL[%s]]", |
| 60 | + xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs)) |
| 61 | + ), |
| 62 | + "not(following-sibling::expr[not(expr[SYMBOL_FUNCTION_CALL])])", |
| 63 | + "not(expr/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr/SYMBOL_FUNCTION_CALL)", |
| 64 | + date_args_cond, |
| 65 | + log_args_cond, |
| 66 | + lubridate_args_cond |
| 67 | + ) |
| 68 | + xpath <- glue::glue("//expr[ |
| 69 | + count(expr) > 2 |
| 70 | + and expr[ |
| 71 | + SYMBOL_FUNCTION_CALL[text() = 'c'] |
| 72 | + and following-sibling::expr[1][ {c_expr_cond} ] |
| 73 | + ] |
| 74 | + ]") |
| 75 | + |
| 76 | + bad_expr <- xml2::xml_find_all(xml, xpath) |
| 77 | + |
| 78 | + return(lapply( |
| 79 | + bad_expr, |
| 80 | + xml_nodes_to_lint, |
| 81 | + source_file = source_file, |
| 82 | + lint_message = function(expr) { |
| 83 | + matched_call <- xml2::xml_text(xml2::xml_find_first(expr, "expr/expr/SYMBOL_FUNCTION_CALL")) |
| 84 | + message <- sprintf( |
| 85 | + "%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).", |
| 86 | + matched_call |
| 87 | + ) |
| 88 | + paste("Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,", message) |
| 89 | + }, |
| 90 | + type = "warning" |
| 91 | + )) |
| 92 | + }) |
| 93 | +} |
| 94 | + |
| 95 | +#' Make the XPath condition ensuring an argument matches across calls |
| 96 | +#' |
| 97 | +#' @param arg Character scalar naming an argument |
| 98 | +#' @noRd |
| 99 | +arg_match_condition <- function(arg) { |
| 100 | + this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg) |
| 101 | + following_symbol <- sprintf("following-sibling::expr/%s", this_symbol) |
| 102 | + next_expr <- "following-sibling::expr[1]" |
| 103 | + return(xp_or( |
| 104 | + sprintf("not(%s) and not(%s)", this_symbol, following_symbol), |
| 105 | + xp_and( |
| 106 | + this_symbol, |
| 107 | + following_symbol, |
| 108 | + sprintf( |
| 109 | + "not(%1$s/%3$s != %2$s/%3$s)", |
| 110 | + this_symbol, following_symbol, next_expr |
| 111 | + ) |
| 112 | + ) |
| 113 | + )) |
| 114 | +} |
| 115 | + |
| 116 | +build_arg_condition <- function(calls, arguments) { |
| 117 | + xp_or( |
| 118 | + sprintf("not(expr[SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)), |
| 119 | + "not(SYMBOL_SUB) and not(following-sibling::expr/SYMBOL_SUB)", |
| 120 | + xp_and(vapply(arguments, arg_match_condition, character(1L))) |
| 121 | + ) |
| 122 | +} |
0 commit comments