-
Notifications
You must be signed in to change notification settings - Fork 187
New inner_combine_linter #1012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New inner_combine_linter #1012
Changes from all commits
7c2a44f
8b3318c
1ea25ef
76ed844
a3b6d5a
53acfa5
299fa9c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#' Require c() to be applied before relatively expensive vectorized functions | ||
#' | ||
#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`; | ||
#' ditto for the equivalence of several other vectorized functions like | ||
#' [as.POSIXct()] and math functions like [sin()]. The former is to be | ||
#' preferred so that the most expensive part of the operation ([as.Date()]) | ||
#' is applied only once. | ||
#' | ||
#' @evalRd rd_tags("inner_combine_linter") | ||
#' @seealso [linters] for a complete list of linters available in lintr. | ||
#' @export | ||
inner_combine_linter <- function() { | ||
Linter(function(source_file) { | ||
if (length(source_file$xml_parsed_content) == 0L) { | ||
return(list()) | ||
} | ||
|
||
xml <- source_file$xml_parsed_content | ||
|
||
# these don't take any other arguments (except maybe by non-default | ||
# methods), so don't need to check equality of other arguments | ||
no_arg_vectorized_funs <- c( | ||
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan", | ||
"log2", "log10", "log1p", "exp", "expm1", | ||
"sqrt", "abs" | ||
) | ||
|
||
# TODO(michaelchirico): the need to spell out specific arguments is pretty brittle, | ||
# but writing the xpath for the alternative case was proving too tricky. | ||
# It's messy enough as is -- it may make sense to take another pass at | ||
# writing the xpath from scratch to see if it can't be simplified. | ||
|
||
# See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default | ||
# POSIXct method, but it is in as.Date.character and as.POSIXlt.character -- | ||
# the latter is what actually gets invoked when running as.POSIXct | ||
# on a character. So it is indeed an argument by pass-through. | ||
date_args <- c("format", "origin", "tz", "tryFormats") | ||
date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt") | ||
|
||
# See ?log. Only these two take a 'base' argument. | ||
log_funs <- c("log", "logb") | ||
log_args <- "base" | ||
|
||
# See ?lubridate::ymd and ?lubridate::ymd_hms | ||
lubridate_args <- c("quiet", "tz", "locale", "truncated") | ||
lubridate_funs <- c( | ||
"ymd", "ydm", "mdy", "myd", "dmy", "dym", | ||
"yq", "ym", "my", | ||
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h", | ||
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h" | ||
) | ||
|
||
date_args_cond <- build_arg_condition(date_funs, date_args) | ||
log_args_cond <- build_arg_condition(log_funs, log_args) | ||
lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args) | ||
|
||
c_expr_cond <- xp_and( | ||
sprintf( | ||
"expr[SYMBOL_FUNCTION_CALL[%s]]", | ||
xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs)) | ||
), | ||
"not(following-sibling::expr[not(expr[SYMBOL_FUNCTION_CALL])])", | ||
"not(expr/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr/SYMBOL_FUNCTION_CALL)", | ||
date_args_cond, | ||
log_args_cond, | ||
lubridate_args_cond | ||
) | ||
xpath <- glue::glue("//expr[ | ||
count(expr) > 2 | ||
and expr[ | ||
SYMBOL_FUNCTION_CALL[text() = 'c'] | ||
and following-sibling::expr[1][ {c_expr_cond} ] | ||
] | ||
]") | ||
|
||
bad_expr <- xml2::xml_find_all(xml, xpath) | ||
|
||
return(lapply( | ||
bad_expr, | ||
xml_nodes_to_lint, | ||
source_file = source_file, | ||
lint_message = function(expr) { | ||
matched_call <- xml2::xml_text(xml2::xml_find_first(expr, "expr/expr/SYMBOL_FUNCTION_CALL")) | ||
message <- sprintf( | ||
"%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).", | ||
matched_call | ||
) | ||
paste("Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,", message) | ||
}, | ||
type = "warning" | ||
)) | ||
}) | ||
} | ||
|
||
#' Make the XPath condition ensuring an argument matches across calls | ||
#' | ||
#' @param arg Character scalar naming an argument | ||
#' @noRd | ||
arg_match_condition <- function(arg) { | ||
this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg) | ||
following_symbol <- sprintf("following-sibling::expr/%s", this_symbol) | ||
next_expr <- "following-sibling::expr[1]" | ||
return(xp_or( | ||
sprintf("not(%s) and not(%s)", this_symbol, following_symbol), | ||
xp_and( | ||
this_symbol, | ||
following_symbol, | ||
sprintf( | ||
"not(%1$s/%3$s != %2$s/%3$s)", | ||
this_symbol, following_symbol, next_expr | ||
) | ||
) | ||
)) | ||
} | ||
|
||
build_arg_condition <- function(calls, arguments) { | ||
xp_or( | ||
sprintf("not(expr[SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)), | ||
"not(SYMBOL_SUB) and not(following-sibling::expr/SYMBOL_SUB)", | ||
xp_and(vapply(arguments, arg_match_condition, character(1L))) | ||
) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
test_that("inner_combine_linter lints a false positive-ish usage", { | ||
# By default as.POSIXct.character picks up the format to apply from | ||
# the first element and, since it succeeds, applies that to the remaining | ||
# timestamps. Whereas when run individually, it won't succeed until | ||
# the correct format is matched for each input. Nevertheless, it is | ||
# still preferable to vectorize the call, while being sure to use a | ||
# consistent format for the inputs. In this case, the correct equivalent | ||
# call is as.POSIXct(c("2021-01-01 00:00:00", "2021-01-01 01:00:00")). | ||
expect_lint( | ||
"c(as.POSIXct('2021-01-01'), as.POSIXct('2021-01-01 01:00:00'))", | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
) | ||
}) | ||
|
||
skip_if_not_installed("patrick") | ||
local({ | ||
vector_funs <- c( | ||
"as.Date", "as.POSIXct", "as.POSIXlt", | ||
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan", | ||
"log", "logb", "log2", "log10", "log1p", "exp", "expm1", | ||
"sqrt", "abs", | ||
"ymd", "ydm", "mdy", "myd", "dmy", "dym", | ||
"yq", "ym", "my", | ||
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h", | ||
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h", | ||
NULL | ||
) | ||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks simple vectorized calls:", | ||
expect_lint( | ||
sprintf("c(%1$s(x), %1$s(y))", vector_fun), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = vector_funs, | ||
vector_fun = vector_funs | ||
) | ||
}) | ||
|
||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks as.Date with identical passed arguments:", | ||
expect_lint( | ||
sprintf("c(as.Date(x, %1$s), as.Date(y, %1$s))", arg), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = c("format", "origin", "tz", "tryFormats", "non-literal"), | ||
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'Asia/Jakarta'", "tryFormats = '%F'", "tz = tz") | ||
) | ||
|
||
patrick::with_parameters_test_that( | ||
"inner_combine_linter blocks as.POSIXct with identical passed arguments:", | ||
expect_lint( | ||
sprintf("c(as.POSIXct(x, %1$s), as.POSIXct(y, %1$s))", arg), | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
), | ||
.test_name = c("format", "origin", "tz", "non-literal"), | ||
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'UTC'", "tz = tz") | ||
) | ||
|
||
test_that("inner_combine_linter is order-agnostic for matching arguments", { | ||
expect_lint( | ||
"c(as.Date(x, format = f, tz = t), as.Date(y, tz = t, format = f))", | ||
rex::rex("Combine inputs to vectorized functions first"), | ||
inner_combine_linter() | ||
) | ||
}) | ||
|
||
skip_if_not_installed("tibble") | ||
patrick::with_parameters_test_that( | ||
"inner_combine_linter skips allowed usages:", | ||
expect_lint(expr, NULL, inner_combine_linter()), | ||
.cases = tibble::tribble( | ||
~.test_name, ~expr, | ||
"simple sin()", "x <- sin(1:10)", | ||
"mixed sin()+cos()", "y <- c(sin(1:10), cos(2:20))", | ||
"present/absent vector function", "c(log(x), 0.5)", | ||
"absent/present vector function", "c(0.5, log(x))", | ||
"mismatched arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, format = '%m'))", | ||
"present/absent arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y))", | ||
"absent/present arg (Date)", "c(as.Date(x), as.Date(y, format = '%y'))", | ||
"matched value, not arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, tz = '%y'))", | ||
"mismatched arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y, format = '%m'))", | ||
"present/absent arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y))", | ||
"mismatched arg (log)", "c(log(x, base = 4), log(y, base = 5))", | ||
"present/absent arg (log)", "c(log(x, base = 4), log(y))" | ||
# TODO(michaelchirico): fix the code so these edge cases are covered | ||
# "unknown Date method argument", "c(as.Date(x, zoo = zzz), as.Date(y, zoo = zzz))", | ||
# "known+unknown Date argument", "c(as.Date(x, format = '%y', zoo = zzz), as.Date(y, format = '%y', zoo = zzz))", | ||
# "unknown POSIXct method argument", "c(as.POSIXct(x, zoo = zzz), as.POSIXct(y, zoo = zzz))", | ||
) | ||
) |
Uh oh!
There was an error while loading. Please reload this page.