Skip to content

New inner_combine_linter #1012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Collate:
'ifelse_censor_linter.R'
'implicit_integer_linter.R'
'infix_spaces_linter.R'
'inner_combine_linter.R'
'line_length_linter.R'
'lint.R'
'linter_tag_docs.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export(if_else_match_braces_linter)
export(ifelse_censor_linter)
export(implicit_integer_linter)
export(infix_spaces_linter)
export(inner_combine_linter)
export(line_length_linter)
export(lint)
export(lint_dir)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ function calls. (#850, #851, @renkun-ken)
* `consecutive_stopifnot_linter()` Require consecutive calls to `stopifnot()` to be unified into one
* `ifelse_censor_linter()` Require usage of `pmax()` / `pmin()` where appropriate, e.g. `ifelse(x > y, x, y)` is `pmax(x, y)`
* `system_file_linter()` Require file paths to be constructed by `system.file()` instead of calling `file.path()` directly
* `inner_combine_linter` Require inputs to vectorized functions to be combined first rather than later, e.g. `as.Date(c(x, y))` over `c(as.Date(x), as.Date(y))`
* `assignment_linter()` now lints right assignment (`->` and `->>`) and gains two arguments. `allow_cascading_assign` (`TRUE` by default) toggles whether to lint `<<-` and `->>`; `allow_right_assign` toggles whether to lint `->` and `->>` (#915, @michaelchirico)
* `infix_spaces_linter()` gains argument `exclude_operators` to disable lints on selected infix operators. By default, all "low-precedence" operators throw lints; see `?infix_spaces_linter` for an enumeration of these. (#914 @michaelchirico)
* `infix_spaces_linter()` now throws a lint on `a~b` and `function(a=1) {}` (#930, @michaelchirico)
Expand Down
122 changes: 122 additions & 0 deletions R/inner_combine_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#' Require c() to be applied before relatively expensive vectorized functions
#'
#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`;
#' ditto for the equivalence of several other vectorized functions like
#' [as.POSIXct()] and math functions like [sin()]. The former is to be
#' preferred so that the most expensive part of the operation ([as.Date()])
#' is applied only once.
#'
#' @evalRd rd_tags("inner_combine_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
inner_combine_linter <- function() {
Linter(function(source_file) {
if (length(source_file$xml_parsed_content) == 0L) {
return(list())
}

xml <- source_file$xml_parsed_content

# these don't take any other arguments (except maybe by non-default
# methods), so don't need to check equality of other arguments
no_arg_vectorized_funs <- c(
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan",
"log2", "log10", "log1p", "exp", "expm1",
"sqrt", "abs"
)

# TODO(michaelchirico): the need to spell out specific arguments is pretty brittle,
# but writing the xpath for the alternative case was proving too tricky.
# It's messy enough as is -- it may make sense to take another pass at
# writing the xpath from scratch to see if it can't be simplified.

# See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default
# POSIXct method, but it is in as.Date.character and as.POSIXlt.character --
# the latter is what actually gets invoked when running as.POSIXct
# on a character. So it is indeed an argument by pass-through.
date_args <- c("format", "origin", "tz", "tryFormats")
date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt")

# See ?log. Only these two take a 'base' argument.
log_funs <- c("log", "logb")
log_args <- "base"

# See ?lubridate::ymd and ?lubridate::ymd_hms
lubridate_args <- c("quiet", "tz", "locale", "truncated")
lubridate_funs <- c(
"ymd", "ydm", "mdy", "myd", "dmy", "dym",
"yq", "ym", "my",
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h",
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h"
)

date_args_cond <- build_arg_condition(date_funs, date_args)
log_args_cond <- build_arg_condition(log_funs, log_args)
lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args)

c_expr_cond <- xp_and(
sprintf(
"expr[SYMBOL_FUNCTION_CALL[%s]]",
xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs))
),
"not(following-sibling::expr[not(expr[SYMBOL_FUNCTION_CALL])])",
"not(expr/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr/SYMBOL_FUNCTION_CALL)",
date_args_cond,
log_args_cond,
lubridate_args_cond
)
xpath <- glue::glue("//expr[
count(expr) > 2
and expr[
SYMBOL_FUNCTION_CALL[text() = 'c']
and following-sibling::expr[1][ {c_expr_cond} ]
]
]")

bad_expr <- xml2::xml_find_all(xml, xpath)

return(lapply(
bad_expr,
xml_nodes_to_lint,
source_file = source_file,
lint_message = function(expr) {
matched_call <- xml2::xml_text(xml2::xml_find_first(expr, "expr/expr/SYMBOL_FUNCTION_CALL"))
message <- sprintf(
"%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).",
matched_call
)
paste("Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,", message)
},
type = "warning"
))
})
}

#' Make the XPath condition ensuring an argument matches across calls
#'
#' @param arg Character scalar naming an argument
#' @noRd
arg_match_condition <- function(arg) {
this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg)
following_symbol <- sprintf("following-sibling::expr/%s", this_symbol)
next_expr <- "following-sibling::expr[1]"
return(xp_or(
sprintf("not(%s) and not(%s)", this_symbol, following_symbol),
xp_and(
this_symbol,
following_symbol,
sprintf(
"not(%1$s/%3$s != %2$s/%3$s)",
this_symbol, following_symbol, next_expr
)
)
))
}

build_arg_condition <- function(calls, arguments) {
xp_or(
sprintf("not(expr[SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)),
"not(SYMBOL_SUB) and not(following-sibling::expr/SYMBOL_SUB)",
xp_and(vapply(arguments, arg_match_condition, character(1L)))
)
}
1 change: 1 addition & 0 deletions inst/lintr/linters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ if_else_match_braces_linter,default style readability
ifelse_censor_linter,best_practices efficiency
implicit_integer_linter,style consistency best_practices
infix_spaces_linter,style readability default
inner_combine_linter,efficiency consistency readability
line_length_linter,style readability default configurable
literal_coercion_linter,best_practices consistency efficiency
missing_argument_linter,correctness common_mistakes configurable
Expand Down
1 change: 1 addition & 0 deletions man/consistency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/efficiency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/inner_combine_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/readability_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 94 additions & 0 deletions tests/testthat/test-inner_combine_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
test_that("inner_combine_linter lints a false positive-ish usage", {
# By default as.POSIXct.character picks up the format to apply from
# the first element and, since it succeeds, applies that to the remaining
# timestamps. Whereas when run individually, it won't succeed until
# the correct format is matched for each input. Nevertheless, it is
# still preferable to vectorize the call, while being sure to use a
# consistent format for the inputs. In this case, the correct equivalent
# call is as.POSIXct(c("2021-01-01 00:00:00", "2021-01-01 01:00:00")).
expect_lint(
"c(as.POSIXct('2021-01-01'), as.POSIXct('2021-01-01 01:00:00'))",
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
)
})

skip_if_not_installed("patrick")
local({
vector_funs <- c(
"as.Date", "as.POSIXct", "as.POSIXlt",
"sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan",
"log", "logb", "log2", "log10", "log1p", "exp", "expm1",
"sqrt", "abs",
"ymd", "ydm", "mdy", "myd", "dmy", "dym",
"yq", "ym", "my",
"ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h",
"mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h",
NULL
)
patrick::with_parameters_test_that(
"inner_combine_linter blocks simple vectorized calls:",
expect_lint(
sprintf("c(%1$s(x), %1$s(y))", vector_fun),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = vector_funs,
vector_fun = vector_funs
)
})

patrick::with_parameters_test_that(
"inner_combine_linter blocks as.Date with identical passed arguments:",
expect_lint(
sprintf("c(as.Date(x, %1$s), as.Date(y, %1$s))", arg),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = c("format", "origin", "tz", "tryFormats", "non-literal"),
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'Asia/Jakarta'", "tryFormats = '%F'", "tz = tz")
)

patrick::with_parameters_test_that(
"inner_combine_linter blocks as.POSIXct with identical passed arguments:",
expect_lint(
sprintf("c(as.POSIXct(x, %1$s), as.POSIXct(y, %1$s))", arg),
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
),
.test_name = c("format", "origin", "tz", "non-literal"),
arg = c("format = '%F'", "origin = '1900-01-01'", "tz = 'UTC'", "tz = tz")
)

test_that("inner_combine_linter is order-agnostic for matching arguments", {
expect_lint(
"c(as.Date(x, format = f, tz = t), as.Date(y, tz = t, format = f))",
rex::rex("Combine inputs to vectorized functions first"),
inner_combine_linter()
)
})

skip_if_not_installed("tibble")
patrick::with_parameters_test_that(
"inner_combine_linter skips allowed usages:",
expect_lint(expr, NULL, inner_combine_linter()),
.cases = tibble::tribble(
~.test_name, ~expr,
"simple sin()", "x <- sin(1:10)",
"mixed sin()+cos()", "y <- c(sin(1:10), cos(2:20))",
"present/absent vector function", "c(log(x), 0.5)",
"absent/present vector function", "c(0.5, log(x))",
"mismatched arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, format = '%m'))",
"present/absent arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y))",
"absent/present arg (Date)", "c(as.Date(x), as.Date(y, format = '%y'))",
"matched value, not arg (Date)", "c(as.Date(x, format = '%y'), as.Date(y, tz = '%y'))",
"mismatched arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y, format = '%m'))",
"present/absent arg (POSIXct)", "c(as.POSIXct(x, format = '%y'), as.POSIXct(y))",
"mismatched arg (log)", "c(log(x, base = 4), log(y, base = 5))",
"present/absent arg (log)", "c(log(x, base = 4), log(y))"
# TODO(michaelchirico): fix the code so these edge cases are covered
# "unknown Date method argument", "c(as.Date(x, zoo = zzz), as.Date(y, zoo = zzz))",
# "known+unknown Date argument", "c(as.Date(x, format = '%y', zoo = zzz), as.Date(y, format = '%y', zoo = zzz))",
# "unknown POSIXct method argument", "c(as.POSIXct(x, zoo = zzz), as.POSIXct(y, zoo = zzz))",
)
)