Skip to content

Commit f6bda23

Browse files
New regex_subset_linter (#1004)
* New regex_subset_linter * update comment to be more official
1 parent c0899af commit f6bda23

File tree

10 files changed

+173
-2
lines changed

10 files changed

+173
-2
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ Collate:
112112
'path_linters.R'
113113
'pipe_call_linter.R'
114114
'pipe_continuation_linter.R'
115+
'regex_subset_linter.R'
115116
'semicolon_terminator_linter.R'
116117
'seq_linter.R'
117118
'settings.R'

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ export(paren_brace_linter)
7676
export(paste_sep_linter)
7777
export(pipe_call_linter)
7878
export(pipe_continuation_linter)
79+
export(regex_subset_linter)
7980
export(semicolon_terminator_linter)
8081
export(seq_linter)
8182
export(single_quotes_linter)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ function calls. (#850, #851, @renkun-ken)
107107
* `paste_sep_linter()` Require usage of `paste0()` over `paste(sep = "")`
108108
* `nested_ifelse_linter()` Prevent nested calls to `ifelse()` like `ifelse(A, x, ifelse(B, y, z))`, and similar
109109
* `unreachable_code_linter()` Prevent code after `return()` and `stop()` statements that will never be reached
110+
* `regex_subset_linter()` Require usage of `grep(ptn, x, value = TRUE)` over `x[grep(ptn, x)]` and similar
110111
* `consecutive_stopifnot_linter()` Require consecutive calls to `stopifnot()` to be unified into one
111112
* `assignment_linter()` now lints right assignment (`->` and `->>`) and gains two arguments. `allow_cascading_assign` (`TRUE` by default) toggles whether to lint `<<-` and `->>`; `allow_right_assign` toggles whether to lint `->` and `->>` (#915, @michaelchirico)
112113
* `infix_spaces_linter()` gains argument `exclude_operators` to disable lints on selected infix operators. By default, all "low-precedence" operators throw lints; see `?infix_spaces_linter` for an enumeration of these. (#914 @michaelchirico)

R/regex_subset_linter.R

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#' Require usage of direct methods for subsetting strings via regex.
2+
#'
3+
#' Using `value = TRUE` in [grep()] returns the subset of the input that matches
4+
#' the pattern, e.g. `grep("[a-m]", letters, value = TRUE)` will return the
5+
#' first 13 elements (`a` through `m`).
6+
#'
7+
#' `letters[grep("[a-m]", letters)]` and `letters[grepl("[a-m]", letters)]`
8+
#' both return the same thing, but more circuitously and more verbosely.
9+
#'
10+
#' The `stringr` package also provides an even more readable alternative,
11+
#' namely `str_subset()`, which should be preferred to versions using
12+
#' `str_detect()` and `str_which()`.
13+
#'
14+
#' @section Exceptions:
15+
#' Note that `x[grep(pattern, x)]` and `grep(pattern, x, value = TRUE)`
16+
#' are not _completely_ interchangeable when `x` is not character
17+
#' (most commonly, when `x` is a factor), because the output of the
18+
#' latter will be a character vector while the former remains a factor.
19+
#' It still may be preferable to refactor such code, as it may be faster
20+
#' to match the pattern on `levels(x)` and use that to subset instead.
21+
#'
22+
#' @evalRd rd_tags("regex_subset_linter")
23+
#' @seealso [linters] for a complete list of linters available in lintr.
24+
#' @export
25+
regex_subset_linter <- function() {
26+
Linter(function(source_file) {
27+
if (length(source_file$xml_parsed_content) == 0L) {
28+
return(list())
29+
}
30+
31+
xml <- source_file$xml_parsed_content
32+
33+
parent_expr_cond <- xp_and(
34+
"OP-LEFT-BRACKET",
35+
# parent::expr for LEFT_ASSIGN and RIGHT_ASSIGN, but, strangely,
36+
# parent::equal_assign for EQ_ASSIGN. So just use * as a catchall.
37+
"not(parent::*[LEFT_ASSIGN or EQ_ASSIGN or RIGHT_ASSIGN])"
38+
)
39+
# See https://www.w3.org/TR/1999/REC-xpath-19991116/#booleans;
40+
# equality of nodes is based on the string value of the nodes, which
41+
# is basically what we need, i.e., whatever expression comes in
42+
# <expr>[grepl(pattern, <expr>)] matches exactly, e.g. names(x)[grepl(ptn, names(x))].
43+
subset_cond_fmt <- xp_and(
44+
"expr[SYMBOL_FUNCTION_CALL[%s]]",
45+
"expr[position() = %d] = parent::expr/expr[1]"
46+
)
47+
grep_xpath <- sprintf(
48+
"//expr[%s]/expr[%s]",
49+
parent_expr_cond,
50+
sprintf(subset_cond_fmt, xp_text_in_table(c("grep", "grepl")), 3L)
51+
)
52+
53+
grep_expr <- xml2::xml_find_all(xml, grep_xpath)
54+
55+
grep_lints <- lapply(
56+
grep_expr,
57+
xml_nodes_to_lint,
58+
source_file = source_file,
59+
lint_message = paste(
60+
"Prefer grep(pattern, x, ..., value = TRUE) over",
61+
"x[grep(pattern, x, ...)] and x[grepl(pattern, x, ...)]."
62+
),
63+
type = "warning"
64+
)
65+
66+
stringr_xpath <- sprintf(
67+
"//expr[%s]/expr[%s]",
68+
parent_expr_cond,
69+
sprintf(subset_cond_fmt, xp_text_in_table(c("str_detect", "str_which")), 2L)
70+
)
71+
72+
stringr_expr <- xml2::xml_find_all(xml, stringr_xpath)
73+
74+
stringr_lints <- lapply(
75+
stringr_expr,
76+
xml_nodes_to_lint,
77+
source_file = source_file,
78+
lint_message = paste(
79+
"Prefer stringr::str_subset(x, pattern) over",
80+
"x[str_detect(x, pattern)] and x[str_which(x, pattern)]."
81+
),
82+
type = "warning"
83+
)
84+
85+
return(c(grep_lints, stringr_lints))
86+
})
87+
}

inst/lintr/linters.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ paren_brace_linter,style readability default
4949
paste_sep_linter,best_practices consistency
5050
pipe_call_linter,style readability
5151
pipe_continuation_linter,style readability default
52+
regex_subset_linter,best_practices efficiency
5253
semicolon_terminator_linter,style readability default configurable
5354
seq_linter,robustness efficiency consistency best_practices default
5455
single_quotes_linter,style consistency readability default

man/best_practices_linters.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/efficiency_linters.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/linters.Rd

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/regex_subset_linter.Rd

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
test_that("regex_subset_linter skips allowed usages", {
2+
expect_lint("y[grepl(ptn, x)]", NULL, regex_subset_linter())
3+
expect_lint("x[grepl(ptn, foo(x))]", NULL, regex_subset_linter())
4+
})
5+
6+
test_that("regex_subset_linter blocks simple disallowed usages", {
7+
expect_lint(
8+
"x[grep(ptn, x)]",
9+
rex::rex("Prefer grep(pattern, x, ..., value = TRUE)"),
10+
regex_subset_linter()
11+
)
12+
13+
expect_lint(
14+
"names(y)[grepl(ptn, names(y), perl = TRUE)]",
15+
rex::rex("Prefer grep(pattern, x, ..., value = TRUE)"),
16+
regex_subset_linter()
17+
)
18+
19+
expect_lint(
20+
"names(foo(y))[grepl(ptn, names(foo(y)), fixed = TRUE)]",
21+
rex::rex("Prefer grep(pattern, x, ..., value = TRUE)"),
22+
regex_subset_linter()
23+
)
24+
})
25+
26+
test_that("regex_subset_linter skips grep/grepl subassignment", {
27+
expect_lint("x[grep(ptn, x)] <- ''", NULL, regex_subset_linter())
28+
expect_lint("x[grepl(ptn, x)] <- ''", NULL, regex_subset_linter())
29+
expect_lint("x[grep(ptn, x, perl = TRUE)] = ''", NULL, regex_subset_linter())
30+
expect_lint("'' -> x[grep(ptn, x, ignore.case = TRUE)] = ''", NULL, regex_subset_linter())
31+
})
32+
33+
test_that("regex_subset_linter works for stringr equivalents", {
34+
expect_lint("y[str_detect(x, ptn)]", NULL, regex_subset_linter())
35+
expect_lint("x[str_detect(foo(x), ptn)]", NULL, regex_subset_linter())
36+
37+
expect_lint(
38+
"x[str_which(x, ptn)]",
39+
rex::rex("Prefer stringr::str_subset(x, pattern) over"),
40+
regex_subset_linter()
41+
)
42+
43+
expect_lint(
44+
"names(y)[str_detect(names(y), ptn, negate = TRUE)]",
45+
rex::rex("Prefer stringr::str_subset(x, pattern) over"),
46+
regex_subset_linter()
47+
)
48+
expect_lint("x[str_detect(x, ptn)] <- ''", NULL, regex_subset_linter())
49+
expect_lint("x[str_detect(x, ptn)] <- ''", NULL, regex_subset_linter())
50+
})

0 commit comments

Comments
 (0)