Skip to content

Commit f3d85db

Browse files
Add --ignore-multiline-regex option. (#3476)
1 parent d2707c3 commit f3d85db

File tree

3 files changed

+97
-6
lines changed

3 files changed

+97
-6
lines changed

codespell_lib/_codespell.py

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
Pattern,
3737
Sequence,
3838
Set,
39+
TextIO,
3940
Tuple,
4041
)
4142

@@ -201,11 +202,17 @@ def __str__(self) -> str:
201202

202203

203204
class FileOpener:
204-
def __init__(self, use_chardet: bool, quiet_level: int) -> None:
205+
def __init__(
206+
self,
207+
use_chardet: bool,
208+
quiet_level: int,
209+
ignore_multiline_regex: Optional[Pattern[str]],
210+
) -> None:
205211
self.use_chardet = use_chardet
206212
if use_chardet:
207213
self.init_chardet()
208214
self.quiet_level = quiet_level
215+
self.ignore_multiline_regex = ignore_multiline_regex
209216

210217
def init_chardet(self) -> None:
211218
try:
@@ -247,7 +254,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
247254
)
248255
raise
249256
else:
250-
lines = f.readlines()
257+
lines = self.get_lines(f)
251258
f.close()
252259

253260
return lines, f.encoding
@@ -262,7 +269,7 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
262269
print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr)
263270
with open(filename, encoding=encoding, newline="") as f:
264271
try:
265-
lines = f.readlines()
272+
lines = self.get_lines(f)
266273
except UnicodeDecodeError:
267274
if not self.quiet_level & QuietLevels.ENCODING:
268275
print(
@@ -279,6 +286,22 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
279286

280287
return lines, encoding
281288

289+
def get_lines(self, f: TextIO) -> List[str]:
290+
if self.ignore_multiline_regex:
291+
text = f.read()
292+
pos = 0
293+
text2 = ""
294+
for m in re.finditer(self.ignore_multiline_regex, text):
295+
text2 += text[pos : m.start()]
296+
# Replace with blank lines so line numbers are unchanged.
297+
text2 += "\n" * m.group().count("\n")
298+
pos = m.end()
299+
text2 += text[pos:]
300+
lines = text2.split("\n")
301+
else:
302+
lines = f.readlines()
303+
return lines
304+
282305

283306
# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
284307

@@ -411,6 +434,19 @@ def parse_options(
411434
'e.g., "\\bmatch\\b". Defaults to '
412435
"empty/disabled.",
413436
)
437+
parser.add_argument(
438+
"--ignore-multiline-regex",
439+
action="store",
440+
type=str,
441+
help="regular expression that is used to ignore "
442+
"text that may span multi-line regions. "
443+
"The regex is run with re.DOTALL. For example to "
444+
"allow skipping of regions of Python code using "
445+
"begin/end comments one could use: "
446+
"--ignore-multiline-regex "
447+
"'# codespell:ignore-begin *\\n.*# codespell:ignore-end *\\n'. "
448+
"Defaults to empty/disabled.",
449+
)
414450
parser.add_argument(
415451
"-I",
416452
"--ignore-words",
@@ -1115,6 +1151,20 @@ def main(*args: str) -> int:
11151151
else:
11161152
ignore_word_regex = None
11171153

1154+
if options.ignore_multiline_regex:
1155+
try:
1156+
ignore_multiline_regex = re.compile(
1157+
options.ignore_multiline_regex, re.DOTALL
1158+
)
1159+
except re.error as e:
1160+
return _usage_error(
1161+
parser,
1162+
f"ERROR: invalid --ignore-multiline-regex "
1163+
f'"{options.ignore_multiline_regex}" ({e})',
1164+
)
1165+
else:
1166+
ignore_multiline_regex = None
1167+
11181168
ignore_words, ignore_words_cased = parse_ignore_words_option(
11191169
options.ignore_words_list
11201170
)
@@ -1203,7 +1253,11 @@ def main(*args: str) -> int:
12031253
for exclude_file in exclude_files:
12041254
build_exclude_hashes(exclude_file, exclude_lines)
12051255

1206-
file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level)
1256+
file_opener = FileOpener(
1257+
options.hard_encoding_detection,
1258+
options.quiet_level,
1259+
ignore_multiline_regex,
1260+
)
12071261

12081262
glob_match = GlobMatch(
12091263
flatten_clean_comma_separated_arguments(options.skip) if options.skip else []

codespell_lib/tests/test_basic.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,43 @@ def test_ignore_regex_option(
942942
assert cs.main(fname, r"--ignore-regex=\bdonn\b") == 1
943943

944944

945+
def test_ignore_multiline_regex_option(
946+
tmp_path: Path,
947+
capsys: pytest.CaptureFixture[str],
948+
) -> None:
949+
"""Test ignore regex option functionality."""
950+
951+
# Invalid regex.
952+
result = cs.main("--ignore-multiline-regex=(", std=True)
953+
assert isinstance(result, tuple)
954+
code, stdout, _ = result
955+
assert code == EX_USAGE
956+
assert "usage:" in stdout
957+
958+
fname = tmp_path / "flag.txt"
959+
fname.write_text(
960+
"""
961+
Please see http://example.com/abandonned for info
962+
# codespell:ignore-begin
963+
'''
964+
abandonned
965+
abandonned
966+
'''
967+
# codespell:ignore-end
968+
abandonned
969+
"""
970+
)
971+
assert cs.main(fname) == 4
972+
assert (
973+
cs.main(
974+
fname,
975+
"--ignore-multiline-regex",
976+
"codespell:ignore-begin.*codespell:ignore-end",
977+
)
978+
== 2
979+
)
980+
981+
945982
def test_uri_regex_option(
946983
tmp_path: Path,
947984
capsys: pytest.CaptureFixture[str],

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,6 @@ max-complexity = 45
169169
[tool.ruff.lint.pylint]
170170
allow-magic-value-types = ["bytes", "int", "str",]
171171
max-args = 13
172-
max-branches = 46
173-
max-returns = 11
172+
max-branches = 47
173+
max-returns = 12
174174
max-statements = 119

0 commit comments

Comments
 (0)