36
36
Pattern ,
37
37
Sequence ,
38
38
Set ,
39
+ TextIO ,
39
40
Tuple ,
40
41
)
41
42
@@ -201,11 +202,17 @@ def __str__(self) -> str:
201
202
202
203
203
204
class FileOpener :
204
- def __init__ (self , use_chardet : bool , quiet_level : int ) -> None :
205
+ def __init__ (
206
+ self ,
207
+ use_chardet : bool ,
208
+ quiet_level : int ,
209
+ ignore_multiline_regex : Optional [Pattern [str ]],
210
+ ) -> None :
205
211
self .use_chardet = use_chardet
206
212
if use_chardet :
207
213
self .init_chardet ()
208
214
self .quiet_level = quiet_level
215
+ self .ignore_multiline_regex = ignore_multiline_regex
209
216
210
217
def init_chardet (self ) -> None :
211
218
try :
@@ -247,7 +254,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
247
254
)
248
255
raise
249
256
else :
250
- lines = f . readlines ( )
257
+ lines = self . get_lines ( f )
251
258
f .close ()
252
259
253
260
return lines , f .encoding
@@ -262,7 +269,7 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
262
269
print (f'WARNING: Trying next encoding "{ encoding } "' , file = sys .stderr )
263
270
with open (filename , encoding = encoding , newline = "" ) as f :
264
271
try :
265
- lines = f . readlines ( )
272
+ lines = self . get_lines ( f )
266
273
except UnicodeDecodeError :
267
274
if not self .quiet_level & QuietLevels .ENCODING :
268
275
print (
@@ -279,6 +286,22 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
279
286
280
287
return lines , encoding
281
288
289
+ def get_lines (self , f : TextIO ) -> List [str ]:
290
+ if self .ignore_multiline_regex :
291
+ text = f .read ()
292
+ pos = 0
293
+ text2 = ""
294
+ for m in re .finditer (self .ignore_multiline_regex , text ):
295
+ text2 += text [pos : m .start ()]
296
+ # Replace with blank lines so line numbers are unchanged.
297
+ text2 += "\n " * m .group ().count ("\n " )
298
+ pos = m .end ()
299
+ text2 += text [pos :]
300
+ lines = text2 .split ("\n " )
301
+ else :
302
+ lines = f .readlines ()
303
+ return lines
304
+
282
305
283
306
# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
284
307
@@ -411,6 +434,19 @@ def parse_options(
411
434
'e.g., "\\ bmatch\\ b". Defaults to '
412
435
"empty/disabled." ,
413
436
)
437
+ parser .add_argument (
438
+ "--ignore-multiline-regex" ,
439
+ action = "store" ,
440
+ type = str ,
441
+ help = "regular expression that is used to ignore "
442
+ "text that may span multi-line regions. "
443
+ "The regex is run with re.DOTALL. For example to "
444
+ "allow skipping of regions of Python code using "
445
+ "begin/end comments one could use: "
446
+ "--ignore-multiline-regex "
447
+ "'# codespell:ignore-begin *\\ n.*# codespell:ignore-end *\\ n'. "
448
+ "Defaults to empty/disabled." ,
449
+ )
414
450
parser .add_argument (
415
451
"-I" ,
416
452
"--ignore-words" ,
@@ -1115,6 +1151,20 @@ def main(*args: str) -> int:
1115
1151
else :
1116
1152
ignore_word_regex = None
1117
1153
1154
+ if options .ignore_multiline_regex :
1155
+ try :
1156
+ ignore_multiline_regex = re .compile (
1157
+ options .ignore_multiline_regex , re .DOTALL
1158
+ )
1159
+ except re .error as e :
1160
+ return _usage_error (
1161
+ parser ,
1162
+ f"ERROR: invalid --ignore-multiline-regex "
1163
+ f'"{ options .ignore_multiline_regex } " ({ e } )' ,
1164
+ )
1165
+ else :
1166
+ ignore_multiline_regex = None
1167
+
1118
1168
ignore_words , ignore_words_cased = parse_ignore_words_option (
1119
1169
options .ignore_words_list
1120
1170
)
@@ -1203,7 +1253,11 @@ def main(*args: str) -> int:
1203
1253
for exclude_file in exclude_files :
1204
1254
build_exclude_hashes (exclude_file , exclude_lines )
1205
1255
1206
- file_opener = FileOpener (options .hard_encoding_detection , options .quiet_level )
1256
+ file_opener = FileOpener (
1257
+ options .hard_encoding_detection ,
1258
+ options .quiet_level ,
1259
+ ignore_multiline_regex ,
1260
+ )
1207
1261
1208
1262
glob_match = GlobMatch (
1209
1263
flatten_clean_comma_separated_arguments (options .skip ) if options .skip else []
0 commit comments