Skip to content

support --ignore-words-case-sensitive option. #3658

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,10 @@ You can select the optional dictionaries with the ``--builtin`` option.
Ignoring words
--------------

When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``wrod`` (to match the case of the dictionary entry).
When ignoring false positives, note that spelling errors are *case-insensitive*.
By default, only lowercase words to ignore are *case-insensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, to ignore it you can pass ``wrod``.
Non-lowercase words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``. To ignore it you must pass ``Wrod``.
If you want to ignore all the words in *case-sensitive* mode including lowercase words, you can use the ``--ignore-words-case-sensitive`` optional flag.

The words to ignore can be passed in two ways:

Expand Down
46 changes: 38 additions & 8 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,13 @@ def parse_options(
'the dictionary file. If set to "*", all '
"misspelling in URIs and emails will be ignored.",
)
parser.add_argument(
"--ignore-words-case-sensitive",
action="store_true",
default=False,
help="all ignore words in the ignore-words arguments in a case-sensitive way. "
"By default, lowercase words to ignore are handled in a case-insensitive way.",
)
parser.add_argument(
"-r",
"--regex",
Expand Down Expand Up @@ -697,18 +704,25 @@ def parse_options(


def process_ignore_words(
words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
words: Iterable[str],
ignore_words: Set[str],
ignore_words_cased: Set[str],
ignore_words_case_sensitive: bool = False,
) -> None:
for word in words:
word = word.strip()
if word == word.lower():
if ignore_words_case_sensitive:
# all ignore words are handled in a case-sensitive way
ignore_words_cased.add(word)
elif word == word.lower():
# lowercase words to ignore are handled in a case-insensitive way
ignore_words.add(word)
else:
ignore_words_cased.add(word)


def parse_ignore_words_option(
ignore_words_option: List[str],
ignore_words_option: List[str], ignore_words_case_sensitive: bool = False
) -> Tuple[Set[str], Set[str]]:
ignore_words: Set[str] = set()
ignore_words_cased: Set[str] = set()
Expand All @@ -718,6 +732,7 @@ def parse_ignore_words_option(
(word.strip() for word in comma_separated_words.split(",")),
ignore_words,
ignore_words_cased,
ignore_words_case_sensitive,
)
return (ignore_words, ignore_words_cased)

Expand All @@ -728,11 +743,17 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:


def build_ignore_words(
filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
filename: str,
ignore_words: Set[str],
ignore_words_cased: Set[str],
ignore_word_case_sensitive: bool = False,
) -> None:
with open(filename, encoding="utf-8") as f:
process_ignore_words(
(line.strip() for line in f), ignore_words, ignore_words_cased
(line.strip() for line in f),
ignore_words,
ignore_words_cased,
ignore_word_case_sensitive,
)


Expand Down Expand Up @@ -1173,7 +1194,7 @@ def main(*args: str) -> int:
ignore_multiline_regex = None

ignore_words, ignore_words_cased = parse_ignore_words_option(
options.ignore_words_list
options.ignore_words_list, options.ignore_words_case_sensitive
)
if options.ignore_words:
ignore_words_files = flatten_clean_comma_separated_arguments(
Expand All @@ -1185,7 +1206,12 @@ def main(*args: str) -> int:
parser,
f"ERROR: cannot find ignore-words file: {ignore_words_file}",
)
build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
build_ignore_words(
ignore_words_file,
ignore_words,
ignore_words_cased,
options.ignore_words_case_sensitive,
)

uri_regex = options.uri_regex or uri_regex_def
try:
Expand All @@ -1197,7 +1223,11 @@ def main(*args: str) -> int:
)

uri_ignore_words = set(
itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
itertools.chain(
*parse_ignore_words_option(
options.uri_ignore_words_list, options.ignore_words_case_sensitive
)
)
)

dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"])
Expand Down
37 changes: 37 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,50 @@ def test_ignore_words_with_cases(
assert cs.main("-LMIS,Mis", bad_name) == 1
assert cs.main("-I", fname, "-f", bad_name) == 1
assert cs.main("-LMIS,Mis", "-f", bad_name) == 1
# Only lowercase words are ignored works in a case-insensitive manner
fname.write_text("mis")
assert cs.main("-I", fname, bad_name) == 0
assert cs.main("-Lmis", bad_name) == 0
assert cs.main("-I", fname, "-f", bad_name) == 0
assert cs.main("-Lmis", "-f", bad_name) == 0


def test_ignore_words_with_case_sensitive(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
) -> None:
"""Test --ignore-words-case-sensitive for -I and -L options."""
bad_name = tmp_path / "MIS.txt"
bad_name.write_text(
"1 MIS (Management Information System) 1\n2 Les Mis (1980 musical) 2\n3 mis 3\n"
)
assert cs.main(bad_name) == 3
assert cs.main(bad_name, "-f") == 4
fname = tmp_path / "ignore.txt"

fname.write_text("miS")
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 3
assert cs.main("--ignore-words-case-sensitive", "-LmiS", bad_name) == 3
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 4
assert cs.main("--ignore-words-case-sensitive", "-LmiS", "-f", bad_name) == 4
# lowercase words are ignored also works in a case-sensitive manner
fname.write_text("mis")
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
assert cs.main("--ignore-words-case-sensitive", "-Lmis", bad_name) == 2
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 3
assert cs.main("--ignore-words-case-sensitive", "-Lmis", "-f", bad_name) == 3
fname.write_text("MIS")
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
assert cs.main("--ignore-words-case-sensitive", "-LMIS", bad_name) == 2
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 2
assert cs.main("--ignore-words-case-sensitive", "-LMIS", "-f", bad_name) == 2
fname.write_text("MIS\nMis")
assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 1
assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", bad_name) == 1
assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 1
assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", "-f", bad_name) == 1


def test_ignore_word_list(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
Expand Down