Skip to content

Commit 05751d1

Browse files
committed
Fix #1897 #3300 Add --git-only
1 parent 47337d2 commit 05751d1

File tree

2 files changed

+282
-61
lines changed

2 files changed

+282
-61
lines changed

codespell_lib/_codespell.py

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import itertools
2424
import os
2525
import re
26+
import subprocess
2627
import sys
2728
import textwrap
2829
from typing import (
@@ -620,6 +621,11 @@ def parse_options(
620621
action="store_true",
621622
help="output just a single line for each misspelling in stdin mode",
622623
)
624+
parser.add_argument(
625+
"--git-only",
626+
action="store_true",
627+
help="When selected, only check files under git control",
628+
)
623629
parser.add_argument("--config", type=str, help="path to config file.")
624630
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
625631
parser.add_argument("files", nargs="*", help="files or directories to check")
@@ -1096,6 +1102,82 @@ def flatten_clean_comma_separated_arguments(
10961102
]
10971103

10981104

1105+
def get_git_tracked_files(
1106+
root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1107+
) -> Iterable[str]:
1108+
# Flatten the list of files into a single list of arguments for git ls-files
1109+
file_args = []
1110+
for filename in files:
1111+
if os.path.isdir(filename):
1112+
file_args.append(f"{filename}/**")
1113+
else:
1114+
file_args.append(filename)
1115+
1116+
# Add the glob patterns to exclude
1117+
exclude_patterns = [
1118+
f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list
1119+
]
1120+
1121+
# Add pattern to exclude hidden files if check_hidden is False
1122+
if not check_hidden:
1123+
exclude_patterns.append(":(exclude)**/.*")
1124+
exclude_patterns.append(":(exclude).*")
1125+
1126+
git_executable = "git" # Could be future option
1127+
1128+
try:
1129+
# ruff: noqa: S603
1130+
result = subprocess.run(
1131+
[git_executable, "ls-files", *file_args, *exclude_patterns],
1132+
cwd=root,
1133+
capture_output=True,
1134+
check=True,
1135+
text=True,
1136+
)
1137+
return set(result.stdout.splitlines())
1138+
except subprocess.CalledProcessError:
1139+
# If the command fails, assume no files are tracked
1140+
return set()
1141+
1142+
1143+
def build_file_list_with_os_walk(
1144+
files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
1145+
) -> Iterable[str]:
1146+
all_files = []
1147+
for filename in files:
1148+
# ignore hidden files
1149+
if is_hidden(filename, check_hidden):
1150+
continue
1151+
if os.path.isdir(filename):
1152+
for root, dirs, dirfiles in os.walk(filename):
1153+
if glob_match.match(root): # skip (absolute) directories
1154+
dirs.clear()
1155+
continue
1156+
if is_hidden(root, check_hidden): # dir itself hidden
1157+
continue
1158+
for file_ in dirfiles:
1159+
if is_hidden(
1160+
file_, check_hidden
1161+
): # ignore hidden files in directories
1162+
continue
1163+
if glob_match.match(file_): # skip files
1164+
continue
1165+
fname = os.path.join(root, file_)
1166+
if glob_match.match(fname): # skip paths
1167+
continue
1168+
all_files.append(fname)
1169+
1170+
# skip (relative) directories
1171+
dirs[:] = [
1172+
dir_
1173+
for dir_ in dirs
1174+
if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden)
1175+
]
1176+
elif not glob_match.match(filename) and not is_hidden(filename, check_hidden):
1177+
all_files.append(filename)
1178+
return all_files
1179+
1180+
10991181
def _script_main() -> int:
11001182
"""Wrap to main() for setuptools."""
11011183
try:
@@ -1278,68 +1360,33 @@ def main(*args: str) -> int:
12781360
"try escaping special characters",
12791361
)
12801362

1281-
bad_count = 0
1282-
for filename in sorted(options.files):
1283-
# ignore hidden files
1284-
if is_hidden(filename, options.check_hidden):
1285-
continue
1286-
1287-
if os.path.isdir(filename):
1288-
for root, dirs, files in os.walk(filename):
1289-
if glob_match.match(root): # skip (absolute) directories
1290-
dirs.clear()
1291-
continue
1292-
if is_hidden(root, options.check_hidden): # dir itself hidden
1293-
continue
1294-
for file_ in sorted(files):
1295-
# ignore hidden files in directories
1296-
if is_hidden(file_, options.check_hidden):
1297-
continue
1298-
if glob_match.match(file_): # skip files
1299-
continue
1300-
fname = os.path.join(root, file_)
1301-
if glob_match.match(fname): # skip paths
1302-
continue
1303-
bad_count += parse_file(
1304-
fname,
1305-
colors,
1306-
summary,
1307-
misspellings,
1308-
ignore_words_cased,
1309-
exclude_lines,
1310-
file_opener,
1311-
word_regex,
1312-
ignore_word_regex,
1313-
uri_regex,
1314-
uri_ignore_words,
1315-
context,
1316-
options,
1317-
)
1318-
1319-
# skip (relative) directories
1320-
dirs[:] = [
1321-
dir_
1322-
for dir_ in dirs
1323-
if not glob_match.match(dir_)
1324-
and not is_hidden(dir_, options.check_hidden)
1325-
]
1363+
# Build the list of all files based on the git_only option
1364+
if options.git_only:
1365+
all_files = get_git_tracked_files(
1366+
os.getcwd(), options.files, glob_match, options.check_hidden
1367+
)
1368+
else:
1369+
all_files = build_file_list_with_os_walk(
1370+
options.files, glob_match, options.check_hidden
1371+
)
13261372

1327-
elif not glob_match.match(filename): # skip files
1328-
bad_count += parse_file(
1329-
filename,
1330-
colors,
1331-
summary,
1332-
misspellings,
1333-
ignore_words_cased,
1334-
exclude_lines,
1335-
file_opener,
1336-
word_regex,
1337-
ignore_word_regex,
1338-
uri_regex,
1339-
uri_ignore_words,
1340-
context,
1341-
options,
1342-
)
1373+
bad_count = 0
1374+
for filename in sorted(all_files):
1375+
bad_count += parse_file(
1376+
filename,
1377+
colors,
1378+
summary,
1379+
misspellings,
1380+
ignore_words_cased,
1381+
exclude_lines,
1382+
file_opener,
1383+
word_regex,
1384+
ignore_word_regex,
1385+
uri_regex,
1386+
uri_ignore_words,
1387+
context,
1388+
options,
1389+
)
13431390

13441391
if summary:
13451392
print("\n-------8<-------\nSUMMARY:")

codespell_lib/tests/test_basic.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ def test_exclude_file(
499499
bad_name.write_bytes(
500500
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
501501
)
502+
502503
assert cs.main(bad_name) == 18
503504
fname = tmp_path / "tmp.txt"
504505
fname.write_bytes(
@@ -519,6 +520,77 @@ def test_exclude_file(
519520
assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1
520521

521522

523+
def run_git(path: Path, *args: Union[Path, str]) -> None:
524+
subprocess.run( # noqa: S603
525+
["git", "-C", path, *list(args)], # noqa: S607
526+
capture_output=False,
527+
check=True,
528+
text=True,
529+
)
530+
531+
532+
def test_git_only_exclude_file(
533+
tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
534+
) -> None:
535+
monkeypatch.chdir(tmp_path)
536+
"""Test exclude file functionality."""
537+
bad_name = tmp_path / "bad.txt"
538+
# check all possible combinations of lines to ignore and ignores
539+
combinations = "".join(
540+
f"{n} abandonned {n}\n"
541+
f"{n} abandonned {n}\r\n"
542+
f"{n} abandonned {n} \n"
543+
f"{n} abandonned {n} \r\n"
544+
for n in range(1, 5)
545+
)
546+
bad_name.write_bytes(
547+
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
548+
)
549+
550+
run_git(tmp_path, "init")
551+
run_git(tmp_path, "add", bad_name)
552+
553+
assert cs.main(bad_name) == 18
554+
fname = tmp_path / "tmp.txt"
555+
fname.write_bytes(
556+
b"1 abandonned 1\n"
557+
b"2 abandonned 2\r\n"
558+
b"3 abandonned 3 \n"
559+
b"4 abandonned 4 \r\n"
560+
b"6 abandonned 6\n"
561+
)
562+
563+
# Not adding fname to git to exclude it
564+
565+
# Should have 23 total errors (bad_name + fname)
566+
assert cs.main(tmp_path) == 23
567+
568+
# Before adding to git, should not report on fname, only 18 error in bad.txt
569+
assert cs.main("--git-only", tmp_path) == 18
570+
run_git(tmp_path, "add", fname)
571+
assert cs.main(tmp_path) == 23
572+
# After adding to git, should report on fname
573+
assert cs.main("--git-only", tmp_path) == 23
574+
# After adding to git, should not report on excluded file
575+
assert cs.main("--git-only", "-x", fname, tmp_path) == 1
576+
# comma-separated list of files
577+
fname_dummy1 = tmp_path / "dummy1.txt"
578+
fname_dummy1.touch()
579+
fname_dummy2 = tmp_path / "dummy2.txt"
580+
fname_dummy2.touch()
581+
run_git(tmp_path, "add", fname_dummy1, fname_dummy2)
582+
assert (
583+
cs.main(
584+
"--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name
585+
)
586+
== 1
587+
)
588+
assert (
589+
cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name)
590+
== 1
591+
)
592+
593+
522594
def test_encoding(
523595
tmp_path: Path,
524596
capsys: pytest.CaptureFixture[str],
@@ -636,6 +708,108 @@ def test_check_filename_irregular_file(
636708
assert cs.main("-f", tmp_path) == 1
637709

638710

711+
def test_check_hidden_git(
712+
tmp_path: Path,
713+
capsys: pytest.CaptureFixture[str],
714+
monkeypatch: pytest.MonkeyPatch,
715+
) -> None:
716+
"""Test ignoring of hidden files."""
717+
monkeypatch.chdir(tmp_path)
718+
run_git(tmp_path, "init")
719+
# visible file
720+
#
721+
# tmp_path
722+
# └── test.txt
723+
#
724+
fname = tmp_path / "test.txt"
725+
fname.write_text("erorr\n")
726+
run_git(tmp_path, "add", ".")
727+
assert cs.main("--git-only", fname) == 1
728+
assert cs.main("--git-only", tmp_path) == 1
729+
730+
# hidden file
731+
#
732+
# tmp_path
733+
# └── .test.txt
734+
#
735+
hidden_file = tmp_path / ".test.txt"
736+
fname.rename(hidden_file)
737+
run_git(tmp_path, "add", ".")
738+
assert cs.main("--git-only", hidden_file) == 0
739+
assert cs.main("--git-only", tmp_path) == 0
740+
assert cs.main("--git-only", "--check-hidden", hidden_file) == 1
741+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
742+
743+
# hidden file with typo in name
744+
#
745+
# tmp_path
746+
# └── .abandonned.txt
747+
#
748+
typo_file = tmp_path / ".abandonned.txt"
749+
hidden_file.rename(typo_file)
750+
run_git(tmp_path, "add", ".")
751+
assert cs.main("--git-only", typo_file) == 0
752+
assert cs.main("--git-only", tmp_path) == 0
753+
assert cs.main("--git-only", "--check-hidden", typo_file) == 1
754+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
755+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2
756+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
757+
758+
# hidden directory
759+
#
760+
# tmp_path
761+
# ├── .abandonned
762+
# │ ├── .abandonned.txt
763+
# │ └── subdir
764+
# │ └── .abandonned.txt
765+
# └── .abandonned.txt
766+
#
767+
assert cs.main("--git-only", tmp_path) == 0
768+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
769+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
770+
hidden = tmp_path / ".abandonned"
771+
hidden.mkdir()
772+
copyfile(typo_file, hidden / typo_file.name)
773+
subdir = hidden / "subdir"
774+
subdir.mkdir()
775+
copyfile(typo_file, subdir / typo_file.name)
776+
run_git(tmp_path, "add", ".")
777+
assert cs.main("--git-only", tmp_path) == 0
778+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 3
779+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8
780+
# check again with a relative path
781+
try:
782+
rel = op.relpath(tmp_path)
783+
except ValueError:
784+
# Windows: path is on mount 'C:', start on mount 'D:'
785+
pass
786+
else:
787+
assert cs.main("--git-only", rel) == 0
788+
assert cs.main("--git-only", "--check-hidden", rel) == 3
789+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8
790+
791+
# hidden subdirectory
792+
#
793+
# tmp_path
794+
# ├── .abandonned
795+
# │ ├── .abandonned.txt
796+
# │ └── subdir
797+
# │ └── .abandonned.txt
798+
# ├── .abandonned.txt
799+
# └── subdir
800+
# └── .abandonned
801+
# └── .abandonned.txt
802+
subdir = tmp_path / "subdir"
803+
subdir.mkdir()
804+
hidden = subdir / ".abandonned"
805+
hidden.mkdir()
806+
copyfile(typo_file, hidden / typo_file.name)
807+
run_git(tmp_path, "add", ".")
808+
assert cs.main("--git-only", tmp_path) == 0
809+
assert cs.main("--git-only", "--check-hidden", tmp_path) == 4
810+
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11
811+
812+
639813
def test_check_hidden(
640814
tmp_path: Path,
641815
capsys: pytest.CaptureFixture[str],

0 commit comments

Comments
 (0)