Skip to content

NEW option --git-only - Implements #1897 #3300 #3676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 108 additions & 61 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import itertools
import os
import re
import subprocess
import sys
import textwrap
from typing import (
Expand Down Expand Up @@ -620,6 +621,11 @@
action="store_true",
help="output just a single line for each misspelling in stdin mode",
)
parser.add_argument(
"--git-only",
action="store_true",
help="When selected, only check files under git control",
)
parser.add_argument("--config", type=str, help="path to config file.")
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
parser.add_argument("files", nargs="*", help="files or directories to check")
Expand Down Expand Up @@ -1096,6 +1102,82 @@
]


def get_git_tracked_files(
root: str, files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
) -> Iterable[str]:
# Flatten the list of files into a single list of arguments for git ls-files
file_args = []
for filename in files:
if os.path.isdir(filename):
file_args.append(f"{filename}/**")
else:
file_args.append(filename)

# Add the glob patterns to exclude
exclude_patterns = [
f":(exclude)**/{pattern}" for pattern in glob_match.pattern_list
]

# Add pattern to exclude hidden files if check_hidden is False
if not check_hidden:
exclude_patterns.append(":(exclude)**/.*")
exclude_patterns.append(":(exclude).*")

git_executable = "git" # Could be future option

try:
# ruff: noqa: S603
result = subprocess.run(
[git_executable, "ls-files", *file_args, *exclude_patterns],
cwd=root,
capture_output=True,
check=True,
text=True,
)
return set(result.stdout.splitlines())
except subprocess.CalledProcessError:

Check warning on line 1138 in codespell_lib/_codespell.py

View check run for this annotation

Codecov / codecov/patch

codespell_lib/_codespell.py#L1138

Added line #L1138 was not covered by tests
# If the command fails, assume no files are tracked
return set()

Check warning on line 1140 in codespell_lib/_codespell.py

View check run for this annotation

Codecov / codecov/patch

codespell_lib/_codespell.py#L1140

Added line #L1140 was not covered by tests


def build_file_list_with_os_walk(
files: Iterable[str], glob_match: GlobMatch, check_hidden: bool
) -> Iterable[str]:
all_files = []
for filename in files:
# ignore hidden files
if is_hidden(filename, check_hidden):
continue
if os.path.isdir(filename):
for root, dirs, dirfiles in os.walk(filename):
if glob_match.match(root): # skip (absolute) directories
dirs.clear()
continue
if is_hidden(root, check_hidden): # dir itself hidden
continue

Check warning on line 1157 in codespell_lib/_codespell.py

View check run for this annotation

Codecov / codecov/patch

codespell_lib/_codespell.py#L1157

Added line #L1157 was not covered by tests
for file_ in dirfiles:
if is_hidden(
file_, check_hidden
): # ignore hidden files in directories
continue
if glob_match.match(file_): # skip files
continue
fname = os.path.join(root, file_)
if glob_match.match(fname): # skip paths
continue
all_files.append(fname)

# skip (relative) directories
dirs[:] = [
dir_
for dir_ in dirs
if not glob_match.match(dir_) and not is_hidden(dir_, check_hidden)
]
elif not glob_match.match(filename) and not is_hidden(filename, check_hidden):
all_files.append(filename)
return all_files


def _script_main() -> int:
"""Wrap to main() for setuptools."""
try:
Expand Down Expand Up @@ -1278,68 +1360,33 @@
"try escaping special characters",
)

bad_count = 0
for filename in sorted(options.files):
# ignore hidden files
if is_hidden(filename, options.check_hidden):
continue

if os.path.isdir(filename):
for root, dirs, files in os.walk(filename):
if glob_match.match(root): # skip (absolute) directories
dirs.clear()
continue
if is_hidden(root, options.check_hidden): # dir itself hidden
continue
for file_ in sorted(files):
# ignore hidden files in directories
if is_hidden(file_, options.check_hidden):
continue
if glob_match.match(file_): # skip files
continue
fname = os.path.join(root, file_)
if glob_match.match(fname): # skip paths
continue
bad_count += parse_file(
fname,
colors,
summary,
misspellings,
ignore_words_cased,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)

# skip (relative) directories
dirs[:] = [
dir_
for dir_ in dirs
if not glob_match.match(dir_)
and not is_hidden(dir_, options.check_hidden)
]
# Build the list of all files based on the git_only option
if options.git_only:
all_files = get_git_tracked_files(
os.getcwd(), options.files, glob_match, options.check_hidden
)
else:
all_files = build_file_list_with_os_walk(
options.files, glob_match, options.check_hidden
)

elif not glob_match.match(filename): # skip files
bad_count += parse_file(
filename,
colors,
summary,
misspellings,
ignore_words_cased,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)
bad_count = 0
for filename in sorted(all_files):
bad_count += parse_file(
filename,
colors,
summary,
misspellings,
ignore_words_cased,
exclude_lines,
file_opener,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)

if summary:
print("\n-------8<-------\nSUMMARY:")
Expand Down
174 changes: 174 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ def test_exclude_file(
bad_name.write_bytes(
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
)

assert cs.main(bad_name) == 18
fname = tmp_path / "tmp.txt"
fname.write_bytes(
Expand All @@ -519,6 +520,77 @@ def test_exclude_file(
assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1


def run_git(path: Path, *args: Union[Path, str]) -> None:
subprocess.run( # noqa: S603
["git", "-C", path, *list(args)], # noqa: S607
capture_output=False,
check=True,
text=True,
)


def test_git_only_exclude_file(
tmp_path: Path, capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.chdir(tmp_path)
"""Test exclude file functionality."""
bad_name = tmp_path / "bad.txt"
# check all possible combinations of lines to ignore and ignores
combinations = "".join(
f"{n} abandonned {n}\n"
f"{n} abandonned {n}\r\n"
f"{n} abandonned {n} \n"
f"{n} abandonned {n} \r\n"
for n in range(1, 5)
)
bad_name.write_bytes(
(combinations + "5 abandonned 5\n6 abandonned 6").encode("utf-8")
)

run_git(tmp_path, "init")
run_git(tmp_path, "add", bad_name)

assert cs.main(bad_name) == 18
fname = tmp_path / "tmp.txt"
fname.write_bytes(
b"1 abandonned 1\n"
b"2 abandonned 2\r\n"
b"3 abandonned 3 \n"
b"4 abandonned 4 \r\n"
b"6 abandonned 6\n"
)

# Not adding fname to git to exclude it

# Should have 23 total errors (bad_name + fname)
assert cs.main(tmp_path) == 23

# Before adding to git, should not report on fname, only 18 error in bad.txt
assert cs.main("--git-only", tmp_path) == 18
run_git(tmp_path, "add", fname)
assert cs.main(tmp_path) == 23
# After adding to git, should report on fname
assert cs.main("--git-only", tmp_path) == 23
# After adding to git, should not report on excluded file
assert cs.main("--git-only", "-x", fname, tmp_path) == 1
# comma-separated list of files
fname_dummy1 = tmp_path / "dummy1.txt"
fname_dummy1.touch()
fname_dummy2 = tmp_path / "dummy2.txt"
fname_dummy2.touch()
run_git(tmp_path, "add", fname_dummy1, fname_dummy2)
assert (
cs.main(
"--git-only", "-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name
)
== 1
)
assert (
cs.main("--git-only", "-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name)
== 1
)


def test_encoding(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
Expand Down Expand Up @@ -636,6 +708,108 @@ def test_check_filename_irregular_file(
assert cs.main("-f", tmp_path) == 1


def test_check_hidden_git(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Test ignoring of hidden files."""
monkeypatch.chdir(tmp_path)
run_git(tmp_path, "init")
# visible file
#
# tmp_path
# └── test.txt
#
fname = tmp_path / "test.txt"
fname.write_text("erorr\n")
run_git(tmp_path, "add", ".")
assert cs.main("--git-only", fname) == 1
assert cs.main("--git-only", tmp_path) == 1

# hidden file
#
# tmp_path
# └── .test.txt
#
hidden_file = tmp_path / ".test.txt"
fname.rename(hidden_file)
run_git(tmp_path, "add", ".")
assert cs.main("--git-only", hidden_file) == 0
assert cs.main("--git-only", tmp_path) == 0
assert cs.main("--git-only", "--check-hidden", hidden_file) == 1
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1

# hidden file with typo in name
#
# tmp_path
# └── .abandonned.txt
#
typo_file = tmp_path / ".abandonned.txt"
hidden_file.rename(typo_file)
run_git(tmp_path, "add", ".")
assert cs.main("--git-only", typo_file) == 0
assert cs.main("--git-only", tmp_path) == 0
assert cs.main("--git-only", "--check-hidden", typo_file) == 1
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
assert cs.main("--git-only", "--check-hidden", "--check-filenames", typo_file) == 2
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2

# hidden directory
#
# tmp_path
# ├── .abandonned
# │ ├── .abandonned.txt
# │ └── subdir
# │ └── .abandonned.txt
# └── .abandonned.txt
#
assert cs.main("--git-only", tmp_path) == 0
assert cs.main("--git-only", "--check-hidden", tmp_path) == 1
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 2
hidden = tmp_path / ".abandonned"
hidden.mkdir()
copyfile(typo_file, hidden / typo_file.name)
subdir = hidden / "subdir"
subdir.mkdir()
copyfile(typo_file, subdir / typo_file.name)
run_git(tmp_path, "add", ".")
assert cs.main("--git-only", tmp_path) == 0
assert cs.main("--git-only", "--check-hidden", tmp_path) == 3
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 8
# check again with a relative path
try:
rel = op.relpath(tmp_path)
except ValueError:
# Windows: path is on mount 'C:', start on mount 'D:'
pass
else:
assert cs.main("--git-only", rel) == 0
assert cs.main("--git-only", "--check-hidden", rel) == 3
assert cs.main("--git-only", "--check-hidden", "--check-filenames", rel) == 8

# hidden subdirectory
#
# tmp_path
# ├── .abandonned
# │ ├── .abandonned.txt
# │ └── subdir
# │ └── .abandonned.txt
# ├── .abandonned.txt
# └── subdir
# └── .abandonned
# └── .abandonned.txt
subdir = tmp_path / "subdir"
subdir.mkdir()
hidden = subdir / ".abandonned"
hidden.mkdir()
copyfile(typo_file, hidden / typo_file.name)
run_git(tmp_path, "add", ".")
assert cs.main("--git-only", tmp_path) == 0
assert cs.main("--git-only", "--check-hidden", tmp_path) == 4
assert cs.main("--git-only", "--check-hidden", "--check-filenames", tmp_path) == 11


def test_check_hidden(
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
Expand Down
Loading