-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
gh-108095: Add re.grep module #108096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
serhiy-storchaka
wants to merge
3
commits into
python:main
Choose a base branch
from
serhiy-storchaka:re-grep
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
gh-108095: Add re.grep module #108096
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
import argparse | ||
from collections import deque | ||
import fnmatch | ||
import os | ||
import re | ||
import sys | ||
|
||
|
||
def find(patterns, line): | ||
for pat in patterns: | ||
if pat.search(line) is not None: | ||
return True | ||
return False | ||
|
||
def findall(patterns, line): | ||
if len(patterns) == 1: | ||
for m in patterns[0].finditer(line): | ||
yield m.group() | ||
return | ||
|
||
i = 0 | ||
while True: | ||
begin = len(line) + 1 | ||
end = -1 | ||
best_span = (len(line) + 1, 1) | ||
for pat in patterns: | ||
m = pat.search(line, i) | ||
if m is not None: | ||
# Find the longest of the first matches. | ||
best_span = min(best_span, (m.start(), -m.end())) | ||
if best_span == (i, -len(line)): | ||
# Matches the rest of line. | ||
break | ||
start = best_span[0] | ||
end = -best_span[1] | ||
if end < 0: | ||
break | ||
yield line[start:end] | ||
i = end | ||
if start == end: | ||
i += 1 | ||
|
||
def grep(opts, patterns, file, filename): | ||
def print_line(sep, ln, line): | ||
if opts.filename: | ||
print(filename, end=sep) | ||
if opts.line_number: | ||
print(ln, end=sep) | ||
print(line) | ||
|
||
fast_exit = opts.quiet or opts.filename_only is not None | ||
prev_lines = deque(maxlen=opts.before_context) | ||
after_lines = 0 | ||
next_group = None | ||
total_count = 0 | ||
found = False | ||
for ln, line in enumerate(file, 1): | ||
line = line.removesuffix('\n') | ||
if opts.only_matching: | ||
for match in findall(patterns, line): | ||
print_line(':', ln, match) | ||
found = True | ||
else: | ||
matches = find(patterns, line) | ||
if opts.invert_match: | ||
matches = not matches | ||
if matches: | ||
found = True | ||
if fast_exit: | ||
break | ||
elif opts.count: | ||
total_count += 1 | ||
else: | ||
if next_group and ln > next_group: | ||
print(opts.group_separator) | ||
for ln2, line2 in enumerate(prev_lines, ln - len(prev_lines)): | ||
print_line('-', ln2, line2) | ||
prev_lines.clear() | ||
print_line(':', ln, line) | ||
after_lines = opts.after_context | ||
next_group = (None if opts.group_separator is None else | ||
ln + after_lines + opts.before_context + 1) | ||
elif not (fast_exit or opts.count): | ||
if after_lines: | ||
print_line('-', ln, line) | ||
after_lines -= 1 | ||
else: | ||
prev_lines.append(line) | ||
if fast_exit: | ||
if opts.filename_only == found: | ||
print(filename) | ||
elif opts.count: | ||
if opts.filename: | ||
print(filename, end=':') | ||
print(total_count) | ||
return found | ||
|
||
def read_from_file(filename): | ||
with open(filename, encoding=sys.stdin.encoding, | ||
errors=sys.stdin.errors) as f: | ||
return [line.removesuffix('\n') for line in f.readlines()] | ||
|
||
def make_parser(): | ||
parser = argparse.ArgumentParser(add_help=False) | ||
# Add --help option explicitly to avoid conflict in the -h option. | ||
parser.add_argument('--help', | ||
action='help', default=argparse.SUPPRESS, | ||
help='show this help message and exit') | ||
|
||
parser.set_defaults(label='(standard input)', group_separator='--') | ||
|
||
parser.add_argument('files', | ||
nargs=argparse.REMAINDER, | ||
metavar='FILES', | ||
help='Files to search.') | ||
|
||
grp = parser.add_argument_group('Matching Control') | ||
grp.add_argument('-e', '--regexp', | ||
action='append', dest='patterns', | ||
metavar='PATTERN', | ||
help='Use PATTERN as the pattern.') | ||
grp.add_argument('-f', '--file', | ||
action='extend', dest='patterns', type=read_from_file, | ||
metavar='PATTERN_FILE', | ||
help='Obtain patterns from PATTERN_FILE, one per line.') | ||
grp.add_argument('-F', '--fixed-strings', | ||
action='store_true', | ||
help='Interpret patterns as fixed strings.') | ||
grp.add_argument('-i', '--ignore-case', | ||
action='store_true', | ||
help='Ignore case distinctions in patterns and input data.') | ||
grp.add_argument('--no-ignore-case', | ||
action='store_false', dest='ignore_case', | ||
help='Do not ignore case distinctions in patterns and input data. ' | ||
'This is the default.') | ||
grp.add_argument('-v', '--invert-match', | ||
action='store_true', | ||
help='Invert the sense of matching, to select non-matching lines.') | ||
grp.add_argument('-w', '--word-regexp', | ||
action='store_true', | ||
help='Select only those lines containing matches that form whole words.') | ||
grp.add_argument('-x', '--line-regexp', | ||
action='store_true', | ||
help='Select only those matches that exactly match the whole line.') | ||
|
||
grp = parser.add_argument_group('Output Control') | ||
grp.add_argument('-c', '--count', | ||
action='store_true', | ||
help='Suppress normal output; instead print a count of matching ' | ||
'lines for each input file.') | ||
grp.add_argument('-L', '--files-without-match', | ||
action='store_false', dest='filename_only', default=None, | ||
help='Suppress normal output; instead print the name of each input ' | ||
'file from which no output would normally have been printed.') | ||
grp.add_argument('-l', '--files-with-match', | ||
action='store_true', dest='filename_only', | ||
help='Suppress normal output; instead print the name of each input ' | ||
'file from which output would normally have been printed.') | ||
grp.add_argument('-o', '--only-matching', | ||
action='store_true', | ||
help='Print only the matched (non-empty) parts of a matching line, ' | ||
'with each such part on a separate output line.') | ||
grp.add_argument('-q', '--quiet', | ||
action='store_true', | ||
help='Quiet; do not write anything to standard output. ' | ||
'Exit immediately with zero status if any match is found.') | ||
|
||
grp = parser.add_argument_group('Output Line Prefix Control') | ||
grp.add_argument('-H', '--with-filename', | ||
action='store_true', dest='filename', default=None, | ||
help='Print the file name for each match. ' | ||
'This is the default when there is more than one file to search.') | ||
grp.add_argument('-h', '--no-filename', | ||
action='store_false', dest='filename', | ||
help='Suppress the prefixing of file names on output. ' | ||
'This is the default when there is only one file (or only standard ' | ||
'input) to search.') | ||
grp.add_argument('-n', '--line-number', | ||
action='store_true', | ||
help='Prefix each line of output with the 1-based line number ' | ||
'within its input file.') | ||
|
||
grp = parser.add_argument_group('Context Line Control') | ||
grp.add_argument('-A', '--after-context', | ||
type=int, metavar='NUM', | ||
help='Print NUM lines of trailing context after matching lines.') | ||
grp.add_argument('-B', '--before-context', | ||
type=int, metavar='NUM', | ||
help='Print NUM lines of leading context before matching lines.') | ||
grp.add_argument('-C', '--context', | ||
type=int, default=0, metavar='NUM', | ||
help='Print NUM lines of output context.') | ||
|
||
grp = parser.add_argument_group('File and Directory Selection') | ||
grp.add_argument('-r', '--recursive', | ||
action='store_true', | ||
help='Read all files under each directory, recursively.') | ||
grp.add_argument('--include', | ||
action='append', | ||
metavar='GLOB', | ||
help='Search only files whose base name matches GLOB (using wildcard matching).') | ||
return parser | ||
|
||
def parse_args(): | ||
parser = make_parser() | ||
opts = parser.parse_args() | ||
|
||
# By default print filenames only if more than one file is specified. | ||
if opts.filename is None: | ||
opts.filename = len(opts.files) > 1 | ||
# -C sets -A and -B if they are not specified explicitly. | ||
if opts.after_context is None: | ||
opts.after_context = opts.context | ||
if opts.before_context is None: | ||
opts.before_context = opts.context | ||
# -q, -l, -L and -c suppresses normal output. | ||
if opts.quiet or opts.filename_only is not None or opts.count: | ||
opts.only_matching = False | ||
opts.after_context = opts.before_context = 0 | ||
# -vo suppresses normal output. | ||
if opts.only_matching and opts.invert_match: | ||
opts.only_matching = False | ||
opts.quiet = True | ||
# -o suppresses context output. | ||
if opts.only_matching: | ||
opts.after_context = opts.before_context = 0 | ||
# Only print group separator for non-zero context. | ||
if not (opts.after_context or opts.before_context): | ||
opts.group_separator = None | ||
return opts | ||
|
||
def compile_patterns(opts): | ||
patterns = opts.patterns or [] | ||
if opts.fixed_strings: | ||
patterns = [re.escape(pat) for pat in patterns] | ||
if opts.line_regexp: | ||
patterns = [fr'\A(?:{pat})\Z' for pat in patterns] | ||
elif opts.word_regexp: | ||
patterns = [fr'\b(?:{pat})\b' for pat in patterns] | ||
flags = re.IGNORECASE if opts.ignore_case else 0 | ||
return [re.compile(pat, flags) for pat in patterns] | ||
|
||
def filter_file(opts, filename): | ||
return not opts.include or any(fnmatch.fnmatch(filename, pat) | ||
for pat in opts.include) | ||
|
||
def iter_files(opts): | ||
for arg in opts.files or ['-']: | ||
if arg == '-': | ||
yield None | ||
elif os.path.isfile(arg): | ||
if filter_file(opts, os.path.basename(arg)): | ||
yield arg | ||
elif os.path.isdir(arg): | ||
if not opts.recursive: | ||
print(f'error: {arg!r} is a directory', file=sys.stderr) | ||
continue | ||
for dirpath, dirnames, filenames in os.walk(arg): | ||
for filename in filenames: | ||
if filter_file(opts, filename): | ||
yield os.path.join(dirpath, filename) | ||
else: | ||
print(f'error: {arg!r} has unsupported type', file=sys.stderr) | ||
|
||
def main(): | ||
opts = parse_args() | ||
patterns = compile_patterns(opts) | ||
|
||
found = False | ||
for filename in iter_files(opts): | ||
if filename is None: | ||
found |= grep(opts, patterns, sys.stdin, opts.label) | ||
else: | ||
with open(filename, encoding=sys.stdin.encoding, | ||
errors=sys.stdin.errors) as f: | ||
found |= grep(opts, patterns, f, filename) | ||
if found and opts.quiet: | ||
break | ||
return found | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
found = main() | ||
except SystemExit: | ||
raise | ||
except BaseException as e: | ||
print(f'error: {e!r}', file=sys.stderr) | ||
sys.exit(2) | ||
sys.exit(0 if found else 1) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a lot of code for one function. I would put the parser code in a separate function, say 'grep_parser' and replace
opts = parser.parse_args()
below withopts = grep_parser()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something went wrong in the quoting. I only wanted the last line included.