Skip to content

Commit

Permalink
cli: add --stop-on-nonmatch flag
Browse files Browse the repository at this point in the history
This causes ripgrep to stop searching an individual file after it has
found a non-matching line. But this only occurs after it has found a
matching line.

Fixes #1790, Closes #1930
  • Loading branch information
edoardopirovano authored and BurntSushi committed Jul 8, 2023
1 parent 2dfb7c9 commit 55acc88
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Unreleased changes. Release notes have not yet been written.
Feature enhancements:

* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V
* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790):
Add new `--stop-on-nonmatch` flag.
* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195):
When `extra-verbose` mode is enabled in zsh, show extra file type info.
* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409):
Expand Down
1 change: 1 addition & 0 deletions complete/_rg
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ _rg() {
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
'--stop-on-nonmatch[stop on first non-matching line after a matching one]'

+ operand # Operands
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
Expand Down
25 changes: 24 additions & 1 deletion crates/core/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_sort(&mut args);
flag_sortr(&mut args);
flag_stats(&mut args);
flag_stop_on_nonmatch(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_trim(&mut args);
Expand Down Expand Up @@ -1926,13 +1927,16 @@ Nevertheless, if you only care about matches spanning at most one line, then it
is always better to disable multiline mode.
This flag can be disabled with --no-multiline.
This overrides the --stop-on-nonmatch flag.
"
);
let arg = RGArg::switch("multiline")
.short("U")
.help(SHORT)
.long_help(LONG)
.overrides("no-multiline");
.overrides("no-multiline")
.overrides("stop-on-nonmatch");
args.push(arg);

let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
Expand Down Expand Up @@ -2854,6 +2858,25 @@ This flag can be disabled with --no-stats.
args.push(arg);
}

fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
const SHORT: &str = "Stop searching after a non-match.";
const LONG: &str = long!(
"\
Enabling this option will cause ripgrep to stop reading a file once it
encounters a non-matching line after it has encountered a matching line.
This is useful if it is expected that all matches in a given file will be on
sequential lines, for example due to the lines being sorted.
This overrides the -U/--multiline flag.
"
);
let arg = RGArg::switch("stop-on-nonmatch")
.help(SHORT)
.long_help(LONG)
.overrides("multiline");
args.push(arg);
}

fn flag_text(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search binary files as if they were text.";
const LONG: &str = long!(
Expand Down
3 changes: 2 additions & 1 deletion crates/core/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,8 @@ impl ArgMatches {
.before_context(ctx_before)
.after_context(ctx_after)
.passthru(self.is_present("passthru"))
.memory_map(self.mmap_choice(paths));
.memory_map(self.mmap_choice(paths))
.stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
match self.encoding()? {
EncodingMode::Some(enc) => {
builder.encoding(Some(enc));
Expand Down
49 changes: 39 additions & 10 deletions crates/searcher/src/searcher/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ use crate::sink::{
};
use grep_matcher::{LineMatchKind, Matcher};

enum FastMatchResult {
Continue,
Stop,
SwitchToSlow,
}

#[derive(Debug)]
pub struct Core<'s, M: 's, S> {
config: &'s Config,
Expand All @@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> {
last_line_visited: usize,
after_context_left: usize,
has_sunk: bool,
has_matched: bool,
}

impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Expand All @@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
last_line_visited: 0,
after_context_left: 0,
has_sunk: false,
has_matched: false,
};
if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() {
Expand Down Expand Up @@ -109,7 +117,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {

pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
self.match_by_line_fast(buf)
match self.match_by_line_fast(buf)? {
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
FastMatchResult::Continue => Ok(true),
FastMatchResult::Stop => Ok(false),
}
} else {
self.match_by_line_slow(buf)
}
Expand Down Expand Up @@ -270,7 +282,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
};
self.set_pos(line.end());
if matched != self.config.invert_match {
let success = matched != self.config.invert_match;
if success {
self.has_matched = true;
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
}
Expand All @@ -286,40 +300,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
return Ok(false);
}
}
if self.config.stop_on_nonmatch && !success && self.has_matched {
return Ok(false);
}
}
Ok(true)
}

fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
debug_assert!(!self.config.passthru);
fn match_by_line_fast(
&mut self,
buf: &[u8],
) -> Result<FastMatchResult, S::Error> {
use FastMatchResult::*;

debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() {
if self.config.stop_on_nonmatch && self.has_matched {
return Ok(SwitchToSlow);
}
if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? {
return Ok(false);
return Ok(Stop);
}
} else if let Some(line) = self.find_by_line_fast(buf)? {
self.has_matched = true;
if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? {
return Ok(false);
return Ok(Stop);
}
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
return Ok(Stop);
}
}
self.set_pos(line.end());
if !self.sink_matched(buf, &line)? {
return Ok(false);
return Ok(Stop);
}
} else {
break;
}
}
if !self.after_context_by_line(buf, buf.len())? {
return Ok(false);
return Ok(Stop);
}
self.set_pos(buf.len());
Ok(true)
Ok(Continue)
}

#[inline(always)]
Expand All @@ -344,6 +369,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if invert_match.is_empty() {
return Ok(true);
}
self.has_matched = true;
if !self.after_context_by_line(buf, invert_match.start())? {
return Ok(false);
}
Expand Down Expand Up @@ -577,6 +603,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.config.passthru {
return false;
}
if self.config.stop_on_nonmatch && self.has_matched {
return false;
}
if let Some(line_term) = self.matcher.line_terminator() {
if line_term == self.config.line_term {
return true;
Expand Down
24 changes: 24 additions & 0 deletions crates/searcher/src/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ pub struct Config {
encoding: Option<Encoding>,
/// Whether to do automatic transcoding based on a BOM or not.
bom_sniffing: bool,
/// Whether to stop searching when a non-matching line is found after a
/// matching line.
stop_on_nonmatch: bool,
}

impl Default for Config {
Expand All @@ -190,6 +193,7 @@ impl Default for Config {
multi_line: false,
encoding: None,
bom_sniffing: true,
stop_on_nonmatch: false,
}
}
}
Expand Down Expand Up @@ -555,6 +559,19 @@ impl SearcherBuilder {
self.config.bom_sniffing = yes;
self
}

/// Stop searching a file when a non-matching line is found after a
/// matching line.
///
/// This is useful for searching sorted files where it is expected that all
/// the matches will be on adjacent lines.
pub fn stop_on_nonmatch(
&mut self,
stop_on_nonmatch: bool,
) -> &mut SearcherBuilder {
self.config.stop_on_nonmatch = stop_on_nonmatch;
self
}
}

/// A searcher executes searches over a haystack and writes results to a caller
Expand Down Expand Up @@ -838,6 +855,13 @@ impl Searcher {
self.config.multi_line
}

/// Returns true if and only if this searcher is configured to stop when in
/// finds a non-matching line after a matching one.
#[inline]
pub fn stop_on_nonmatch(&self) -> bool {
self.config.stop_on_nonmatch
}

/// Returns true if and only if this searcher will choose a multi-line
/// strategy given the provided matcher.
///
Expand Down
7 changes: 7 additions & 0 deletions tests/feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -992,3 +992,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "δ");
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
});

// See: https://github.com/BurntSushi/ripgrep/issues/1790
rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "line1\nline2\nline3\nline4\nline5");
cmd.args(&["--stop-on-nonmatch", "[235]"]);
eqnice!("test:line2\ntest:line3\n", cmd.stdout());
});

0 comments on commit 55acc88

Please sign in to comment.