Skip to content

Commit 4c9ffb1

Browse files
committed
grep: avoid duplicated lines when we're coloring output
For the default uncolored output, we'll just output a line once and then move on. For colored output, we'll output multiple matches per line with context from the line interspersed and may end up writing out some match context multiple times as we don't persist which part of the lines have already been printed. Fix it by tracking the length of line printed thus far in printline() and retaining it across successive calls to printline() in the same line. printline() should indicate whether it terminated the line or not to avoid tracking the logic for that in multiple places: -o lines are always terminated, so it's generally only some --color contexts where we wouldn't have terminated. Add a test to make sure that we're only printing one line going forward. Reported and tested by: Jamie Landeg-Jones <jamie catflap org> Reviewed by: emaste Differential Revision: https://reviews.freebsd.org/D49324
1 parent 3658680 commit 4c9ffb1

File tree

2 files changed

+74
-13
lines changed

2 files changed

+74
-13
lines changed

usr.bin/grep/tests/grep_freebsd_test.sh

+15
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,25 @@ zflag_body()
103103
atf_check grep -qz "foo.*bar" in
104104
}
105105

106+
atf_test_case color_dupe
107+
color_dupe_body()
108+
{
109+
110+
# This assumes a MAX_MATCHES of exactly 32. Previously buggy procline()
111+
# calls would terminate the line premature every MAX_MATCHES matches,
112+
# meaning we'd see the line be output again for the next MAX_MATCHES
113+
# number of matches.
114+
jot -nb 'A' -s '' 33 > in
115+
116+
atf_check -o save:color.out grep --color=always . in
117+
atf_check -o match:"^ +1 color.out" wc -l color.out
118+
}
119+
106120
atf_init_test_cases()
107121
{
108122
atf_add_test_case grep_r_implied
109123
atf_add_test_case rgrep
110124
atf_add_test_case gnuext
111125
atf_add_test_case zflag
126+
atf_add_test_case color_dupe
112127
}

usr.bin/grep/util.c

+59-13
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ static int litexec(const struct pat *pat, const char *string,
7272
size_t nmatch, regmatch_t pmatch[]);
7373
#endif
7474
static bool procline(struct parsec *pc);
75-
static void printline(struct parsec *pc, int sep);
75+
static bool printline(struct parsec *pc, int sep, size_t *last_out);
7676
static void printline_metadata(struct str *line, int sep);
7777

7878
bool
@@ -214,15 +214,29 @@ procmatch_match(struct mprintc *mc, struct parsec *pc)
214214

215215
/* Print the matching line, but only if not quiet/binary */
216216
if (mc->printmatch) {
217-
printline(pc, ':');
217+
size_t last_out;
218+
bool terminated;
219+
220+
last_out = 0;
221+
terminated = printline(pc, ':', &last_out);
218222
while (pc->matchidx >= MAX_MATCHES) {
219223
/* Reset matchidx and try again */
220224
pc->matchidx = 0;
221225
if (procline(pc) == !vflag)
222-
printline(pc, ':');
226+
terminated = printline(pc, ':', &last_out);
223227
else
224228
break;
225229
}
230+
231+
/*
232+
* The above loop processes the entire line as long as we keep
233+
* hitting the maximum match count. At this point, we know
234+
* that there's nothing left to be printed and can terminate the
235+
* line.
236+
*/
237+
if (!terminated)
238+
printline(pc, ':', &last_out);
239+
226240
first_match = false;
227241
mc->same_file = true;
228242
mc->last_outed = 0;
@@ -748,26 +762,39 @@ printline_metadata(struct str *line, int sep)
748762
}
749763

750764
/*
751-
* Prints a matching line according to the command line options.
765+
* Prints a matching line according to the command line options. We need
766+
* *last_out to be populated on entry in case this is just a continuation of
767+
* matches within the same line.
768+
*
769+
* Returns true if the line was terminated, false if it was not.
752770
*/
753-
static void
754-
printline(struct parsec *pc, int sep)
771+
static bool
772+
printline(struct parsec *pc, int sep, size_t *last_out)
755773
{
756-
size_t a = 0;
774+
size_t a = *last_out;
757775
size_t i, matchidx;
758776
regmatch_t match;
777+
bool terminated;
778+
779+
/*
780+
* Nearly all paths below will terminate the line by default, but it is
781+
* avoided in some circumstances in case we don't have the full context
782+
* available here.
783+
*/
784+
terminated = true;
759785

760786
/* If matchall, everything matches but don't actually print for -o */
761787
if (oflag && matchall)
762-
return;
788+
return (terminated);
763789

764790
matchidx = pc->matchidx;
765791

766792
/* --color and -o */
767-
if ((oflag || color) && matchidx > 0) {
793+
if ((oflag || color) && (pc->printed > 0 || matchidx > 0)) {
768794
/* Only print metadata once per line if --color */
769-
if (!oflag && pc->printed == 0)
795+
if (!oflag && pc->printed == 0) {
770796
printline_metadata(&pc->ln, sep);
797+
}
771798
for (i = 0; i < matchidx; i++) {
772799
match = pc->matches[i];
773800
/* Don't output zero length matches */
@@ -780,9 +807,10 @@ printline(struct parsec *pc, int sep)
780807
if (oflag) {
781808
pc->ln.boff = match.rm_so;
782809
printline_metadata(&pc->ln, sep);
783-
} else
810+
} else {
784811
fwrite(pc->ln.dat + a, match.rm_so - a, 1,
785812
stdout);
813+
}
786814
if (color)
787815
fprintf(stdout, "\33[%sm\33[K", color);
788816
fwrite(pc->ln.dat + match.rm_so,
@@ -793,13 +821,31 @@ printline(struct parsec *pc, int sep)
793821
if (oflag)
794822
putchar('\n');
795823
}
796-
if (!oflag) {
797-
if (pc->ln.len - a > 0)
824+
825+
/*
826+
* Don't terminate if we reached the match limit; we may have
827+
* other matches on this line to process.
828+
*/
829+
*last_out = a;
830+
if (!oflag && matchidx != MAX_MATCHES) {
831+
if (pc->ln.len - a > 0) {
798832
fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
799833
stdout);
834+
*last_out = pc->ln.len;
835+
}
800836
putchar('\n');
837+
} else if (!oflag) {
838+
/*
839+
* -o is terminated on every match output, so this
840+
* branch is only designed to capture MAX_MATCHES in a
841+
* line which may be a signal to us for a lack of
842+
* context. The caller will know more and call us again
843+
* to terminate if it needs to.
844+
*/
845+
terminated = false;
801846
}
802847
} else
803848
grep_printline(&pc->ln, sep);
804849
pc->printed++;
850+
return (terminated);
805851
}

0 commit comments

Comments
 (0)