Skip to content

Commit

Permalink
git-grep: Learn PCRE
Browse files Browse the repository at this point in the history
This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.

PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.

git isn't build with PCRE support automatically. USE_LIBPCRE environment
variable must be enabled (like `make USE_LIBPCRE=YesPlease`).

Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
mkiedrowicz authored and gitster committed May 9, 2011
1 parent a30c148 commit 63e7e9d
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 1 deletion.
6 changes: 6 additions & 0 deletions Documentation/git-grep.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
[-P | --perl-regexp]
[-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
Expand Down Expand Up @@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.

-P::
--perl-regexp::
Use Perl-compatible regexp for patterns. Requires libpcre to be
compiled in.

-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern
Expand Down
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
Expand Down Expand Up @@ -1248,6 +1254,15 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif

ifdef USE_LIBPCRE
BASIC_CFLAGS += -DUSE_LIBPCRE
ifdef LIBPCREDIR
BASIC_CFLAGS += -I$(LIBPCREDIR)/include
EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
endif
EXTLIBS += -lpcre
endif

ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
Expand Down
2 changes: 2 additions & 0 deletions builtin/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
REG_EXTENDED),
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
"interpret patterns as fixed strings"),
OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
"use Perl-compatible regular expressions"),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
Expand Down
1 change: 1 addition & 0 deletions contrib/completion/git-completion.bash
Original file line number Diff line number Diff line change
Expand Up @@ -1487,6 +1487,7 @@ _git_grep ()
--text --ignore-case --word-regexp --invert-match
--full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
--perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth
Expand Down
75 changes: 74 additions & 1 deletion grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,69 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
die("%s'%s': %s", where, p->pattern, error);
}

#ifdef USE_LIBPCRE
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
const char *error;
int erroffset;
int options = 0;

if (opt->ignore_case)
options |= PCRE_CASELESS;

p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
NULL);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);

p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
if (!p->pcre_extra_info && error)
die("%s", error);
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int ovector[30], ret, flags = 0;

if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;

ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
0, flags, ovector, ARRAY_SIZE(ovector));
if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
die("pcre_exec failed with error code %d", ret);
if (ret > 0) {
ret = 0;
match->rm_so = ovector[0];
match->rm_eo = ovector[1];
}

return ret;
}

static void free_pcre_regexp(struct grep_pat *p)
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
return 1;
}

static void free_pcre_regexp(struct grep_pat *p)
{
}
#endif /* !USE_LIBPCRE */

static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
Expand All @@ -85,6 +148,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;

if (opt->pcre) {
compile_pcre_regexp(p, opt);
return;
}

err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
Expand Down Expand Up @@ -327,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
regfree(&p->regexp);
if (p->pcre_regexp)
free_pcre_regexp(p);
else
regfree(&p->regexp);
break;
default:
break;
Expand Down Expand Up @@ -426,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,

if (p->fixed)
hit = !fixmatch(p, line, eol, match);
else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags);
else
hit = !regmatch(&p->regexp, line, eol, match, eflags);

Expand Down
9 changes: 9 additions & 0 deletions grep.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
#ifdef USE_LIBPCRE
#include <pcre.h>
#else
typedef int pcre;
typedef int pcre_extra;
#endif

enum grep_pat_token {
GREP_PATTERN,
Expand Down Expand Up @@ -33,6 +39,8 @@ struct grep_pat {
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
Expand Down Expand Up @@ -83,6 +91,7 @@ struct grep_opt {
#define GREP_BINARY_TEXT 2
int binary;
int extended;
int pcre;
int relative;
int pathname;
int null_following_name;
Expand Down

0 comments on commit 63e7e9d

Please sign in to comment.