Skip to content

Commit

Permalink
matchtree: disable word search optimization for symbol search (source…
Browse files Browse the repository at this point in the history
…graph#571)

Before this change case sensitive symbol searches of the form
\bLITERAL\b would result in the error message

  found *zoekt.andMatchTree inside query.Symbol

The root cause is the symbol search code is pretty janky. It constructs
a matchtree and then pulls out either the regex matchtree or the substr
matchtree, then creates a specific symbol searcher for those. It feels
like it could be more generic rather than a bunch of hard to follow copy
pasta. For now this was a more straightforward fix than creating a word
search for symbols.

Test Plan: expanded the unit tests to cover word search more often.
Additionally run zoekt-webserver and checked that a search like the
following works now:

  sym:\bnewMatchTree\b case:yes
  • Loading branch information
keegancsmith authored Mar 28, 2023
1 parent 704a5cc commit 2fdbd60
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 30 deletions.
2 changes: 1 addition & 1 deletion eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions)

q = query.Map(q, query.ExpandFileContent)

mt, err := d.newMatchTree(q)
mt, err := d.newMatchTree(q, matchTreeOpt{})
if err != nil {
return nil, err
}
Expand Down
29 changes: 21 additions & 8 deletions matchtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,13 @@ func (t *substrMatchTree) matches(cp *contentProvider, cost int, known map[match
return len(t.current) > 0, true
}

func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
type matchTreeOpt struct {
// DisableWordMatchOptimization is used to disable the use of wordMatchTree.
// This was added since we do not support wordMatchTree with symbol search.
DisableWordMatchOptimization bool
}

func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) {
if q == nil {
return nil, fmt.Errorf("got nil (sub)query")
}
Expand All @@ -856,7 +862,7 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
}

var tr matchTree
if wmt, ok := regexpToWordMatchTree(s); ok {
if wmt, ok := regexpToWordMatchTree(s, opt); ok {
// A common search we get is "\bLITERAL\b". Avoid the regex engine and
// provide something faster.
tr = wmt
Expand All @@ -880,7 +886,7 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
case *query.And:
var r []matchTree
for _, ch := range s.Children {
ct, err := d.newMatchTree(ch)
ct, err := d.newMatchTree(ch, opt)
if err != nil {
return nil, err
}
Expand All @@ -890,15 +896,15 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
case *query.Or:
var r []matchTree
for _, ch := range s.Children {
ct, err := d.newMatchTree(ch)
ct, err := d.newMatchTree(ch, opt)
if err != nil {
return nil, err
}
r = append(r, ct)
}
return &orMatchTree{r}, nil
case *query.Not:
ct, err := d.newMatchTree(s.Child)
ct, err := d.newMatchTree(s.Child, opt)
return &notMatchTree{
child: ct,
}, err
Expand All @@ -908,7 +914,7 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
break
}

ct, err := d.newMatchTree(s.Child)
ct, err := d.newMatchTree(s.Child, opt)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -963,7 +969,11 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
}, nil

case *query.Symbol:
subMT, err := d.newMatchTree(s.Expr)
// Disable WordMatchTree since we don't support it in symbols yet.
optCopy := opt
optCopy.DisableWordMatchOptimization = true

subMT, err := d.newMatchTree(s.Expr, optCopy)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1131,7 +1141,10 @@ func (d *indexData) newSubstringMatchTree(s *query.Substring) (matchTree, error)
return st, nil
}

func regexpToWordMatchTree(q *query.Regexp) (_ *wordMatchTree, ok bool) {
func regexpToWordMatchTree(q *query.Regexp, opt matchTreeOpt) (_ *wordMatchTree, ok bool) {
if opt.DisableWordMatchOptimization {
return nil, false
}
// Needs to be case sensitive
if !q.CaseSensitive || q.Regexp.Flags&syntax.FoldCase != 0 {
return nil, false
Expand Down
63 changes: 42 additions & 21 deletions matchtree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
{query: "contain(er|ing)", skip: false},
{query: "thread (needle|haystack)", skip: true},
{query: "thread (needle|)", skip: false},
{query: `\bthread\b case:yes`, skip: true}, // word search
{query: `\bthread\b case:no`, skip: false},
}

for _, tt := range tests {
Expand All @@ -179,14 +181,15 @@ func TestEquivalentQuerySkipRegexpTree(t *testing.T) {
}

d := &indexData{}
mt, err := d.newMatchTree(q)
mt, err := d.newMatchTree(q, matchTreeOpt{})
if err != nil {
t.Errorf("Error creating match tree from query: %s", q)
continue
}

visitMatchTree(mt, func(m matchTree) {
if _, ok := m.(*regexpMatchTree); ok && tt.skip {
t.Log(mt)
t.Errorf("Expected regexpMatchTree to be skipped for query: %s", q)
}
})
Expand All @@ -203,7 +206,7 @@ func TestWordSearchSkipRegexpTree(t *testing.T) {
}

d := &indexData{}
mt, err := d.newMatchTree(q)
mt, err := d.newMatchTree(q, matchTreeOpt{})
if err != nil {
t.Fatalf("Error creating match tree from query: %s", q)
}
Expand All @@ -227,38 +230,56 @@ func TestWordSearchSkipRegexpTree(t *testing.T) {
}
}

func TestSymbolMatchRegexAll(t *testing.T) {
func TestSymbolMatchTree(t *testing.T) {
tests := []struct {
query string
all bool
query string
substr string
regex string
regexAll bool
}{
{query: ".*", all: true},
{query: "(a|b)", all: false},
{query: "b.r", all: false},
{query: "sym:.*", regex: "(?i)(?-s:.)*", regexAll: true},
{query: "sym:(ab|cd)", regex: "(?i)ab|cd"},
{query: "sym:b.r", regex: "(?i)b(?-s:.)r"},
{query: "sym:horse", substr: "horse"},
{query: `sym:\bthread\b case:yes`, regex: `\bthread\b`}, // check we disable word search opt
{query: `sym:\bthread\b case:no`, regex: `(?i)\bthread\b`},
}

for _, tt := range tests {
q, err := query.Parse("sym:" + tt.query)
q, err := query.Parse(tt.query)
if err != nil {
t.Errorf("Error parsing query: %s", "sym:"+tt.query)
t.Errorf("Error parsing query: %s", tt.query)
continue
}

d := &indexData{}
mt, err := d.newMatchTree(q)
mt, err := d.newMatchTree(q, matchTreeOpt{})
if err != nil {
t.Errorf("Error creating match tree from query: %s", q)
continue
}

regexMT, ok := mt.(*symbolRegexpMatchTree)
if !ok {
t.Errorf("Expected symbol regex match tree from query: %s, got %v", q, mt)
continue
var (
substr string
regex string
regexAll bool
)
if substrMT, ok := mt.(*symbolSubstrMatchTree); ok {
substr = substrMT.query.Pattern
}
if regexMT, ok := mt.(*symbolRegexpMatchTree); ok {
regex = regexMT.regexp.String()
regexAll = regexMT.all
}

if regexMT.all != tt.all {
t.Errorf("Expected property all: %t from query: %s", tt.all, q)
if substr != tt.substr {
t.Errorf("%s has unexpected substring:\nwant: %q\ngot: %q", tt.query, tt.substr, substr)
}
if regex != tt.regex {
t.Errorf("%s has unexpected regex:\nwant: %q\ngot: %q", tt.query, tt.regex, regex)
}
if regexAll != tt.regexAll {
t.Errorf("%s has unexpected regexAll: want=%t got=%t", tt.query, tt.regexAll, regexAll)
}
}
}
Expand All @@ -269,7 +290,7 @@ func TestRepoSet(t *testing.T) {
fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
repos: []uint16{0, 0, 1, 2, 3, 3},
}
mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}})
mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{})
if err != nil {
t.Fatal(err)
}
Expand All @@ -292,7 +313,7 @@ func TestRepo(t *testing.T) {
fileBranchMasks: []uint64{1, 1, 1, 1, 1},
repos: []uint16{0, 0, 1, 0, 1},
}
mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")})
mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{})
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -323,7 +344,7 @@ func TestBranchesRepos(t *testing.T) {
mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{
{Branch: "b1", Repos: roaring.BitmapOf(hash("bar"))},
{Branch: "b2", Repos: roaring.BitmapOf(hash("bar"))},
}})
}}, matchTreeOpt{})
if err != nil {
t.Fatal(err)
}
Expand All @@ -348,7 +369,7 @@ func TestRepoIDs(t *testing.T) {
fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1},
repos: []uint16{0, 0, 1, 2, 3, 3},
}
mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)})
mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{})
if err != nil {
t.Fatal(err)
}
Expand Down

0 comments on commit 2fdbd60

Please sign in to comment.