Skip to content

Commit

Permalink
Merge pull request G-Research#49 from G-Research/regexpopt
Browse files Browse the repository at this point in the history
Regexp "optimiser", use to implement basic NRE support
  • Loading branch information
dgl authored Oct 15, 2019
2 parents d7b5a58 + b23b837 commit 1196e55
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 12 deletions.
103 changes: 103 additions & 0 deletions pkg/regexputil/opt.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Package regexputil implements some introspection on parsed regexps.
package regexputil

import (
"regexp/syntax"
)

// Regexp represents a parsed regexp. Use Parse to make one.
type Regexp struct {
pt *syntax.Regexp
}

// Parse takes a regexp in Perl syntax (as implemented by regexp/syntax) and
// returns a Regexp, for introspecting the regexp.
func Parse(regexp string) (Regexp, error) {
pt, err := syntax.Parse(regexp, syntax.Perl)
if err != nil {
return Regexp{}, err
}
pt = pt.Simplify()
return Regexp{pt: pt}, nil
}

// List returns a list of fixed matches if the regexp only matches a fixed set
// of alternate strings.
func (r Regexp) List() ([]string, bool) {
potential := r.recurse([]*syntax.Regexp{r.pt}, 0, 0)
if len(potential) == 0 {
return nil, false
}
items := make([]string, len(potential))
for i, p := range potential {
items[i] = string(p)
}
return items, true
}

func (r Regexp) recurse(p []*syntax.Regexp, parentOp syntax.Op, level int) [][]rune {
var potential [][]rune
// Concat, Capture, Alternate, (a leaf op) is the most we handle
if level > 3 {
return nil
}
for i, s := range p {
// Ignore (?i), etc.
if (s.Flags & (syntax.FoldCase | syntax.DotNL)) != 0 {
return nil
}
switch s.Op {
case syntax.OpConcat:
if len(potential) != 0 {
return nil
}
potential = r.recurse(s.Sub, s.Op, level+1)
case syntax.OpCapture:
if len(potential) != 0 {
return nil
}
potential = r.recurse(s.Sub, s.Op, level+1)
case syntax.OpAlternate:
if len(potential) != 0 {
return nil
}
potential = r.recurse(s.Sub, s.Op, level+1)
case syntax.OpCharClass:
if len(potential) > 0 && parentOp != syntax.OpAlternate {
return nil
}
// Rune is a list of pairs of character ranges in this case, we have to expand
for i := 0; i < len(s.Rune); i += 2 {
start, end := s.Rune[i], s.Rune[i+1]
for r := start; r <= end; r++ {
potential = append(potential, []rune{r})
}
}
case syntax.OpLiteral:
if len(potential) > 0 && parentOp != syntax.OpAlternate {
return nil
}
potential = append(potential, s.Rune)
case syntax.OpEmptyMatch:
if len(potential) > 0 && parentOp != syntax.OpAlternate {
return nil
}
potential = append(potential, []rune{})
// We only handle full matches on single lines as that's what Prometheus uses.
// ^ and $ are therefore meaningless, but people do use them, so ignore if in the correct place.
case syntax.OpBeginText:
if i != 0 {
// invalid, skip
return nil
}
case syntax.OpEndText:
if i != len(p)-1 {
// invalid, skip
return nil
}
default:
return nil // unknown op, can't do anything
}
}
return potential
}
60 changes: 60 additions & 0 deletions pkg/regexputil/opt_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package regexputil_test

import (
"testing"

"github.com/G-Research/geras/pkg/regexputil"
)

func TestList(t *testing.T) {
for _, r := range []struct {
re string
err bool
ok bool
expect []string
}{
// Normal cases we expect to handle
{"", false, true, []string{""}},
{"xx|yy", false, true, []string{"xx", "yy"}},
{"xx|yy", false, true, []string{"xx", "yy"}},
{"(xx|yy)", false, true, []string{"xx", "yy"}},
{"(?:xx|yy)", false, true, []string{"xx", "yy"}},
{"^(?:xx|yy)$", false, true, []string{"xx", "yy"}},
{"^(xx|yy)$", false, true, []string{"xx", "yy"}},
{"^(xx|yy)", false, true, []string{"xx", "yy"}},
{"^(xx|yy)", false, true, []string{"xx", "yy"}},
// Handled as CharClasses instead of Literals, so test explicitly.
{"x|y|z", false, true, []string{"x", "y", "z"}},
{"([ab])", false, true, []string{"a", "b"}},
{"[a-f]", false, true, []string{"a", "b", "c", "d", "e", "f"}},

// We don't handle some aspect
{"(^xx|^yy)", false, false, nil}, // Would be easy, but who writes regexps like that anyway.
{"^$", false, false, nil}, // Better BeginText/EndText handling could fix this too, probably not worth it.
{"^(?i:xx|yy)$", false, false, nil},
{"(xx|yy.)", false, false, nil},
{"(xx|yy.*)", false, false, nil},
{"(xx|yy).", false, false, nil},
{"(xx|yy).*", false, false, nil},
{"(xx|yy)*", false, false, nil},
{".", false, false, nil},
} {
p, err := regexputil.Parse(r.re)
if err != nil {
if !r.err {
t.Errorf("%q: got err, want !err", r.re)
}
continue
}
if r.err {
t.Errorf("%q: got !err, want err", r.re)
}
l, ok := p.List()
if ok != r.ok {
t.Errorf("%q: got %v, want %v", r.re, ok, r.ok)
}
if len(l) != len(r.expect) {
t.Errorf("%q: got %d items, want %d", r.re, len(l), len(r.expect))
}
}
}
19 changes: 15 additions & 4 deletions pkg/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ import (
"github.com/thanos-io/thanos/pkg/store/storepb"
"golang.org/x/net/trace"
"google.golang.org/grpc/codes"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"

opentsdb "github.com/G-Research/opentsdb-goclient/client"
healthpb "google.golang.org/grpc/health/grpc_health_v1"

"github.com/G-Research/geras/pkg/regexputil"
)

type OpenTSDBStore struct {
Expand Down Expand Up @@ -309,9 +311,9 @@ func (store *OpenTSDBStore) getMatchingMetricNames(matcher storepb.LabelMatcher)
return []string{value}, nil
} else if matcher.Type == storepb.LabelMatcher_NEQ {
// we can support this, but we should not.
return nil, errors.New("NEQ is not supported for __name__ label")
return nil, errors.New("NEQ (!=) is not supported for __name__")
} else if matcher.Type == storepb.LabelMatcher_NRE {
return nil, errors.New("NRE is not supported")
return nil, errors.New("NRE (!~) is not supported for __name__")
} else if matcher.Type == storepb.LabelMatcher_RE {
// TODO: Regexp matchers working on the actual name seems like the least
// surprising behaviour. Actually document this.
Expand Down Expand Up @@ -462,7 +464,16 @@ func convertPromQLMatcherToFilter(matcher storepb.LabelMatcher) (opentsdb.Filter
f.Type = "not_literal_or"
f.FilterExp = matcher.Value
case storepb.LabelMatcher_NRE:
return opentsdb.Filter{}, errors.New("LabelMatcher_NRE is not supported")
rx, err := regexputil.Parse(matcher.Value)
if err != nil {
return opentsdb.Filter{}, err
}
items, ok := rx.List()
if !ok {
return opentsdb.Filter{}, errors.New("NRE (!~) is not supported for general regexps, only fixed alternatives like '(a|b)'")
}
f.Type = "not_literal_or"
f.FilterExp = strings.Join(items, "|")
case storepb.LabelMatcher_RE:
f.Type = "regexp"
f.FilterExp = matcher.Value
Expand Down
72 changes: 64 additions & 8 deletions pkg/store/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,28 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
MinTime: 0,
MaxTime: 100,
Matchers: []storepb.LabelMatcher{
{
Type: storepb.LabelMatcher_EQ,
Name: "__name__",
Value: "test.metric2",
},
{
Type: storepb.LabelMatcher_NRE,
Name: "host",
Value: ".*",
},
},
MaxResolutionWindow: 5,
Aggregates: []storepb.Aggr{storepb.Aggr_MIN},
PartialResponseDisabled: false,
},
err: errors.New("NRE (!~) is not supported for general regexps, only fixed alternatives like '(a|b)'"),
},
{
req: storepb.SeriesRequest{
MinTime: 0,
MaxTime: 100,
Matchers: []storepb.LabelMatcher{
{
Type: storepb.LabelMatcher_NRE,
Name: "__name__",
Expand All @@ -147,7 +164,7 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
Aggregates: []storepb.Aggr{storepb.Aggr_MIN},
PartialResponseDisabled: false,
},
err: errors.New("LabelMatcher_NRE is not supported"),
err: errors.New("NRE (!~) is not supported for __name__"),
},
{
req: storepb.SeriesRequest{
Expand Down Expand Up @@ -199,6 +216,45 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
},
},
},
{
req: storepb.SeriesRequest{
MinTime: 0,
MaxTime: 100,
Matchers: []storepb.LabelMatcher{
{
Type: storepb.LabelMatcher_NRE,
Name: "host",
Value: "(aa|bb)",
},
{
Type: storepb.LabelMatcher_EQ,
Name: "__name__",
Value: "test.metric2",
},
},
MaxResolutionWindow: 5,
Aggregates: []storepb.Aggr{storepb.Aggr_MIN},
PartialResponseDisabled: false,
},
tsdbQ: &opentsdb.QueryParam{
Start: 0,
End: 100,
Queries: []opentsdb.SubQuery{
{
Aggregator: "none",
Metric: "test.metric2",
Fiters: []opentsdb.Filter{
{
Type: "not_literal_or",
Tagk: "host",
FilterExp: "aa|bb",
GroupBy: true,
},
},
},
},
},
},
{
req: storepb.SeriesRequest{
MinTime: 0,
Expand Down Expand Up @@ -401,7 +457,7 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
},
}

for _, test := range testCases {
for i, test := range testCases {
allowedMetrics := regexp.MustCompile(".*")
if test.allowedMetrics != nil {
allowedMetrics = test.allowedMetrics
Expand All @@ -416,27 +472,27 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
p, _, err := store.composeOpenTSDBQuery(&test.req)
if test.err != nil {
if test.err.Error() != err.Error() {
t.Error("not expected error")
t.Errorf("%d: not expected error, got %v, want %v", i, err, test.err)
}
continue
}
if err != nil {
t.Error(err)
}
if len(p.Queries) != len(test.tsdbQ.Queries) {
t.Errorf("expected %d queries, got %d", len(test.tsdbQ.Queries), len(p.Queries))
t.Errorf("%d: expected %d queries, got %d", i, len(test.tsdbQ.Queries), len(p.Queries))
}
if len(test.tsdbQ.Queries) == 0 {
continue
}
// test the requested ranges
if test.tsdbQ.Start.(int) != int(p.Start.(int64)) ||
test.tsdbQ.End.(int) != int(p.End.(int64)) {
t.Errorf("requested range is not equal to sent range (%d - %d) != (%d - %d)",
p.Start, p.End, test.tsdbQ.Start, test.tsdbQ.End)
t.Errorf("%d: requested range is not equal to sent range (%d - %d) != (%d - %d)",
i, p.Start, p.End, test.tsdbQ.Start, test.tsdbQ.End)
}
if len(p.Queries) != len(test.tsdbQ.Queries) {
t.Errorf("number of subqueries does not match")
t.Errorf("%d: number of subqueries does not match", i)
}
for _, referenceQ := range test.tsdbQ.Queries {
match := false
Expand Down Expand Up @@ -472,7 +528,7 @@ func TestComposeOpenTSDBQuery(t *testing.T) {
break
}
if !match {
t.Errorf("there is no matching subquery for %v", referenceQ)
t.Errorf("%d: there is no matching subquery for %v", i, referenceQ)
}
}
}
Expand Down

0 comments on commit 1196e55

Please sign in to comment.