Skip to content

Commit 40a618f

Browse files
committed
feat: flatten elasticsearch queries
1 parent 519e1a3 commit 40a618f

File tree

6 files changed

+335
-116
lines changed

6 files changed

+335
-116
lines changed

v2/evalostic.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type Evalostic struct {
1616
ahoCorasick ahocorasick.AhoCorasick
1717
strings map[string]int
1818
mapping map[int][]int // which string can be found in which condition
19+
orig []node // original conditions for export
1920
}
2021

2122
// New builds a new Evalostic matcher that compiles all conditions to one big rule set that can be applied to strings.
@@ -37,6 +38,7 @@ func New(conditions []string) (*Evalostic, error) {
3738
if err != nil {
3839
return nil, fmt.Errorf("condition %d: %s", i, err)
3940
}
41+
e.orig = append(e.orig, root)
4042
condStrings, _ := extractStrings(root)
4143
for _, str := range condStrings {
4244
strI, ok := e.strings[str]

v2/evalostic_test.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ func TestEvalosticAgainstStringContains(t *testing.T) {
271271
}
272272
}
273273

274-
func assert(t *testing.T, b bool) {
274+
func assertTrue(t *testing.T, b bool) {
275275
_, f, l, _ := runtime.Caller(1)
276276
if !b {
277277
t.Fatalf("assertion failed in %s:%d", f, l)
@@ -298,23 +298,23 @@ func TestEvalostic(t *testing.T) {
298298
`"1" AND NOT "2"`,
299299
`"1" AND NOT "2"`,
300300
})
301-
assert(t, err == nil)
302-
assert(t, sameIntegers(e.Match("foo"), []int{0}))
303-
assert(t, sameIntegers(e.Match("bar"), []int{0}))
304-
assert(t, sameIntegers(e.Match("foo bar"), []int{0}))
305-
assert(t, sameIntegers(e.Match("baz"), []int{}))
306-
assert(t, sameIntegers(e.Match("baz qux"), []int{1}))
307-
assert(t, sameIntegers(e.Match("qux baz"), []int{1}))
308-
assert(t, sameIntegers(e.Match("ab"), []int{}))
309-
assert(t, sameIntegers(e.Match("ac"), []int{2}))
310-
assert(t, sameIntegers(e.Match("ad"), []int{2}))
311-
assert(t, sameIntegers(e.Match("bc"), []int{2}))
312-
assert(t, sameIntegers(e.Match("bd"), []int{2}))
313-
assert(t, sameIntegers(e.Match("cd"), []int{}))
314-
assert(t, sameIntegers(e.Match("abcd"), []int{2}))
315-
assert(t, sameIntegers(e.Match("1"), []int{3, 4}))
316-
assert(t, sameIntegers(e.Match("2"), []int{}))
317-
assert(t, sameIntegers(e.Match("12"), []int{}))
301+
assertTrue(t, err == nil)
302+
assertTrue(t, sameIntegers(e.Match("foo"), []int{0}))
303+
assertTrue(t, sameIntegers(e.Match("bar"), []int{0}))
304+
assertTrue(t, sameIntegers(e.Match("foo bar"), []int{0}))
305+
assertTrue(t, sameIntegers(e.Match("baz"), []int{}))
306+
assertTrue(t, sameIntegers(e.Match("baz qux"), []int{1}))
307+
assertTrue(t, sameIntegers(e.Match("qux baz"), []int{1}))
308+
assertTrue(t, sameIntegers(e.Match("ab"), []int{}))
309+
assertTrue(t, sameIntegers(e.Match("ac"), []int{2}))
310+
assertTrue(t, sameIntegers(e.Match("ad"), []int{2}))
311+
assertTrue(t, sameIntegers(e.Match("bc"), []int{2}))
312+
assertTrue(t, sameIntegers(e.Match("bd"), []int{2}))
313+
assertTrue(t, sameIntegers(e.Match("cd"), []int{}))
314+
assertTrue(t, sameIntegers(e.Match("abcd"), []int{2}))
315+
assertTrue(t, sameIntegers(e.Match("1"), []int{3, 4}))
316+
assertTrue(t, sameIntegers(e.Match("2"), []int{}))
317+
assertTrue(t, sameIntegers(e.Match("12"), []int{}))
318318
}
319319

320320
func ExampleMatch() {

v2/export.go

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -17,94 +17,114 @@ func (e *Evalostic) ExportElasticSearchQuery(wildcardField string, useMatchPhras
1717
// `"foo" OR "baz"` will be compiled to
1818
// {"bool":{"should":[{"wildcard":{"raw":{"case_insensitive":false,"value":"foo"}}},{"wildcard":{"raw":{"case_insensitive":false,"value":"bar"}}}]}}
1919
func (e *Evalostic) ExportElasticSearchQueryMap(wildcardField string, useMatchPhrase bool) map[string]interface{} {
20-
indexToStrings := make(map[int]string)
21-
for k, v := range e.strings {
22-
indexToStrings[v] = k
20+
var root node
21+
for _, n := range e.orig {
22+
if root == nil {
23+
root = n
24+
} else {
25+
root = nodeOR{twoSubNodes{root, n}}
26+
}
2327
}
24-
query := e.exportElasticSearchQuerySub(wildcardField, useMatchPhrase, indexToStrings, decisionTreeEntry{value: -1}, e.decisionTree, false)
25-
if query == nil {
26-
return make(map[string]interface{})
28+
return nodeToElasticSearchQuery(root, useMatchPhrase)
29+
}
30+
31+
func nodeToElasticSearchQuery(n node, useMatchPhrase bool) map[string]interface{} {
32+
switch v := n.(type) {
33+
case nodeVAL:
34+
return leafToElasticSearchQuery(v, useMatchPhrase)
35+
case nodeNOT:
36+
return notToElasticSearchQuery(v, useMatchPhrase)
37+
case nodeOR:
38+
return orToElasticSearchQuery(v, useMatchPhrase)
39+
case nodeAND:
40+
return andToElasticSearchQuery(v, useMatchPhrase)
41+
default:
42+
return nil
2743
}
28-
return query
2944
}
3045

3146
var wildcardReplacer = strings.NewReplacer("\\", "\\\\", "*", "\\*", "?", "\\?")
3247

33-
func (e *Evalostic) exportElasticSearchQuerySub(wildcardField string, useMatchPhrase bool, indexToStrings map[int]string, entry decisionTreeEntry, node *decisionTreeNode, not bool) map[string]interface{} {
34-
isLeaf := len(node.outputs) != 0
35-
wildcard := map[string]interface{}{
36-
"wildcard": map[string]interface{}{
37-
wildcardField: map[string]interface{}{
38-
"value": "*" + wildcardReplacer.Replace(indexToStrings[entry.value]) + "*",
39-
"case_insensitive": true,
48+
func notToElasticSearchQuery(n nodeNOT, useMatchPhrase bool) map[string]interface{} {
49+
if not, ok := n.node.(nodeNOT); ok { // check for double negation
50+
return nodeToElasticSearchQuery(not.node, useMatchPhrase)
51+
}
52+
return map[string]interface{}{
53+
"bool": map[string]interface{}{
54+
"must_not": []map[string]interface{}{
55+
nodeToElasticSearchQuery(n.node, useMatchPhrase),
4056
},
4157
},
4258
}
43-
if useMatchPhrase {
44-
wildcard = map[string]interface{}{
45-
"match_phrase": map[string]interface{}{
46-
wildcardField: indexToStrings[entry.value],
47-
},
48-
}
59+
}
60+
61+
func flattenOr(n nodeOR) []node {
62+
var nodes []node
63+
if or, ok := n.node1.(nodeOR); ok {
64+
nodes = append(nodes, flattenOr(or)...)
65+
} else {
66+
nodes = append(nodes, n.node1)
4967
}
50-
if not {
51-
wildcard = map[string]interface{}{
52-
"bool": map[string]interface{}{
53-
"must_not": []interface{}{wildcard},
54-
},
55-
}
68+
if or, ok := n.node2.(nodeOR); ok {
69+
nodes = append(nodes, flattenOr(or)...)
70+
} else {
71+
nodes = append(nodes, n.node2)
5672
}
57-
if entry.value == -1 {
58-
// special case: do not use root node as wildcard
59-
wildcard = nil
73+
return nodes
74+
}
75+
76+
func flattenAnd(n nodeAND) []node {
77+
var nodes []node
78+
if and, ok := n.node1.(nodeAND); ok {
79+
nodes = append(nodes, flattenAnd(and)...)
80+
} else {
81+
nodes = append(nodes, n.node1)
6082
}
61-
if isLeaf && wildcard != nil {
62-
// special case: if it's a leaf, we don't need to process the sub tree
63-
return wildcard
83+
if and, ok := n.node2.(nodeAND); ok {
84+
nodes = append(nodes, flattenAnd(and)...)
85+
} else {
86+
nodes = append(nodes, n.node2)
6487
}
88+
return nodes
89+
}
6590

91+
func orToElasticSearchQuery(n nodeOR, useMatchPhrase bool) map[string]interface{} {
6692
var should []map[string]interface{}
67-
68-
for subEntry, subNode := range node.children {
69-
if subQuery := e.exportElasticSearchQuerySub(wildcardField, useMatchPhrase, indexToStrings, subEntry, subNode, false); subQuery != nil {
70-
should = append(should, subQuery)
71-
}
93+
for _, node := range flattenOr(n) {
94+
should = append(should, nodeToElasticSearchQuery(node, useMatchPhrase))
7295
}
73-
for subEntry, subNode := range node.notChildren {
74-
if subQuery := e.exportElasticSearchQuerySub(wildcardField, useMatchPhrase, indexToStrings, subEntry, subNode, true); subQuery != nil {
75-
should = append(should, subQuery)
76-
}
96+
return map[string]interface{}{
97+
"bool": map[string]interface{}{
98+
"should": should,
99+
},
77100
}
101+
}
78102

79-
toQuery := func(should []map[string]interface{}) map[string]interface{} {
80-
if len(should) == 0 {
81-
return nil
82-
}
83-
var res map[string]interface{}
84-
if len(should) == 1 {
85-
res = should[0]
86-
} else {
87-
res = map[string]interface{}{
88-
"bool": map[string]interface{}{
89-
"should": should,
90-
},
91-
}
92-
}
93-
return res
103+
func andToElasticSearchQuery(n nodeAND, useMatchPhrase bool) map[string]interface{} {
104+
var must []map[string]interface{}
105+
for _, node := range flattenAnd(n) {
106+
must = append(must, nodeToElasticSearchQuery(node, useMatchPhrase))
94107
}
95-
96-
childQuery := toQuery(should)
97-
if childQuery == nil {
98-
return nil
108+
return map[string]interface{}{
109+
"bool": map[string]interface{}{
110+
"must": must,
111+
},
99112
}
100-
if wildcard == nil {
101-
return childQuery
113+
}
114+
115+
func leafToElasticSearchQuery(n nodeVAL, useMatchPhrase bool) map[string]interface{} {
116+
if useMatchPhrase {
117+
return map[string]interface{}{
118+
"match_phrase": map[string]interface{}{
119+
"raw": n.nodeValue,
120+
},
121+
}
102122
}
103123
return map[string]interface{}{
104-
"bool": map[string]interface{}{
105-
"must": []interface{}{
106-
wildcard,
107-
childQuery,
124+
"wildcard": map[string]interface{}{
125+
"raw": map[string]interface{}{
126+
"value": "*" + wildcardReplacer.Replace(n.nodeValue) + "*",
127+
"case_insensitive": true,
108128
},
109129
},
110130
}

0 commit comments

Comments
 (0)