Skip to content

Commit 99aa596

Browse files
Ability to differentiate between nested fields and those with .
+ Reference: https://issues.couchbase.com/browse/MB-55699 + bleve uses "." as the path separator for nested field names. This can conflict with those fields whose names contains "." within them - which is an allowed parameter. + The proposal here is decorate field names under the hood within backticks to preserve their true meaning. So for example .. - ``` `a.b` ``` is a single unnested field name - ``` `a`.`b` ``` is a nested field name with ``` `b` ``` being a child field of ``` `a` ``` + Here're the ramifications with this approach: - While indexing, users can still specify fields names as they appear in their JSON documents. Underneath the hood however, these field names will now be registered with their decorated versions to avoid ambiguity. - While querying, users can still specify fields as they expect to see them within their json documents. Note that, it will be the user's responsibility to differentiate between nested field names and others. For example, consider an index mapping over this kind of a document: ``` { "x": { "y": "1" }, "x.y": "2" } ``` The searches that'd work here are .. 1. ```{"field": "`x.y`", "match": 2}``` 2. ```{"field": "x.y", "match": 1}``` 3. ```{"field": "`x`.`y`", "match": 1}``` - Users will also be responsible for specifying sort keys, facet fields, highlight fields accordingly in their search requests. For example .. ``` x : interpreted as `x` `x` : interpreted as `x` x.y : interpreted as `x`.`y` `x.y` : interpreted as `x.y` `x`.`y`. : interpreted as `x`.`y` ``` - In the search response, users will now see decorated names for fragments, locations and facets to avoid any ambiguous interpretation of the field names.
1 parent a8beab1 commit 99aa596

37 files changed

+337
-179
lines changed

examples_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ func ExampleNewHighlight() {
321321
panic(err)
322322
}
323323

324-
fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
324+
fmt.Println(searchResults.Hits[0].Fragments["`Name`"][0])
325325
// Output:
326326
// great <mark>nameless</mark> one
327327
}
@@ -335,7 +335,7 @@ func ExampleNewHighlightWithStyle() {
335335
panic(err)
336336
}
337337

338-
fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
338+
fmt.Println(searchResults.Hits[0].Fragments["`Name`"][0])
339339
// Output:
340340
// great nameless one
341341
}
@@ -446,7 +446,7 @@ func ExampleSearchRequest_SortByCustom() {
446446
searchRequest := NewSearchRequest(query)
447447
searchRequest.SortByCustom(search.SortOrder{
448448
&search.SortField{
449-
Field: "Age",
449+
Field: "`Age`",
450450
Missing: search.SortFieldMissingFirst,
451451
},
452452
&search.SortDocID{},

http/handlers_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,9 +292,9 @@ func TestHandlers(t *testing.T) {
292292
},
293293
Status: http.StatusOK,
294294
ResponseMatch: map[string]bool{
295-
`"id":"a"`: true,
296-
`"body":"test"`: true,
297-
`"name":"a"`: true,
295+
"\"id\":\"a\"": true,
296+
"\"`body`\":\"test\"": true,
297+
"\"`name`\":\"a\"": true,
298298
},
299299
},
300300
{
@@ -483,10 +483,10 @@ func TestHandlers(t *testing.T) {
483483
},
484484
Status: http.StatusOK,
485485
ResponseMatch: map[string]bool{
486-
`"fields":`: true,
487-
`"name"`: true,
488-
`"body"`: true,
489-
`"_all"`: true,
486+
"\"fields\"": true,
487+
"\"`name`\"": true,
488+
"\"`body`\"": true,
489+
"\"_all\"": true,
490490
},
491491
},
492492
{

index_impl.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/blevesearch/bleve/v2/search/collector"
3535
"github.com/blevesearch/bleve/v2/search/facet"
3636
"github.com/blevesearch/bleve/v2/search/highlight"
37+
"github.com/blevesearch/bleve/v2/util"
3738
index "github.com/blevesearch/bleve_index_api"
3839
)
3940

@@ -631,7 +632,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
631632
fieldsToLoad := deDuplicate(req.Fields)
632633
for _, f := range fieldsToLoad {
633634
doc.VisitFields(func(docF index.Field) {
634-
if f == "*" || docF.Name() == f {
635+
if f == "*" || docF.Name() == util.CleansePath(f) {
635636
var value interface{}
636637
switch docF := docF.(type) {
637638
case index.TextField:
@@ -683,7 +684,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
683684
}
684685
}
685686
for _, hf := range highlightFields {
686-
highlighter.BestFragmentsInField(hit, doc, hf, 1)
687+
highlighter.BestFragmentsInField(hit, doc, util.CleansePath(hf), 1)
687688
}
688689
}
689690
} else if doc == nil {
@@ -737,6 +738,7 @@ func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
737738
return nil, err
738739
}
739740

741+
field = util.CleansePath(field)
740742
fieldDict, err := indexReader.FieldDict(field)
741743
if err != nil {
742744
i.mutex.RUnlock()
@@ -764,6 +766,7 @@ func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byt
764766
return nil, err
765767
}
766768

769+
field = util.CleansePath(field)
767770
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
768771
if err != nil {
769772
i.mutex.RUnlock()
@@ -791,6 +794,7 @@ func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.Fiel
791794
return nil, err
792795
}
793796

797+
field = util.CleansePath(field)
794798
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
795799
if err != nil {
796800
i.mutex.RUnlock()

index_test.go

Lines changed: 62 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"os"
2525
"path/filepath"
2626
"reflect"
27-
"sort"
2827
"strconv"
2928
"strings"
3029
"sync"
@@ -199,7 +198,7 @@ func TestCrud(t *testing.T) {
199198
}
200199
foundNameField := false
201200
doc.VisitFields(func(field index.Field) {
202-
if field.Name() == "name" && string(field.Value()) == "marty" {
201+
if field.Name() == "`name`" && string(field.Value()) == "marty" {
203202
foundNameField = true
204203
}
205204
})
@@ -212,9 +211,9 @@ func TestCrud(t *testing.T) {
212211
t.Fatal(err)
213212
}
214213
expectedFields := map[string]bool{
215-
"_all": false,
216-
"name": false,
217-
"desc": false,
214+
"_all": false,
215+
"`name`": false,
216+
"`desc`": false,
218217
}
219218
if len(fields) < len(expectedFields) {
220219
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
@@ -399,10 +398,11 @@ func TestBytesRead(t *testing.T) {
399398
if err != nil {
400399
t.Error(err)
401400
}
401+
402402
stats, _ := idx.StatsMap()["index"].(map[string]interface{})
403403
prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
404-
if prevBytesRead != 32349 && res.BytesRead == prevBytesRead {
405-
t.Fatalf("expected bytes read for query string 32349, got %v",
404+
if prevBytesRead != 32475 && res.BytesRead == prevBytesRead {
405+
t.Fatalf("expected bytes read for query string 32475, got %v",
406406
prevBytesRead)
407407
}
408408

@@ -580,8 +580,8 @@ func TestBytesReadStored(t *testing.T) {
580580

581581
stats, _ := idx.StatsMap()["index"].(map[string]interface{})
582582
bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
583-
if bytesRead != 25928 && bytesRead == res.BytesRead {
584-
t.Fatalf("expected the bytes read stat to be around 25928, got %v", bytesRead)
583+
if bytesRead != 26054 && bytesRead == res.BytesRead {
584+
t.Fatalf("expected the bytes read stat to be around 26054, got %v", bytesRead)
585585
}
586586
prevBytesRead := bytesRead
587587

@@ -651,8 +651,8 @@ func TestBytesReadStored(t *testing.T) {
651651

652652
stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
653653
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
654-
if bytesRead != 18114 && bytesRead == res.BytesRead {
655-
t.Fatalf("expected the bytes read stat to be around 18114, got %v", bytesRead)
654+
if bytesRead != 18240 && bytesRead == res.BytesRead {
655+
t.Fatalf("expected the bytes read stat to be around 18240, got %v", bytesRead)
656656
}
657657
prevBytesRead = bytesRead
658658

@@ -920,17 +920,17 @@ func TestStoredFieldPreserved(t *testing.T) {
920920
if len(res.Hits) != 1 {
921921
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
922922
}
923-
if res.Hits[0].Fields["name"] != "Marty" {
924-
t.Errorf("expected 'Marty' got '%s'", res.Hits[0].Fields["name"])
923+
if res.Hits[0].Fields["`name`"] != "Marty" {
924+
t.Errorf("expected 'Marty' got '%s'", res.Hits[0].Fields["`name`"])
925925
}
926-
if res.Hits[0].Fields["desc"] != "GopherCON India" {
927-
t.Errorf("expected 'GopherCON India' got '%s'", res.Hits[0].Fields["desc"])
926+
if res.Hits[0].Fields["`desc`"] != "GopherCON India" {
927+
t.Errorf("expected 'GopherCON India' got '%s'", res.Hits[0].Fields["`desc`"])
928928
}
929-
if res.Hits[0].Fields["num"] != float64(1) {
930-
t.Errorf("expected '1' got '%v'", res.Hits[0].Fields["num"])
929+
if res.Hits[0].Fields["`num`"] != float64(1) {
930+
t.Errorf("expected '1' got '%v'", res.Hits[0].Fields["`num`"])
931931
}
932-
if res.Hits[0].Fields["bool"] != true {
933-
t.Errorf("expected 'true' got '%v'", res.Hits[0].Fields["bool"])
932+
if res.Hits[0].Fields["`bool`"] != true {
933+
t.Errorf("expected 'true' got '%v'", res.Hits[0].Fields["`bool`"])
934934
}
935935
}
936936

@@ -1185,7 +1185,7 @@ func TestSortMatchSearch(t *testing.T) {
11851185
}
11861186
prev := ""
11871187
for _, hit := range sr.Hits {
1188-
val := hit.Fields["Day"].(string)
1188+
val := hit.Fields["`Day`"].(string)
11891189
if prev > val {
11901190
t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val)
11911191
}
@@ -1533,14 +1533,14 @@ func TestTermVectorArrayPositions(t *testing.T) {
15331533
if results.Total != 1 {
15341534
t.Fatalf("expected 1 result, got %d", results.Total)
15351535
}
1536-
if len(results.Hits[0].Locations["Messages"]["second"]) < 1 {
1536+
if len(results.Hits[0].Locations["`Messages`"]["second"]) < 1 {
15371537
t.Fatalf("expected at least one location")
15381538
}
1539-
if len(results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions) < 1 {
1539+
if len(results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions) < 1 {
15401540
t.Fatalf("expected at least one location array position")
15411541
}
1542-
if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 {
1543-
t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0])
1542+
if results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions[0] != 1 {
1543+
t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["`Messages`"]["second"][0].ArrayPositions[0])
15441544
}
15451545

15461546
// repeat search for this document in Messages field
@@ -1555,14 +1555,14 @@ func TestTermVectorArrayPositions(t *testing.T) {
15551555
if results.Total != 1 {
15561556
t.Fatalf("expected 1 result, got %d", results.Total)
15571557
}
1558-
if len(results.Hits[0].Locations["Messages"]["third"]) < 1 {
1558+
if len(results.Hits[0].Locations["`Messages`"]["third"]) < 1 {
15591559
t.Fatalf("expected at least one location")
15601560
}
1561-
if len(results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions) < 1 {
1561+
if len(results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions) < 1 {
15621562
t.Fatalf("expected at least one location array position")
15631563
}
1564-
if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 {
1565-
t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0])
1564+
if results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions[0] != 2 {
1565+
t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["`Messages`"]["third"][0].ArrayPositions[0])
15661566
}
15671567

15681568
err = index.Close()
@@ -1611,14 +1611,21 @@ func TestDocumentStaticMapping(t *testing.T) {
16111611
if err != nil {
16121612
t.Fatal(err)
16131613
}
1614-
sort.Strings(fields)
1615-
expectedFields := []string{"Date", "Numeric", "Text", "_all"}
1614+
expectedFields := map[string]bool{
1615+
"`Date`": false,
1616+
"`Numeric`": false,
1617+
"`Text`": false,
1618+
"_all": false,
1619+
}
16161620
if len(fields) < len(expectedFields) {
1617-
t.Fatalf("invalid field count: %d", len(fields))
1621+
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
1622+
}
1623+
for _, f := range fields {
1624+
expectedFields[f] = true
16181625
}
1619-
for i, expected := range expectedFields {
1620-
if expected != fields[i] {
1621-
t.Fatalf("unexpected field[%d]: %s", i, fields[i])
1626+
for ef, efp := range expectedFields {
1627+
if !efp {
1628+
t.Errorf("field %s is missing", ef)
16221629
}
16231630
}
16241631

@@ -1791,13 +1798,13 @@ func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
17911798
if results.Total != 1 {
17921799
t.Fatalf("expected 1 result, got %d", results.Total)
17931800
}
1794-
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
1795-
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
1801+
if len(results.Hits[0].Locations["`Messages`"]["bleve"]) != 2 {
1802+
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["`Messages`"]["bleve"]))
17961803
}
1797-
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
1804+
if results.Hits[0].Locations["`Messages`"]["bleve"][0].ArrayPositions[0] != 0 {
17981805
t.Errorf("expected array position to be 0")
17991806
}
1800-
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
1807+
if results.Hits[0].Locations["`Messages`"]["bleve"][1].ArrayPositions[0] != 1 {
18011808
t.Errorf("expected array position to be 1")
18021809
}
18031810

@@ -1812,13 +1819,13 @@ func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
18121819
if results.Total != 1 {
18131820
t.Fatalf("expected 1 result, got %d", results.Total)
18141821
}
1815-
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
1816-
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
1822+
if len(results.Hits[0].Locations["`Messages`"]["bleve"]) != 2 {
1823+
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["`Messages`"]["bleve"]))
18171824
}
1818-
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
1825+
if results.Hits[0].Locations["`Messages`"]["bleve"][0].ArrayPositions[0] != 0 {
18191826
t.Errorf("expected array position to be 0")
18201827
}
1821-
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
1828+
if results.Hits[0].Locations["`Messages`"]["bleve"][1].ArrayPositions[0] != 1 {
18221829
t.Errorf("expected array position to be 1")
18231830
}
18241831

@@ -2389,7 +2396,7 @@ func TestBatchMerge(t *testing.T) {
23892396

23902397
foundNameField := false
23912398
doc.VisitFields(func(field index.Field) {
2392-
if field.Name() == "name" && string(field.Value()) == "blahblah" {
2399+
if field.Name() == "`name`" && string(field.Value()) == "blahblah" {
23932400
foundNameField = true
23942401
}
23952402
})
@@ -2403,10 +2410,10 @@ func TestBatchMerge(t *testing.T) {
24032410
}
24042411

24052412
expectedFields := map[string]bool{
2406-
"_all": false,
2407-
"name": false,
2408-
"desc": false,
2409-
"country": false,
2413+
"_all": false,
2414+
"`name`": false,
2415+
"`desc`": false,
2416+
"`country`": false,
24102417
}
24112418
if len(fields) < len(expectedFields) {
24122419
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
@@ -2837,7 +2844,7 @@ func TestCopyIndex(t *testing.T) {
28372844
}
28382845
foundNameField := false
28392846
doc.VisitFields(func(field index.Field) {
2840-
if field.Name() == "name" && string(field.Value()) == "tester" {
2847+
if field.Name() == "`name`" && string(field.Value()) == "tester" {
28412848
foundNameField = true
28422849
}
28432850
})
@@ -2850,9 +2857,9 @@ func TestCopyIndex(t *testing.T) {
28502857
t.Fatal(err)
28512858
}
28522859
expectedFields := map[string]bool{
2853-
"_all": false,
2854-
"name": false,
2855-
"desc": false,
2860+
"_all": false,
2861+
"`name`": false,
2862+
"`desc`": false,
28562863
}
28572864
if len(fields) < len(expectedFields) {
28582865
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
@@ -2906,7 +2913,7 @@ func TestCopyIndex(t *testing.T) {
29062913
}
29072914
copyFoundNameField := false
29082915
copyDoc.VisitFields(func(field index.Field) {
2909-
if field.Name() == "name" && string(field.Value()) == "tester" {
2916+
if field.Name() == "`name`" && string(field.Value()) == "tester" {
29102917
copyFoundNameField = true
29112918
}
29122919
})
@@ -2919,9 +2926,9 @@ func TestCopyIndex(t *testing.T) {
29192926
t.Fatal(err)
29202927
}
29212928
copyExpectedFields := map[string]bool{
2922-
"_all": false,
2923-
"name": false,
2924-
"desc": false,
2929+
"_all": false,
2930+
"`name`": false,
2931+
"`desc`": false,
29252932
}
29262933
if len(copyFields) < len(copyExpectedFields) {
29272934
t.Fatalf("expected %d fields got %d", len(copyExpectedFields), len(copyFields))

0 commit comments

Comments
 (0)