-
Notifications
You must be signed in to change notification settings - Fork 1
/
invertedindex.go
76 lines (60 loc) · 1.28 KB
/
invertedindex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package invertedindex
import (
"sort"
"strings"
"github.com/deckarep/golang-set"
)
const SEP = "\u2318"
// k-v to docID
type InvertedIndex map[string]mapset.Set
func NewInvertedIndex() *InvertedIndex {
i := make(InvertedIndex)
return &i
}
func (x *InvertedIndex) Size() int {
return len(*x)
}
func (x *InvertedIndex) AddDoc(docID uint64, doc string) {
for _, word := range tokenizer(doc) {
_, exist := (*x)[word]
if !exist {
(*x)[word] = mapset.NewSet()
}
(*x)[word].Add(docID)
}
}
func (x *InvertedIndex) Search(query string) mapset.Set {
if ref, exist := (*x)[query]; exist {
return ref
}
return nil
}
func Intersect(x, y mapset.Set) mapset.Set {
if x == nil || y == nil {
return mapset.NewSet()
}
return x.Intersect(y)
}
// docID to counter string
type ForwardIndex map[uint64]string
func NewForwardIndex() *ForwardIndex {
f := make(ForwardIndex)
return &f
}
func (x *ForwardIndex) Size() int {
return len(*x)
}
func (x *ForwardIndex) AddDoc(docID uint64, doc string) {
temp := tokenizer(doc)
sort.Strings(temp)
(*x)[docID] = strings.Join(temp, ",")
}
func (x *ForwardIndex) ToCounterString(docID uint64) string {
if ref, exist := (*x)[docID]; exist {
return ref
}
return ""
}
func tokenizer(doc string) []string {
return strings.Split(doc, SEP)
}