forked from muesli/regommend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
regommendtable.go
282 lines (234 loc) · 6.22 KB
/
regommendtable.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/*
* Simple recommendation engine
* Copyright (c) 2014, Christian Muehlhaeuser <muesli@gmail.com>
*
* For license see LICENSE.txt
*/
package regommend
import (
"errors"
"log"
_ "fmt"
"sort"
"sync"
_ "time"
)
// Structure of a table with items in the engine.
type RegommendTable struct {
sync.RWMutex
// The table's name.
name string
// All items in the table.
items map[interface{}]*RegommendItem
// The logger used for this table.
logger *log.Logger
// Callback method triggered when trying to load a non-existing key.
loadData func(key interface{}) *RegommendItem
// Callback method triggered when adding a new item to the engine.
addedItem func(item *RegommendItem)
// Callback method triggered before deleting an item from the engine.
aboutToDeleteItem func(item *RegommendItem)
}
// Returns how many items are currently stored in the engine.
func (table *RegommendTable) Count() int {
table.RLock()
defer table.RUnlock()
return len(table.items)
}
// Configures a data-loader callback, which will be called when trying
// to use access a non-existing key.
func (table *RegommendTable) SetDataLoader(f func(interface{}) *RegommendItem) {
table.Lock()
defer table.Unlock()
table.loadData = f
}
// Configures a callback, which will be called every time a new item
// is added to the engine.
func (table *RegommendTable) SetAddedItemCallback(f func(*RegommendItem)) {
table.Lock()
defer table.Unlock()
table.addedItem = f
}
// Configures a callback, which will be called every time an item
// is about to be removed from the engine.
func (table *RegommendTable) SetAboutToDeleteItemCallback(f func(*RegommendItem)) {
table.Lock()
defer table.Unlock()
table.aboutToDeleteItem = f
}
// Sets the logger to be used by this engine table.
func (table *RegommendTable) SetLogger(logger *log.Logger) {
table.Lock()
defer table.Unlock()
table.logger = logger
}
// Adds a key/value pair to the engine.
// Parameter key is the item's engine-key.
// Parameter data is the item's value.
func (table *RegommendTable) Add(key interface{}, data map[interface{}]float64) *RegommendItem {
item := CreateRegommendItem(key, data)
// Add item to engine.
table.Lock()
table.items[key] = &item
// engine values so we don't keep blocking the mutex.
addedItem := table.addedItem
table.Unlock()
// Trigger callback after adding an item to engine.
if addedItem != nil {
addedItem(&item)
}
return &item
}
// Delete an item from the engine.
func (table *RegommendTable) Delete(key interface{}) (*RegommendItem, error) {
table.RLock()
r, ok := table.items[key]
if !ok {
table.RUnlock()
return nil, errors.New("Key not found in engine")
}
// engine value so we don't keep blocking the mutex.
aboutToDeleteItem := table.aboutToDeleteItem
table.RUnlock()
// Trigger callbacks before deleting an item from engine.
if aboutToDeleteItem != nil {
aboutToDeleteItem(r)
}
r.RLock()
defer r.RUnlock()
table.Lock()
defer table.Unlock()
delete(table.items, key)
return r, nil
}
// Test whether an item exists in the engine. Unlike the Value method
// Exists neither tries to fetch data via the loadData callback nor
// does it keep the item alive in the engine.
func (table *RegommendTable) Exists(key interface{}) bool {
table.RLock()
defer table.RUnlock()
_, ok := table.items[key]
return ok
}
// Get an item from the engine and mark it to be kept alive.
func (table *RegommendTable) Value(key interface{}) (*RegommendItem, error) {
table.RLock()
r, ok := table.items[key]
loadData := table.loadData
table.RUnlock()
if ok {
return r, nil
}
// Item doesn't exist in engine. Try and fetch it with a data-loader.
if loadData != nil {
item := loadData(key)
if item != nil {
table.Add(key, item.data)
return item, nil
}
return nil, errors.New("Key not found and could not be loaded into engine")
}
return nil, errors.New("Key not found in engine")
}
// Delete all items from engine.
func (table *RegommendTable) Flush() {
table.Lock()
defer table.Unlock()
table.log("Flushing table", table.name)
table.items = make(map[interface{}]*RegommendItem)
}
type DistancePair struct {
Key interface{}
Distance float64
}
type DistancePairList []DistancePair
func (p DistancePairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p DistancePairList) Len() int { return len(p) }
func (p DistancePairList) Less(i, j int) bool { return p[i].Distance > p[j].Distance }
func (table *RegommendTable) Recommend(key interface{}) (DistancePairList, error) {
dists, err := table.Neighbors(key)
if err != nil {
return dists, err
}
sitem, err := table.Value(key)
if err != nil {
return dists, err
}
smap := sitem.Data()
totalDistance := 0.0
for _, v := range dists {
//fmt.Println("Comparing to", v.Key, "-", v.Distance)
totalDistance += v.Distance
}
recs := make(map[interface{}]float64)
for _, v := range dists {
weight := v.Distance / totalDistance
if weight <= 0 {
continue
}
if weight > 1 {
weight = 1
}
ditem, _ := table.Value(v.Key)
recMap := ditem.Data()
for key, x := range recMap {
_, ok := smap[key]
if ok {
// key already knows this item, don't recommend it
continue
}
//fmt.Println("Adding to recs:", key)
score, ok := recs[key]
if ok {
recs[key] = score + x * weight
} else {
recs[key] = x * weight
}
}
}
recsList := make(DistancePairList, len(recs))
i := 0
for key, score := range recs {
recsList[i] = DistancePair{
Key: key,
Distance: score,
}
i++
}
sort.Sort(recsList)
return recsList, nil
}
func (table *RegommendTable) Neighbors(key interface{}) (DistancePairList, error) {
dists := DistancePairList{}
sitem, err := table.Value(key)
if err != nil {
return dists, err
}
smap := sitem.Data()
table.RLock()
defer table.RUnlock()
for k, ditem := range table.items {
if err != nil {
continue
}
if k == key {
continue
}
//fmt.Println("Analyzing:", k)
distance := DistancePair{
Key: k,
Distance: cosineSim(smap, ditem.Data()),
}
//fmt.Println("Distance:", distance.Distance)
dists = append(dists, distance)
}
sort.Sort(dists)
return dists, nil
}
// Internal logging method for convenience.
func (table *RegommendTable) log(v ...interface{}) {
if table.logger == nil {
return
}
table.logger.Println(v)
}