Skip to content

Commit 220b803

Browse files
authored
Merge branch 'master' into regression-bblfshd-mockups
2 parents 7a15422 + 000ef21 commit 220b803

File tree

4 files changed

+96
-61
lines changed

4 files changed

+96
-61
lines changed

internal/function/language.go

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
package function
22

33
import (
4-
"encoding/binary"
54
"fmt"
65
"hash/crc32"
76
"os"
87
"strconv"
8+
"sync"
99

10-
lru "github.com/hashicorp/golang-lru"
1110
enry "github.com/src-d/enry/v2"
1211
"github.com/src-d/go-mysql-server/sql"
1312
)
@@ -27,14 +26,21 @@ func languageCacheSize() int {
2726
return size
2827
}
2928

30-
var languageCache *lru.TwoQueueCache
29+
var (
30+
languageMut sync.Mutex
31+
languageCache sql.KeyValueCache
32+
)
3133

32-
func init() {
33-
var err error
34-
languageCache, err = lru.New2Q(languageCacheSize())
35-
if err != nil {
36-
panic(fmt.Errorf("cannot initialize language cache: %s", err))
34+
func getLanguageCache(ctx *sql.Context) sql.KeyValueCache {
35+
languageMut.Lock()
36+
defer languageMut.Unlock()
37+
if languageCache == nil {
38+
// Dispose function is ignored because the cache will never be disposed
39+
// until the program dies.
40+
languageCache, _ = ctx.Memory.NewLRUCache(uint(languageCacheSize()))
3741
}
42+
43+
return languageCache
3844
}
3945

4046
// Language gets the language of a file given its path and
@@ -136,11 +142,13 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
136142
blob = right.([]byte)
137143
}
138144

139-
var hash [8]byte
145+
languageCache := getLanguageCache(ctx)
146+
147+
var hash uint64
140148
if len(blob) > 0 {
141149
hash = languageHash(path, blob)
142-
value, ok := languageCache.Get(hash)
143-
if ok {
150+
value, err := languageCache.Get(hash)
151+
if err == nil {
144152
return value, nil
145153
}
146154
}
@@ -151,38 +159,31 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
151159
}
152160

153161
if len(blob) > 0 {
154-
languageCache.Add(hash, lang)
162+
if err := languageCache.Put(hash, lang); err != nil {
163+
return nil, err
164+
}
155165
}
156166

157167
return lang, nil
158168
}
159169

160-
func languageHash(filename string, blob []byte) [8]byte {
170+
func languageHash(filename string, blob []byte) uint64 {
161171
fh := filenameHash(filename)
162172
bh := blobHash(blob)
163173

164-
var result [8]byte
165-
copy(result[:], fh)
166-
copy(result[4:], bh)
167-
return result
174+
return uint64(fh)<<32 | uint64(bh)
168175
}
169176

170-
func blobHash(blob []byte) []byte {
177+
func blobHash(blob []byte) uint32 {
171178
if len(blob) == 0 {
172-
return nil
179+
return 0
173180
}
174181

175-
n := crc32.ChecksumIEEE(blob)
176-
hash := make([]byte, 4)
177-
binary.LittleEndian.PutUint32(hash, n)
178-
return hash
182+
return crc32.ChecksumIEEE(blob)
179183
}
180184

181-
func filenameHash(filename string) []byte {
182-
n := crc32.ChecksumIEEE([]byte(filename))
183-
hash := make([]byte, 4)
184-
binary.LittleEndian.PutUint32(hash, n)
185-
return hash
185+
func filenameHash(filename string) uint32 {
186+
return crc32.ChecksumIEEE([]byte(filename))
186187
}
187188

188189
// Children implements the Expression interface.

internal/function/loc.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ var languages = gocloc.NewDefinedLanguages()
1414

1515
var errEmptyInputValues = errors.New("empty input values")
1616

17+
// LOC is a function that returns the count of different types of lines of code.
1718
type LOC struct {
1819
Left sql.Expression
1920
Right sql.Expression
@@ -74,7 +75,11 @@ func (f *LOC) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
7475
return nil, err
7576
}
7677

77-
lang := f.getLanguage(path, blob)
78+
lang, err := f.getLanguage(path, blob)
79+
if err != nil {
80+
return nil, err
81+
}
82+
7883
if lang == "" || languages.Langs[lang] == nil {
7984
return nil, nil
8085
}
@@ -137,20 +142,22 @@ func (f *LOC) getInputValues(ctx *sql.Context, row sql.Row) (string, []byte, err
137142
return path, blob, nil
138143
}
139144

140-
func (f *LOC) getLanguage(path string, blob []byte) string {
145+
func (f *LOC) getLanguage(path string, blob []byte) (string, error) {
141146
hash := languageHash(path, blob)
142147

143-
value, ok := languageCache.Get(hash)
144-
if ok {
145-
return value.(string)
148+
value, err := languageCache.Get(hash)
149+
if err == nil {
150+
return value.(string), nil
146151
}
147152

148153
lang := enry.GetLanguage(path, blob)
149154
if len(blob) > 0 {
150-
languageCache.Add(hash, lang)
155+
if err := languageCache.Put(hash, lang); err != nil {
156+
return "", err
157+
}
151158
}
152159

153-
return lang
160+
return lang, nil
154161
}
155162

156163
// Children implements the Expression interface.

internal/function/uast.go

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package function
22

33
import (
4-
"crypto/sha1"
54
"encoding/json"
65
"fmt"
76
"hash"
@@ -16,7 +15,6 @@ import (
1615
"github.com/bblfsh/sdk/v3/uast"
1716
"github.com/bblfsh/sdk/v3/uast/nodes"
1817
"github.com/go-kit/kit/metrics/discard"
19-
lru "github.com/hashicorp/golang-lru"
2018
"github.com/sirupsen/logrus"
2119

2220
"github.com/src-d/go-mysql-server/sql"
@@ -53,8 +51,24 @@ func observeQuery(lang, xpath string, t time.Time) func(bool) {
5351
}
5452
}
5553

56-
var uastCache *lru.Cache
57-
var uastMaxBlobSize int
54+
var (
55+
uastmut sync.Mutex
56+
uastCache sql.KeyValueCache
57+
uastCacheSize int
58+
uastMaxBlobSize int
59+
)
60+
61+
func getUASTCache(ctx *sql.Context) sql.KeyValueCache {
62+
uastmut.Lock()
63+
defer uastmut.Unlock()
64+
if uastCache == nil {
65+
// Dispose function is ignored because the cache will never be disposed
66+
// until the program dies.
67+
uastCache, _ = ctx.Memory.NewLRUCache(uint(uastCacheSize))
68+
}
69+
70+
return uastCache
71+
}
5872

5973
func init() {
6074
s := os.Getenv(uastCacheSizeKey)
@@ -63,10 +77,7 @@ func init() {
6377
size = defaultUASTCacheSize
6478
}
6579

66-
uastCache, err = lru.New(size)
67-
if err != nil {
68-
panic(fmt.Errorf("cannot initialize UAST cache: %s", err))
69-
}
80+
uastCacheSize = size
7081

7182
uastMaxBlobSize, err = strconv.Atoi(os.Getenv(uastMaxBlobSizeKey))
7283
if err != nil {
@@ -83,7 +94,7 @@ type uastFunc struct {
8394
Lang sql.Expression
8495
XPath sql.Expression
8596

86-
h hash.Hash
97+
h hash.Hash64
8798
m sync.Mutex
8899
}
89100

@@ -151,7 +162,7 @@ func (u *uastFunc) WithChildren(children ...sql.Expression) (sql.Expression, err
151162
Blob: blob,
152163
XPath: xpath,
153164
Lang: lang,
154-
h: sha1.New(),
165+
h: newHash(),
155166
}, nil
156167
}
157168

@@ -234,6 +245,13 @@ func (u *uastFunc) Eval(ctx *sql.Context, row sql.Row) (out interface{}, err err
234245
return u.getUAST(ctx, bytes, lang, xpath, mode)
235246
}
236247

248+
func (u *uastFunc) computeKey(mode, lang string, blob []byte) (uint64, error) {
249+
u.m.Lock()
250+
defer u.m.Unlock()
251+
252+
return computeKey(u.h, mode, lang, blob)
253+
}
254+
237255
func (u *uastFunc) getUAST(
238256
ctx *sql.Context,
239257
blob []byte,
@@ -242,17 +260,17 @@ func (u *uastFunc) getUAST(
242260
) (interface{}, error) {
243261
finish := observeQuery(lang, xpath, time.Now())
244262

245-
u.m.Lock()
246-
key, err := computeKey(u.h, mode.String(), lang, blob)
247-
u.m.Unlock()
248-
263+
key, err := u.computeKey(mode.String(), lang, blob)
249264
if err != nil {
250265
return nil, err
251266
}
252267

268+
uastCache := getUASTCache(ctx)
269+
253270
var node nodes.Node
254-
value, ok := uastCache.Get(key)
255-
if ok {
271+
value, err := uastCache.Get(key)
272+
cacheMiss := err != nil
273+
if !cacheMiss {
256274
node = value.(nodes.Node)
257275
} else {
258276
var err error
@@ -265,7 +283,9 @@ func (u *uastFunc) getUAST(
265283
return nil, err
266284
}
267285

268-
uastCache.Add(key, node)
286+
if err := uastCache.Put(key, node); err != nil {
287+
return nil, err
288+
}
269289
}
270290

271291
var nodeArray nodes.Array
@@ -288,7 +308,7 @@ func (u *uastFunc) getUAST(
288308
return nil, nil
289309
}
290310

291-
finish(ok)
311+
finish(!cacheMiss)
292312

293313
return result, nil
294314
}
@@ -321,7 +341,7 @@ func NewUAST(args ...sql.Expression) (sql.Expression, error) {
321341
Blob: blob,
322342
Lang: lang,
323343
XPath: xpath,
324-
h: sha1.New(),
344+
h: newHash(),
325345
}}, nil
326346
}
327347

@@ -380,7 +400,7 @@ func NewUASTMode(mode, blob, lang sql.Expression) sql.Expression {
380400
Blob: blob,
381401
Lang: lang,
382402
XPath: nil,
383-
h: sha1.New(),
403+
h: newHash(),
384404
}}
385405
}
386406

internal/function/uast_utils.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@ import (
44
"bytes"
55
"fmt"
66
"hash"
7+
"hash/crc64"
78

89
"github.com/bblfsh/go-client/v4/tools"
910
"github.com/bblfsh/sdk/v3/uast/nodes/nodesproto"
1011

11-
"github.com/sirupsen/logrus"
12-
"github.com/src-d/gitbase"
1312
bblfsh "github.com/bblfsh/go-client/v4"
1413
"github.com/bblfsh/sdk/v3/uast/nodes"
15-
errors "gopkg.in/src-d/go-errors.v1"
14+
"github.com/sirupsen/logrus"
15+
"github.com/src-d/gitbase"
1616
"github.com/src-d/go-mysql-server/sql"
17+
errors "gopkg.in/src-d/go-errors.v1"
1718
)
1819

1920
var (
@@ -53,17 +54,23 @@ func exprToString(
5354
return x.(string), nil
5455
}
5556

56-
func computeKey(h hash.Hash, mode, lang string, blob []byte) (string, error) {
57+
var crcTable = crc64.MakeTable(crc64.ISO)
58+
59+
func newHash() hash.Hash64 {
60+
return crc64.New(crcTable)
61+
}
62+
63+
func computeKey(h hash.Hash64, mode, lang string, blob []byte) (uint64, error) {
5764
h.Reset()
5865
if err := writeToHash(h, [][]byte{
5966
[]byte(mode),
6067
[]byte(lang),
6168
blob,
6269
}); err != nil {
63-
return "", err
70+
return 0, err
6471
}
6572

66-
return string(h.Sum(nil)), nil
73+
return h.Sum64(), nil
6774
}
6875

6976
func writeToHash(h hash.Hash, elements [][]byte) error {

0 commit comments

Comments
 (0)