Skip to content

Commit

Permalink
MB-62230 - Avoiding unnecessary computations for un-filtered kNN. (#2076
Browse files Browse the repository at this point in the history
)

Currently, there are a bunch of additional computations done on every
kNN query that are necessary only for pre-filtered kNN.
This PR adds checks to perform these only when at least one kNN query,
out of one or more in the search request, is a filtered kNN query.

Also added some commentary.

---------

Co-authored-by: Abhinav Dangeti <abhinav@couchbase.com>
  • Loading branch information
metonymic-smokey and abhinavdangeti authored Sep 19, 2024
1 parent cf4802e commit ea7f589
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 25 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/bits-and-blooms/bitset v1.12.0
github.com/blevesearch/bleve_index_api v1.1.12
github.com/blevesearch/geo v0.1.20
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
Expand All @@ -24,7 +24,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.13
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92 h1:pDbDTN8dgycpdp9eCzrNp9e6Z4C+UQhCUAZbaarQ6Bs=
github.com/blevesearch/go-faiss v1.0.22-0.20240918182005-f19c1d446e92/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a h1:mSUfDoOPOLt0OABjiyQq/kQxOzAJmsgIjlAWUPfUDfc=
github.com/blevesearch/go-faiss v1.0.22-0.20240919162919-05a9ee21155a/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand Down Expand Up @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe h1:S1rCvhrU2HqDrRtogYgM52rT5px7o2zFIB3Yo+JPFOU=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240909182401-e148470cefbe/go.mod h1:x9Kg015zbkSXxmE7F+0qeGxpeHJBwkDuxosrrDxYltU=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9 h1:pSaAZuB/gu5cNhSXrpI6s6xyN3ysVdG+RMqEbHEDx+o=
github.com/blevesearch/zapx/v16 v16.1.6-0.20240919163431-f2ee7670abd9/go.mod h1:R6fi71sVKI+HnzchzfkomFQ5HvMvn3CWTmLBuuUqoTQ=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down
43 changes: 25 additions & 18 deletions index/scorch/optimize_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sync"
"sync/atomic"

"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
segment_api "github.com/blevesearch/scorch_segment_api/v2"
Expand Down Expand Up @@ -64,7 +65,10 @@ func (o *OptimizeVR) Finish() error {
var errorsM sync.Mutex
var errors []error

snapshotGlobalDocNums := o.snapshot.globalDocNums()
var snapshotGlobalDocNums map[int]*roaring.Bitmap
if o.requiresFiltering {
snapshotGlobalDocNums = o.snapshot.globalDocNums()
}

defer o.invokeSearcherEndCallback()

Expand Down Expand Up @@ -94,28 +98,31 @@ func (o *OptimizeVR) Finish() error {
vectorIndexSize := vecIndex.Size()
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
for _, vr := range vrs {
eligibleVectorInternalIDs := vr.getEligibleDocIDs()
if snapshotGlobalDocNums != nil {
// Only the eligible documents belonging to this segment
// will get filtered out.
// There is no way to determine which doc belongs to which segment
eligibleVectorInternalIDs.And(snapshotGlobalDocNums[index])
}

eligibleLocalDocNums := make([]uint64,
eligibleVectorInternalIDs.Stats().Cardinality)
// get the (segment-)local document numbers
for i, docNum := range eligibleVectorInternalIDs.ToArray() {
localDocNum := o.snapshot.localDocNumFromGlobal(index,
uint64(docNum))
eligibleLocalDocNums[i] = localDocNum
}

var pl segment_api.VecPostingsList
var err error

// for each VR, populate postings list and iterators
// by passing the obtained vector index and getting similar vectors.

// Only applies to filtered kNN.
if vr.eligibleDocIDs != nil && len(vr.eligibleDocIDs) > 0 {
eligibleVectorInternalIDs := vr.getEligibleDocIDs()
if snapshotGlobalDocNums != nil {
// Only the eligible documents belonging to this segment
// will get filtered out.
// There is no way to determine which doc belongs to which segment
eligibleVectorInternalIDs.And(snapshotGlobalDocNums[index])
}

eligibleLocalDocNums := make([]uint64,
eligibleVectorInternalIDs.Stats().Cardinality)
// get the (segment-)local document numbers
for i, docNum := range eligibleVectorInternalIDs.ToArray() {
localDocNum := o.snapshot.localDocNumFromGlobal(index,
uint64(docNum))
eligibleLocalDocNums[i] = localDocNum
}

pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,
eligibleLocalDocNums, vr.searchParams)
} else {
Expand Down
2 changes: 1 addition & 1 deletion index/scorch/snapshot_index_vr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ type IndexSnapshotVectorReader struct {
searchParams json.RawMessage

// The following fields are only applicable for vector readers which will
// process kNN queries.
// process pre-filtered kNN queries.
eligibleDocIDs []index.IndexInternalID
}

Expand Down
1 change: 1 addition & 0 deletions search_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
}

if _, ok := filterQ.(*query.MatchAllQuery); ok {
// Equivalent to not having a filter query.
requiresFiltering[idx] = false
continue
}
Expand Down

0 comments on commit ea7f589

Please sign in to comment.