Skip to content

Commit

Permalink
add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ekzhu committed Jan 22, 2016
1 parent 6ed89ef commit 5a99d6d
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 17 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Golang LSH
# LSH for Go

This library includes various Locality Sensitive Hashing (LSH) algorithms
for the approximate nearest neighbour search problem.
for the approximate nearest neighbour search problem in L2 metric space.
The family of LSH functions for L2 is the work of
[Mayur Datar et.al.](http://www.cs.princeton.edu/courses/archive/spr05/cos598E/bib/p253-datar.pdf)

Currently includes:

Expand Down
20 changes: 14 additions & 6 deletions basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,25 @@ type basicHashTableKey string

type hashTable map[basicHashTableKey]hashTableBucket

// BasicLsh implements the original LSH algorithm for L2 distance.
type BasicLsh struct {
*lshParams
// Number of distinct hashes in the index.
count int
// Hash tables.
tables []hashTable
}

// NewBasicLsh creates a basic LSH for L2 distance.
// dim is the diminsionality of the data, l is the number of hash
// tables to use, m is the number of hash values to concatenate to
// form the key to the hash tables, w is the slot size for the
// family of LSH functions.
func NewBasicLsh(dim, l, m int, w float64) *BasicLsh {
tables := make([]hashTable, l)
for i := range tables {
tables[i] = make(hashTable)
}
return &BasicLsh{
lshParams: newLshParams(dim, l, m, w),
count: 0,
tables: tables,
}
}
Expand All @@ -41,7 +44,8 @@ func (index *BasicLsh) toBasicHashTableKeys(keys []hashTableKey) []basicHashTabl
return basicKeys
}

// Insert adds a new key to the LSH
// Insert adds a new data point to the LSH.
// id is the unique identifier for the data point.
func (index *BasicLsh) Insert(point Point, id int) {
// Apply hash functions
hvs := index.toBasicHashTableKeys(index.hash(point))
Expand All @@ -62,8 +66,12 @@ func (index *BasicLsh) Insert(point Point, id int) {
wg.Wait()
}

// Query searches for candidate keys given the signature
// and writes them to an output channel
// Query returns the ids of approximate nearest neighbour candidates,
// in un-sorted order, given the query point,
// and writes them to an output channel, out.
// The basic LSH does not support k-NN query directly,
// however, it can be used as a part of a k-NN query function.
// Note: the function does not close the channel.
func (index *BasicLsh) Query(q Point, out chan int) {
// Apply hash functions
hvs := index.toBasicHashTableKeys(index.hash(q))
Expand Down
21 changes: 17 additions & 4 deletions forest.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,20 @@ func (tree *prefixTree) lookup(maxLevel int, tableKey hashTableKey) []int {
return indices
}

// LshForest implements the LSH Forest algorithm by Mayank Bawa et.al.
// It supports both nearest neighbour candidate query and k-NN query.
type LshForest struct {
// Embedded type
*lshParams
// Trees.
trees []prefixTree
}

// NewLshForest creates a new LSH Forest for L2 distance.
// dim is the diminsionality of the data, l is the number of hash
// tables to use, m is the number of hash values to concatenate to
// form the key to the hash tables, w is the slot size for the
// family of LSH functions.
func NewLshForest(dim, l, m int, w float64) *LshForest {
trees := make([]prefixTree, l)
for i, _ := range trees {
Expand All @@ -129,7 +136,8 @@ func NewLshForest(dim, l, m int, w float64) *LshForest {
}
}

// Insert adds a point into the LSH Forest index.
// Insert adds a new data point to the LSH Forest.
// id is the unique identifier for the data point.
func (index *LshForest) Insert(point Point, id int) {
// Apply hash functions.
hvs := index.hash(point)
Expand Down Expand Up @@ -163,8 +171,10 @@ func (index *LshForest) queryHelper(maxLevel int, tableKeys []hashTableKey) []in
return indices
}

// Query searches for candidate keys given the signature
// and writes them to an output channel
// Query returns the ids of approximate nearest neighbour candidates,
// in un-sorted order, given the query point,
// and writes them to an output channel, out.
// Note: the function does not close the channel.
func (index *LshForest) Query(q Point, out chan int) {
// Apply hash functions
hvs := index.hash(q)
Expand All @@ -173,7 +183,10 @@ func (index *LshForest) Query(q Point, out chan int) {
}
}

// QueryK queries for the top k approximate closest neighbours.
// QueryKnn returns the ids of the top-k approximate nearest neighbours,
// in un-sorted order, given the query point,
// and writes them to an output channel, out.
// Note: the function does not close the channel.
func (index *LshForest) QueryKnn(q Point, k int, out chan int) {
// Apply hash functions
hvs := index.hash(q)
Expand Down
6 changes: 3 additions & 3 deletions metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package lsh

import "math"

// Point is a vector that we are trying to index and query
// Point is a vector in the L2 metric space.
type Point []float64

// Dot returns the dot product of two Point vectors
// Dot returns the dot product of two points.
func (p Point) Dot(q Point) float64 {
s := 0.0
for i := 0; i < len(p); i++ {
Expand All @@ -14,7 +14,7 @@ func (p Point) Dot(q Point) float64 {
return s
}

// L2 returns the L2 distance of two Point vectors
// L2 returns the L2 distance of two points.
func (p Point) L2(q Point) float64 {
s := 0.0
for i := 0; i < len(p); i++ {
Expand Down
18 changes: 17 additions & 1 deletion multiprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ func (h *perturbSetHeap) Pop() interface{} {
return x
}

// MultiprobeLsh implements the Multi-probe LSH algorithm by Qin Lv et.al.
// The Multi-probe LSH does not support k-NN query directly.
type MultiprobeLsh struct {
*BasicLsh
// The size of our probe sequence.
Expand All @@ -92,6 +94,14 @@ type MultiprobeLsh struct {
perturbVecs [][][]int
}

// NewMultiprobeLsh creates a new Multi-probe LSH for L2 distance.
// dim is the diminsionality of the data, l is the number of hash
// tables to use, m is the number of hash values to concatenate to
// form the key to the hash tables, and w is the slot size for the
// family of LSH functions.
// t is the number of perturbation vectors that will be applied to
// each query.
// Increasing t increases the running time of the Query function.
func NewMultiprobeLsh(dim, l, m int, w float64, t int) *MultiprobeLsh {
index := &MultiprobeLsh{
BasicLsh: NewBasicLsh(dim, l, m, w),
Expand Down Expand Up @@ -230,7 +240,13 @@ func (index *MultiprobeLsh) perturb(baseKey []hashTableKey, perturbation [][]int
return perturbedTableKeys
}

func (index *MultiprobeLsh) QueryKnn(q Point, k int, out chan int) {
// Query returns the ids of nearest neighbour candidates,
// given the query point,
// and writes them to an output channel, out.
// Multi-probe LSH does not support k-NN query directly,
// however, it can be used as a part of a k-NN query function.
// Note: the function does not close the channel.
func (index *MultiprobeLsh) Query(q Point, out chan int) {
baseKey := index.hash(q)
seens := make(map[int]bool)
for i := 0; i < len(index.perturbVecs)+1; i++ {
Expand Down
2 changes: 1 addition & 1 deletion multiprobe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func Test_MultiprobeLshQueryKnn(t *testing.T) {
for i, key := range insertedKeys {
result := make(chan int)
go func() {
lsh.QueryKnn(points[i], 10, result)
lsh.Query(points[i], result)
close(result)
}()
found := false
Expand Down

0 comments on commit 5a99d6d

Please sign in to comment.