Skip to content

Commit 03b445e

Browse files
committed
Added support for SparseDoubleVector
1 parent a037ade commit 03b445e

File tree

4 files changed

+54
-28
lines changed

4 files changed

+54
-28
lines changed

pom.xml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
<url>http://www.opensource.org/licenses/mit-license.php</url>
2323
</license>
2424
</licenses>
25+
2526
<developers>
2627
<developer>
2728
<name>Thibault Debatty</name>
@@ -30,6 +31,7 @@
3031
<organizationUrl>http://debatty.info</organizationUrl>
3132
</developer>
3233
</developers>
34+
3335
<scm>
3436
<connection>scm:git:git@github.com:tdebatty/java-LSH.git</connection>
3537
<developerConnection>scm:git:git@github.com:tdebatty/java-LSH.git</developerConnection>
@@ -136,7 +138,7 @@
136138
<dependency>
137139
<groupId>${project.groupId}</groupId>
138140
<artifactId>java-string-similarity</artifactId>
139-
<version>0.11</version>
141+
<version>0.12</version>
140142
</dependency>
141143
</dependencies>
142144
</project>

src/main/java/info/debatty/java/lsh/LSHSuperBit.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.lsh;
2626

27+
import info.debatty.java.utils.SparseDoubleVector;
2728
import info.debatty.java.utils.SparseIntegerVector;
2829
import java.io.Serializable;
2930

@@ -50,7 +51,7 @@ public class LSHSuperBit extends LSH implements Serializable {
5051
* @param b buckets (per stage)
5152
* @param n dimensionality
5253
*/
53-
public LSHSuperBit(int s, int b, int n) {
54+
public LSHSuperBit(int s, int b, int n) throws Exception {
5455
super(s, b, n);
5556

5657
// SuberBit code length
@@ -62,6 +63,10 @@ public LSHSuperBit(int s, int b, int n) {
6263
}
6364
}
6465

66+
if (superbit == 0) {
67+
throw new Exception("Superbit is 0 with parameters: s=" + s + " b=" + b + " n=" + n);
68+
}
69+
6570
this.sb = new SuperBit(n, superbit, K/superbit);
6671
}
6772

@@ -82,6 +87,10 @@ public int[] hash(SparseIntegerVector vector){
8287
return hashSignature(sb.signature(vector));
8388
}
8489

90+
public int[] hash(SparseDoubleVector vector) {
91+
return hashSignature(sb.signature(vector));
92+
}
93+
8594
/**
8695
* Hash (bin) a vector in s stages into b buckets
8796
* @param vector

src/main/java/info/debatty/java/lsh/SuperBit.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.lsh;
2626

27+
import info.debatty.java.utils.SparseDoubleVector;
2728
import info.debatty.java.utils.SparseIntegerVector;
2829
import java.io.Serializable;
2930
import java.util.Random;
@@ -157,6 +158,14 @@ public boolean[] signature(SparseIntegerVector vector) {
157158
return sig;
158159
}
159160

161+
public boolean[] signature(SparseDoubleVector vector) {
162+
boolean[] sig = new boolean[this.hyperplanes.length];
163+
for (int i = 0; i < this.hyperplanes.length; i++) {
164+
sig[i] = (vector.dotProduct(this.hyperplanes[i]) >= 0);
165+
}
166+
return sig;
167+
}
168+
160169
/**
161170
* Compute the signature of this vector
162171
* @param vector

src/main/java/info/debatty/java/lsh/examples/LSHSuperBitExample.java

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
import info.debatty.java.lsh.LSHSuperBit;
2828
import java.util.Random;
29+
import java.util.logging.Level;
30+
import java.util.logging.Logger;
2931

3032
/**
3133
*
@@ -37,36 +39,40 @@ public class LSHSuperBitExample {
3739
* @param args the command line arguments
3840
*/
3941
public static void main(String[] args) {
40-
int count = 100;
4142

42-
// R^n
43-
int n = 3;
44-
45-
int stages = 2;
46-
int buckets = 4;
47-
48-
// Produce some vectors in R^n
49-
Random r = new Random();
50-
double[][] vectors = new double[count][];
51-
for (int i = 0; i < count; i++) {
52-
vectors[i] = new double[n];
43+
int count = 100;
44+
45+
// R^n
46+
int n = 3;
47+
48+
int stages = 2;
49+
int buckets = 4;
5350

54-
for (int j = 0; j < n; j++) {
55-
vectors[i][j] = r.nextGaussian();
51+
// Produce some vectors in R^n
52+
Random r = new Random();
53+
double[][] vectors = new double[count][];
54+
for (int i = 0; i < count; i++) {
55+
vectors[i] = new double[n];
56+
57+
for (int j = 0; j < n; j++) {
58+
vectors[i][j] = r.nextGaussian();
59+
}
5660
}
57-
}
58-
59-
LSHSuperBit lsh = new LSHSuperBit(stages, buckets, n);
60-
61-
// Compute a SuperBit signature, and a LSH hash
62-
for (int i = 0; i < count; i++) {
63-
double[] vector = vectors[i];
64-
int[] hash = lsh.hash(vector);
65-
for (double v : vector) {
66-
System.out.printf("%6.2f\t", v);
61+
try {
62+
LSHSuperBit lsh = new LSHSuperBit(stages, buckets, n);
63+
64+
// Compute a SuperBit signature, and a LSH hash
65+
for (int i = 0; i < count; i++) {
66+
double[] vector = vectors[i];
67+
int[] hash = lsh.hash(vector);
68+
for (double v : vector) {
69+
System.out.printf("%6.2f\t", v);
70+
}
71+
System.out.print(hash[0]);
72+
System.out.print("\n");
6773
}
68-
System.out.print(hash[0]);
69-
System.out.print("\n");
74+
} catch (Exception ex) {
75+
Logger.getLogger(LSHSuperBitExample.class.getName()).log(Level.SEVERE, null, ex);
7076
}
7177
}
7278
}

0 commit comments

Comments
 (0)