Skip to content

Commit 28042bd

Browse files
Merge pull request #581 from apache/quotient-filter
constructor takes fingerprint length and load factor
2 parents ccdabc4 + 2064c3c commit 28042bd

File tree

3 files changed

+93
-116
lines changed

3 files changed

+93
-116
lines changed

src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@
3131

3232
public class QuotientFilter extends Filter {
3333

34-
public static final double LOAD_FACTOR = 0.9;
34+
public static final float DEFAULT_LOAD_FACTOR = 0.8f;
3535

36-
int numBitsPerEntry_;
37-
int powerOfTwoSize_;
36+
int lgQ_;
37+
int numFingerprintBits_;
38+
float loadFactor_;
3839
int numEntries_;
3940
int numExpansions_;
4041
BitArray bitArray_;
@@ -45,10 +46,15 @@ public class QuotientFilter extends Filter {
4546
public double avgRunLength_;
4647
public double avgClusterLength_;
4748

48-
public QuotientFilter(final int powerOfTwo, final int numBitsPerEntry) {
49-
powerOfTwoSize_ = powerOfTwo;
50-
numBitsPerEntry_ = numBitsPerEntry;
51-
bitArray_ = makeFilter(getNumSlots(), numBitsPerEntry);
49+
public QuotientFilter(final int lgQ, final int numFingerprintBits) {
50+
this(lgQ, numFingerprintBits, DEFAULT_LOAD_FACTOR);
51+
}
52+
53+
public QuotientFilter(final int lgQ, final int numFingerprintBits, final float loadFactor) {
54+
lgQ_ = lgQ;
55+
numFingerprintBits_ = numFingerprintBits;
56+
loadFactor_ = loadFactor;
57+
bitArray_ = makeFilter(getNumSlots(), getNumBitsPerEntry());
5258
numExpansions_ = 0;
5359
//hash_type = XxHash.hashLong ; //HashType.xxh;
5460
}
@@ -66,31 +72,31 @@ public int getNumExpansions() {
6672
}
6773

6874
public long getMaxEntriesBeforeExpansion() {
69-
return (long)(getNumSlots() * LOAD_FACTOR);
75+
return (long)(getNumSlots() * loadFactor_);
7076
}
7177

7278
BitArray makeFilter(final long initSize, final int bitsPerEntry) {
7379
return new HeapBitArray(initSize * bitsPerEntry);
7480
}
7581

7682
public int getFingerprintLength() {
77-
return numBitsPerEntry_ - 3;
83+
return numFingerprintBits_;
7884
}
7985

80-
QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) {
81-
powerOfTwoSize_ = powerOfTwo;
82-
numBitsPerEntry_ = numBitsPerEntry;
83-
bitArray_ = bitArray;
84-
}
86+
// QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) {
87+
// powerOfTwoSize_ = powerOfTwo;
88+
// numBitsPerEntry_ = numBitsPerEntry;
89+
// bitArray_ = bitArray;
90+
// }
8591

8692
void expand() {
8793
if (getFingerprintLength() < 2) throw new SketchesException("for expansion value must have at least 2 bits");
88-
QuotientFilter other = new QuotientFilter(powerOfTwoSize_ + 1, numBitsPerEntry_ - 1);
94+
final QuotientFilter other = new QuotientFilter(lgQ_ + 1, numFingerprintBits_ - 1, loadFactor_);
8995

9096
long i = 0;
9197
if (!isSlotEmpty(i)) { i = findClusterStart(i); }
9298

93-
Queue<Long> fifo = new LinkedList<Long>();
99+
final Queue<Long> fifo = new LinkedList<Long>();
94100
long count = 0;
95101
while (count < numEntries_) {
96102
if (!isSlotEmpty(i)) {
@@ -104,8 +110,8 @@ void expand() {
104110
i = (i + 1) & getSlotMask();
105111
if (!fifo.isEmpty() && ! isContinuation(i)) { fifo.remove(); }
106112
}
107-
powerOfTwoSize_++;
108-
numBitsPerEntry_--;
113+
lgQ_++;
114+
numFingerprintBits_--;
109115
bitArray_ = other.bitArray_;
110116
numExpansions_++;
111117
}
@@ -146,7 +152,7 @@ public double getUtilization() {
146152

147153
// returns the number of slots in the filter without the extension/buffer slots
148154
public long getNumSlots() {
149-
return 1L << powerOfTwoSize_;
155+
return 1L << lgQ_;
150156
}
151157

152158
long getSlotMask() {
@@ -166,18 +172,18 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo
166172

167173
// sets the fingerprint for a given slot index
168174
void setFingerprint(final long index, final long fingerprint) {
169-
bitArray_.setBits(index * numBitsPerEntry_ + 3, getFingerprintLength(), fingerprint);
175+
bitArray_.setBits(index * getNumBitsPerEntry() + 3, getFingerprintLength(), fingerprint);
170176
}
171177

172178
// print a nice representation of the filter that can be understood.
173179
// if vertical is on, each line will represent a slot
174180
public String getPrettyStr(final boolean vertical) {
175181
final StringBuffer sbr = new StringBuffer();
176-
final long numBits = getNumSlots() * numBitsPerEntry_;
182+
final long numBits = getNumSlots() * getNumBitsPerEntry();
177183
for (long i = 0; i < numBits; i++) {
178-
final long remainder = i % numBitsPerEntry_;
184+
final long remainder = i % getNumBitsPerEntry();
179185
if (remainder == 0) {
180-
final long slot = i / numBitsPerEntry_;
186+
final long slot = i / getNumBitsPerEntry();
181187
sbr.append(" ");
182188
if (vertical) {
183189
sbr.append("\n" + String.format("%-10d", slot) + "\t");
@@ -199,12 +205,12 @@ public void prettyPrint() {
199205

200206
// return a fingerprint in a given slot index
201207
long getFingerprint(final long index) {
202-
return bitArray_.getBits(index * numBitsPerEntry_ + 3, getFingerprintLength());
208+
return bitArray_.getBits(index * getNumBitsPerEntry() + 3, getFingerprintLength());
203209
}
204210

205211
// return an entire slot representation, including metadata flags and fingerprint
206212
long getSlot(final long index) {
207-
return bitArray_.getBits(index * numBitsPerEntry_, numBitsPerEntry_);
213+
return bitArray_.getBits(index * getNumBitsPerEntry(), getNumBitsPerEntry());
208214
}
209215

210216
// compare a fingerprint input to the fingerprint in some slot index
@@ -222,7 +228,7 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo
222228
// summarize some statistical measures about the filter
223229
public void printFilterSummary() {
224230
final long slots = getNumSlots();
225-
final long numBits = slots * numBitsPerEntry_;
231+
final long numBits = slots * getNumBitsPerEntry();
226232
System.out.println("slots: " + slots);
227233
System.out.println("bits: " + numBits);
228234
System.out.println("bits/entry: " + numBits / (double)numEntries_);
@@ -242,35 +248,35 @@ public void printFilterSummary() {
242248
*/
243249
@Override
244250
public long getSpaceUse() {
245-
return getNumSlots() * numBitsPerEntry_;
251+
return getNumSlots() * getNumBitsPerEntry();
246252
}
247253

248254
public int getNumBitsPerEntry() {
249-
return numBitsPerEntry_;
255+
return numFingerprintBits_ + 3;
250256
}
251257

252258
boolean isOccupied(final long index) {
253-
return bitArray_.getBit(index * numBitsPerEntry_);
259+
return bitArray_.getBit(index * getNumBitsPerEntry());
254260
}
255261

256262
boolean isContinuation(final long index) {
257-
return bitArray_.getBit(index * numBitsPerEntry_ + 1);
263+
return bitArray_.getBit(index * getNumBitsPerEntry() + 1);
258264
}
259265

260266
boolean isShifted(final long index) {
261-
return bitArray_.getBit(index * numBitsPerEntry_ + 2);
267+
return bitArray_.getBit(index * getNumBitsPerEntry() + 2);
262268
}
263269

264270
void setOccupied(final long index, final boolean val) {
265-
bitArray_.assignBit(index * numBitsPerEntry_, val);
271+
bitArray_.assignBit(index * getNumBitsPerEntry(), val);
266272
}
267273

268274
void setContinuation(final long index, final boolean val) {
269-
bitArray_.assignBit(index * numBitsPerEntry_ + 1, val);
275+
bitArray_.assignBit(index * getNumBitsPerEntry() + 1, val);
270276
}
271277

272278
void setShifted(final long index, final boolean val) {
273-
bitArray_.assignBit(index * numBitsPerEntry_ + 2, val);
279+
bitArray_.assignBit(index * getNumBitsPerEntry() + 2, val);
274280
}
275281

276282
boolean isSlotEmpty(final long index) {
@@ -432,7 +438,7 @@ void insertFingerprintAndPushAllElse(long fingerprint, long index, final long ca
432438
numEntries_++;
433439
}
434440

435-
boolean delete(final long fingerprint, final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) {
441+
boolean delete(final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) {
436442
long runEnd = findRunEnd(matchingFingerprintIndex);
437443

438444
// the run has only one entry, we need to disable its is_occupied flag
@@ -524,7 +530,7 @@ boolean delete(final long fingerprint, final long canonicalSlot) {
524530
// we didn't find a matching fingerprint
525531
return false;
526532
}
527-
return delete(fingerprint, canonicalSlot, runStartIndex, matchingFingerprintIndex);
533+
return delete(canonicalSlot, runStartIndex, matchingFingerprintIndex);
528534
}
529535

530536
long getSlotFromHash(final long largeHash) {

src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ public class DeletionTests {
3535
*/
3636
@Test
3737
static public void BasicDeletions() {
38-
int bits_per_entry = 8;
38+
int fingerprint_len_bits = 5;
3939
int num_entries_power = 3;
40-
int num_entries = (int)Math.pow(2, num_entries_power);
41-
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
40+
int num_entries = 1 << num_entries_power;
41+
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);
4242

4343
long fp1 = 1 << 4;
4444
long fp2 = 1 << 3;
@@ -60,9 +60,9 @@ static public void BasicDeletions() {
6060
qf.delete(fp1, 1);
6161
qf.delete(fp1, 1);
6262

63-
BitSet result = new BitSet(num_entries * bits_per_entry);
64-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, false, false, fp2);
65-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, false, false, fp3);
63+
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
64+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, false, false, fp2);
65+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, false, false, fp3);
6666
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
6767
}
6868

@@ -76,10 +76,10 @@ static public void BasicDeletions() {
7676
*/
7777
@Test
7878
static public void Deletions() {
79-
int bits_per_entry = 8;
79+
int fingerprint_len_bits = 5;
8080
int num_entries_power = 3;
8181
int num_entries = (int)Math.pow(2, num_entries_power);
82-
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
82+
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);
8383

8484
qf.insert(1, 1);
8585
qf.insert(2, 1);
@@ -96,15 +96,15 @@ static public void Deletions() {
9696
qf.delete(3, 2);
9797
qf.delete(5, 3);
9898

99-
BitSet result = new BitSet(num_entries * bits_per_entry);
100-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0);
101-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1);
102-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2);
103-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, true, false, true, 4);
104-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, false, false, true, 6);
105-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, false, true, true, 7);
106-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, true, false, false, 8);
107-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, false, 0);
99+
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
100+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0);
101+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1);
102+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2);
103+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, true, false, true, 4);
104+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, false, false, true, 6);
105+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, false, true, true, 7);
106+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, true, false, false, 8);
107+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, false, 0);
108108

109109
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
110110
}
@@ -121,10 +121,10 @@ static public void Deletions() {
121121
* The expected outcome is that after deletion, the remaining keys should be in their canonical slots.
122122
*/
123123
static public void DeletionsWithWrap() {
124-
int bits_per_entry = 8;
124+
int fingerprint_len_bits = 5;
125125
int num_entries_power = 3;
126126
int num_entries = (int)Math.pow(2, num_entries_power);
127-
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
127+
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);
128128

129129
qf.insert(1, 1);
130130
qf.insert(2, 1);
@@ -139,15 +139,15 @@ static public void DeletionsWithWrap() {
139139
qf.delete(5, 3);
140140
//qf.pretty_print();
141141

142-
BitSet result = new BitSet(num_entries * bits_per_entry);
143-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0);
144-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1);
145-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2);
146-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, false, false, true, 3);
147-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, true, true, 4);
148-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, true, false, true, 6);
149-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, false, true, true, 7);
150-
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, true, 8);
142+
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
143+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0);
144+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1);
145+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2);
146+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, false, false, true, 3);
147+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, true, true, 4);
148+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, true, false, true, 6);
149+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, false, true, true, 7);
150+
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, true, 8);
151151
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
152152
}
153153
}

0 commit comments

Comments
 (0)