|
| 1 | +package bloom |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "strconv" |
| 6 | + |
| 7 | + "zero/core/hash" |
| 8 | + "zero/core/stores/redis" |
| 9 | +) |
| 10 | + |
| 11 | +const ( |
| 12 | + // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html |
| 13 | + // maps as k in the error rate table |
| 14 | + maps = 14 |
| 15 | + setScript = ` |
| 16 | +local key = KEYS[1] |
| 17 | +for _, offset in ipairs(ARGV) do |
| 18 | + redis.call("setbit", key, offset, 1) |
| 19 | +end |
| 20 | +` |
| 21 | + testScript = ` |
| 22 | +local key = KEYS[1] |
| 23 | +for _, offset in ipairs(ARGV) do |
| 24 | + if tonumber(redis.call("getbit", key, offset)) == 0 then |
| 25 | + return false |
| 26 | + end |
| 27 | +end |
| 28 | +return true |
| 29 | +` |
| 30 | +) |
| 31 | + |
| 32 | +var ErrTooLargeOffset = errors.New("too large offset") |
| 33 | + |
| 34 | +type ( |
| 35 | + BitSetProvider interface { |
| 36 | + check([]uint) (bool, error) |
| 37 | + set([]uint) error |
| 38 | + } |
| 39 | + |
| 40 | + BloomFilter struct { |
| 41 | + bits uint |
| 42 | + maps uint |
| 43 | + bitSet BitSetProvider |
| 44 | + } |
| 45 | +) |
| 46 | + |
| 47 | +// New create a BloomFilter, store is the backed redis, key is the key for the bloom filter, |
| 48 | +// bits is how many bits will be used, maps is how many hashes for each addition. |
| 49 | +// best practices: |
| 50 | +// elements - means how many actual elements |
| 51 | +// when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4 |
| 52 | +// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html |
| 53 | +func New(store *redis.Redis, key string, bits uint) *BloomFilter { |
| 54 | + return &BloomFilter{ |
| 55 | + bits: bits, |
| 56 | + bitSet: newRedisBitSet(store, key, bits), |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +func (f *BloomFilter) Add(data []byte) error { |
| 61 | + locations := f.getLocations(data) |
| 62 | + err := f.bitSet.set(locations) |
| 63 | + if err != nil { |
| 64 | + return err |
| 65 | + } |
| 66 | + return nil |
| 67 | +} |
| 68 | + |
| 69 | +func (f *BloomFilter) Exists(data []byte) (bool, error) { |
| 70 | + locations := f.getLocations(data) |
| 71 | + isSet, err := f.bitSet.check(locations) |
| 72 | + if err != nil { |
| 73 | + return false, err |
| 74 | + } |
| 75 | + if !isSet { |
| 76 | + return false, nil |
| 77 | + } |
| 78 | + |
| 79 | + return true, nil |
| 80 | +} |
| 81 | + |
| 82 | +func (f *BloomFilter) getLocations(data []byte) []uint { |
| 83 | + locations := make([]uint, maps) |
| 84 | + for i := uint(0); i < maps; i++ { |
| 85 | + hashValue := hash.Hash(append(data, byte(i))) |
| 86 | + locations[i] = uint(hashValue % uint64(f.bits)) |
| 87 | + } |
| 88 | + |
| 89 | + return locations |
| 90 | +} |
| 91 | + |
| 92 | +type redisBitSet struct { |
| 93 | + store *redis.Redis |
| 94 | + key string |
| 95 | + bits uint |
| 96 | +} |
| 97 | + |
| 98 | +func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet { |
| 99 | + return &redisBitSet{ |
| 100 | + store: store, |
| 101 | + key: key, |
| 102 | + bits: bits, |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) { |
| 107 | + var args []string |
| 108 | + |
| 109 | + for _, offset := range offsets { |
| 110 | + if offset >= r.bits { |
| 111 | + return nil, ErrTooLargeOffset |
| 112 | + } |
| 113 | + |
| 114 | + args = append(args, strconv.FormatUint(uint64(offset), 10)) |
| 115 | + } |
| 116 | + |
| 117 | + return args, nil |
| 118 | +} |
| 119 | + |
| 120 | +func (r *redisBitSet) check(offsets []uint) (bool, error) { |
| 121 | + args, err := r.buildOffsetArgs(offsets) |
| 122 | + if err != nil { |
| 123 | + return false, err |
| 124 | + } |
| 125 | + |
| 126 | + resp, err := r.store.Eval(testScript, []string{r.key}, args) |
| 127 | + if err == redis.Nil { |
| 128 | + return false, nil |
| 129 | + } else if err != nil { |
| 130 | + return false, err |
| 131 | + } |
| 132 | + |
| 133 | + if exists, ok := resp.(int64); !ok { |
| 134 | + return false, nil |
| 135 | + } else { |
| 136 | + return exists == 1, nil |
| 137 | + } |
| 138 | +} |
| 139 | + |
| 140 | +func (r *redisBitSet) del() error { |
| 141 | + _, err := r.store.Del(r.key) |
| 142 | + return err |
| 143 | +} |
| 144 | + |
| 145 | +func (r *redisBitSet) expire(seconds int) error { |
| 146 | + return r.store.Expire(r.key, seconds) |
| 147 | +} |
| 148 | + |
| 149 | +func (r *redisBitSet) set(offsets []uint) error { |
| 150 | + args, err := r.buildOffsetArgs(offsets) |
| 151 | + if err != nil { |
| 152 | + return err |
| 153 | + } |
| 154 | + |
| 155 | + _, err = r.store.Eval(setScript, []string{r.key}, args) |
| 156 | + if err == redis.Nil { |
| 157 | + return nil |
| 158 | + } else { |
| 159 | + return err |
| 160 | + } |
| 161 | +} |
0 commit comments