diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1841cd8..5a1bafe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,8 @@ -on: pull_request name: Test + +on: [push, pull_request] +permissions: + contents: read jobs: test: strategy: diff --git a/README.md b/README.md index 97e8307..4b82fd1 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,15 @@ [![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc) +This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems: + +* [beego](https://github.com/beego/beego) +* [CubeFS](https://github.com/cubefs/cubefs) +* [Amazon EKS Distro](https://github.com/aws/eks-distro) +* [sourcegraph](https://github.com/sourcegraph/sourcegraph) +* [torrent](https://github.com/anacrolix/torrent) + + ## Description Package bitset implements bitsets, a mapping between non-negative integers and boolean values. @@ -60,19 +69,54 @@ func main() { } ``` -As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc +## Serialization + + +You may serialize a bitset safely and portably to a stream +of bytes as follows: +```Go + const length = 9585 + const oneEvery = 97 + bs := bitset.New(length) + // Add some bits + for i := uint(0); i < length; i += oneEvery { + bs = bs.Set(i) + } + + var buf bytes.Buffer + n, err := bs.WriteTo(&buf) + if err != nil { + // failure + } + // Here n == buf.Len() +``` +You can later deserialize the result as follows: + +```Go + // Read back from buf + bs = bitset.New() + n, err = bs.ReadFrom(&buf) + if err != nil { + // error + } + // n is the number of bytes read +``` + +The `ReadFrom` function attempts to read the data into the existing +BitSet instance, to minimize memory allocations. + ## Memory Usage -The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). +The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). ## Implementation Note Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. -It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. +It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped. ## Installation diff --git a/bitset.go b/bitset.go index 5751b68..f417d4c 100644 --- a/bitset.go +++ b/bitset.go @@ -613,7 +613,7 @@ func (b *BitSet) Equal(c *BitSet) bool { return true } wn := b.wordCount() - for p:= 0; p < wn; p++ { + for p := 0; p < wn; p++ { if c.set[p] != b.set[p] { return false } @@ -901,13 +901,16 @@ func (b *BitSet) DumpAsBits() string { return buffer.String() } -// BinaryStorageSize returns the binary storage requirements +// BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes. func (b *BitSet) BinaryStorageSize() int { nWords := b.wordCount() return binary.Size(uint64(0)) + binary.Size(b.set[:nWords]) } -// WriteTo writes a BitSet to a stream +// WriteTo writes a BitSet to a stream. The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes written is returned. func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { length := uint64(b.length) @@ -935,6 +938,14 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { } // ReadFrom reads a BitSet from a stream written using WriteTo +// The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes read is returned. +// If the current BitSet is not large enough to hold the data, +// it is extended. In case of error, the BitSet is either +// left unchanged or made empty if the error occurs too late +// to preserve the content. func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { var length uint64 @@ -946,26 +957,36 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { } return 0, err } - newset := New(uint(length)) + newlength := uint(length) - if uint64(newset.length) != length { + if uint64(newlength) != length { return 0, errors.New("unmarshalling error: type mismatch") } + nWords := wordsNeeded(uint(newlength)) + if cap(b.set) >= nWords { + b.set = b.set[:nWords] + } else { + b.set = make([]uint64, nWords) + } + + b.length = newlength var item [8]byte - nWords := wordsNeeded(uint(length)) reader := bufio.NewReader(io.LimitReader(stream, 8*int64(nWords))) for i := 0; i < nWords; i++ { if _, err := io.ReadFull(reader, item[:]); err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } + // We do not want to leave the BitSet partially filled as + // it is error prone. + b.set = b.set[:0] + b.length = 0 return 0, err } - newset.set[i] = binaryOrder.Uint64(item[:]) + b.set[i] = binaryOrder.Uint64(item[:]) } - *b = *newset return int64(b.BinaryStorageSize()), nil } diff --git a/bitset_test.go b/bitset_test.go index 601c274..b8c682f 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -712,7 +712,7 @@ func TestShrink(t *testing.T) { b.Set(80) b.Shrink(70) for _, word := range b.set { - if (word != 0) { + if word != 0 { t.Error("word should be 0", word) } } @@ -1236,30 +1236,30 @@ func TestMarshalUnmarshalBinary(t *testing.T) { aSetBit := uint(128) a = New(256).Set(aSetBit) - aExpectedMarshaledSize := 8 /* length: uint64 */ + 4 * 8 /* set : [4]uint64 */ + aExpectedMarshaledSize := 8 /* length: uint64 */ + 4*8 /* set : [4]uint64 */ aMarshaled, err := a.MarshalBinary() if err != nil || aExpectedMarshaledSize != len(aMarshaled) || aExpectedMarshaledSize != a.BinaryStorageSize() { - t.Error("MarshalBinary failed to produce expected (", aExpectedMarshaledSize , ") number of bytes") + t.Error("MarshalBinary failed to produce expected (", aExpectedMarshaledSize, ") number of bytes") return } shiftAmount := uint(72) // https://github.com/bits-and-blooms/bitset/issues/114 - for i := uint(0) ; i < shiftAmount; i++ { + for i := uint(0); i < shiftAmount; i++ { a.DeleteAt(0) } - aExpectedMarshaledSize = 8 /* length: uint64 */ + 3 * 8 /* set : [3]uint64 */ + aExpectedMarshaledSize = 8 /* length: uint64 */ + 3*8 /* set : [3]uint64 */ aMarshaled, err = a.MarshalBinary() if err != nil || aExpectedMarshaledSize != len(aMarshaled) || aExpectedMarshaledSize != a.BinaryStorageSize() { - t.Error("MarshalBinary failed to produce expected (", aExpectedMarshaledSize , ") number of bytes") + t.Error("MarshalBinary failed to produce expected (", aExpectedMarshaledSize, ") number of bytes") return } copyBinary(t, a, b) - if b.Len() != 256 - shiftAmount || !b.Test(aSetBit - shiftAmount) { + if b.Len() != 256-shiftAmount || !b.Test(aSetBit-shiftAmount) { t.Error("Shifted bitset is not copied correctly") } } diff --git a/popcnt_19.go b/popcnt_19.go index fc8ff4f..9a3766a 100644 --- a/popcnt_19.go +++ b/popcnt_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset diff --git a/popcnt_amd64.go b/popcnt_amd64.go index 4cf64f2..116e044 100644 --- a/popcnt_amd64.go +++ b/popcnt_amd64.go @@ -1,5 +1,5 @@ -// +build !go1.9 -// +build amd64,!appengine +//go:build !go1.9 && amd64 && !appengine +// +build !go1.9,amd64,!appengine package bitset diff --git a/popcnt_amd64_test.go b/popcnt_amd64_test.go index c79d009..8bcbf94 100644 --- a/popcnt_amd64_test.go +++ b/popcnt_amd64_test.go @@ -1,5 +1,5 @@ -// +build !go1.9 -// +build amd64,!appengine +//go:build !go1.9 && amd64 && !appengine +// +build !go1.9,amd64,!appengine // This file tests the popcnt funtions diff --git a/popcnt_cmp_test.go b/popcnt_cmp_test.go index 8a06dc9..363dc70 100644 --- a/popcnt_cmp_test.go +++ b/popcnt_cmp_test.go @@ -1,5 +1,5 @@ -// +build !go1.9 -// +build amd64,!appengine +//go:build !go1.9 && amd64 && !appengine +// +build !go1.9,amd64,!appengine // This file tests the popcnt funtions diff --git a/popcnt_generic.go b/popcnt_generic.go index 21e0ff7..9e0ad46 100644 --- a/popcnt_generic.go +++ b/popcnt_generic.go @@ -1,3 +1,4 @@ +//go:build !go1.9 && (!amd64 || appengine) // +build !go1.9 // +build !amd64 appengine diff --git a/trailing_zeros_18.go b/trailing_zeros_18.go index c52b61b..12336e7 100644 --- a/trailing_zeros_18.go +++ b/trailing_zeros_18.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package bitset diff --git a/trailing_zeros_19.go b/trailing_zeros_19.go index 36a988e..cfb0a84 100644 --- a/trailing_zeros_19.go +++ b/trailing_zeros_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset