diff --git a/bwt/rsa_bitvector.go b/bwt/rsa_bitvector.go index aaa543ed..0cb9da46 100644 --- a/bwt/rsa_bitvector.go +++ b/bwt/rsa_bitvector.go @@ -4,6 +4,8 @@ import "math/bits" // rsaBitVector allows us to perform RSA: (R)ank, (S)elect, and (A)ccess // queries in a memory performant and memory compact way. +// To learn about how Rank, Select, and Access work, take a look at the +// examples in each respective method. type rsaBitVector struct { bv bitvector totalOnesRank int @@ -36,11 +38,14 @@ func newRSABitVectorFromBitVector(bv bitvector) rsaBitVector { } // Rank returns the rank of the given value up to, but not including -// the ith bit. We count Rank starting a 0. +// the ith bit. // For Example: // Given the bitvector 001000100001 -// Rank(true, 8) = 1 -// Rank(false, 8) = 5 +// Rank(true, 1) = 0 +// Rank(true, 2) = 0 +// Rank(true, 3) = 1 +// Rank(true, 8) = 2 +// Rank(false, 8) = 6 func (rsa rsaBitVector) Rank(val bool, i int) int { if i > rsa.bv.len()-1 { if val { @@ -70,11 +75,13 @@ func (rsa rsaBitVector) Rank(val bool, i int) int { return (chunkPos*rsa.jrBitsPerChunk - chunk.onesCumulativeRank) + (subChunkPos*rsa.jrBitsPerSubChunk - subChunk.onesCumulativeRank) + bits.OnesCount64(remaining) } -// Select returns the the position of the given value of a specified Rank +// Select returns the position of the given value with the provided Rank // For Example: // Given the bitvector 001000100001 -// Select(true, 1) = 6 -// Rank(false, 5) = 7 +// Select(true, 1) = 2 +// Rank(false, 5) = 5 +// Rank(false, 1) = 1 +// Rank(false, 0) = 0 func (rsa rsaBitVector) Select(val bool, rank int) (i int, ok bool) { if val { i, ok := rsa.oneSelectMap[rank] @@ -103,24 +110,24 @@ type subChunk struct { buildJacobsonRank Jacobson rank is a succinct data structure. This allows us to represent something normally would require O(N) worth of memory with less that N memory. Jacobson Rank allows for sub linear growth. Jacobson rank also allows us to lookup rank for some value of a bitvector in O(1) -time. Theoretically, Jacobson Rank tells us to: -1. Create log(N) "Chunks" -2. Create 2log(N) "Sub Chunks" -3. Have "Sub Chunks" be 0.5log(N) in length +time. Theoretically, Jacobson Rank Requires: +1. Creating log(N) "Chunks" +2. Creating 2log(N) "Sub Chunks" +3. Having "Sub Chunks" be 0.5log(N) in length 4. For each "Chunk", store the cumulative rank of set bits relative to the overall bitvector 5. For each "Sub Chunk", store the cumulative rank of set bits relative to the parent "Chunk" 6. We can One's count the N bit word if possible. We will only consider this possibility :) -For simplicity and all around good results, we just have "Sub Chunks" of size 64 bits. +For simplicity and all around decent results, we just have "Sub Chunks" of size 64 bits. It is O(1) because given some offset i, all we have to do is calculate rank is: rank = CumulativeRank(ChunkOfi(i))) + CumulativeRank(SubChunkOfi(i))) + OnesCount(SubChunkOfi(i)) To understand why it is sub linear in space, you can refer to Ben Langmead and other literature that -describes this complexity. +describes the space complexity. +https://www.youtube.com/watch?v=M1sUZxXVjG8&list=PL2mpR0RYFQsADmYpW2YWBrXJZ_6EL_3nu&index=7 */ func buildJacobsonRank(inBv bitvector) (jacobsonRankChunks []chunk, numOfSubChunksPerChunk, numOfBitsPerSubChunk, totalRank int) { - // TODO: talk about why this is probably good enough, improves as n grows, gets worse as n gets smaller, and how this fits into a machine instruction, and how this is "simple" numOfSubChunksPerChunk = 4 totalRank = 0