Skip to content

Commit

Permalink
Enable ordinary-index building without a page count bound
Browse files Browse the repository at this point in the history
  • Loading branch information
jeltsch committed Oct 4, 2024
1 parent 869575f commit 1db7791
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 38 deletions.
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ library
Database.LSMTree.Internal.UniqCounter
Database.LSMTree.Internal.Unsliced
Database.LSMTree.Internal.Vector
Database.LSMTree.Internal.Vector.Growing
Database.LSMTree.Internal.WriteBuffer
Database.LSMTree.Internal.WriteBufferBlobs
Database.LSMTree.Monoidal
Expand Down
51 changes: 16 additions & 35 deletions src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ import Prelude hiding (take)

import Control.Exception (assert)
import Control.Monad.ST.Strict (ST)
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
writePrimVar)
import Data.Vector (force, take, unsafeFreeze)
import Data.Vector.Mutable (MVector)
import qualified Data.Vector.Mutable as Mutable (unsafeNew, write)
import qualified Data.Vector.Primitive as Primitive (Vector, length)
import Data.Word (Word16, Word8)
import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler,
Expand All @@ -33,33 +28,28 @@ import Database.LSMTree.Internal.IndexOrdinary
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey'))
import Database.LSMTree.Internal.Vector (byteVectorFromPrim)
import Database.LSMTree.Internal.Vector.Growing (GrowingVector)
import qualified Database.LSMTree.Internal.Vector.Growing as Growing (append,
new, unsafeFreeze)

{-|
A general-purpose fence pointer index under incremental construction.
A value @IndexOrdinaryAcc buffer keyCountRef baler@ denotes a partially
constructed index with the following properties:
* The keys that the index assigns to pages are stored as a prefix of the
mutable vector @buffer@.
* The reference @keyCountRef@ points to the number of those keys.
* The @baler@ object is used by the index for incremental output of the
serialised key list.
A value @IndexOrdinaryAcc lastKeys baler@ denotes a partially constructed
index that assigns keys to pages according to @lastKeys@ and uses @baler@
for incremental output of the serialised key list.
-}
data IndexOrdinaryAcc s = IndexOrdinaryAcc
!(MVector s SerialisedKey)
!(PrimVar s Int)
!(GrowingVector s SerialisedKey)
!(Baler s)

-- | Creates a new, initially empty, index.
new :: Int -- ^ Maximum number of keys
new :: Int -- ^ Initial size of the key buffer
-> Int -- ^ Minimum chunk size in bytes
-> ST s (IndexOrdinaryAcc s) -- ^ Construction of the index
new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
IndexOrdinaryAcc <$>
Mutable.unsafeNew maxKeyCount <*>
newPrimVar 0 <*>
createBaler minChunkSize
new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$>
Growing.new initialKeyBufferSize <*>
createBaler minChunkSize

{-|
Appends keys to the key list of an index and outputs newly available chunks
Expand All @@ -69,26 +59,18 @@ new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
word may result in a corrupted serialised key list.
-}
append :: Append -> IndexOrdinaryAcc s -> ST s (Maybe Chunk)
append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
append instruction (IndexOrdinaryAcc lastKeys baler)
= case instruction of
AppendSinglePage _ key -> do
keyCount <- readPrimVar keyCountRef
Mutable.write buffer keyCount key
writePrimVar keyCountRef (succ keyCount)
Growing.append lastKeys 1 key
feedBaler (keyListElem key) baler
AppendMultiPage key overflowPageCount -> do
keyCount <- readPrimVar keyCountRef
let

pageCount :: Int
!pageCount = succ (fromIntegral overflowPageCount)

keyCount' :: Int
!keyCount' = keyCount + pageCount

mapM_ (flip (Mutable.write buffer) key)
[keyCount .. pred keyCount']
writePrimVar keyCountRef keyCount'
Growing.append lastKeys pageCount key
feedBaler (concat (replicate pageCount (keyListElem key))) baler
where

Expand All @@ -112,8 +94,7 @@ append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
@index@ is not used afterwards.
-}
unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary)
unsafeEnd (IndexOrdinaryAcc buffer keyCountRef baler) = do
keyCount <- readPrimVar keyCountRef
keys <- force <$> take keyCount <$> unsafeFreeze buffer
unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do
keys <- Growing.unsafeFreeze lastKeys
remnant <- unsafeEndBaler baler
return (remnant, IndexOrdinary keys)
117 changes: 117 additions & 0 deletions src/Database/LSMTree/Internal/Vector/Growing.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

{- HLINT ignore "Avoid restricted alias" -}

-- | Vectors with support for appending elements.
module Database.LSMTree.Internal.Vector.Growing
(
GrowingVector,
new,
append,
unsafeFreeze
)
where

import Prelude hiding (init, last, length)

import Control.Monad (when)
import Control.Monad.ST.Strict (ST)
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
writePrimVar)
import Data.STRef.Strict (STRef, newSTRef, readSTRef, writeSTRef)
import Data.Vector (Vector, force)
import qualified Data.Vector as Mutable (unsafeFreeze)
import Data.Vector.Mutable (MVector)
import qualified Data.Vector.Mutable as Mutable (grow, length, new, set, slice,
take)

{-|
A vector with support for appending elements.
Internally, the elements of a growing vector are stored in a buffer. This
buffer is automatically enlarged whenever this is needed for storing
additional elements. On each such enlargement, the size of the buffer is
multiplied by a power of 2, whose exponent is chosen just big enough to make
the final buffer size at least as high as the new vector length.
-}
data GrowingVector s a = GrowingVector
!(STRef s (MVector s a)) -- Reference to the buffer
!(PrimVar s Int) -- Reference to the length

-- | Creates a new, initially empty, vector.
new :: Int -- ^ Initial buffer size
-> ST s (GrowingVector s a) -- ^ Construction of the vector
new illegalInitialBufferSize | illegalInitialBufferSize <= 0
= error "Initial buffer size not positive"
new initialBufferSize
= do
buffer <- Mutable.new initialBufferSize
bufferRef <- newSTRef buffer
lengthRef <- newPrimVar 0
return (GrowingVector bufferRef lengthRef)

{-|
Appends a value a certain number of times to a vector. If a negative number
is provided as the count, the vector is not changed.
-}
append :: forall s a . GrowingVector s a -> Int -> a -> ST s ()
append _ pseudoCount _ | pseudoCount <= 0
= return ()
append (GrowingVector bufferRef lengthRef) count val
= do
length <- readPrimVar lengthRef
makeRoom
buffer' <- readSTRef bufferRef
Mutable.set (Mutable.slice length count buffer') val
where

makeRoom :: ST s ()
makeRoom = do
length <- readPrimVar lengthRef
when (count > maxBound - length) (error "New length too large")
buffer <- readSTRef bufferRef
let

bufferSize :: Int
!bufferSize = Mutable.length buffer

length' :: Int
!length' = length + count

when (bufferSize < length') $ do
let

higherBufferSizes :: [Int]
higherBufferSizes = tail (init ++ [last]) where

init :: [Int]
last :: Int
(init, last : _) = span (<= maxBound `div` 2) $
iterate (* 2) bufferSize
{-NOTE:
In order to prevent overflow, we have to start with the
current buffer size here, although we know that it is
not sufficient.
-}

sufficientBufferSizes :: [Int]
sufficientBufferSizes = dropWhile (< length') higherBufferSizes

case sufficientBufferSizes of
[]
-> error "No sufficient buffer size available"
bufferSize' : _
-> Mutable.grow buffer (bufferSize' - bufferSize) >>=
writeSTRef bufferRef
writePrimVar lengthRef length'

{-|
Turns a growing vector into an ordinary vector, thereby invalidating the
growing vector. Executing @unsafeFreeze vec@ is only safe when @vec@ is not
used afterwards.
-}
unsafeFreeze :: GrowingVector s a -> ST s (Vector a)
unsafeFreeze (GrowingVector bufferRef lengthRef) = do
buffer <- readSTRef bufferRef
length <- readPrimVar lengthRef
force <$> Mutable.unsafeFreeze (Mutable.take length buffer)
10 changes: 7 additions & 3 deletions test/Test/Database/LSMTree/Internal/IndexOrdinary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ lastKeysBlockFromAppends appends = lastKeysBlock where
-}
incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8)
incrementalConstruction appends = runST $ do
acc <- new keyCount minChunkSize
acc <- new initialKeyBufferSize minChunkSize
commonChunks <- mapM (flip append acc) appends
(remnant, unserialised) <- unsafeEnd acc
let
Expand All @@ -277,8 +277,12 @@ incrementalConstruction appends = runST $ do
return (unserialised, serialised)
where

keyCount :: Int
keyCount = sum (map appendedKeysCount appends)
{-
We do not need to vary the initial key buffer size, since we are not
testing growing vectors here.
-}
initialKeyBufferSize :: Int
initialKeyBufferSize = 0x100

{-
We do not need to vary the minimum chunk size, since we are not testing
Expand Down

0 comments on commit 1db7791

Please sign in to comment.