From 95e373276081b64746e07569fc8bf5fa80b83c72 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Fri, 12 Jul 2024 13:25:20 +0200 Subject: [PATCH] Add `hAdvise` and `hAllocate` to `HasBlockIO` Provides even more control over the OS page cache. --- .../src-linux/System/FS/BlockIO/Async.hs | 6 +++- .../src-linux/System/FS/BlockIO/Internal.hs | 24 +++++++++++-- .../src-macos/System/FS/BlockIO/Internal.hs | 14 ++++++-- .../src-windows/System/FS/BlockIO/Internal.hs | 11 ++++-- blockio-api/src/System/FS/BlockIO/API.hs | 35 +++++++++++++++++-- blockio-api/src/System/FS/BlockIO/Serial.hs | 6 +++- blockio-sim/src/System/FS/BlockIO/Sim.hs | 9 ++++- lsm-tree.cabal | 4 ++- 8 files changed, 95 insertions(+), 14 deletions(-) diff --git a/blockio-api/src-linux/System/FS/BlockIO/Async.hs b/blockio-api/src-linux/System/FS/BlockIO/Async.hs index 60221e8dc..ab755b153 100644 --- a/blockio-api/src-linux/System/FS/BlockIO/Async.hs +++ b/blockio-api/src-linux/System/FS/BlockIO/Async.hs @@ -28,15 +28,19 @@ import System.Posix.Types -- | IO instantiation of 'HasBlockIO', using @blockio-uring@. asyncHasBlockIO :: (Handle HandleIO -> Bool -> IO ()) + -> (Handle HandleIO -> FileOffset -> FileOffset -> API.Advice -> IO ()) + -> (Handle HandleIO -> FileOffset -> FileOffset -> IO ()) -> HasFS IO HandleIO -> API.IOCtxParams -> IO (API.HasBlockIO IO HandleIO) -asyncHasBlockIO hSetNoCache hasFS ctxParams = do +asyncHasBlockIO hSetNoCache hAdvise hAllocate hasFS ctxParams = do ctx <- I.initIOCtx (ctxParamsConv ctxParams) pure $ API.HasBlockIO { API.close = I.closeIOCtx ctx , API.submitIO = submitIO hasFS ctx , API.hSetNoCache + , API.hAdvise + , API.hAllocate } ctxParamsConv :: API.IOCtxParams -> I.IOCtxParams diff --git a/blockio-api/src-linux/System/FS/BlockIO/Internal.hs b/blockio-api/src-linux/System/FS/BlockIO/Internal.hs index d26a35668..d27d41331 100644 --- a/blockio-api/src-linux/System/FS/BlockIO/Internal.hs +++ b/blockio-api/src-linux/System/FS/BlockIO/Internal.hs @@ -5,7 +5,8 @@ module System.FS.BlockIO.Internal ( ) where import System.FS.API (Handle (handleRaw), HasFS) -import System.FS.BlockIO.API (HasBlockIO, IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, + IOCtxParams) #if SERIALBLOCKIO import qualified System.FS.BlockIO.Serial as Serial #else @@ -13,6 +14,7 @@ import qualified System.FS.BlockIO.Async as Async #endif import System.FS.IO (HandleIO) import System.FS.IO.Handle (withOpenHandle) +import qualified System.Posix.Fcntl as Fcntl import qualified System.Posix.Fcntl.NoCache as Unix ioHasBlockIO :: @@ -20,11 +22,27 @@ ioHasBlockIO :: -> IOCtxParams -> IO (HasBlockIO IO HandleIO) #if SERIALBLOCKIO -ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hfs +ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate hfs #else -ioHasBlockIO hfs params = Async.asyncHasBlockIO hSetNoCache hfs params +ioHasBlockIO hfs params = Async.asyncHasBlockIO hSetNoCache hAdvise hAllocate hfs params #endif hSetNoCache :: Handle HandleIO -> Bool -> IO () hSetNoCache h b = withOpenHandle "hSetNoCache" (handleRaw h) (flip Unix.writeFcntlNoCache b) + +hAdvise :: Handle HandleIO -> FileOffset -> FileOffset -> Advice -> IO () +hAdvise h off len advice = withOpenHandle "hAdvise" (handleRaw h) $ \fd -> + Fcntl.fileAdvise fd off len advice' + where + advice' = case advice of + AdviceNormal -> Fcntl.AdviceNormal + AdviceRandom -> Fcntl.AdviceRandom + AdviceSequential -> Fcntl.AdviceSequential + AdviceWillNeed -> Fcntl.AdviceWillNeed + AdviceDontNeed -> Fcntl.AdviceDontNeed + AdviceNoReuse -> Fcntl.AdviceNoReuse + +hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO () +hAllocate h off len = withOpenHandle "hAllocate" (handleRaw h) $ \fd -> + Fcntl.fileAllocate fd off len diff --git a/blockio-api/src-macos/System/FS/BlockIO/Internal.hs b/blockio-api/src-macos/System/FS/BlockIO/Internal.hs index 9595e891d..6e85c61c3 100644 --- a/blockio-api/src-macos/System/FS/BlockIO/Internal.hs +++ b/blockio-api/src-macos/System/FS/BlockIO/Internal.hs @@ -3,7 +3,8 @@ module System.FS.BlockIO.Internal ( ) where import System.FS.API (Handle (handleRaw), HasFS) -import System.FS.BlockIO.API (HasBlockIO, IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, + IOCtxParams) import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) import System.FS.IO.Handle (withOpenHandle) @@ -18,8 +19,17 @@ ioHasBlockIO :: HasFS IO HandleIO -> IOCtxParams -> IO (HasBlockIO IO HandleIO) -ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hfs +ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate hfs hSetNoCache :: Handle HandleIO -> Bool -> IO () hSetNoCache h b = withOpenHandle "hSetNoCache" (handleRaw h) (flip Unix.writeFcntlNoCache b) + +-- TODO: it is unclear if MacOS supports @posix_fadvise(2)@, and it's hard to +-- check because there are no manual pages online. For now, it's just hardcoded +-- to be a no-op. +hAdvise :: Handle HandleIO -> FileOffset -> FileOffset -> Advice -> IO () +hAdvise _h _off _len _advice = pure () + +hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO () +hAllocate _h _off _len = pure () diff --git a/blockio-api/src-windows/System/FS/BlockIO/Internal.hs b/blockio-api/src-windows/System/FS/BlockIO/Internal.hs index 458faa5fc..a7e3433a1 100644 --- a/blockio-api/src-windows/System/FS/BlockIO/Internal.hs +++ b/blockio-api/src-windows/System/FS/BlockIO/Internal.hs @@ -3,7 +3,8 @@ module System.FS.BlockIO.Internal ( ) where import System.FS.API (Handle, HasFS) -import System.FS.BlockIO.API (HasBlockIO, IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, + IOCtxParams) import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) @@ -16,7 +17,13 @@ ioHasBlockIO :: HasFS IO HandleIO -> IOCtxParams -> IO (HasBlockIO IO HandleIO) -ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hfs +ioHasBlockIO hfs _params = Serial.serialHasBlockIO hSetNoCache hAdvise hAllocate hfs hSetNoCache :: Handle HandleIO -> Bool -> IO () hSetNoCache _h _b = pure () + +hAdvise :: Handle HandleIO -> FileOffset -> FileOffset -> Advice -> IO () +hAdvise _h _off _len _advice = pure () + +hAllocate :: Handle HandleIO -> FileOffset -> FileOffset -> IO () +hAllocate _h _off _len = pure () diff --git a/blockio-api/src/System/FS/BlockIO/API.hs b/blockio-api/src/System/FS/BlockIO/API.hs index efc9889ec..c746b1784 100644 --- a/blockio-api/src/System/FS/BlockIO/API.hs +++ b/blockio-api/src/System/FS/BlockIO/API.hs @@ -18,6 +18,7 @@ module System.FS.BlockIO.API ( , ioopBufferOffset , ioopByteCount , IOResult (..) + , Advice (..) -- * Re-exports , ByteCount , FileOffset @@ -31,7 +32,7 @@ import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic.Mutable as VGM import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Unboxed as VU -import qualified Data.Vector.Unboxed as VUM +import qualified Data.Vector.Unboxed.Mutable as VUM import GHC.IO.Exception (IOErrorType (ResourceVanished)) import GHC.Stack (HasCallStack) import System.FS.API @@ -59,12 +60,30 @@ data HasBlockIO m h = HasBlockIO { -- * [Linux]: set the @O_DIRECT@ flag. -- * [MacOS]: set the @F_NOCACHE@ flag. -- * [Windows]: no-op. + -- + -- TODO: subsequent reads/writes with misaligned byte arrays should fail + -- both in simulation and real implementation. , hSetNoCache :: Handle h -> Bool -> m () + -- | Predeclare an access pattern for file data. + -- + -- This has different effects on different distributions. + -- * [Linux]: perform @posix_fadvise(2). + -- * [MacOS]: no-op. + -- * [Windows]: no-op. + , hAdvise :: Handle h -> FileOffset -> FileOffset -> Advice -> m () + -- | Allocate file space. + -- + -- This has different effects on different distributions. + -- * [Linux]: perform @posix_fallocate(2). + -- * [MacOS]: no-op. + -- * [Windows]: no-op. + , hAllocate :: Handle h -> FileOffset -> FileOffset -> m () } instance NFData (HasBlockIO m h) where - rnf (HasBlockIO a b c) = - rwhnf a `seq` rwhnf b `seq` rwhnf c + rnf (HasBlockIO a b c d e) = + rwhnf a `seq` rwhnf b `seq` rnf c `seq` + rwhnf d `seq` rwhnf e -- | Concurrency parameters for initialising a 'HasBlockIO. Can be ignored by -- serial implementations. @@ -128,3 +147,13 @@ deriving via (VU.UnboxViaPrim IOResult) instance VGM.MVector VU.MVector IOResult deriving via (VU.UnboxViaPrim IOResult) instance VG.Vector VU.Vector IOResult instance VUM.Unbox IOResult + +-- | Basically "System.Posix.Fcntl.Advice" from the @unix@ package +data Advice = + AdviceNormal + | AdviceRandom + | AdviceSequential + | AdviceWillNeed + | AdviceDontNeed + | AdviceNoReuse + deriving stock (Show, Eq) diff --git a/blockio-api/src/System/FS/BlockIO/Serial.hs b/blockio-api/src/System/FS/BlockIO/Serial.hs index 5e6b0f548..1a5baacbe 100644 --- a/blockio-api/src/System/FS/BlockIO/Serial.hs +++ b/blockio-api/src/System/FS/BlockIO/Serial.hs @@ -21,14 +21,18 @@ import System.FS.BlockIO.API (IOOp (..), IOResult (..)) serialHasBlockIO :: (MonadThrow m, MonadMVar m, PrimMonad m, Eq h) => (Handle h -> Bool -> m ()) + -> (Handle h -> API.FileOffset -> API.FileOffset -> API.Advice -> m ()) + -> (Handle h -> API.FileOffset -> API.FileOffset -> m ()) -> HasFS m h -> m (API.HasBlockIO m h) -serialHasBlockIO hSetNoCache hfs = do +serialHasBlockIO hSetNoCache hAdvise hAllocate hfs = do ctx <- initIOCtx (SomeHasFS hfs) pure $ API.HasBlockIO { API.close = close ctx , API.submitIO = submitIO hfs ctx , API.hSetNoCache + , API.hAdvise + , API.hAllocate } data IOCtx m = IOCtx { ctxFS :: SomeHasFS m, openVar :: MVar m Bool } diff --git a/blockio-sim/src/System/FS/BlockIO/Sim.hs b/blockio-sim/src/System/FS/BlockIO/Sim.hs index 8e63a7bbf..b54f55344 100644 --- a/blockio-sim/src/System/FS/BlockIO/Sim.hs +++ b/blockio-sim/src/System/FS/BlockIO/Sim.hs @@ -21,9 +21,16 @@ fromHasFS :: (MonadThrow m, MonadMVar m, PrimMonad m) => HasFS m HandleMock -> m (HasBlockIO m HandleMock) -fromHasFS = serialHasBlockIO hSetNoCache +fromHasFS = serialHasBlockIO hSetNoCache hAdvise hAllocate where + -- TODO: It should be possible for the implementations and simulation to + -- throw an FsError when doing file I/O with misaligned byte arrays after + -- hSetNoCache. Maybe they should? However, to do that we'd have to be able + -- to move hSetNoCache into fs-api and fs-sim because we'd need access to + -- the internals. hSetNoCache _h _b = pure () + hAdvise _ _ _ _ = pure () + hAllocate _ _ _ = pure () simHasBlockIO :: (MonadThrow m, MonadMVar m, PrimMonad m, MonadSTM m) diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 44742a534..91f631491 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -609,7 +609,9 @@ library blockio-api if os(linux) hs-source-dirs: blockio-api/src-linux other-modules: System.FS.BlockIO.Internal - build-depends: lsm-tree:fcntl-nocache + build-depends: + , lsm-tree:fcntl-nocache + , unix ^>=2.8 if !flag(serialblockio) other-modules: System.FS.BlockIO.Async