Skip to content

Commit

Permalink
fix order for chunks, add newline as chunks separator
Browse files Browse the repository at this point in the history
  • Loading branch information
cyga committed May 22, 2012
1 parent ee9409b commit 1f050b0
Show file tree
Hide file tree
Showing 127 changed files with 3,866 additions and 2,714 deletions.
50 changes: 27 additions & 23 deletions BloomFilter/Easy.hs
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
-- file: BloomFilter/Easy.hs
suggestSizing
:: Integer -- expected maximum capacity
-> Double -- desired false positive rate
-> Either String (Word32,Int) -- (filter size, number of hashes)
suggestSizing capacity errRate
| capacity <= 0 = Left "capacity too small"
| errRate <= 0 || errRate >= 1 = Left "invalid error rate"
| null saneSizes = Left "capacity too large"
| otherwise = Right (minimum saneSizes)
where saneSizes = catMaybes . map sanitize $ sizings capacity errRate
sanitize (bits,hashes)
| bits > maxWord32 - 1 = Nothing
| otherwise = Just (ceiling bits, truncate hashes)
where maxWord32 = fromIntegral (maxBound :: Word32)
easyList :: (Hashable a)
=> Double -- false positive rate (between 0 and 1)
-> [a] -- values to populate the filter with
-> Either String (B.Bloom a)

sizings :: Integer -> Double -> [(Double, Double)]
sizings capacity errRate =
[(((-k) * cap / log (1 - (errRate ** (1 / k)))), k) | k <- [1..50]]
where cap = fromIntegral capacity-- file: BloomFilter/Easy.hs
-- file: BloomFilter/Easy.hs
module BloomFilter.Easy
(
suggestSizing
Expand All @@ -41,8 +28,25 @@ easyList errRate values =
case suggestSizing (genericLength values) errRate of
Left err -> Left err
Right (bits,hashes) -> Right filt
where filt = B.fromList (doubleHash hashes) bits values-- file: BloomFilter/Easy.hs
easyList :: (Hashable a)
=> Double -- false positive rate (between 0 and 1)
-> [a] -- values to populate the filter with
-> Either String (B.Bloom a)
where filt = B.fromList (doubleHash hashes) bits values

-- file: BloomFilter/Easy.hs
suggestSizing
:: Integer -- expected maximum capacity
-> Double -- desired false positive rate
-> Either String (Word32,Int) -- (filter size, number of hashes)
suggestSizing capacity errRate
| capacity <= 0 = Left "capacity too small"
| errRate <= 0 || errRate >= 1 = Left "invalid error rate"
| null saneSizes = Left "capacity too large"
| otherwise = Right (minimum saneSizes)
where saneSizes = catMaybes . map sanitize $ sizings capacity errRate
sanitize (bits,hashes)
| bits > maxWord32 - 1 = Nothing
| otherwise = Just (ceiling bits, truncate hashes)
where maxWord32 = fromIntegral (maxBound :: Word32)

sizings :: Integer -> Double -> [(Double, Double)]
sizings capacity errRate =
[(((-k) * cap / log (1 - (errRate ** (1 / k)))), k) | k <- [1..50]]
where cap = fromIntegral capacity
192 changes: 106 additions & 86 deletions BloomFilter/Hash.hs
Original file line number Diff line number Diff line change
@@ -1,90 +1,4 @@
-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = go 0
where go n | n == num = []
| otherwise = h1 + h2 * n : go (n + 1)

!h1 = fromIntegral (h `shiftR` 32) .&. maxBound
!h2 = fromIntegral h

h = hashSalt 0x9150a946c4a8966e value
num = fromIntegral numHashes-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = [h1 + h2 * i | i <- [0..num]]
where h = hashSalt 0x9150a946c4a8966e value
h1 = fromIntegral (h `shiftR` 32) .&. maxBound
h2 = fromIntegral h
num = fromIntegral numHashes-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = [h1 + h2 * i | i <- [0..num]]
where h = hashSalt 0x9150a946c4a8966e value
h1 = fromIntegral (h `shiftR` 32) .&. maxBound
h2 = fromIntegral h
num = fromIntegral numHashes-- file: BloomFilter/Hash.hs
hashByteString :: Word64 -> Strict.ByteString -> IO Word64
hashByteString salt bs = Strict.useAsCStringLen bs $ \(ptr, len) ->
hashIO ptr (fromIntegral len) salt

instance Hashable Strict.ByteString where
hashSalt salt bs = unsafePerformIO $ hashByteString salt bs

rechunk :: Lazy.ByteString -> [Strict.ByteString]
rechunk s
| Lazy.null s = []
| otherwise = let (pre,suf) = Lazy.splitAt chunkSize s
in repack pre : rechunk suf
where repack = Strict.concat . Lazy.toChunks
chunkSize = 64 * 1024

instance Hashable Lazy.ByteString where
hashSalt salt bs = unsafePerformIO $
foldM hashByteString salt (rechunk bs)-- file: BloomFilter/Hash.hs
hash2 :: (Hashable a) => a -> Word64 -> Word64
hash2 k salt = hashSalt salt k

instance (Hashable a, Hashable b) => Hashable (a,b) where
hashSalt salt (a,b) = hash2 b . hash2 a $ salt

instance (Hashable a, Hashable b, Hashable c) => Hashable (a,b,c) where
hashSalt salt (a,b,c) = hash2 c . hash2 b . hash2 a $ salt-- file: BloomFilter/Hash.hs
hashList :: (Storable a) => Word64 -> [a] -> IO Word64
hashList salt xs =
withArrayLen xs $ \len ptr ->
hashIO ptr (fromIntegral (len * sizeOf x)) salt
where x = head xs

instance (Storable a) => Hashable [a] where
hashSalt salt xs = unsafePerformIO $ hashList salt xs-- file: BloomFilter/Hash.hs
instance Storable a => Hashable a where
hashSalt = hashStorable-- file: BloomFilter/Hash.hs
hashStorable :: Storable a => Word64 -> a -> Word64
hashStorable salt k = unsafePerformIO . with k $ \ptr ->
hashIO ptr (fromIntegral (sizeOf k)) salt

instance Hashable Char where hashSalt = hashStorable
instance Hashable Int where hashSalt = hashStorable
instance Hashable Double where hashSalt = hashStorable-- file: BloomFilter/Hash.hs
class Hashable a where
hashSalt :: Word64 -- ^ salt
-> a -- ^ value to hash
-> Word64

hash :: Hashable a => a -> Word64
hash = hashSalt 0x106fc397cf62f64d3-- file: BloomFilter/Hash.hs
hashIO :: Ptr a -- value to hash
-> CSize -- number of bytes
-> Word64 -- salt
-> IO Word64
hashIO ptr bytes salt =
with (fromIntegral salt) $ \sp -> do
let p1 = castPtr sp
p2 = castPtr sp `plusPtr` 4
go p1 p2
peek sp
where go p1 p2
| bytes .&. 3 == 0 = hashWord2 (castPtr ptr) words p1 p2
| otherwise = hashLittle2 ptr bytes p1 p2
words = bytes `div` 4-- file: BloomFilter/Hash.hs
{-# LANGUAGE BangPatterns, ForeignFunctionInterface #-}
module BloomFilter.Hash
(
Expand All @@ -110,3 +24,109 @@ foreign import ccall unsafe "lookup3.h hashword2" hashWord2

foreign import ccall unsafe "lookup3.h hashlittle2" hashLittle2
:: Ptr a -> CSize -> Ptr Word32 -> Ptr Word32 -> IO ()

-- file: BloomFilter/Hash.hs
hashIO :: Ptr a -- value to hash
-> CSize -- number of bytes
-> Word64 -- salt
-> IO Word64
hashIO ptr bytes salt =
with (fromIntegral salt) $ \sp -> do
let p1 = castPtr sp
p2 = castPtr sp `plusPtr` 4
go p1 p2
peek sp
where go p1 p2
| bytes .&. 3 == 0 = hashWord2 (castPtr ptr) words p1 p2
| otherwise = hashLittle2 ptr bytes p1 p2
words = bytes `div` 4

-- file: BloomFilter/Hash.hs
class Hashable a where
hashSalt :: Word64 -- ^ salt
-> a -- ^ value to hash
-> Word64

hash :: Hashable a => a -> Word64
hash = hashSalt 0x106fc397cf62f64d3

-- file: BloomFilter/Hash.hs
hashStorable :: Storable a => Word64 -> a -> Word64
hashStorable salt k = unsafePerformIO . with k $ \ptr ->
hashIO ptr (fromIntegral (sizeOf k)) salt

instance Hashable Char where hashSalt = hashStorable
instance Hashable Int where hashSalt = hashStorable
instance Hashable Double where hashSalt = hashStorable

-- file: BloomFilter/Hash.hs
instance Storable a => Hashable a where
hashSalt = hashStorable

-- file: BloomFilter/Hash.hs
hashList :: (Storable a) => Word64 -> [a] -> IO Word64
hashList salt xs =
withArrayLen xs $ \len ptr ->
hashIO ptr (fromIntegral (len * sizeOf x)) salt
where x = head xs

instance (Storable a) => Hashable [a] where
hashSalt salt xs = unsafePerformIO $ hashList salt xs

-- file: BloomFilter/Hash.hs
hash2 :: (Hashable a) => a -> Word64 -> Word64
hash2 k salt = hashSalt salt k

instance (Hashable a, Hashable b) => Hashable (a,b) where
hashSalt salt (a,b) = hash2 b . hash2 a $ salt

instance (Hashable a, Hashable b, Hashable c) => Hashable (a,b,c) where
hashSalt salt (a,b,c) = hash2 c . hash2 b . hash2 a $ salt

-- file: BloomFilter/Hash.hs
hashByteString :: Word64 -> Strict.ByteString -> IO Word64
hashByteString salt bs = Strict.useAsCStringLen bs $ \(ptr, len) ->
hashIO ptr (fromIntegral len) salt

instance Hashable Strict.ByteString where
hashSalt salt bs = unsafePerformIO $ hashByteString salt bs

rechunk :: Lazy.ByteString -> [Strict.ByteString]
rechunk s
| Lazy.null s = []
| otherwise = let (pre,suf) = Lazy.splitAt chunkSize s
in repack pre : rechunk suf
where repack = Strict.concat . Lazy.toChunks
chunkSize = 64 * 1024

instance Hashable Lazy.ByteString where
hashSalt salt bs = unsafePerformIO $
foldM hashByteString salt (rechunk bs)

-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = [h1 + h2 * i | i <- [0..num]]
where h = hashSalt 0x9150a946c4a8966e value
h1 = fromIntegral (h `shiftR` 32) .&. maxBound
h2 = fromIntegral h
num = fromIntegral numHashes

-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = [h1 + h2 * i | i <- [0..num]]
where h = hashSalt 0x9150a946c4a8966e value
h1 = fromIntegral (h `shiftR` 32) .&. maxBound
h2 = fromIntegral h
num = fromIntegral numHashes

-- file: BloomFilter/Hash.hs
doubleHash :: Hashable a => Int -> a -> [Word32]
doubleHash numHashes value = go 0
where go n | n == num = []
| otherwise = h1 + h2 * n : go (n + 1)

!h1 = fromIntegral (h `shiftR` 32) .&. maxBound
!h2 = fromIntegral h

h = hashSalt 0x9150a946c4a8966e value
num = fromIntegral numHashes
10 changes: 6 additions & 4 deletions BloomFilter/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
-- file: BloomFilter/Internal.hs
data MutBloom s a = MB {
mutHash :: (a -> [Word32])
, mutArray :: STUArray s Word32 Bool
}-- file: BloomFilter/Internal.hs
module BloomFilter.Internal
(
Bloom(..)
Expand All @@ -17,3 +13,9 @@ data Bloom a = B {
blmHash :: (a -> [Word32])
, blmArray :: UArray Word32 Bool
}

-- file: BloomFilter/Internal.hs
data MutBloom s a = MB {
mutHash :: (a -> [Word32])
, mutArray :: STUArray s Word32 Bool
}
60 changes: 35 additions & 25 deletions BloomFilter/Mutable.hs
Original file line number Diff line number Diff line change
@@ -1,29 +1,4 @@
-- file: BloomFilter/Mutable.hs
allM :: Monad m => (a -> m Bool) -> [a] -> m Bool
allM p (x:xs) = do
ok <- p x
if ok
then allM p xs
else return False
allM _ [] = return True-- file: BloomFilter/Mutable.hs
elem, notElem :: a -> MutBloom s a -> ST s Bool

elem elt filt = indices filt elt >>=
allM (readArray (mutArray filt))

notElem elt filt = not `liftM` elem elt filt-- file: BloomFilter/Mutable.hs
insert :: MutBloom s a -> a -> ST s ()
insert filt elt = indices filt elt >>=
mapM_ (\bit -> writeArray (mutArray filt) bit True)

indices :: MutBloom s a -> a -> ST s [Word32]
indices filt elt = do
modulus <- length filt
return $ map (`mod` modulus) (mutHash filt elt)-- file: BloomFilter/Mutable.hs
length :: MutBloom s a -> ST s Word32
length filt = (succ . snd) `liftM` getBounds (mutArray filt)-- file: BloomFilter/Mutable.hs
new :: (a -> [Word32]) -> Word32 -> ST s (MutBloom s a)
new hash numBits = MB hash `liftM` newArray (0,numBits-1) False-- file: BloomFilter/Mutable.hs
module BloomFilter.Mutable
(
MutBloom
Expand All @@ -41,3 +16,38 @@ import Data.Word (Word32)
import Prelude hiding (elem, length, notElem)

import BloomFilter.Internal (MutBloom(..))

-- file: BloomFilter/Mutable.hs
new :: (a -> [Word32]) -> Word32 -> ST s (MutBloom s a)
new hash numBits = MB hash `liftM` newArray (0,numBits-1) False

-- file: BloomFilter/Mutable.hs
length :: MutBloom s a -> ST s Word32
length filt = (succ . snd) `liftM` getBounds (mutArray filt)

-- file: BloomFilter/Mutable.hs
insert :: MutBloom s a -> a -> ST s ()
insert filt elt = indices filt elt >>=
mapM_ (\bit -> writeArray (mutArray filt) bit True)

indices :: MutBloom s a -> a -> ST s [Word32]
indices filt elt = do
modulus <- length filt
return $ map (`mod` modulus) (mutHash filt elt)

-- file: BloomFilter/Mutable.hs
elem, notElem :: a -> MutBloom s a -> ST s Bool

elem elt filt = indices filt elt >>=
allM (readArray (mutArray filt))

notElem elt filt = not `liftM` elem elt filt

-- file: BloomFilter/Mutable.hs
allM :: Monad m => (a -> m Bool) -> [a] -> m Bool
allM p (x:xs) = do
ok <- p x
if ok
then allM p xs
else return False
allM _ [] = return True
8 changes: 5 additions & 3 deletions ch02/myDrop.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
-- file: ch02/myDrop.hs
niceDrop n xs | n <= 0 = xs
niceDrop _ [] = []
niceDrop n (_:xs) = niceDrop (n - 1) xs-- file: ch02/myDrop.hs
myDrop n xs = if n <= 0 || null xs
then xs
else myDrop (n - 1) (tail xs)

-- file: ch02/myDrop.hs
niceDrop n xs | n <= 0 = xs
niceDrop _ [] = []
niceDrop n (_:xs) = niceDrop (n - 1) xs
6 changes: 4 additions & 2 deletions ch03/BadPattern.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
-- file: ch03/BadPattern.hs
goodExample (x:xs) = x + goodExample xs
goodExample _ = 0-- file: ch03/BadPattern.hs
badExample (x:xs) = x + badExample xs

-- file: ch03/BadPattern.hs
goodExample (x:xs) = x + goodExample xs
goodExample _ = 0
8 changes: 5 additions & 3 deletions ch03/BadTree.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
-- file: ch03/BadTree.hs
nodesAreSame (Node a _ _) (Node b _ _)
| a == b = Just a
nodesAreSame _ _ = Nothing-- file: ch03/BadTree.hs
bad_nodesAreSame (Node a _ _) (Node a _ _) = Just a
bad_nodesAreSame _ _ = Nothing

-- file: ch03/BadTree.hs
nodesAreSame (Node a _ _) (Node b _ _)
| a == b = Just a
nodesAreSame _ _ = Nothing
Loading

0 comments on commit 1f050b0

Please sign in to comment.