Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialise Texts without going through ByteString #617

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ import qualified Data.Text.Array as A
import qualified Data.List as L hiding (head, tail)
import qualified Data.List.NonEmpty as NonEmptyList
import Data.Binary (Binary(get, put))
import Data.Binary.Put (putBuilder)
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
Expand All @@ -245,7 +246,7 @@ import Data.Text.Internal.Measure (measure_off)
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4)
import qualified Data.Text.Internal.Fusion as S
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Encoding (decodeUtf8', encodeUtf8Builder)
import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
import Data.Text.Internal.Private (span_)
import Data.Text.Internal (Text(..), StrictText, empty, firstf, mul, safe, text, append, pack)
Expand Down Expand Up @@ -394,7 +395,11 @@ instance NFData Text where rnf !_ = ()

-- | @since 1.2.1.0
instance Binary Text where
put t = put (encodeUtf8 t)
put t = do
-- This needs to be in sync with the Binary instance for ByteString
-- in the binary package.
put (lengthWord8 t)
putBuilder (encodeUtf8Builder t)
get = do
bs <- get
case decodeUtf8' bs of
Expand Down Expand Up @@ -556,7 +561,7 @@ null (Text _arr _off len) =
len <= 0
{-# INLINE [1] null #-}

{-# RULES
{-# RULES
"TEXT null/empty -> True" null empty = True
#-}

Expand Down Expand Up @@ -1275,7 +1280,7 @@ take :: Int -> Text -> Text
take n t@(Text arr off len)
| n <= 0 = empty
| n >= len || m >= len || m < 0 = t
| otherwise = Text arr off m
| otherwise = Text arr off m
where
m = measureOff n t
{-# INLINE [1] take #-}
Expand Down Expand Up @@ -1325,7 +1330,7 @@ drop :: Int -> Text -> Text
drop n t@(Text arr off len)
| n <= 0 = t
| n >= len || m >= len || m < 0 = empty
| otherwise = Text arr (off+m) (len-m)
| otherwise = Text arr (off+m) (len-m)
where m = measureOff n t
{-# INLINE [1] drop #-}

Expand Down Expand Up @@ -1434,7 +1439,7 @@ splitAt :: Int -> Text -> (Text, Text)
splitAt n t@(Text arr off len)
| n <= 0 = (empty, t)
| n >= len || m >= len || m < 0 = (t, empty)
| otherwise = (Text arr off m, Text arr (off+m) (len-m))
| otherwise = (Text arr off m, Text arr (off+m) (len-m))
where
m = measureOff n t

Expand Down
9 changes: 7 additions & 2 deletions src/Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ import Data.Char (isSpace)
import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex,
Constr, mkConstr, DataType, mkDataType, Fixity(Prefix))
import Data.Binary (Binary(get, put))
import Data.Binary.Put (putBuilder)
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Monoid(..))
Expand All @@ -241,7 +242,7 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks,
import Data.Text.Internal (firstf, safe, text)
import Data.Text.Internal.Reverse (reverseNonEmpty)
import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_)
import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8Builder)
import Data.Text.Internal.Lazy.Search (indices)
import qualified GHC.CString as GHC
import qualified GHC.Exts as Exts
Expand Down Expand Up @@ -352,7 +353,11 @@ instance NFData Text where

-- | @since 1.2.1.0
instance Binary Text where
put t = put (encodeUtf8 t)
put t = do
-- This needs to be in sync with the Binary instance for ByteString
-- in the binary package.
put (foldlChunks (\n c -> n + T.lengthWord8 c) 0 t)
putBuilder (encodeUtf8Builder t)
get = do
bs <- get
case decodeUtf8' bs of
Expand Down
15 changes: 14 additions & 1 deletion tests/Tests/Properties/Instances.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module Tests.Properties.Instances
( testInstances
) where

import Data.Binary (encode, decodeOrFail)
import Data.String (IsString(fromString))
import Test.QuickCheck
import Test.Tasty (TestTree, testGroup)
Expand Down Expand Up @@ -43,6 +44,16 @@ tl_mempty = mempty === (unpackS (mempty :: TL.Text))
t_IsString = fromString `eqP` (T.unpack . fromString)
tl_IsString = fromString `eqP` (TL.unpack . fromString)

t_Binary s =
case decodeOrFail . encode $ (s :: T.Text) of
Left _ -> counterexample (show (T.unpack s)) (property False)
Right (_, _, s') -> s === s'

tl_Binary s =
case decodeOrFail . encode $ (s :: TL.Text) of
Left _ -> counterexample (show (TL.unpack s)) (property False)
Right (_, _, s') -> s === s'

testInstances :: TestTree
testInstances =
testGroup "instances" [
Expand All @@ -65,5 +76,7 @@ testInstances =
testProperty "t_mempty" t_mempty,
testProperty "tl_mempty" tl_mempty,
testProperty "t_IsString" t_IsString,
testProperty "tl_IsString" tl_IsString
testProperty "tl_IsString" tl_IsString,
testProperty "t_Binary" t_Binary,
testProperty "tl_Binary" tl_Binary
]
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ test-suite tests
build-depends:
QuickCheck >= 2.12.6 && < 2.16,
base <5,
binary,
bytestring,
deepseq,
directory,
Expand Down
Loading