Skip to content

Commit aff7f2b

Browse files
Remove CodePoints parsers and use CodeUnits parsers (#59)
* Remove CodePoints parser * Implement, test, and document `anyCodePoint` Implementation was designed by @hdgarrood * Add tests for lone surrogate pairs * Test the parsed UTF-16 string becomes invalid due to splitting surrogate pair
1 parent 0d873e9 commit aff7f2b

File tree

6 files changed

+34
-264
lines changed

6 files changed

+34
-264
lines changed

src/Text/Parsing/StringParser/CodePoints.purs

Lines changed: 0 additions & 155 deletions
This file was deleted.

src/Text/Parsing/StringParser/CodeUnits.purs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
module Text.Parsing.StringParser.CodeUnits
77
( eof
88
, anyChar
9+
, anyCodePoint
910
, anyDigit
1011
, string
1112
, satisfy
@@ -30,6 +31,7 @@ import Data.Char (toCharCode)
3031
import Data.Either (Either(..))
3132
import Data.Foldable (class Foldable, foldMap, elem, notElem)
3233
import Data.Maybe (Maybe(..))
34+
import Data.String as SCP
3335
import Data.String.CodeUnits (charAt, singleton)
3436
import Data.String.CodeUnits as SCU
3537
import Data.String.Pattern (Pattern(..))
@@ -45,13 +47,22 @@ eof = Parser \s ->
4547
{ str, pos } | pos < SCU.length str -> Left { pos, error: ParseError "Expected EOF" }
4648
_ -> Right { result: unit, suffix: s }
4749

48-
-- | Match any character.
50+
-- | Match any character. This is limited by `Char` to any code points
51+
-- | that are below `0xFFFF`. If you need to use higher code points
52+
-- | (e.g. emoji), see `anyCodePoint` and `string`.
4953
anyChar :: Parser Char
5054
anyChar = Parser \{ str, pos } ->
5155
case charAt pos str of
5256
Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
5357
Nothing -> Left { pos, error: ParseError "Unexpected EOF" }
5458

59+
-- | Match any code point, including those above `0xFFFF`
60+
anyCodePoint :: Parser SCP.CodePoint
61+
anyCodePoint = Parser \rec@{ str, pos } ->
62+
case SCP.codePointAt 0 (SCU.drop pos str) of
63+
Just cp -> Right { result: cp, suffix: { str, pos: pos + SCU.length (SCP.singleton cp) } }
64+
Nothing -> Left { pos, error: ParseError "Unexpected EOF" }
65+
5566
-- | Match any digit.
5667
anyDigit :: Parser Char
5768
anyDigit = try do

test/CodePoints.purs

Lines changed: 0 additions & 102 deletions
This file was deleted.

test/CodeUnits.purs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@ import Data.List (List(Nil), (:))
99
import Data.List.Lazy (take, repeat)
1010
import Data.List.NonEmpty (NonEmptyList(..))
1111
import Data.NonEmpty ((:|))
12+
import Data.String.CodePoints as SCP
1213
import Data.String.CodeUnits (singleton)
1314
import Data.String.Common as SC
1415
import Data.Unfoldable (replicate)
1516
import Effect (Effect)
1617
import Test.Assert (assert', assert)
1718
import Text.Parsing.StringParser (Parser, runParser, try)
19+
import Text.Parsing.StringParser.CodeUnits (anyChar, anyCodePoint, anyDigit, eof, regex, string)
1820
import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
1921
import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
20-
import Text.Parsing.StringParser.CodeUnits (anyDigit, eof, string, anyChar, regex)
2122

2223
parens :: forall a. Parser a -> Parser a
2324
parens = between (string "(") (string ")")
@@ -97,3 +98,22 @@ testCodeUnits = do
9798
assert $ canParse (many1Till (string "a") (string "and")) $ (fold <<< take 10000 $ repeat "a") <> "and"
9899
-- check correct order
99100
assert $ expectResult (NonEmptyList ('a' :| 'b':'c':Nil)) (many1Till anyChar (string "d")) "abcd"
101+
-- check anyCodePoint
102+
let anyCodePointStr = map SCP.singleton anyCodePoint
103+
let anyCharStr = map singleton anyChar
104+
assert $ expectResult (NonEmptyList ("🍔" :| "🍺":Nil)) (many1 $ anyCodePointStr) "🍔🍺"
105+
assert $ expectResult "🍔" (anyChar *> anyCodePointStr <* anyChar) "a🍔a"
106+
assert $ expectResult ({a: "🍔", b: "🍺"}) ({a:_, b:_} <$> (anyCodePointStr <* void anyChar) <*> anyCodePointStr) "🍔a🍺"
107+
assert $ expectResult ({a: "a", b: "b", c:"c"}) ({a:_, b:_, c:_} <$> anyCodePointStr <*> anyCodePointStr <*> anyCodePointStr) "abc"
108+
-- check string
109+
assert $ expectResult "🍔🍺" (string "🍔🍺") "🍔🍺"
110+
assert $ expectResult (NonEmptyList ("🍔🍺" :| "🍔🍺":"🍔🍺":Nil)) (many1 $ string "🍔🍺") "🍔🍺🍔🍺🍔🍺"
111+
assert $ expectResult (NonEmptyList ("a🍔🍺":|"a🍔🍺":"a🍔🍺":Nil)) (many1 $ string "a🍔🍺") "a🍔🍺a🍔🍺a🍔🍺"
112+
assert $ expectResult (NonEmptyList ("🍔a🍺":|"🍔a🍺":"🍔a🍺":Nil)) (many1 $ string "🍔a🍺") "🍔a🍺🍔a🍺🍔a🍺"
113+
assert $ expectResult (NonEmptyList ("🍔🍺a" :| "🍔🍺a":"🍔🍺a":Nil)) (many1 $ string "🍔🍺a") "🍔🍺a🍔🍺a🍔🍺a"
114+
assert $ expectResult (NonEmptyList ("a" :| "a":"a":Nil)) (many1 $ string "a") "aaa"
115+
assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
116+
assert $ expectResult (NonEmptyList ("abc" :| "abc":"abc":Nil)) (many1 $ string "abc") "abcabcabc"
117+
assert $ expectResult (NonEmptyList ("abc�def" :| Nil)) (many1 $ string "abc�def") "abc�def"
118+
119+
assert $ expectResult "🍔\xd83c" (string "🍔\xd83c") "🍔🍺"

test/Examples.purs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import Data.List.Types (NonEmptyList)
99
import Effect (Effect)
1010
import Effect.Console (log, logShow)
1111
import Text.Parsing.StringParser (Parser, fail, runParser, unParser)
12-
import Text.Parsing.StringParser.CodePoints (anyChar, char, eof, regex, skipSpaces, string)
12+
import Text.Parsing.StringParser.CodeUnits (anyChar, char, eof, regex, skipSpaces, string)
1313
import Text.Parsing.StringParser.Combinators (between, endBy1, lookAhead, many, many1, sepBy1, (<?>))
1414

1515
-- Serves only to make this file runnable

test/Main.purs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,9 @@ import Prelude
44

55
import Effect (Effect)
66
import Effect.Console (log)
7-
import Test.CodePoints (testCodePoints)
87
import Test.CodeUnits (testCodeUnits)
98

109
main :: Effect Unit
1110
main = do
12-
log "Testing CodePoint parsing\n"
13-
testCodePoints
14-
1511
log "\n\nTesting CodeUnit parsing\n"
1612
testCodeUnits

0 commit comments

Comments
 (0)