Skip to content

CodePoint versions of oneOf and noneOf #127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Breaking changes:

New features:

- Added primitive parsers `oneOfCodePoints` and `noneOfCodePoints` - `CodePoint`
versions of `oneOf` and `noneOf` respectively. (#127 by @fsoikin)

Bugfixes:

Other improvements:
Expand Down
16 changes: 14 additions & 2 deletions src/Text/Parsing/Parser/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,23 @@ module Text.Parsing.Parser.String
, whiteSpace
, skipSpaces
, oneOf
, oneOfCodePoints
, noneOf
, noneOfCodePoints
, match
) where

import Prelude hiding (between)

import Control.Alt ((<|>))
import Control.Lazy (defer)
import Control.Monad.State (get, put)
import Data.Array (notElem)
import Data.Char (fromCharCode)
import Data.CodePoint.Unicode (isSpace)
import Data.Foldable (elem)
import Data.Maybe (Maybe(..))
import Data.String (CodePoint, Pattern(..), null, stripPrefix, uncons)
import Data.String (CodePoint, Pattern(..), null, singleton, stripPrefix, uncons)
import Data.String.CodeUnits as SCU
import Data.Tuple (Tuple(..), fst)
import Text.Parsing.Parser (ParseState(..), ParserT, fail)
Expand Down Expand Up @@ -121,6 +125,14 @@ oneOf ss = satisfy (flip elem ss) <?> ("one of " <> show ss)
noneOf :: forall m. Monad m => Array Char -> ParserT String m Char
noneOf ss = satisfy (flip notElem ss) <?> ("none of " <> show ss)

-- | Match one of the Unicode characters in the array.
oneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint
oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <|> defer \_ -> fail $ "Expected one of " <> show (singleton <$> ss)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose to make error messages deferred to avoid the cost of generating the error messages until they're needed. Feels like a small thing, but the waste just bothers me too much.

Let me know if this is undesirable for some reason, I'll change it back to <?>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I think this is a great idea, and from the tests you added it clearly works. We should probably do this everywhere. I opened #128


-- | Match any Unicode character not in the array.
noneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint
noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <|> defer \_ -> fail $ "Expected none of " <> show (singleton <$> ss)

-- | Updates a `Position` by adding the columns and lines in `String`.
updatePosString :: Position -> String -> Position
updatePosString pos str = case uncons str of
Expand Down Expand Up @@ -154,4 +166,4 @@ match p = do
-- | This will break at runtime if the definition of CodePoint ever changes
-- | to something other than `newtype CodePoint = CodePoint Int`.
unCodePoint :: CodePoint -> Int
unCodePoint = unsafeCoerce
unCodePoint = unsafeCoerce
28 changes: 26 additions & 2 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Prelude hiding (between, when)
import Control.Alt ((<|>))
import Control.Lazy (fix)
import Data.Array (some)
import Data.Array as Array
import Data.Either (Either(..))
import Data.List (List(..), fromFoldable, many)
import Data.List.NonEmpty (cons, cons')
Expand All @@ -16,12 +17,12 @@ import Data.Tuple (Tuple(..))
import Effect (Effect)
import Effect.Console (logShow)
import Test.Assert (assert')
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorPosition, region, runParser)
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, region, runParser)
import Text.Parsing.Parser.Combinators (between, chainl, endBy1, optionMaybe, sepBy1, try)
import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
import Text.Parsing.Parser.Pos (Position(..), initialPos)
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, satisfy, string, whiteSpace)
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, satisfy, string, whiteSpace)
import Text.Parsing.Parser.Token (TokenParser, letter, makeTokenParser, match, token, when)

parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a
Expand Down Expand Up @@ -49,6 +50,14 @@ parseErrorTestPosition p input expected = case runParser input p of
assert' ("expected: " <> show expected <> ", pos: " <> show pos) (expected == pos)
logShow expected

parseErrorTestMessage :: forall s a. Show a => Parser s a -> s -> String -> Effect Unit
parseErrorTestMessage p input expected = case runParser input p of
Right x -> assert' ("ParseError expected '" <> expected <> "' but parsed " <> show x) false
Left err -> do
let msg = parseErrorMessage err
assert' ("expected: " <> expected <> ", message: " <> msg) (expected == msg)
logShow expected

opTest :: Parser String String
opTest = chainl (singleton <$> anyChar) (char '+' $> append) ""

Expand Down Expand Up @@ -465,6 +474,21 @@ main = do
sixteenth <- string "𝅘𝅥𝅯" <|> (singleton <$> char 'x')
pure $ [ SCP.singleton quarter, eighth, letterx, sixteenth ]

parseTest "🤔💯✅🤔💯" [ "🤔💯", "✅🤔💯" ] do
none <- Array.many $ noneOfCodePoints $ SCP.toCodePointArray "❓✅"
one <- Array.many $ oneOfCodePoints $ SCP.toCodePointArray "🤔💯✅"
pure $ SCP.fromCodePointArray <$> [ none, one ]

parseErrorTestMessage
(noneOfCodePoints $ SCP.toCodePointArray "❓✅")
"❓"
"Expected none of [\"❓\",\"✅\"]"

parseErrorTestMessage
(oneOfCodePoints $ SCP.toCodePointArray "❓✅")
"abc"
"Expected one of [\"❓\",\"✅\"]"

parseTest "aa bb" [ "aa", " ", "bb" ] do
aa <- SCU.fromCharArray <$> some letter
w <- whiteSpace
Expand Down