Skip to content

Commit 8cf6083

Browse files
committed
Delete regex, replace it with mkRegex
1 parent 9467576 commit 8cf6083

File tree

6 files changed

+80
-95
lines changed

6 files changed

+80
-95
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Breaking changes:
2525
`<|>` was made right associative. Decreasing these two operators
2626
prevents a compiler error (i.e. `MixedAssociativityError`)
2727
without causing issues with `<$>`.
28+
- Delete the `regex` parser and replace it with `mkRegex`. ( by @jamesdbrock)
2829

2930
New features:
3031

bench/Json/Parsing.purs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,17 @@ import Prelude hiding (between)
44

55
import Bench.Json.Common (Json(..))
66
import Control.Lazy (defer)
7+
import Data.Either (Either(..))
78
import Data.List (List)
89
import Data.Maybe (Maybe(..))
910
import Data.Number as Number
11+
import Data.String.Regex.Flags (noFlags)
1012
import Data.Tuple (Tuple(..))
13+
import Effect.Exception (throw)
14+
import Effect.Unsafe (unsafePerformEffect)
1115
import Text.Parsing.Parser (ParserT, fail)
1216
import Text.Parsing.Parser.Combinators (between, choice, sepBy, try)
13-
import Text.Parsing.Parser.String (regex, skipSpaces, string)
17+
import Text.Parsing.Parser.String (mkRegex, skipSpaces, string)
1418

1519
json :: forall m. Monad m => ParserT String m Json
1620
json = defer \_ ->
@@ -38,15 +42,18 @@ jsonArray = defer \_ ->
3842
json `sepBy` (try (skipSpaces *> string ","))
3943

4044
jsonString :: forall m. Monad m => ParserT String m String
41-
jsonString = between (string "\"") (string "\"") do
42-
regex {} """\\"|[^"]*"""
45+
jsonString = case mkRegex """\\"|[^"]*""" noFlags of
46+
Left err -> unsafePerformEffect $ throw err
47+
Right p -> between (string "\"") (string "\"") p
4348

4449
jsonNumber :: forall m. Monad m => ParserT String m Number
45-
jsonNumber = do
46-
n <- regex {} """(\+|-)?(\d+(\.\d*)?|\d*\.\d+)([eE](\+|-)?\d+)?"""
47-
case Number.fromString n of
48-
Just n' -> pure n'
49-
Nothing -> fail "Expected number"
50+
jsonNumber = case mkRegex """(\+|-)?(\d+(\.\d*)?|\d*\.\d+)([eE](\+|-)?\d+)?""" noFlags of
51+
Left err -> unsafePerformEffect $ throw err
52+
Right p -> do
53+
n <- p
54+
case Number.fromString n of
55+
Just n' -> pure n'
56+
Nothing -> fail "Expected number"
5057

5158
jsonBoolean :: forall m. Monad m => ParserT String m Boolean
5259
jsonBoolean = choice

packages.dhall

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
let upstream =
22
https://raw.githubusercontent.com/purescript/package-sets/prepare-0.15/src/packages.dhall
3+
sha256:5f32c078b014909642302b328bd9bebcdcedc301956a709b302f19521680a0aa
34

45
in upstream

spago.dhall

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
, "numbers"
1818
, "partial"
1919
, "prelude"
20-
, "record"
2120
, "strings"
2221
, "tailrec"
2322
, "transformers"

src/Text/Parsing/Parser/String.purs

Lines changed: 40 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ module Text.Parsing.Parser.String
3636
, noneOf
3737
, noneOfCodePoints
3838
, match
39-
, regex
40-
, RegexFlagsRow
39+
, mkRegex
4140
, consumeWith
4241
) where
4342

@@ -55,12 +54,10 @@ import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton
5554
import Data.String as String
5655
import Data.String.CodeUnits as SCU
5756
import Data.String.Regex as Regex
58-
import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec)
57+
import Data.String.Regex.Flags (RegexFlags)
5958
import Data.Tuple (Tuple(..), fst)
6059
import Partial.Unsafe (unsafePartial)
61-
import Prim.Row (class Nub, class Union)
62-
import Record (merge)
63-
import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail)
60+
import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..))
6461
import Text.Parsing.Parser.Combinators ((<?>), (<~?>))
6562
import Text.Parsing.Parser.Pos (Position(..))
6663

@@ -229,101 +226,72 @@ match p = do
229226
-- boundary.
230227
pure $ Tuple (SCU.take (SCU.length input1 - SCU.length input2) input1) x
231228

232-
-- | Parser which uses the `Data.String.Regex` module to match the regular
233-
-- | expression pattern passed as the `String`
234-
-- | argument to the parser.
229+
-- | Compile a regular expression string into a regular expression parser.
230+
-- |
231+
-- | This function will use the `Data.String.Regex.regex` function to compile and return a parser which can be used
232+
-- | in a `ParserT String m` monad.
235233
-- |
236234
-- | This parser will try to match the regular expression pattern starting
237235
-- | at the current parser position. On success, it will return the matched
238236
-- | substring.
239237
-- |
240-
-- | If the `Regex` pattern string fails to compile then this parser will fail.
241-
-- | (Note: It’s not possible to use a precompiled `Regex` because this parser
242-
-- | must set flags and make adjustments to the `Regex` pattern string.)
238+
-- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
239+
-- |
240+
-- | This function should be called outside the context of a `ParserT String m` monad, because this function might
241+
-- | fail with a `Left` RegExp compilation error message.
242+
-- | If you call this function inside of the `ParserT String m` monad and then `fail` the parse when the compilation fails,
243+
-- | then that could be confusing because a parser failure is supposed to indicate an invalid input string.
244+
-- | If the compilation failure occurs in an `alt` then the compilation failure might not be reported at all and instead
245+
-- | the input string would be parsed incorrectly.
243246
-- |
244247
-- | This parser may be useful for quickly consuming a large section of the
245248
-- | input `String`, because in a JavaScript runtime environment the `RegExp`
246249
-- | runtime is a lot faster than primitive parsers.
247250
-- |
248-
-- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
249-
-- |
250-
-- | #### Flags
251+
-- | #### Example
251252
-- |
252-
-- | The `Record flags` argument to the parser is for `Regex` flags. Here are
253-
-- | the default flags.
253+
-- | This example shows how to compile and run the `xMany` parser which will capture the regular expression pattern `x*`.
254254
-- |
255255
-- | ```purescript
256-
-- | { dotAll: true
257-
-- | ignoreCase: false
258-
-- | unicode: true
259-
-- | }
256+
-- | case mkRegex "x*" noFlags of
257+
-- | Left compileError -> unsafePerformEffect $ throw $ "xMany failed to compile: " <> compileError
258+
-- | Right xMany -> case runParser "xxxZ" xMany of
259+
-- | Left (ParseError parseError _) -> -- parse failed
260+
-- | Right capture -> -- capture should be "xxx"
260261
-- | ```
261262
-- |
262-
-- | To use the defaults, pass
263-
-- | `{}` as the flags argument. For case-insensitive pattern matching, pass
264-
-- | `{ignoreCase: true}` as the flags argument.
263+
-- | #### Flags
265264
-- |
266-
-- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
267-
-- | nonsense in the context of parsing
268-
-- | and use of the other flags may cause strange behavior in the parser.
265+
-- | Set `RegexFlags` with the `Semigroup` instance like this.
269266
-- |
270-
-- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
267+
-- | ```purescript
268+
-- | mkRegex "x*" (dotAll <> ignoreCase)
269+
-- | ```
271270
-- |
272-
-- | #### Example
271+
-- | The `dotAll`, `unicode`, and `ignoreCase` flags might make sense for a `mkRegex` parser. The other flags will
272+
-- | probably cause surprising behavior and you should avoid them.
273273
-- |
274-
-- | ```
275-
-- | runParser "ababXX" (regex {} "(ab)+")
276-
-- | ```
277-
-- | ```
278-
-- | (Right "abab")
279-
-- | ```
280-
regex
281-
:: forall m flags f_
282-
. Monad m
283-
=> Union flags RegexFlagsRow f_
284-
=> Nub f_ RegexFlagsRow
285-
=> Record flags
286-
-> String
287-
-> ParserT String m String
288-
regex flags pattern =
289-
-- Prefix a ^ to ensure the pattern only matches the current position in the parse
290-
case Regex.regex ("^(" <> pattern <> ")") flags' of
291-
Left paterr ->
292-
fail $ "Regex pattern error " <> paterr
293-
Right regexobj ->
274+
-- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
275+
mkRegex :: forall m. String -> RegexFlags -> Either String (ParserT String m String)
276+
mkRegex pattern flags =
277+
Regex.regex ("^(" <> pattern <> ")") flags <#> \regexobj ->
294278
consumeWith \input -> do
295279
case NonEmptyArray.head <$> Regex.match regexobj input of
296280
Just (Just consumed) -> do
297281
let remainder = SCU.drop (SCU.length consumed) input
298282
Right { value: consumed, consumed, remainder }
299283
_ ->
300284
Left "No Regex pattern match"
301-
where
302-
flags' = RegexFlags
303-
( merge flags
304-
{ dotAll: true
305-
, global: false
306-
, ignoreCase: false
307-
, multiline: false
308-
, sticky: false
309-
, unicode: true
310-
} :: RegexFlagsRec
311-
)
312285

313-
-- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
314-
type RegexFlagsRow =
315-
( dotAll :: Boolean
316-
, global :: Boolean
317-
, ignoreCase :: Boolean
318-
, multiline :: Boolean
319-
, sticky :: Boolean
320-
, unicode :: Boolean
321-
)
322286

323-
-- | Consumes a portion of the input string while yielding a value.
287+
-- | Consume a portion of the input string while yielding a value.
288+
-- |
289+
-- | Takes a consumption function which takes the remaining input `String`
290+
-- | as its argument and returns three fields:
291+
-- |
324292
-- | * `value` is the value to return.
325-
-- | * `consumed` is the input that was consumed and is used to update the parser position.
326-
-- | * `remainder` is the new input state.
293+
-- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
294+
-- | * `remainder` is the new remaining input `String`.
327295
consumeWith
328296
:: forall m a
329297
. (String -> Either String { value :: a, consumed :: String, remainder :: String })

test/Main.purs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
-- Run tests:
2+
--
3+
-- spago -x spago-dev.dhall test
4+
--
5+
16
module Test.Main where
27

38
import Prelude hiding (between, when)
@@ -16,6 +21,7 @@ import Data.Number (infinity, isNaN)
1621
import Data.String.CodePoints as SCP
1722
import Data.String.CodeUnits (fromCharArray, singleton)
1823
import Data.String.CodeUnits as SCU
24+
import Data.String.Regex.Flags (RegexFlags, ignoreCase, noFlags)
1925
import Data.Tuple (Tuple(..))
2026
import Effect (Effect)
2127
import Effect.Console (logShow)
@@ -26,7 +32,7 @@ import Text.Parsing.Parser.Combinators (between, chainl, chainl1Rec, chainlRec,
2632
import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
2733
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
2834
import Text.Parsing.Parser.Pos (Position(..), initialPos)
29-
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, regex, rest, satisfy, string, takeN, whiteSpace)
35+
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, mkRegex, noneOfCodePoints, oneOfCodePoints, rest, satisfy, string, takeN, whiteSpace)
3036
import Text.Parsing.Parser.String.Basic (intDecimal, number, letter)
3137
import Text.Parsing.Parser.Token (TokenParser, makeTokenParser, match, token, when)
3238
import Text.Parsing.Parser.Token as Parser.Token
@@ -94,6 +100,15 @@ manySatisfyTest = do
94100
_ <- char '?'
95101
pure (fromCharArray r)
96102

103+
mkRegexTest :: String -> String -> String -> RegexFlags -> (Parser String String -> Parser String String) -> Effect Unit
104+
mkRegexTest input expected pattern flags pars =
105+
case mkRegex pattern flags of
106+
Left err -> assert' ("error: " <> show err) false
107+
Right p -> parseTest input expected $ pars p
108+
109+
110+
-- TODO everything is stack-safe now.
111+
--
97112
-- This test doesn't test the actual stack safety of these combinators, mainly
98113
-- because I don't know how to come up with an example guaranteed to be large
99114
-- enough to overflow the stack. But thankfully, their stack safety is more or
@@ -749,19 +764,13 @@ main = do
749764

750765
parseTest "-300" (-300) intDecimal
751766

752-
parseTest "regex-" "regex" (regex {} "regex" <* char '-' <* eof)
753-
parseTest "-regex" "regex" (char '-' *> regex {} "regex" <* eof)
754-
parseTest "regexregex" "regexregex" (regex {} "(regex)*")
755-
parseTest "regexregex" "regex" (regex {} "(^regex)*")
756-
parseTest "ReGeX" "ReGeX" (regex { ignoreCase: true } "regex")
757-
parseTest "regexcapregexcap" "regexcap" (regex {} "(?<CaptureGroupName>regexcap)")
758-
parseTest "regexcapregexcap" "regexcap" (regex {} "(((?<CaptureGroupName>(r)e(g)excap)))")
759-
760-
-- Maybe it is nonsense to allow multiline regex.
761-
-- Because an end-of-line regex pattern `$` will match but then the
762-
-- newline character will not be consumed.
763-
-- Also why does this test fail? I think it should succeed.
764-
-- parseTest "regex\nregex\n" "regex\nregex\n" (regex {dotAll: false, multiline: true} "(^regex$)+")
767+
mkRegexTest "regex-" "regex" "regex" noFlags (\p -> p <* char '-' <* eof)
768+
mkRegexTest "-regex" "regex" "regex" noFlags (\p -> char '-' *> p <* eof)
769+
mkRegexTest "regexregex" "regexregex" "(regex)*" noFlags identity
770+
mkRegexTest "regexregex" "regex" "(^regex)*" noFlags identity
771+
mkRegexTest "ReGeX" "ReGeX" "regex" ignoreCase identity
772+
mkRegexTest "regexcapregexcap" "regexcap" "(?<CaptureGroupName>regexcap)" noFlags identity
773+
mkRegexTest "regexcapregexcap" "regexcap" "(((?<CaptureGroupName>(r)e(g)excap)))" noFlags identity
765774

766775
stackSafeLoopsTest
767776

0 commit comments

Comments
 (0)