@@ -36,8 +36,7 @@ module Text.Parsing.Parser.String
36
36
, noneOf
37
37
, noneOfCodePoints
38
38
, match
39
- , regex
40
- , RegexFlagsRow
39
+ , mkRegex
41
40
, consumeWith
42
41
) where
43
42
@@ -55,12 +54,10 @@ import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton
55
54
import Data.String as String
56
55
import Data.String.CodeUnits as SCU
57
56
import Data.String.Regex as Regex
58
- import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
57
+ import Data.String.Regex.Flags (RegexFlags )
59
58
import Data.Tuple (Tuple (..), fst )
60
59
import Partial.Unsafe (unsafePartial )
61
- import Prim.Row (class Nub , class Union )
62
- import Record (merge )
63
- import Text.Parsing.Parser (ParseError (..), ParseState (..), ParserT (..), fail )
60
+ import Text.Parsing.Parser (ParseError (..), ParseState (..), ParserT (..))
64
61
import Text.Parsing.Parser.Combinators ((<?>), (<~?>))
65
62
import Text.Parsing.Parser.Pos (Position (..))
66
63
@@ -229,101 +226,72 @@ match p = do
229
226
-- boundary.
230
227
pure $ Tuple (SCU .take (SCU .length input1 - SCU .length input2) input1) x
231
228
232
- -- | Parser which uses the `Data.String.Regex` module to match the regular
233
- -- | expression pattern passed as the `String`
234
- -- | argument to the parser.
229
+ -- | Compile a regular expression string into a regular expression parser.
230
+ -- |
231
+ -- | This function will use the `Data.String.Regex.regex` function to compile and return a parser which can be used
232
+ -- | in a `ParserT String m` monad.
235
233
-- |
236
234
-- | This parser will try to match the regular expression pattern starting
237
235
-- | at the current parser position. On success, it will return the matched
238
236
-- | substring.
239
237
-- |
240
- -- | If the `Regex` pattern string fails to compile then this parser will fail.
241
- -- | (Note: It’s not possible to use a precompiled `Regex` because this parser
242
- -- | must set flags and make adjustments to the `Regex` pattern string.)
238
+ -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
239
+ -- |
240
+ -- | This function should be called outside the context of a `ParserT String m` monad, because this function might
241
+ -- | fail with a `Left` RegExp compilation error message.
242
+ -- | If you call this function inside of the `ParserT String m` monad and then `fail` the parse when the compilation fails,
243
+ -- | then that could be confusing because a parser failure is supposed to indicate an invalid input string.
244
+ -- | If the compilation failure occurs in an `alt` then the compilation failure might not be reported at all and instead
245
+ -- | the input string would be parsed incorrectly.
243
246
-- |
244
247
-- | This parser may be useful for quickly consuming a large section of the
245
248
-- | input `String`, because in a JavaScript runtime environment the `RegExp`
246
249
-- | runtime is a lot faster than primitive parsers.
247
250
-- |
248
- -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
249
- -- |
250
- -- | #### Flags
251
+ -- | #### Example
251
252
-- |
252
- -- | The `Record flags` argument to the parser is for `Regex` flags. Here are
253
- -- | the default flags.
253
+ -- | This example shows how to compile and run the `xMany` parser which will capture the regular expression pattern `x*`.
254
254
-- |
255
255
-- | ```purescript
256
- -- | { dotAll: true
257
- -- | ignoreCase: false
258
- -- | unicode: true
259
- -- | }
256
+ -- | case mkRegex "x*" noFlags of
257
+ -- | Left compileError -> unsafePerformEffect $ throw $ "xMany failed to compile: " <> compileError
258
+ -- | Right xMany -> case runParser "xxxZ" xMany of
259
+ -- | Left (ParseError parseError _) -> -- parse failed
260
+ -- | Right capture -> -- capture should be "xxx"
260
261
-- | ```
261
262
-- |
262
- -- | To use the defaults, pass
263
- -- | `{}` as the flags argument. For case-insensitive pattern matching, pass
264
- -- | `{ignoreCase: true}` as the flags argument.
263
+ -- | #### Flags
265
264
-- |
266
- -- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
267
- -- | nonsense in the context of parsing
268
- -- | and use of the other flags may cause strange behavior in the parser.
265
+ -- | Set `RegexFlags` with the `Semigroup` instance like this.
269
266
-- |
270
- -- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
267
+ -- | ```purescript
268
+ -- | mkRegex "x*" (dotAll <> ignoreCase)
269
+ -- | ```
271
270
-- |
272
- -- | #### Example
271
+ -- | The `dotAll`, `unicode`, and `ignoreCase` flags might make sense for a `mkRegex` parser. The other flags will
272
+ -- | probably cause surprising behavior and you should avoid them.
273
273
-- |
274
- -- | ```
275
- -- | runParser "ababXX" (regex {} "(ab)+")
276
- -- | ```
277
- -- | ```
278
- -- | (Right "abab")
279
- -- | ```
280
- regex
281
- :: forall m flags f_
282
- . Monad m
283
- => Union flags RegexFlagsRow f_
284
- => Nub f_ RegexFlagsRow
285
- => Record flags
286
- -> String
287
- -> ParserT String m String
288
- regex flags pattern =
289
- -- Prefix a ^ to ensure the pattern only matches the current position in the parse
290
- case Regex .regex (" ^(" <> pattern <> " )" ) flags' of
291
- Left paterr ->
292
- fail $ " Regex pattern error " <> paterr
293
- Right regexobj ->
274
+ -- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
275
+ mkRegex :: forall m . String -> RegexFlags -> Either String (ParserT String m String )
276
+ mkRegex pattern flags =
277
+ Regex .regex (" ^(" <> pattern <> " )" ) flags <#> \regexobj ->
294
278
consumeWith \input -> do
295
279
case NonEmptyArray .head <$> Regex .match regexobj input of
296
280
Just (Just consumed) -> do
297
281
let remainder = SCU .drop (SCU .length consumed) input
298
282
Right { value: consumed, consumed, remainder }
299
283
_ ->
300
284
Left " No Regex pattern match"
301
- where
302
- flags' = RegexFlags
303
- ( merge flags
304
- { dotAll: true
305
- , global: false
306
- , ignoreCase: false
307
- , multiline: false
308
- , sticky: false
309
- , unicode: true
310
- } :: RegexFlagsRec
311
- )
312
285
313
- -- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
314
- type RegexFlagsRow =
315
- ( dotAll :: Boolean
316
- , global :: Boolean
317
- , ignoreCase :: Boolean
318
- , multiline :: Boolean
319
- , sticky :: Boolean
320
- , unicode :: Boolean
321
- )
322
286
323
- -- | Consumes a portion of the input string while yielding a value.
287
+ -- | Consume a portion of the input string while yielding a value.
288
+ -- |
289
+ -- | Takes a consumption function which takes the remaining input `String`
290
+ -- | as its argument and returns three fields:
291
+ -- |
324
292
-- | * `value` is the value to return.
325
- -- | * `consumed` is the input that was consumed and is used to update the parser position.
326
- -- | * `remainder` is the new input state .
293
+ -- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
294
+ -- | * `remainder` is the new remaining input `String` .
327
295
consumeWith
328
296
:: forall m a
329
297
. (String -> Either String { value :: a , consumed :: String , remainder :: String } )
0 commit comments