Skip to content

New String function: parseErrorHuman #209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Breaking changes:
New features:

- add `MonadAsk` and `MonadReader` instances (#208 by @bentongxyz)
- Add `Parsing.String.parseErrorHuman` (#209 by @jamesdbrock)

Other improvements:

Expand Down
2 changes: 2 additions & 0 deletions src/Parsing.purs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ import Data.Tuple (Tuple(..), fst)
-- | the position in the input stream at which the error occurred.
data ParseError = ParseError String Position

-- | Get the `Message` from a `ParseError`
parseErrorMessage :: ParseError -> String
parseErrorMessage (ParseError msg _) = msg

-- | Get the `Position` from a `ParseError`.
parseErrorPosition :: ParseError -> Position
parseErrorPosition (ParseError _ pos) = pos

Expand Down
103 changes: 101 additions & 2 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-- | Primitive parsers for working with an input stream of type `String`.
-- | Primitive parsers, combinators and functions for working with an input
-- | stream of type `String`.
-- |
-- | All of these primitive parsers will consume when they succeed.
-- |
Expand Down Expand Up @@ -47,17 +48,21 @@ module Parsing.String
, regex
, anyTill
, consumeWith
, parseErrorHuman
) where

import Prelude hiding (between)

import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array (replicate)
import Data.Array.NonEmpty as NonEmptyArray
import Data.Either (Either(..))
import Data.Enum (fromEnum, toEnum)
import Data.Function.Uncurried (mkFn5, runFn2)
import Data.Int (odd)
import Data.Maybe (Maybe(..), fromJust)
import Data.String (CodePoint, Pattern(..), codePointAt, length, null, splitAt, stripPrefix, uncons)
import Data.String as CodePoint
import Data.String as String
import Data.String.CodeUnits as SCU
import Data.String.Regex as Regex
Expand Down Expand Up @@ -339,4 +344,98 @@ anyTill p = do
( do
_ <- anyCodePoint
pure $ Loop unit
)
)

-- | Returns three `String`s which, when printed line-by-line, will show
-- | a human-readable parsing error message with context.
-- |
-- | #### Input arguments
-- |
-- | * The first argument is the input `String` given to the parser which
-- | errored.
-- | * The second argument is a positive `Int` which indicates how many
-- | characters of input `String` context are wanted around the parsing error.
-- | * The third argument is the `ParseError` for the input `String`.
-- |
-- | #### Output `String`s
-- |
-- | 1. The parse error message and the parsing position.
-- | 2. A string with an arrow that points to the error position in the
-- | input context (in a fixed-width font).
-- | 3. The input context. A substring of the input which tries to center
-- | the error position and have the wanted length and not include
-- | any newlines or carriage returns.
-- |
-- | If the parse error occurred on a carriage return or newline character,
-- | then that character will be included at the end of the input context.
-- |
-- | #### Example
-- |
-- | ```
-- | let input = "12345six789"
-- | case runParser input (replicateA 9 String.Basic.digit) of
-- | Left err ->
-- | log $ String.joinWith "\n" $ parseErrorHuman input 20 err
-- | ```
-- | ---
-- | ```
-- | Expected digit at position index:5 (line:1, column:6)
-- | ▼
-- | 12345six789
-- | ```
parseErrorHuman :: String -> Int -> ParseError -> Array String
parseErrorHuman input contextSize (ParseError msg (Position { line, column, index })) =
-- inspired by
-- https://github.com/elm/parser/blob/master/README.md#tracking-context
[ msg <> " at position index:" <> show index
<> " (line:"
<> show line
<> ", column:"
<> show column
<> ")"
, (String.joinWith "" (replicate (lineIndex - minPosBefore) " ")) <> "▼" -- best way to construct string of spaces?
, inputContext
]
where
-- select the input line in which the error appears
-- sadly we can't use splitCap because of circular module dependency and we
-- don't feel like separating out an “Internal” module.
{ posBegin, posEnd, lineBegin } = go 0 input 0 input
where
go posBegin lineBegin posEnd lineEnd =
case String.uncons lineEnd of
Just { head, tail } | head == CodePoint.codePointFromChar '\n' ->
if posEnd == index -- uh-oh, error at the newline
-- so include the newline at the end of the selected line.
then { posBegin, posEnd: posEnd + 1, lineBegin }
else if posEnd > index then { posBegin, posEnd, lineBegin }
else go (posEnd + 1) tail (posEnd + 1) tail
Just { head, tail } | head == CodePoint.codePointFromChar '\r' ->
if posEnd == index -- uh-oh, error at the carriage return
-- so include the carriage return at the end of the selected line.
-- we don't need to add the possible following newline because
-- we're not printing a line break here, we're just making sure
-- to include the character at the position which errored.
then { posBegin, posEnd: posEnd + 1, lineBegin }
else if posEnd > index then { posBegin, posEnd, lineBegin }
else go (posEnd + 1) tail (posEnd + 1) tail
Just { tail } -> go posBegin lineBegin (posEnd + 1) tail
_ -> { posBegin, posEnd, lineBegin }
lineSelect = String.take (posEnd - posBegin) lineBegin
lineIndex = index - posBegin
lineLength = String.length lineSelect

-- position minus half of context
bestPosBefore = lineIndex - (contextSize / 2)
-- position plus half of context
bestPosAfter = lineIndex + (contextSize / 2) + if odd contextSize then 1 else 0

-- constrain the context window to selected line
-- grow the context window to contextSize if the error is at beginning or end of selected line
Tuple minPosBefore maxPosAfter =
if bestPosBefore >= 0 then
if bestPosAfter <= lineLength then Tuple bestPosBefore bestPosAfter
else Tuple (max 0 (lineLength - contextSize)) lineLength
else Tuple 0 (min lineLength contextSize)

inputContext = String.take (maxPosAfter - minPosBefore) $ String.drop minPosBefore lineSelect
42 changes: 39 additions & 3 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import Control.Monad.State (State, lift, modify, runState)
import Data.Array (some, toUnfoldable)
import Data.Array as Array
import Data.Bifunctor (lmap, rmap)
import Data.Either (Either(..), either, hush)
import Data.Either (Either(..), either, fromLeft, hush)
import Data.Foldable (oneOf)
import Data.List (List(..), fromFoldable, (:))
import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
Expand All @@ -34,12 +34,13 @@ import Effect.Console (log, logShow)
import Effect.Unsafe (unsafePerformEffect)
import Node.Process (lookupEnv)
import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorMessage, parseErrorPosition, position, region, runParser)
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, (<?>), (<??>), (<~?>))
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, (<?>), (<??>), (<~?>))
import Parsing.Combinators.Array as Combinators.Array
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, regex, rest, satisfy, string, takeN)
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, parseErrorHuman, regex, rest, satisfy, string, takeN)
import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, whiteSpace)
import Parsing.String.Basic as String.Basic
import Parsing.String.Replace (breakCap, replace, replaceT, splitCap, splitCapT)
import Parsing.Token (TokenParser, makeTokenParser, token, when)
import Parsing.Token as Token
Expand Down Expand Up @@ -1070,3 +1071,38 @@ main = do
{ actual: lmap parseErrorPosition $ runParser "aa" $ advance consume
, expected: Left (Position { index: 0, line: 1, column: 1 })
}

log "\nTESTS error messages\n"
do
let input = "12345six789"
assertEqual' "parseErrorHuman 1"
{ actual: Array.drop 1 $ parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos)
$ runParser input (replicateA 9 String.Basic.digit :: Parser String (List Char))
, expected: [ " ▼", "12345six789" ]
}

do
let input = "12345six789"
assertEqual' "parseErrorHuman 2"
{ actual: Array.drop 1 $ parseErrorHuman input 5 $ fromLeft (ParseError "" initialPos)
$ runParser input (replicateA 9 String.Basic.digit :: Parser String (List Char))
, expected: [ " ▼", "45six" ]
}

do
let input = "aaaa🍷\r\nbbbb"
assertEqual' "parseErrorHuman 3"
{ actual: parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos)
$ runParser input
$ string "aaaa" *> (replicateA 7 letter :: Parser String (List Char))
, expected: [ "Expected letter at position index:4 (line:1, column:5)", " ▼", "aaaa🍷" ]
}

do
let input = "aaaa\r\n🍷bbbb"
assertEqual' "parseErrorHuman 4"
{ actual: parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos)
$ runParser input
$ string "aaaa\r\n" *> (replicateA 5 letter :: Parser String (List Char))
, expected: [ "Expected letter at position index:6 (line:2, column:1)", "▼", "🍷bbbb" ]
}