Skip to content

Fix parsing of indented modules #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
run: npm run install

- name: Build project
run: npm run build -- --pedantic-packages
run: npm run build -- --pedantic-packages --strict

- name: Run tests
run: npm run test -- --offline --quiet
Expand Down
4 changes: 2 additions & 2 deletions bench/src/ParseFile.purs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import Node.FS.Aff (readFile)
import Node.Process as Process
import PureScript.CST (RecoveredParserResult(..), parseModule)
import PureScript.CST.Errors (ParseError, printParseError)
import PureScript.CST.Lexer (lex)
import PureScript.CST.Lexer (lexModule)
import PureScript.CST.Parser.Monad (PositionedError)
import PureScript.CST.Print (TokenOption(..), printSourceTokenWithOption)
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, step)
Expand All @@ -36,7 +36,7 @@ main = launchAff_ do
tokens =
map (foldMap (printSourceTokenWithOption ShowLayout))
$ tokenStreamToArray
$ lex contents
$ lexModule contents
for_ tokens Console.log
else
mempty
Expand Down
2 changes: 0 additions & 2 deletions spago.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package:
name: language-cst-parser
description: A parser for the PureScript concrete syntax tree.
build:
strict: true
publish:
version: 0.14.1
license: MIT
Expand Down
7 changes: 3 additions & 4 deletions src/PureScript/CST/Parser.purs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ import Data.Array as Array
import Data.Array.NonEmpty (NonEmptyArray)
import Data.Array.NonEmpty as NonEmptyArray
import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe)
import Data.Maybe (Maybe(..))
import Data.Set (Set)
import Data.Set as Set
import Data.Tuple (Tuple(..), uncurry)
import Prim as P
import PureScript.CST.Errors (ParseError(..), RecoveredError(..))
import PureScript.CST.Layout (currentIndent)
import PureScript.CST.Parser.Monad (Parser, eof, lookAhead, many, optional, recover, take, try)
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, layoutStack)
import PureScript.CST.TokenStream (TokenStep(..), TokenStream, currentIndentColumn)
import PureScript.CST.TokenStream as TokenStream
import PureScript.CST.Types (AppSpine(..), Binder(..), ClassFundep(..), DataCtor(..), DataMembers(..), Declaration(..), Delimited, DoStatement(..), Export(..), Expr(..), Fixity(..), FixityOp(..), Foreign(..), Guarded(..), GuardedExpr(..), Ident(..), Import(..), ImportDecl(..), Instance(..), InstanceBinding(..), IntValue(..), Label(..), Labeled(..), LetBinding(..), Module(..), ModuleBody(..), ModuleHeader(..), ModuleName(..), Name(..), OneOrDelimited(..), Operator(..), PatternGuard(..), Prefixed(..), Proper(..), QualifiedName(..), RecordLabeled(..), RecordUpdate(..), Role(..), Row(..), Separated(..), SourceToken, Token(..), Type(..), TypeVarBinding(..), Where(..), Wrapped(..))

Expand Down Expand Up @@ -1188,7 +1187,7 @@ recoverTokensWhile :: (SourceToken -> Int -> Boolean) -> TokenStream -> Tuple (A
recoverTokensWhile p initStream = go [] initStream
where
indent :: Int
indent = maybe 0 _.column $ currentIndent $ layoutStack initStream
indent = currentIndentColumn initStream

go :: Array SourceToken -> TokenStream -> Tuple (Array SourceToken) TokenStream
go acc stream = case TokenStream.step stream of
Expand Down
11 changes: 8 additions & 3 deletions src/PureScript/CST/Parser/Monad.purs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ initialParserState stream =
, stream
}

appendConsumed :: ParserState -> ParserState -> ParserState
appendConsumed { consumed } state = case consumed, state.consumed of
true, false -> state { consumed = true }
_, _ -> state

newtype Parser a = Parser
( forall r
. Fn4
Expand Down Expand Up @@ -78,7 +83,7 @@ instance Apply Parser where
more \_ ->
runFn4 p2 state2 more resume
( mkFn2 \state3 a ->
runFn2 done state3 (f a)
runFn2 done (state2 `appendConsumed` state3) (f a)
)
)
)
Expand All @@ -96,7 +101,7 @@ instance Bind Parser where
( mkFn2 \state2 a ->
more \_ -> do
let (Parser p2) = k a
runFn4 p2 state2 more resume done
runFn4 p2 (state1 `appendConsumed` state2) more resume done
)
)

Expand Down Expand Up @@ -222,7 +227,7 @@ many (Parser p) = Parser
runFn2 done state2 (Array.reverse (List.toUnfoldable acc))
)
( mkFn2 \state3 value ->
runFn2 go (List.Cons value acc) state3
runFn2 go (List.Cons value acc) (state2 `appendConsumed` state3)
)
runFn2 go List.Nil state1
)
Expand Down
23 changes: 21 additions & 2 deletions src/PureScript/CST/TokenStream.purs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module PureScript.CST.TokenStream
, consTokens
, layoutStack
, unwindLayout
, currentIndentColumn
) where

import Prelude
Expand All @@ -13,11 +14,11 @@ import Data.Foldable (class Foldable, foldr)
import Data.Lazy (Lazy)
import Data.Lazy as Lazy
import Data.List (List(..), (:))
import Data.Maybe (Maybe)
import Data.Maybe (Maybe, maybe)
import Data.Newtype (class Newtype, unwrap)
import Data.Tuple (Tuple(..))
import PureScript.CST.Errors (ParseError)
import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, isIndented, lytToken)
import PureScript.CST.Layout (LayoutDelim(..), LayoutStack, currentIndent, isIndented, lytToken)
import PureScript.CST.Types (Comment, LineFeed, SourcePos, SourceToken, Token(..))

newtype TokenStream = TokenStream (Lazy TokenStep)
Expand Down Expand Up @@ -64,3 +65,21 @@ unwindLayout pos eof = go
TokenCons (lytToken pos (TokLayoutEnd pos'.column)) pos (go tl) tl
| otherwise ->
step (go tl)

-- In the token stream, the layout stack represents the state after the token.
-- When determining the current indent level, this creates an edge case relating
-- to TokLayoutEnd. The layout stack will return the next indent, but for the
-- purposes of recovery, we want TokLayoutEnd column to be included as the current
-- indent, necessitating special handling.
currentIndentColumn :: TokenStream -> Int
currentIndentColumn stream = case step stream of
TokenError _ _ _ stk ->
stkColumn stk
TokenEOF _ _ ->
0
TokenCons { value: TokLayoutEnd col } _ _ _ ->
col
TokenCons _ _ _ stk ->
stkColumn stk
where
stkColumn = maybe 0 _.column <<< currentIndent
13 changes: 12 additions & 1 deletion test/Main.purs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Test.Main where
module Test.Main (main) where

import Prelude
import Prim hiding (Type)
Expand Down Expand Up @@ -370,3 +370,14 @@ main = do
true
_ ->
false

assertParse "Indented module"
"""
module Test where
test = 42
"""
case _ of
ParseSucceeded (Module _) ->
true
_ ->
false