diff --git a/src/Swarm/Language/Parse.hs b/src/Swarm/Language/Parse.hs index f966e55ad..1ed8b5055 100644 --- a/src/Swarm/Language/Parse.hs +++ b/src/Swarm/Language/Parse.hs @@ -128,8 +128,12 @@ reserved w = (lexeme . try) $ string' w *> notFollowedBy (alphaNumChar <|> char -- | Parse an identifier, i.e. any non-reserved string containing -- alphanumeric characters and underscores and not starting with a -- number. -identifier :: Parser Text -identifier = (lexeme . try) (p >>= check) "variable name" +identifier :: Parser Var +identifier = lvVar <$> locIdentifier + +-- | Parse an identifier together with its source location info. +locIdentifier :: Parser LocVar +locIdentifier = uncurry LV <$> parseLocG ((lexeme . try) (p >>= check) "variable name") where p = (:) <$> (letterChar <|> char '_') <*> many (alphaNumChar <|> char '_' <|> char '\'') check s @@ -261,16 +265,16 @@ parseTermAtom = ) ) <|> SLam - <$> (symbol "\\" *> identifier) + <$> (symbol "\\" *> locIdentifier) <*> optional (symbol ":" *> parseType) <*> (symbol "." *> parseTerm) <|> sLet - <$> (reserved "let" *> identifier) + <$> (reserved "let" *> locIdentifier) <*> optional (symbol ":" *> parsePolytype) <*> (symbol "=" *> parseTerm) <*> (reserved "in" *> parseTerm) <|> sDef - <$> (reserved "def" *> identifier) + <$> (reserved "def" *> locIdentifier) <*> optional (symbol ":" *> parsePolytype) <*> (symbol "=" *> parseTerm <* reserved "end") <|> parens (mkTuple <$> (parseTerm `sepBy` symbol ",")) @@ -292,13 +296,13 @@ mkTuple (x : xs) = SPair x (STerm (mkTuple xs)) -- | Construct an 'SLet', automatically filling in the Boolean field -- indicating whether it is recursive. -sLet :: Var -> Maybe Polytype -> Syntax -> Syntax -> Term -sLet x ty t1 = SLet (x `S.member` setOf fv (sTerm t1)) x ty t1 +sLet :: LocVar -> Maybe Polytype -> Syntax -> Syntax -> Term +sLet x ty t1 = SLet (lvVar x `S.member` setOf fv (sTerm t1)) x ty t1 -- | Construct an 'SDef', automatically filling in the Boolean field -- indicating whether it is recursive. -sDef :: Var -> Maybe Polytype -> Syntax -> Term -sDef x ty t = SDef (x `S.member` setOf fv (sTerm t)) x ty t +sDef :: LocVar -> Maybe Polytype -> Syntax -> Term +sDef x ty t = SDef (lvVar x `S.member` setOf fv (sTerm t)) x ty t parseAntiquotation :: Parser Term parseAntiquotation = @@ -311,7 +315,7 @@ parseTerm = sepEndBy1 parseStmt (symbol ";") >>= mkBindChain mkBindChain :: [Stmt] -> Parser Syntax mkBindChain stmts = case last stmts of - Binder x _ -> return $ foldr mkBind (STerm (TApp (TConst Return) (TVar x))) stmts + Binder x _ -> return $ foldr mkBind (STerm (TApp (TConst Return) (TVar (lvVar x)))) stmts BareTerm t -> return $ foldr mkBind t (init stmts) where mkBind (BareTerm t1) t2 = loc t1 t2 $ SBind Nothing t1 t2 @@ -320,14 +324,14 @@ mkBindChain stmts = case last stmts of data Stmt = BareTerm Syntax - | Binder Text Syntax + | Binder LocVar Syntax deriving (Show) parseStmt :: Parser Stmt parseStmt = - mkStmt <$> optional (try (identifier <* symbol "<-")) <*> parseExpr + mkStmt <$> optional (try (locIdentifier <* symbol "<-")) <*> parseExpr -mkStmt :: Maybe Text -> Syntax -> Stmt +mkStmt :: Maybe LocVar -> Syntax -> Stmt mkStmt Nothing = BareTerm mkStmt (Just x) = Binder x diff --git a/src/Swarm/Language/Syntax.hs b/src/Swarm/Language/Syntax.hs index 4fbdee5fe..2ec4d222f 100644 --- a/src/Swarm/Language/Syntax.hs +++ b/src/Swarm/Language/Syntax.hs @@ -3,6 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ViewPatterns #-} -- | -- Module : Swarm.Language.Syntax @@ -48,6 +49,7 @@ module Swarm.Language.Syntax ( -- * Syntax Syntax (..), + LocVar (..), SrcLoc (..), noLoc, pattern STerm, @@ -808,7 +810,9 @@ pattern TPair t1 t2 = SPair (STerm t1) (STerm t2) -- | Match a TLam without syntax pattern TLam :: Var -> Maybe Type -> Term -> Term -pattern TLam v ty t = SLam v ty (STerm t) +pattern TLam v ty t <- SLam (lvVar -> v) ty (STerm t) + where + TLam v ty t = SLam (LV NoLoc v) ty (STerm t) -- | Match a TApp without syntax pattern TApp :: Term -> Term -> Term @@ -822,15 +826,21 @@ pattern (:$:) t1 s2 = SApp (STerm t1) s2 -- | Match a TLet without syntax pattern TLet :: Bool -> Var -> Maybe Polytype -> Term -> Term -> Term -pattern TLet r v pt t1 t2 = SLet r v pt (STerm t1) (STerm t2) +pattern TLet r v pt t1 t2 <- SLet r (lvVar -> v) pt (STerm t1) (STerm t2) + where + TLet r v pt t1 t2 = SLet r (LV NoLoc v) pt (STerm t1) (STerm t2) -- | Match a TDef without syntax pattern TDef :: Bool -> Var -> Maybe Polytype -> Term -> Term -pattern TDef r v pt t = SDef r v pt (STerm t) +pattern TDef r v pt t <- SDef r (lvVar -> v) pt (STerm t) + where + TDef r v pt t = SDef r (LV NoLoc v) pt (STerm t) -- | Match a TBind without syntax pattern TBind :: Maybe Var -> Term -> Term -> Term -pattern TBind v t1 t2 = SBind v (STerm t1) (STerm t2) +pattern TBind mv t1 t2 <- SBind (fmap lvVar -> mv) (STerm t1) (STerm t2) + where + TBind mv t1 t2 = SBind (LV NoLoc <$> mv) (STerm t1) (STerm t2) -- | Match a TDelay without syntax pattern TDelay :: DelayType -> Term -> Term @@ -859,6 +869,12 @@ data DelayType MemoizedDelay (Maybe Var) deriving (Eq, Show, Data, Generic, FromJSON, ToJSON) +-- | A variable with associated source location, used for variable +-- binding sites. (Variable occurrences are a bare TVar which gets +-- wrapped in a Syntax node, so we don't need LocVar for those.) +data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var} + deriving (Eq, Show, Data, Generic, FromJSON, ToJSON) + -- | Terms of the Swarm language. data Term = -- | The unit value. @@ -894,19 +910,19 @@ data Term SPair Syntax Syntax | -- | A lambda expression, with or without a type annotation on the -- binder. - SLam Var (Maybe Type) Syntax + SLam LocVar (Maybe Type) Syntax | -- | Function application. SApp Syntax Syntax | -- | A (recursive) let expression, with or without a type -- annotation on the variable. The @Bool@ indicates whether -- it is known to be recursive. - SLet Bool Var (Maybe Polytype) Syntax Syntax + SLet Bool LocVar (Maybe Polytype) Syntax Syntax | -- | A (recursive) definition command, which binds a variable to a -- value in subsequent commands. The @Bool@ indicates whether the -- definition is known to be recursive. - SDef Bool Var (Maybe Polytype) Syntax + SDef Bool LocVar (Maybe Polytype) Syntax | -- | A monadic bind for commands, of the form @c1 ; c2@ or @x <- c1; c2@. - SBind (Maybe Var) Syntax Syntax + SBind (Maybe LocVar) Syntax Syntax | -- | Delay evaluation of a term, written @{...}@. Swarm is an -- eager language, but in some cases (e.g. for @if@ statements -- and recursive bindings) we need to delay evaluation. The @@ -941,18 +957,18 @@ fvT f = go S.empty TVar x | x `S.member` bound -> pure t | otherwise -> f (TVar x) - SLam x ty (Syntax l1 t1) -> SLam x ty <$> (Syntax l1 <$> go (S.insert x bound) t1) + SLam x ty (Syntax l1 t1) -> SLam x ty <$> (Syntax l1 <$> go (S.insert (lvVar x) bound) t1) SApp (Syntax l1 t1) (Syntax l2 t2) -> SApp <$> (Syntax l1 <$> go bound t1) <*> (Syntax l2 <$> go bound t2) SLet r x ty (Syntax l1 t1) (Syntax l2 t2) -> - let bound' = S.insert x bound + let bound' = S.insert (lvVar x) bound in SLet r x ty <$> (Syntax l1 <$> go bound' t1) <*> (Syntax l2 <$> go bound' t2) SPair (Syntax l1 t1) (Syntax l2 t2) -> SPair <$> (Syntax l1 <$> go bound t1) <*> (Syntax l2 <$> go bound t2) SDef r x ty (Syntax l1 t1) -> - SDef r x ty <$> (Syntax l1 <$> go (S.insert x bound) t1) + SDef r x ty <$> (Syntax l1 <$> go (S.insert (lvVar x) bound) t1) SBind mx (Syntax l1 t1) (Syntax l2 t2) -> - SBind mx <$> (Syntax l1 <$> go bound t1) <*> (Syntax l2 <$> go (maybe id S.insert mx bound) t2) + SBind mx <$> (Syntax l1 <$> go bound t1) <*> (Syntax l2 <$> go (maybe id (S.insert . lvVar) mx bound) t2) SDelay m (Syntax l1 t1) -> SDelay m <$> (Syntax l1 <$> go bound t1) diff --git a/src/Swarm/Language/Typecheck.hs b/src/Swarm/Language/Typecheck.hs index 6feef2269..ee3139954 100644 --- a/src/Swarm/Language/Typecheck.hs +++ b/src/Swarm/Language/Typecheck.hs @@ -1,5 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- For 'Ord IntVar' instance @@ -273,7 +274,7 @@ inferModule s@(Syntax _ t) = (`catchError` addLocToTypeErr s) $ case t of -- variable for the body, infer the body under an extended context, -- and unify the two. Then generalize the type and return an -- appropriate context. - SDef _ x Nothing t1 -> do + SDef _ (lvVar -> x) Nothing t1 -> do xTy <- fresh ty <- withBinding x (Forall [] xTy) $ infer t1 xTy =:= ty @@ -282,7 +283,7 @@ inferModule s@(Syntax _ t) = (`catchError` addLocToTypeErr s) $ case t of -- If a (poly)type signature has been provided, skolemize it and -- check the definition. - SDef _ x (Just pty) t1 -> do + SDef _ (lvVar -> x) (Just pty) t1 -> do let upty = toU pty uty <- skolemize upty withBinding x upty $ check t1 uty @@ -304,7 +305,7 @@ inferModule s@(Syntax _ t) = (`catchError` addLocToTypeErr s) $ case t of -- case the bound x should shadow the defined one; hence, we apply -- that binding /after/ (i.e. /within/) the application of @ctx1@. withBindings ctx1 $ - maybe id (`withBinding` Forall [] a) mx $ do + maybe id ((`withBinding` Forall [] a) . lvVar) mx $ do Module cmdb ctx2 <- inferModule c2 -- We don't actually need the result type since we're just going @@ -318,7 +319,7 @@ inferModule s@(Syntax _ t) = (`catchError` addLocToTypeErr s) $ case t of -- (if any) as well, since binders are made available at the top -- level, just like definitions. e.g. if the user writes `r <- build {move}`, -- then they will be able to refer to r again later. - let ctxX = maybe Ctx.empty (`Ctx.singleton` Forall [] a) mx + let ctxX = maybe Ctx.empty ((`Ctx.singleton` Forall [] a) . lvVar) mx return $ Module cmdb (ctx1 `Ctx.union` ctxX `Ctx.union` ctx2) -- In all other cases, there can no longer be any definitions in the @@ -374,14 +375,14 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of -- To infer the type of a lambda if the type of the argument is -- provided, just infer the body under an extended context and return -- the appropriate function type. - SLam x (Just argTy) lt -> do + SLam (lvVar -> x) (Just argTy) lt -> do let uargTy = toU argTy resTy <- withBinding x (Forall [] uargTy) $ infer lt return $ UTyFun uargTy resTy -- If the type of the argument is not provided, create a fresh -- unification variable for it and proceed. - SLam x Nothing lt -> do + SLam (lvVar -> x) Nothing lt -> do argTy <- fresh resTy <- withBinding x (Forall [] argTy) $ infer lt return $ UTyFun argTy resTy @@ -398,13 +399,13 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of -- We can infer the type of a let whether a type has been provided for -- the variable or not. - SLet _ x Nothing t1 t2 -> do + SLet _ (lvVar -> x) Nothing t1 t2 -> do xTy <- fresh uty <- withBinding x (Forall [] xTy) $ infer t1 xTy =:= uty upty <- generalize uty withBinding x upty $ infer t2 - SLet _ x (Just pty) t1 t2 -> do + SLet _ (lvVar -> x) (Just pty) t1 t2 -> do let upty = toU pty -- If an explicit polytype has been provided, skolemize it and check -- definition and body under an extended context. @@ -419,7 +420,7 @@ infer s@(Syntax l t) = (`catchError` addLocToTypeErr s) $ case t of SBind mx c1 c2 -> do ty1 <- infer c1 a <- decomposeCmdTy ty1 - ty2 <- maybe id (`withBinding` Forall [] a) mx $ infer c2 + ty2 <- maybe id ((`withBinding` Forall [] a) . lvVar) mx $ infer c2 _ <- decomposeCmdTy ty2 return ty2 where @@ -623,7 +624,7 @@ analyzeAtomic locals (Syntax l t) = case t of SDelay _ s1 -> analyzeAtomic locals s1 -- Bind is similarly simple except that we have to keep track of a local variable -- bound in the RHS. - SBind mx s1 s2 -> (+) <$> analyzeAtomic locals s1 <*> analyzeAtomic (maybe id S.insert mx locals) s2 + SBind mx s1 s2 -> (+) <$> analyzeAtomic locals s1 <*> analyzeAtomic (maybe id (S.insert . lvVar) mx locals) s2 -- Variables are allowed if bound locally, or if they have a simple type. TVar x | x `S.member` locals -> return 0