{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} -- | Second pass of processing a LBNF file. -- module BNFC.Check.Pass2 where import BNFC.Prelude import Data.List (sort) import qualified Data.Map as Map -- import Debug.Trace import qualified BNFC.Utils.List1 as List1 import qualified BNFC.Abs as A import BNFC.Abs (HasPosition(..)) import BNFC.CF import BNFC.Types.Position import BNFC.Types.Regex import BNFC.Check.Expressions import BNFC.Check.Monad import BNFC.Check.Pass1 ( Pass1(..), stDefinedCats, stUsedCats , parseCat, parseCoerceCat ) import qualified BNFC.Check.Pass1 as Pass1 import BNFC.Check.Regex -- | Entry point for pass 2. checkLBNF :: A.Grammar -> Pass1 -> Check LBNF checkLBNF grammar pass1@Pass1{ _stDefinedCats, _stUsedCats, _stKeywords } = checkGrammar grammar `runReaderT` pass1 `execStateT` st where st = initLBNF { _lbnfASTBuiltins = Map.fromList $ map (second $ fmap wpPos) usedBuiltins , _lbnfParserBuiltins = Map.fromList $ [ (b, ps) | (b, occs) <- usedBuiltins , ps <- maybeToList $ filterParseable occs ] , _lbnfKeywords = keywords , _lbnfSymbols = Map.mapKeys (Symbol . theKeyword) symbols , _lbnfSymbolsKeywords = symbolsKeywords , _lbnfTokenDefs = case identPositions of Just ps -> Map.fromList [('I':|"dent", WithPosition (minimum ps) (TokenDef NoPositionToken identRegex True))] Nothing -> mempty } -- partion keywords and symbols. (symbols, keywords) = Map.partitionWithKey (\k _ -> notIdentifier k) _stKeywords symbolsKeywords = Map.fromList $ zip ( sort $ (theKeyword <$> Map.keys symbols) ++ (theKeyword <$> Map.keys keywords) ) [1..] notIdentifier :: Keyword -> Bool notIdentifier (Keyword k) = notIdent $ toList k where notIdent s = null s || not (isAlpha (head s)) || (not . all isIdentRest) s isIdentRest c = isAlphaNum c || c == '_' || c == '\'' identRegex :: Regex identRegex = RSeqs $ List2 letter (RStar letterDigitSpecial) [] letter :: Regex letter = RChar $ CC $ CAlt [CLower, CUpper] letterDigitSpecial :: Regex letterDigitSpecial = RChar $ CC $ CAlt [ CUpper, CLower, CDigit, CChar '_', CChar '\'' ] -- Note: we could add list categories which are used but not defined -- with their standard definition @terminator Cat ""@. -- See issue: BNFC/bnfc#336. -- These should be added before pass2 runs. -- Get all builtins mentioned in the grammar that are not overwritten -- by definitions. usedBuiltins :: [(BuiltinCat, List1 (WithPosition Parseable))] usedBuiltins = [ (b, occs) | (b, bx) <- builtinCats , let icat = Cat bx , Map.notMember icat _stDefinedCats -- b should not be overwritten by a definition , occs <- maybeToList $ Map.lookup icat _stUsedCats -- b should be used somewhere ] -- Positions of the uses of @Ident@ as category. identPositions :: Maybe (List1 Position) identPositions = -- No category or token named "Ident" has been defined. if Map.notMember icat _stDefinedCats then case Map.lookup icat _stUsedCats of Nothing -> Nothing Just p -> Just $ wpPos <$> p else Nothing where icat = Cat $ 'I':|"dent" -- Keep only the non-@internal@ uses. filterParseable :: List1 (WithPosition Parseable) -> Maybe (List1 Position) filterParseable = nonEmpty . mapMaybe isParseable . toList where isParseable (WithPosition pos p) = case p of Parseable -> Just pos Internal -> Nothing -- | The monad for pass2. type M = ReaderT Pass1 (StateT LBNF Check) checkGrammar :: A.Grammar -> M () checkGrammar (A.Grammar _ defs) = do mapM_ checkDef defs checkEntryPoints -- | If no entrypoints are given explicitly, take the first non-terminal. -- If no non-terminal is defined, raise an error checkEntryPoints :: M () checkEntryPoints = do eps <- use lbnfEntryPoints when (Map.null eps) $ do rules <- use lbnfParserRules -- traceShowM rules if Map.null rules then recoverableError EmptyGrammar else do -- Find non-terminal defined first (least Position value of RuleLabel). -- ParserRules = Map Cat (Map RHS (WithPosition RuleLabel)) -- EntryPoints = Map Cat (List1 Position) let WithPosition pos ep = Map.foldlWithKey f (WithPosition maxBound dummyCat) rules lbnfEntryPoints .= Map.singleton ep (singleton pos) where f :: WithPosition Cat -> Cat -> Map rhs (WithPosition label) -> WithPosition Cat f x cat = Map.foldl (\ y (WithPosition pos _) -> y `min` WithPosition pos cat) x dummyCat = Cat $ BaseCat $ "Internal error in checkEntryPoints: no non-terminals" checkDef :: A.Def -> M () checkDef def = case def of A.Rule (Just p) l cat rhs -> checkRule (toPosition p) Parseable l cat rhs A.Internal (Just p) l cat rhs -> checkRule (toPosition p) Internal l cat rhs A.Token (Just p) x re -> addTokenDef (toPosition p) x NoPositionToken re A.PosToken (Just p) x re -> addTokenDef (toPosition p) x PositionToken re A.Separator (Just p) nonempty cat s -> checkList (toPosition p) nonempty cat (Separator s) A.Terminator (Just p) nonempty cat s -> checkList (toPosition p) nonempty cat (Terminator s) A.Delimiters (Just _) _ _ _ _ _ -> panic "Delimiters should have been filtered in Pass1" A.Coercions (Just p) x n -> checkCoercions (toPosition p) x n A.Rules (Just p) x rhss -> checkRules (toPosition p) x rhss A.Entryp (Just _) cats -> mapM_ (addEntryPoint <=< checkCat) cats where addEntryPoint (WithPosition p c) = modifying lbnfEntryPoints $ Map.insertWith (<>) c $ singleton p A.Function (Just p) x args exp -> checkDefine (toPosition p) x args exp -- Lexer stuff: A.Comment (Just p) s -> addLineComment (toPosition p) s A.Comments (Just p) s1 s2 -> addBlockComment (toPosition p) s1 s2 A.Layout (Just p) ss -> mapM_ (addLayoutKeyword lbnfLayoutStart lbnfLayoutStop $ toPosition p) ss A.LayoutStop (Just p) ss -> mapM_ (addLayoutKeyword lbnfLayoutStop lbnfLayoutStart $ toPosition p) ss A.LayoutTop (Just p) -> use lbnfLayoutTop >>= \case Nothing -> assign lbnfLayoutTop $ Just $ toPosition p Just old -> atPosition p $ warn $ DuplicateLayoutTop old _ -> panicPositionNothing parseICat :: ICat -> M Cat parseICat cat = ReaderT $ \ Pass1{ _stDefinedCats } -> do lift $ Pass1.parseICat cat `runReaderT` _stDefinedCats -- | Check that a category is defined and convert it into internal representation checkCat :: A.Cat -> M (WithPosition Cat) checkCat c = atPosition p $ WithPosition p <$> do parseICat $ parseCat c where p = maybe panicPositionNothing toPosition $ hasPosition c -- | Convert a LBNF label into internal representation. parseLabel :: A.Label -> WithPosition Label parseLabel l0 = case l0 of A.Id _ (A.Identifier (_, x)) -> WithPosition p $ labelFromIdentifier $ fromMaybe panicEmptyIdentifier $ nonEmpty x A.Wild _ -> WithPosition p LWild A.ListEmpty _ -> WithPosition p LNil A.ListCons _ -> WithPosition p LCons A.ListOne _ -> WithPosition p LSg where p = maybe panicPositionNothing toPosition $ hasPosition l0 -- | Convert an LBNF item (terminal or non-terminal) to internal representation. checkItem :: A.Item -> M (Maybe (WithPosition AItem)) checkItem = \case -- Empty keywords are immediately dropped: A.Terminal _ [] -> return Nothing A.Terminal p0 (c:s) -> return . Just . WithPosition p . Terminal $ c :| s where p = maybe panicPositionNothing toPosition p0 A.NTerminal _ cat -> Just . fmap NTerminal <$> checkCat cat -- | Check that -- (1) ordinary labels define ordinary types (not list types), -- (2) coercions are have identity type, and -- (3) list constructors have their respective types. -- checkLabel :: WithPosition Label -> FunType -> M () checkLabel (WithPosition p l) ft@(FunType t ts) = atPosition p $ do case l of LId f -> notListType f $ addSig f $ WithPosition p ft LDef f -> notListType f $ addSig f $ WithPosition p ft LWild -> unless (ts == [t]) $ recoverableError $ InvalidLabelWild ft LNil -> elemType $ \ _ -> unless (ts == []) $ recoverableError $ InvalidLabelNil ft LCons -> elemType $ \ s -> unless (ts == [s,t]) $ recoverableError $ InvalidLabelCons ft LSg -> elemType $ \ s -> unless (ts == [s]) $ recoverableError $ InvalidLabelSg ft where elemType k = case t of ListType s -> k s BaseType{} -> recoverableError $ InvalidListLabel t notListType f k = case t of ListType{} -> recoverableError $ InvalidListRule f BaseType{} -> k -- | Check list rules for uniform indexing. -- This flags rules like @(:). [Exp] ::= Exp1 [Exp]@. -- Such rules make sense in the abstract syntax and the parser, -- but may lead to non-faithful printers. checkListLabelForUniformity :: WithPosition Label -- ^ Possibly a list label. -> Cat -- ^ Lhs cat. -> [Cat] -- ^ Rhs cats. -> M () checkListLabelForUniformity (WithPosition p l) c cs = atPosition p $ do case l of LCons -> tr $ elemCat $ \ b -> unless (cs == [b,c]) warning LSg -> tr $ elemCat $ \ b -> unless (cs == [b]) warning _ -> return () where tr = id -- tr = trace (unwords [ "checkListLabelForUniformity", show l, ":", show c, "<-", show cs ]) warning = warn $ NonUniformListRule c cs elemCat k = case c of ListCat b -> k b CoerceCat{} -> return () -- Error already reported in checkLabel. Cat{} -> return () -- Error already reported in checkLabel. -- | Add label to signature, if it does not exist there yet. -- Otherwise, throw error. addSig :: LabelName -> WithPosition FunType -> M () addSig f t = do (Map.lookup f <$> use lbnfSignature) >>= \case -- If not present yet, add to signature. Nothing -> modifying lbnfSignature $ Map.insert f t -- Otherwise complain about duplicate definition Just (WithPosition p _) -> recoverableError $ DuplicateLabel f p checkRHS :: A.RHS -> M ARHS checkRHS (A.RHS _ items0) = map wpThing . catMaybes <$> mapM checkItem items0 trimRHS :: ARHS -> RHS trimRHS = mapMaybe $ traverse $ parseKeyword . toList -- | Check a LBNF rule and convert it into internal form. checkRule :: Position -> Parseable -> A.Label -> A.Cat -> A.RHS -> M () checkRule p parseable l0 cat0 rhs0 = do -- Convert rule to internal format. let l = parseLabel l0 WithPosition _ cat <- checkCat cat0 items <- checkRHS rhs0 -- Check (list, wild) or store (constructor, definition) type. let cs = rhsCats items let ty = FunType (catToType cat) $ map catToType cs checkLabel l ty checkListLabelForUniformity l cat cs -- Add grammar rule with origin. addRule p FromOrdinary parseable cat (wpThing l) items -- | Add a well-typed rule to 'lnbfASTRules', 'lbnfASTRulesAP' -- and, if it is 'Parseable', to 'lbnfParserRules'. addRule :: Position -> RuleOrigin -> Parseable -> Cat -> Label -> ARHS -> M () addRule p origin parseable cat l items = atPosition p $ do -- Warn if label clashes with category. (Error in e.g. Java backend.) case l of LId x -> do (Map.lookup (Cat x) <$> view stDefinedCats) >>= \case Just (WithPosition p1 _) -> warn $ LabelClashesWithCategory x p1 Nothing -> do (Map.lookup (Cat x) <$> view stUsedCats) >>= \case Just (WithPosition p2 _ :| _) -> warn $ LabelClashesWithCategory x p2 Nothing -> return () _ -> return () -- Add AST flavor of rule. -- This cannot fail since we already ensured that the label is unique. -- (See 'DuplicateLabel'.) -- However, in case the user uses @--force@, we should keep the existing entry -- and ignore the new entry. modifying lbnfASTRules $ Map.insertWith (Map.unionWith (\ _new old -> old)) cat $ Map.singleton l $ WithPosition p $ ARuleRHS { aruleOrigin = origin , aruleParseable = parseable , aruleRHS = items } modifying lbnfASTRulesAP $ Map.insertWith (Map.unionWith (\ _new old -> old)) (catToType cat) $ Map.singleton l (rhsType items, (getCatPrec cat, WithPosition p items)) when (parseable == Parseable) $ do -- Trim keywords in RHS. let rhs = trimRHS items -- Add Parser rule of flavor. -- If the same RHS already exist, raise an error and skip this rule for the parser. (lookupRHS cat rhs <$> use lbnfParserRules) >>= \case Just (WithPosition p' _) -> recoverableError $ DuplicateRHS p' Nothing -> do modifying lbnfParserRules $ Map.insertWith (Map.unionWith $ panic panicRHS) cat $ Map.singleton rhs $ WithPosition p $ RuleLabel { ruleOrigin = origin , ruleLabel = l } where panicRHS = "this RHS cannot exist in the map since we just looked it up" -- addRule :: Position -> RuleOrigin -> Parseable -> Label -> Cat -> RHS -> M () -- addRule p origin parseable l cat items = do -- modifying lbnfRules $ Map.insertWith (<>) cat $ singleton $ WithPosition p $ RuleBody -- { ruleOrigin = origin -- , ruleParseable = parseable -- , ruleLabel = l -- , ruleRHS = items -- } -- | Add rules from list pragma. -- -- checkList :: Position -> A.MinimumSize -> A.Cat -> Separator' String -> M () checkList p size cat0 sep0 = atPosition p $ do WithPosition _ cat <- checkCat cat0 let list = ListCat cat -- @asep@ is the separator including whitespace (Nothing for "") asep = parseASeparator sep0 -- @sep@ is the separator with whitespace trimmed (Nothing for @all isSpace@) sep = trimSeparator =<< asep -- Only one of these two is not null: term = [ Terminal s | Terminator s <- maybeToList asep ] tsep = [ Terminal s | Separator s <- maybeToList asep ] arules = concat [ [ (LNil , [] ) | mayEmpty ] -- If either the list is required to be @nonempty@ -- or its is @separator@ which isn't just whitespace, -- then we need an extra rule for the singleton list (that may have a terminator): , [ (LSg , NTerminal cat : term) | not mayEmpty || isSep sep ] , [ (LCons, concat [ [NTerminal cat], tsep, term, [NTerminal list] ]) ] ] mapM_ (uncurry $ addRule p FromList Parseable list) arules where -- Are empty lists allowed? mayEmpty = case size of A.MEmpty _ -> True A.MNonEmpty _ -> False isSep = maybe False $ \case Separator{} -> True Terminator{} -> False -- | Add rules from @coercion@ pragma. -- -- E.g. @coercions Exp 3@ will add the following rules: -- -- @ -- _. Exp ::= Exp1; -- _. Exp1 ::= Exp2; -- _. Exp2 ::= Exp3; -- _. Exp3 ::= "(" Exp ")"; -- @ checkCoercions :: Position -> A.Identifier -> Integer -> M () -- @coercions _ 0@ is ignored. checkCoercions p _ n | n <= 0 = atPosition p $ warn IgnoringNullCoercions checkCoercions p (A.Identifier (_, x)) n = atPosition p $ do c0 <- parseICat $ parseCoerceCat x case c0 of ListCat{} -> panic "Identifier cannot resolve to ListCat" CoerceCat{} -> return () -- this has been flagged already in pass 1 Cat BuiltinCat{} -> recoverableError CoercionsOfBuiltinCat Cat (IdentCat _) -> recoverableError CoercionsOfIdentCat Cat TokenCat {} -> recoverableError CoercionsOfTokenCat Cat (BaseCat ident) -> do let c = CoerceCat ident mapM_ (\ (cat, rhs) -> addRule p FromCoercions Parseable cat LWild rhs) $ concat [ [ (c0 , [ NTerminal (c 1) ]) ] , [ (c (i-1), [ NTerminal (c i) ]) | i <- [2..n] ] , [ (c n , [ Terminal "(", NTerminal c0, Terminal ")" ]) ] ] -- | Add rules from @rules@ pragma. checkRules :: Position -> A.Identifier -> [A.RHS] -> M () checkRules p (A.Identifier (_, x0)) rhss = do let x = fromMaybe panicEmptyIdentifier $ nonEmpty x0 cat <- parseICat $ parseCoerceCat x0 arhss <- mapM checkRHS rhss forM_ (zip [1 :: Int ..] arhss) $ \ (k, arhs) -> do let l = case trimRHS arhs of -- If the rhs is a single keyword @kw@ which is a valid identifier, use @Cat_kw@ as label. [Terminal (Keyword s)] | all (\c -> isAlphaNum c || elem c ("_'" :: [Char])) s -> '_' <| s -- If the rhs is a single category @D@, use @CatD@ as label. [NTerminal c] -> catToIdentifier c -- Otherwise, the label is @Cat_k@ where @k@ is the number of the generated rule. _ -> '_' :| show k addRule p FromRules Parseable cat (LId $ x <> l) arhs checkDefine :: Position -> A.Identifier -> [A.Arg] -> A.Exp -> M () checkDefine p (A.Identifier (_, x0)) args exp = atPosition p $ do let x = fromMaybe panicEmptyIdentifier $ nonEmpty x0 sig <- use lbnfSignature case Map.lookup x sig of Nothing -> recoverableError IgnoringUndeclaredFunction Just (WithPosition _ ft) -> do fun <- lift . lift $ checkFunction p sig x args exp ft modifying lbnfFunctions $ Map.insert x $ WithPosition p fun -- | Add a token category (position carrying or not) defined by a regular expression. addTokenDef :: Position -> A.Identifier -> PositionToken -> A.Reg -> M () addTokenDef pos (A.Identifier (_, x0)) posTok reg = atPosition pos $ do let x = fromMaybe panicEmptyIdentifier $ nonEmpty x0 modifying lbnfTokenDefs $ Map.insert x $ WithPosition pos def addSig x (WithPosition pos (FunType (BaseType (TokenCat x)) [BaseType (BuiltinCat BString)])) unless (satisfiable regex) $ warn $ EmptyToken x regex when (nullable regex) $ recoverableError $ NullableToken x regex where regex = normRegex reg def = TokenDef { positionToken = posTok , regexToken = regex , isIdent = False } -- | Add a keyword that starts or stops layout. addLayoutKeyword :: Lens' LBNF LayoutKeywords -- ^ add here -> Lens' LBNF LayoutKeywords -- ^ shouldn't be in here -> Position -> String -- ^ shouldn't be empty -> M () addLayoutKeyword we others p s = atPosition p $ do case parseKeyword s of Nothing -> warn EmptyLayoutKeyword Just kw -> do -- Check that keyword isn't amoung the @others@. -- If it is there, ignore it with a recoverable error. (Map.lookup kw <$> use others) >>= \case Just pold -> recoverableError $ ConflictingUsesOfLayoutKeyword kw pold Nothing -> do -- Check that keyword is defined in the grammar, otherwise ignore. (Map.lookup kw <$> use lbnfKeywords) >>= \case Nothing -> warn $ UndefinedLayoutKeyword kw Just{} -> do -- Check that keyword isn't defined as layout yet. (Map.lookup kw <$> use we) >>= \case Just pold -> warn $ DuplicateLayoutKeyword kw pold -- Store layout keyword Nothing -> modifying we $ Map.insert kw p -- | Add line comment delimiter, unless empty. addLineComment :: Position -> String -> M () addLineComment p = \case [] -> atPosition p $ warn $ IgnoringEmptyLineComment c:cs -> modifying lbnfLineComments $ Map.insert p $ LineComment $ c :| cs -- NB: Map.insert is safe (not to overwrite) because we cannot have two -- definitions with the same position -- | Add block comment delimiters if both are non-empty. addBlockComment :: Position -> String -> String -> M () addBlockComment p s1 s2 = atPosition p $ case (List1.nonEmpty s1, List1.nonEmpty s2) of (Just n1, Just n2) -> modifying lbnfBlockComments $ Map.insert p $ BlockComment n1 n2 (Nothing, Nothing) -> warn $ IgnoringEmptyBlockComment -- If one of the delimiters is null, this is a more harmful situation, -- we opt for an error here instead of a warning. _ -> recoverableError $ IllformedBlockComment