{- BNF Converter: Happy Generator Copyright (C) 2004 Author: Markus Forsberg, Aarne Ranta -} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module BNFC.Backend.Haskell.CFtoHappy (cf2Happy, convert) where import Prelude hiding ((<>)) import Data.Foldable (toList) import Data.List (intersperse) import BNFC.CF import BNFC.Backend.Common.StrUtils (escapeChars) import BNFC.Backend.Haskell.Utils import BNFC.Options (HappyMode(..), TokenText(..)) import BNFC.PrettyPrint import BNFC.Utils -- Type declarations type Rules = [(NonTerminal,[(Pattern,Action)])] type Pattern = String type Action = String type MetaVar = String -- default naming tokenName = "Token" -- | Generate a happy parser file from a grammar. cf2Happy :: ModuleName -- ^ This module's name. -> ModuleName -- ^ Abstract syntax module name. -> ModuleName -- ^ Lexer module name. -> HappyMode -- ^ Happy mode. -> TokenText -- ^ Use @ByteString@ or @Text@? -> Bool -- ^ AST is a functor? -> CF -- ^ Grammar. -> String -- ^ Generated code. cf2Happy name absName lexName mode tokenText functor cf = unlines [ header name absName lexName tokenText eps , render $ declarations mode functor eps , render $ tokens cf functor , delimiter , specialRules absName functor tokenText cf , render $ prRules absName functor (rulesForHappy absName functor cf) , "" , footer absName tokenText functor eps cf ] where eps = toList $ allEntryPoints cf -- | Construct the header. header :: ModuleName -> ModuleName -> ModuleName -> TokenText -> [Cat] -> String header modName absName lexName tokenText eps = unlines $ concat [ [ "-- This Happy file was machine-generated by the BNF converter" , "{" , "{-# OPTIONS_GHC -fno-warn-incomplete-patterns -fno-warn-overlapping-patterns #-}" , "{-# LANGUAGE PatternSynonyms #-}" , "" , "module " ++ modName , " ( happyError" , " , myLexer" ] , map ((" , " ++) . render . parserName) eps , [ " ) where" , "" , "import Prelude" , "" , "import qualified " ++ absName , "import " ++ lexName ] , tokenTextImport tokenText , [ "" , "}" ] ] -- | The declarations of a happy file. -- >>> declarations Standard False [Cat "A", Cat "B", ListCat (Cat "B")] -- %name pA A -- %name pB B -- %name pListB ListB -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} -- -- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")] -- %name pA_internal A -- %name pB_internal B -- %name pListB_internal ListB -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} declarations :: HappyMode -> Bool -> [Cat] -> Doc declarations mode functor ns = vcat [ vcat $ map generateP ns , case mode of Standard -> "-- no lexer declaration" GLR -> "%lexer { myLexer } { Err _ }", "%monad { Err } { (>>=) } { return }", "%tokentype" <+> braces (text tokenName) ] where generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n) -- The useless delimiter symbol. delimiter :: String delimiter = "\n%%\n" -- | Generate the list of tokens and their identifiers. tokens :: CF -> Bool -> Doc tokens cf functor -- Andreas, 2019-01-02: "%token" followed by nothing is a Happy parse error. -- Thus, if we have no tokens, do not output anything. | null ts = empty | otherwise = "%token" $$ (nest 2 $ vcat $ map text $ table " " ts) where ts = map prToken (cfTokens cf) ++ specialToks cf functor prToken (t,k) = [ render (convert t), "{ PT _ (TS _ " ++ show k ++ ")", "}" ] -- Happy doesn't allow characters such as åäö to occur in the happy file. This -- is however not a restriction, just a naming paradigm in the happy source file. convert :: String -> Doc convert = quotes . text . escapeChars rulesForHappy :: ModuleName -> Bool -> CF -> Rules rulesForHappy absM functor cf = for (ruleGroups cf) $ \ (cat, rules) -> (cat, map (constructRule absM functor) rules) -- | For every non-terminal, we construct a set of rules. A rule is a sequence -- of terminals and non-terminals, and an action to be performed. -- -- >>> constructRule "Foo" False (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable) -- ("Exp '+' Exp","Foo.EPlus $1 $3") -- -- If we're using functors, it adds position value: -- -- >>> constructRule "Foo" True (npRule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] Parsable) -- ("Exp '+' Exp","(fst $1, Foo.EPlus (fst $1) (snd $1) (snd $3))") -- -- List constructors should not be prefixed by the abstract module name: -- -- >>> constructRule "Foo" False (npRule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))] Parsable) -- ("A ',' ListA","(:) $1 $3") -- -- >>> constructRule "Foo" False (npRule "(:[])" (ListCat (Cat "A")) [Left (Cat "A")] Parsable) -- ("A","(:[]) $1") -- -- Coercion are much simpler: -- -- >>> constructRule "Foo" True (npRule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"] Parsable) -- ("'(' Exp ')'","(uncurry Foo.BNFC'Position (tokenLineCol $1), (snd $2))") -- constructRule :: IsFun f => String -> Bool -> Rul f -> (Pattern, Action) constructRule absName functor (Rule fun0 _cat rhs Parsable) = (pat, action) where fun = funName fun0 (pat, metavars) = generatePatterns functor rhs action | functor = "(" ++ actionPos id ++ ", " ++ actionValue ++ ")" | otherwise = actionValue actionPos paren = case rhs of [] -> qualify noPosConstr (Left _:_) -> paren "fst $1" (Right _:_) -> paren $ unwords [ "uncurry", qualify posConstr , "(tokenLineCol $1)" ] actionValue | isCoercion fun = unwords metavars | isNilCons fun = unwords (qualify fun : metavars) | functor = unwords (qualify fun : actionPos (\ x -> "(" ++ x ++ ")") : metavars) | otherwise = unwords (qualify fun : metavars) qualify f | isConsFun f || isNilCons f = f | isDefinedRule f = absName ++ "." ++ mkDefName f | otherwise = absName ++ "." ++ f constructRule _ _ (Rule _ _ _ Internal) = undefined -- impossible -- | Generate patterns and a set of metavariables (de Bruijn indices) indicating -- where in the pattern the non-terminal are locate. -- -- >>> generatePatterns False [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ] -- ("Exp '+' Exp",["$1","$3"]) -- -- >>> generatePatterns True [ Left (Cat "Exp"), Right "+", Left (Cat "Exp") ] -- ("Exp '+' Exp",["(snd $1)","(snd $3)"]) -- generatePatterns :: Bool -> SentForm -> (Pattern, [MetaVar]) generatePatterns _ [] = ("{- empty -}", []) generatePatterns functor its = ( unwords $ for its $ either {-non-term:-} identCat {-term:-} (render . convert) , [ if functor then "(snd $" ++ show i ++ ")" else ('$' : show i) | (i, Left{}) <- zip [1 :: Int ..] its ] ) -- We have now constructed the patterns and actions, -- so the only thing left is to merge them into one string. -- | -- >>> prRules "Foo" False [(Cat "Expr", [("Integer", "Foo.EInt $1"), ("Expr '+' Expr", "Foo.EPlus $1 $3")])] -- Expr :: { Foo.Expr } -- Expr : Integer { Foo.EInt $1 } | Expr '+' Expr { Foo.EPlus $1 $3 } -- -- if there's a lot of cases, print on several lines: -- >>> prRules "" False [(Cat "Expr", [("Abcd", "Action"), ("P2", "A2"), ("P3", "A3"), ("P4", "A4"), ("P5","A5"), ("P6", "A6"), ("P7", "A7"), ("P8", "A8"), ("P9","A9")])] -- Expr :: { Expr } -- Expr -- : Abcd { Action } -- | P2 { A2 } -- | P3 { A3 } -- | P4 { A4 } -- | P5 { A5 } -- | P6 { A6 } -- | P7 { A7 } -- | P8 { A8 } -- | P9 { A9 } -- -- >>> prRules "" False [(Cat "Internal", [])] -- nt has only internal use -- -- -- The functor case: -- >>> prRules "" True [(Cat "Expr", [("Integer", "EInt () $1"), ("Expr '+' Expr", "EPlus () $1 $3")])] -- Expr :: { (BNFC'Position, Expr) } -- Expr : Integer { EInt () $1 } | Expr '+' Expr { EPlus () $1 $3 } -- -- A list with coercion: in the type signature we need to get rid of the -- coercion. -- -- >>> prRules "" True [(ListCat (CoercCat "Exp" 2), [("Exp2", "(:[]) $1"), ("Exp2 ',' ListExp2","(:) $1 $3")])] -- ListExp2 :: { (BNFC'Position, [Exp]) } -- ListExp2 : Exp2 { (:[]) $1 } | Exp2 ',' ListExp2 { (:) $1 $3 } -- prRules :: ModuleName -> Bool -> Rules -> Doc prRules absM functor = vsep . map prOne where prOne (_ , [] ) = empty -- nt has only internal use prOne (nt, (p,a):ls) = vcat [ hsep [ nt', "::", "{", if functor then functorType' nt else type' nt, "}" ] , hang nt' 2 $ sep (pr ":" (p, a) : map (pr "|") ls) ] where nt' = text (identCat nt) pr pre (p,a) = hsep [pre, text p, "{", text a , "}"] type' = catToType qualify empty functorType' nt = hcat ["(", qualify posType, ", ", type' nt, ")"] qualify | null absM = id | otherwise = ((text absM <> ".") <>) -- Finally, some haskell code. footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String footer absName tokenText functor eps cf = unlines $ concat [ [ "{" , "" , "type Err = Either String" , "" , "happyError :: [" ++ tokenName ++ "] -> Err a" , "happyError ts = Left $" , " \"syntax error at \" ++ tokenPos ts ++ " , " case ts of" , " [] -> []" , " [Err _] -> \" due to lexer error\"" , unwords [ " t:_ -> \" before `\" ++" , "(prToken t)" -- , tokenTextUnpack tokenText "(prToken t)" , "++ \"'\"" ] , "" , "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]" , "myLexer = tokens" , "" ] , when functor [ "-- Entrypoints" , "" , render . vsep $ map mkParserFun eps ] , [ "}" ] ] where mkParserFun cat = vcat [ parserName cat <+> "::" <+> brackets (text tokenName) <+> "-> Err" <+> catToType qualify empty cat , parserName cat <+> "=" <+> "fmap snd" <+> "." <+> parserName cat <> "_internal" ] qualify | null absName = id | otherwise = ((text absName <> ".") <>) -- | GF literals. specialToks :: CF -> Bool -> [[String]] -- ^ A table with three columns (last is "}"). specialToks cf functor = (`map` literals cf) $ \t -> case t of "Ident" -> [ "L_Ident" , "{ PT _ (TV " ++ posn t ++ ")", "}" ] "String" -> [ "L_quoted", "{ PT _ (TL " ++ posn t ++ ")", "}" ] "Integer" -> [ "L_integ ", "{ PT _ (TI " ++ posn t ++ ")", "}" ] "Double" -> [ "L_doubl ", "{ PT _ (TD " ++ posn t ++ ")", "}" ] "Char" -> [ "L_charac", "{ PT _ (TC " ++ posn t ++ ")", "}" ] own -> [ "L_" ++ own,"{ PT _ (T_" ++ own ++ " " ++ posn own ++ ")", "}" ] where posn tokenCat = if isPositionCat cf tokenCat || functor then "_" else "$$" specialRules :: ModuleName -> Bool -> TokenText -> CF -> String specialRules absName functor tokenText cf = unlines . intersperse "" . (`map` literals cf) $ \t -> case t of -- "Ident" -> "Ident :: { Ident }" -- ++++ "Ident : L_ident { Ident $1 }" "String" -> "String :: { " ++ mkTypePart t ++ " }" ++++ "String : L_quoted { " ++ mkBodyPart t ++ " }" "Integer" -> "Integer :: { " ++ mkTypePart t ++ " }" ++++ "Integer : L_integ { " ++ mkBodyPart t ++ " }" "Double" -> "Double :: { " ++ mkTypePart t ++ " }" ++++ "Double : L_doubl { " ++ mkBodyPart t ++ " }" "Char" -> "Char :: { " ++ mkTypePart t ++ " }" ++++ "Char : L_charac { " ++ mkBodyPart t ++ " }" own -> own ++ " :: { " ++ mkTypePart (qualify own) ++ " }" ++++ own ++ " : L_" ++ own ++ " { " ++ mkBodyPart t ++ " }" where mkTypePart tokenCat = if functor then concat [ "(", qualify posType, ", ", tokenCat, ")" ] else tokenCat mkBodyPart tokenCat | functor = "(" ++ unwords ["uncurry", qualify posConstr, "(tokenLineCol $1)"] ++ ", " ++ mkValPart tokenCat ++ ")" | otherwise = mkValPart tokenCat mkValPart tokenCat = case tokenCat of "String" -> if functor then stringUnpack "((\\(PT _ (TL s)) -> s) $1)" else stringUnpack "$1" -- String never has pos "Integer" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Integer" else "(read " ++ stringUnpack "$1" ++ ") :: Integer" -- Integer never has pos "Double" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Double" else "(read " ++ stringUnpack "$1" ++ ") :: Double" -- Double never has pos "Char" -> if functor then "(read " ++ stringUnpack "(tokenText $1)" ++ ") :: Char" else "(read " ++ stringUnpack "$1" ++ ") :: Char" -- Char never has pos own -> case functor of False -> case isPositionCat cf tokenCat of False -> qualify own ++ " $1" True -> qualify own ++ " (mkPosToken $1)" True -> case isPositionCat cf tokenCat of False -> qualify own ++ " (tokenText $1)" True -> qualify own ++ " (mkPosToken $1)" stringUnpack = tokenTextUnpack tokenText qualify | null absName = id | otherwise = ((absName ++ ".") ++)