{- BNF Converter: Abstract syntax Copyright (C) 2004 Author: Markus Forsberg, Aarne Ranta This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} module BNFC.GetCF(parseCF, parseCFP) where import qualified AbsBNF as Abs import ParBNF import BNFC.CF import BNFC.Options import BNFC.TypeChecker import BNFC.Utils import Control.Arrow (left) import Control.Monad.State import Data.Char import Data.Either (partitionEithers) import Data.List(nub,partition) import Data.Maybe (mapMaybe) import ErrM -- $setup -- >>> import PrintBNF parseCF :: SharedOptions -> Target -> String -> IO CF parseCF opts t s = liftM cfp2cf (parseCFP opts t s) parseCFP :: SharedOptions -> Target -> String -> IO CFP parseCFP opts target content = do cfp <- runErr $ pGrammar (myLexer content) >>= expandRules >>= getCFP (cnf opts) >>= markTokenCategories let cf = cfp2cf cfp runErr $ checkDefinitions cf let msgs3 = checkTokens cf let reserved = [lang opts | target == TargetJava ] -- Warn of fail if the grammar use non unique names case filter (not . isDefinedRule) $ notUniqueNames reserved cf of [] -> return () ns| target `notElem` [TargetHaskell,TargetHaskellGadt,TargetOCaml] -> fail $ "ERROR: names not unique: " ++ unwords ns | otherwise -> do putStrLn $ "Warning: names not unique: " ++ unwords ns putStrLn "This can be an error in other back ends." -- Print msgs3 putStrLn $ unlines msgs3 -- Print the number of rules putStrLn $ show (length (rulesOfCF cf)) +++ "rules accepted\n" -- Print a warning if comment delimiter are bigger than 2 characters let c3s = [(b,e) | (b,e) <- fst (comments cf), length b > 2 || length e > 2] unless (null c3s) $do putStrLn "Warning: comment delimiters longer than 2 characters ignored in Haskell:" mapM_ putStrLn [b +++ "-" +++ e | (b,e) <- c3s] return cfp where runErr (Ok a) = return a runErr (Bad msg) = fail msg {- case filter (not . isDefinedRule) $ notUniqueFuns cf of [] -> case (badInheritence cf) of [] -> return (ret,True) xs -> do putStrLn "Warning :" putStrLn $ " Bad Label name in Category(s) :" ++ unwords xs putStrLn $ " These categories have more than one Label, yet one of these" putStrLn $ " Labels has the same name as the Category. This will almost" putStrLn $ " certainly cause problems in languages other than Haskell.\n" return (ret,True) xs -> do putStrLn $ "Warning :" putStrLn $ " Non-unique label name(s) : " ++ unwords xs putStrLn $ " There may be problems with the pretty-printer.\n" case (badInheritence cf) of [] -> return (ret,True) xs -> do putStrLn $ "Warning :" putStrLn $ " Bad Label name in Category(s) :" ++ unwords xs putStrLn $ " These categories have more than one Label, yet one of these" putStrLn $ " Labels has the same name as the Category. This will almost" putStrLn $ " certainly cause problems in languages other than Haskell.\n" return (ret,True) -} getCFP :: Bool -> Abs.Grammar -> Err CFP getCFP cnf (Abs.Grammar defs0) = do let rules = inlineDelims rules0 cf0 = revs srt srt = let literals = nub [lit | xs <- map rhsRule rules, Left (Cat lit) <- xs, lit `elem` specialCatsP] (symbols,keywords) = partition notIdent reservedWords notIdent s = null s || not (isAlpha (head s)) || any (not . isIdentRest) s isIdentRest c = isAlphaNum c || c == '_' || c == '\'' reservedWords = nub [t | r <- rules, Right t <- rhsRule r] in CFG((pragma,(literals,symbols,keywords,[])),rules) revs cf1@(CFG((pragma,(literals,symbols,keywords,_)),rules)) = CFG((pragma,(literals,symbols,keywords,findAllReversibleCats (cfp2cf cf1))),rules) case mapMaybe (checkRule (cfp2cf cf0)) (rulesOfCF cf0) of [] -> return () msgs -> fail (unlines msgs) return cf0 where (pragma,rules0) = partitionEithers $ concatMap transDef defs (defs,inlineDelims) = if cnf then (defs0,id) else removeDelims defs0 -- | This function goes through each rule of a grammar and replace Cat "X" with -- TokenCat "X" when "X" is a token type. markTokenCategories :: CFP -> Err CFP markTokenCategories (CFG (exts, rules)) = return $ CFG (exts, newRules) where newRules = [ Rule f (mark c) (map (left mark) rhs) | Rule f c rhs <- rules ] tokenCatNames = [ n | TokenReg n _ _ <- fst exts ] ++ specialCatsP mark = toTokenCat tokenCatNames -- | Change the constructor of categories with the given names from Cat to -- TokenCat -- >>> toTokenCat ["A"] (Cat "A") == TokenCat "A" -- True -- >>> toTokenCat ["A"] (ListCat (Cat "A")) == ListCat (TokenCat "A") -- True -- >>> toTokenCat ["A"] (Cat "B") == Cat "B" -- True toTokenCat :: [String] -> Cat -> Cat toTokenCat ns (Cat a) | a `elem` ns = TokenCat a toTokenCat ns (ListCat c) = ListCat (toTokenCat ns c) toTokenCat _ c = c removeDelims :: [Abs.Def] -> ([Abs.Def], [RuleP] -> [RuleP]) removeDelims xs = (ys ++ map delimToSep ds, foldr (.) id [map (inlineDelim' d) | d <- ds]) where (ds,ys) = partition isDelim xs isDelim (Abs.Delimiters{}) = True isDelim _ = False inlineDelim :: Abs.Def -> Either Cat String -> [Either Cat String] inlineDelim (Abs.Delimiters cat open close _ _) (Left c) | c == ListCat (transCat cat) = [Right open, Left c, Right close] inlineDelim _ x = [x] inlineDelim' :: Abs.Def -> RuleP -> RuleP inlineDelim' d@(Abs.Delimiters cat _ _ _ _) r@(Rule f c rhs) | c == ListCat (transCat cat) = r | otherwise = Rule f c (concatMap (inlineDelim d) rhs) inlineDelim' _ _ = error "Not a delimiters pragma" delimToSep (Abs.Delimiters cat _ _ (Abs.SepTerm s) sz) = Abs.Terminator sz cat s delimToSep (Abs.Delimiters cat _ _ (Abs.SepSepar s) sz) = Abs.Separator sz cat s delimToSep (Abs.Delimiters cat _ _ Abs.SepNone sz) = Abs.Terminator sz cat "" delimToSep x = x transDef :: Abs.Def -> [Either Pragma RuleP] transDef x = case x of Abs.Rule label cat items -> [Right $ Rule (transLabel label) (transCat cat) (map transItem items)] Abs.Comment str -> [Left $ CommentS str] Abs.Comments str0 str -> [Left $ CommentM (str0,str)] Abs.Token ident reg -> [Left $ TokenReg (transIdent ident) False reg] Abs.PosToken ident reg -> [Left $ TokenReg (transIdent ident) True reg] Abs.Entryp idents -> [Left $ EntryPoints (map (strToCat .transIdent) idents)] Abs.Internal label cat items -> [Right $ Rule (transLabel label) (transCat cat) (Left InternalCat:map transItem items)] Abs.Separator size ident str -> map (Right . cf2cfpRule) $ separatorRules size ident str Abs.Terminator size ident str -> map (Right . cf2cfpRule) $ terminatorRules size ident str Abs.Delimiters a b c d e -> map (Right . cf2cfpRule) $ delimiterRules a b c d e Abs.Coercions ident int -> map (Right . cf2cfpRule) $ coercionRules ident int Abs.Rules ident strs -> map (Right . cf2cfpRule) $ ebnfRules ident strs Abs.Layout ss -> [Left $ Layout ss] Abs.LayoutStop ss -> [Left $ LayoutStop ss] Abs.LayoutTop -> [Left LayoutTop] Abs.Function f xs e -> [Left $ FunDef (transIdent f) (map transArg xs) (transExp e)] delimiterRules :: Abs.Cat -> String -> String -> Abs.Separation -> Abs.MinimumSize -> [Rule] delimiterRules a0 l r (Abs.SepTerm "") size = delimiterRules a0 l r Abs.SepNone size delimiterRules a0 l r (Abs.SepSepar "") size = delimiterRules a0 l r Abs.SepNone size delimiterRules a0 l r sep size = [ -- recognizing a single element Rule "(:[])" (strToCat a') (Left a : termin), -- optional terminator/separator -- glueing two sublists Rule "(++)" (strToCat a') [Left (strToCat a'), Left (strToCat a')], -- starting on either side with a delimiter Rule "[]" (strToCat c) [Right l], Rule (if optFinal then "(:[])" else "[]") (strToCat d) ([Left a | optFinal] ++ [Right r]), -- gathering chains Rule "(++)" (strToCat c) [Left (strToCat c), Left (strToCat a')], Rule "(++)" (strToCat d) [Left (strToCat a'), Left (strToCat d)], -- finally, put together left and right chains Rule "(++)" as [Left (strToCat c),Left (strToCat d)]] ++ [ -- special rule for the empty list if necessary Rule "[]" as [Right l,Right r] | optEmpty] where a = transCat a0 as = ListCat a a' = '@':'@':show a c = '@':'{':show a d = '@':'}':show a -- optionally separated concat. of x and y categories. termin = case sep of Abs.SepSepar t -> [Right t] Abs.SepTerm t -> [Right t] _ -> [] optFinal = case (sep,size) of (Abs.SepSepar _,_) -> True (Abs.SepTerm _,Abs.MNonempty) -> True (Abs.SepNone,Abs.MNonempty) -> True _ -> False optEmpty = case sep of Abs.SepSepar _ -> size == Abs.MEmpty _ -> False separatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule] separatorRules size c s = if null s then terminatorRules size c s else ifEmpty [ Rule "(:[])" cs [Left c'], Rule "(:)" cs [Left c', Right s, Left cs] ] where c' = transCat c cs = ListCat c' ifEmpty rs = if size == Abs.MNonempty then rs else Rule "[]" cs [] : rs terminatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule] terminatorRules size c s = [ ifEmpty, Rule "(:)" cs (Left c' : s' [Left cs]) ] where c' = transCat c cs = ListCat c' s' its = if null s then its else Right s : its ifEmpty = if size == Abs.MNonempty then Rule "(:[])" cs (Left c' : if null s then [] else [Right s]) else Rule "[]" cs [] coercionRules :: Abs.Ident -> Integer -> [Rule] coercionRules (Abs.Ident c) n = Rule "_" (Cat c) [Left (CoercCat c 1)] : [Rule "_" (CoercCat c (i-1)) [Left (CoercCat c i)] | i <- [2..n]] ++ [Rule "_" (CoercCat c n) [Right "(", Left (Cat c), Right ")"]] ebnfRules :: Abs.Ident -> [Abs.RHS] -> [Rule] ebnfRules (Abs.Ident c) rhss = [Rule (mkFun k its) (strToCat c) (map transItem its) | (k, Abs.RHS its) <- zip [1 :: Int ..] rhss] where mkFun k i = case i of [Abs.Terminal s] -> c' ++ "_" ++ mkName k s [Abs.NTerminal n] -> c' ++ identCat (transCat n) _ -> c' ++ "_" ++ show k c' = c --- normCat c mkName k s = if all (\c -> isAlphaNum c || elem c ("_'" :: String)) s then s else show k transItem :: Abs.Item -> Either Cat String transItem x = case x of Abs.Terminal str -> Right str Abs.NTerminal cat -> Left (transCat cat) transCat :: Abs.Cat -> Cat transCat x = case x of Abs.ListCat cat -> ListCat (transCat cat) Abs.IdCat (Abs.Ident c) -> strToCat c transLabel :: Abs.Label -> (Fun,Prof) transLabel y = case y of Abs.LabNoP f -> let g = transLabelId f in (g,(g,[])) ---- should be Nothing Abs.LabP f p -> let g = transLabelId f in (g,(g, map transProf p)) Abs.LabPF f g p -> (transLabelId f,(transLabelId g, map transProf p)) Abs.LabF f g -> (transLabelId f,(transLabelId g, [])) where transLabelId x = case x of Abs.Id id -> transIdent id Abs.Wild -> "_" Abs.ListE -> "[]" Abs.ListCons -> "(:)" Abs.ListOne -> "(:[])" transProf (Abs.ProfIt bss as) = ([map fromInteger bs | Abs.Ints bs <- bss], map fromInteger as) transIdent :: Abs.Ident -> String transIdent x = case x of Abs.Ident str -> str transArg :: Abs.Arg -> String transArg (Abs.Arg x) = transIdent x transExp :: Abs.Exp -> Exp transExp e = case e of Abs.App x es -> App (transIdent x) (map transExp es) Abs.Var x -> App (transIdent x) [] Abs.Cons e1 e2 -> cons e1 (transExp e2) Abs.List es -> foldr cons nil es Abs.LitInt x -> LitInt x Abs.LitDouble x -> LitDouble x Abs.LitChar x -> LitChar x Abs.LitString x -> LitString x where cons e1 e2 = App "(:)" [transExp e1, e2] nil = App "[]" [] -------------------------------------------------------------------------------- --checkTokens :: CFG f -> [String] checkTokens cf = if null ns then [] else ["Warning : ", -- change to error in a future version " The following tokens accept the empty string: ", " "++unwords ns, " This is error-prone and will not be supported in the future."] where ns = map (show.fst) . filter (nullable.snd) $ tokenPragmas cf -- | Check if a regular expression is nullable (accepts the empty string) nullable :: Abs.Reg -> Bool nullable r = case r of Abs.RSeq r1 r2 -> nullable r1 && nullable r2 Abs.RAlt r1 r2 -> nullable r1 || nullable r2 Abs.RMinus r1 r2 -> nullable r1 && not (nullable r2) Abs.RStar _ -> True Abs.RPlus r1 -> nullable r1 Abs.ROpt _ -> True Abs.REps -> True Abs.RChar _ -> False Abs.RAlts _ -> False Abs.RSeqs s -> null s Abs.RDigit -> False Abs.RLetter -> False Abs.RUpper -> False Abs.RLower -> False Abs.RAny -> False -- we should actually check that -- (1) coercions are always between variants -- (2) no other digits are used checkRule :: CF -> RuleP -> Maybe String checkRule _ (Rule _ (Cat ('@':_)) _) = Nothing -- Generated by a pragma; it's a trusted category checkRule cf (Rule (f,_) cat rhs) | badCoercion = Just $ "Bad coercion in rule" +++ s | badNil = Just $ "Bad empty list rule" +++ s | badOne = Just $ "Bad one-element list rule" +++ s | badCons = Just $ "Bad list construction rule" +++ s | badList = Just $ "Bad list formation rule" +++ s | badSpecial = Just $ "Bad special category rule" +++ s | badTypeName = Just $ "Bad type name" +++ unwords (map show badtypes) +++ "in" +++ s | badFunName = Just $ "Bad constructor name" +++ f +++ "in" +++ s | badMissing = Just $ "No production for" +++ unwords missing ++ ", appearing in rule" +++ s +++ ". Defined categories:" +++ unwords defineds | otherwise = Nothing where s = f ++ "." +++ show cat +++ "::=" +++ unwords (map (either show show) rhs) -- Todo: consider using the show instance of Rule c = normCat cat cs = [normCat c | Left c <- rhs] badCoercion = isCoercion f && [c] /= cs badNil = isNilFun f && not (isList c && null cs) badOne = isOneFun f && not (isList c && cs == [catOfList c]) badCons = isConsFun f && not (isList c && cs == [catOfList c, c]) badList = isList c && not (isCoercion f || isNilCons f) badSpecial = elem c [ Cat x | x <- specialCatsP] && not (isCoercion f) badMissing = not (null missing) missing = filter nodef [show c | Left c <- rhs] nodef t = t `notElem` defineds defineds = show InternalCat : tokenNames cf ++ specialCatsP ++ map (show . valCat) (rulesOfCF cf) badTypeName = not (null badtypes) badtypes = filter isBadType $ cat : [c | Left c <- rhs] isBadType (ListCat c) = isBadType c isBadType InternalCat = False isBadType (CoercCat c _) = isBadCatName c isBadType (Cat s) = isBadCatName s isBadType (TokenCat s) = isBadCatName s isBadCatName s = not (isUpper (head s) || s == show InternalCat || (head s == '@')) badFunName = not (all (\c -> isAlphaNum c || c == '_') f {-isUpper (head f)-} || isCoercion f || isNilCons f) -- | Pre-processor that converts the `rules` macros to regular rules -- by creating unique function names for them. -- >>> :{ -- let rules1 = Abs.Rules (Abs.Ident "Foo") -- [ Abs.RHS [Abs.Terminal "abc"] -- , Abs.RHS [Abs.NTerminal (Abs.IdCat (Abs.Ident "A"))] -- , Abs.RHS [Abs.Terminal "foo", Abs.Terminal "bar"] -- , Abs.RHS [Abs.Terminal "++"] -- ] -- in -- let Ok tree = expandRules (Abs.Grammar [rules1]) -- in putStrLn (printTree tree) -- :} -- Foo_abc . Foo ::= "abc" ; -- FooA . Foo ::= A ; -- Foo1 . Foo ::= "foo" "bar" ; -- Foo2 . Foo ::= "++" -- -- Note that if there are two `rules` macro with the same category, the -- generated names should be uniques: -- >>> :{ -- let rules1 = Abs.Rules (Abs.Ident "Foo") -- [ Abs.RHS [Abs.Terminal "foo", Abs.Terminal "bar"] ] -- in -- let rules2 = Abs.Rules (Abs.Ident "Foo") -- [ Abs.RHS [Abs.Terminal "foo", Abs.Terminal "foo"] ] -- in -- let Ok tree = expandRules (Abs.Grammar [rules1, rules2]) -- in putStrLn (printTree tree) -- :} -- Foo1 . Foo ::= "foo" "bar" ; -- Foo2 . Foo ::= "foo" "foo" -- -- This is using a State monad to remember the last used index for a category. expandRules :: Abs.Grammar -> Err Abs.Grammar expandRules (Abs.Grammar defs) = return $ Abs.Grammar (concat (evalState (mapM expand defs) [])) where expand (Abs.Rules ident rhss) = mapM (mkRule ident) rhss expand other = return [other] mkRule :: Abs.Ident -> Abs.RHS -> State [(String, Int)] Abs.Def mkRule ident (Abs.RHS rhs) = do fun <- liftM (Abs.LabNoP . Abs.Id . Abs.Ident) (mkName ident rhs) return (Abs.Rule fun (Abs.IdCat ident) rhs) mkName :: Abs.Ident -> [Abs.Item] -> State [(String, Int)] String mkName (Abs.Ident cat) [Abs.Terminal s] | all (\c -> isAlphaNum c || elem c ("_'" :: String)) s = return (cat ++ "_" ++ s) mkName (Abs.Ident cat) [Abs.NTerminal (Abs.IdCat (Abs.Ident s))] = return (cat ++ s) mkName (Abs.Ident cat) _ = do i <- liftM (maybe 1 (+1) . lookup cat) get modify ((cat, i):) return (cat ++ show i)