{-
    BNF Converter: Abstract syntax
    Copyright (C) 2004  Author: Markus Forsberg, Aarne Ranta

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
-}


module BNFC.GetCF where

import Control.Arrow (left)
import Control.Monad.State (State, evalState, get, modify)

import Data.Char
import Data.Either  (partitionEithers)
import Data.Functor ((<$>))
import Data.List    (nub, partition)
import Data.Maybe   (mapMaybe)

import qualified Data.Foldable as Fold

import System.Exit (exitFailure)
import System.IO   (hPutStrLn, stderr)

-- Local imports:

import qualified AbsBNF as Abs
import ParBNF
import ErrM

import BNFC.CF
import BNFC.Options
import BNFC.TypeChecker
import BNFC.Utils


-- $setup
-- >>> import PrintBNF

parseCF :: SharedOptions -> Target -> String -> IO CF
parseCF opts t s = cfp2cf <$> parseCFP opts t s

parseCFP :: SharedOptions -> Target -> String -> IO CFP
parseCFP opts target content = do
  cfp <- runErr $ pGrammar (myLexer content)
                    >>= expandRules
                    >>= getCFP (cnf opts)
                    >>= markTokenCategories
  let cf = cfp2cf cfp
  runErr $ checkDefinitions cf

  -- Some backends do not allow the grammar name to coincide with
  -- one of the category or constructor names.
  let names    = allNames cf
  () <- when (target == TargetJava && lang opts `elem` names) $
      die $ unwords $
        [ "ERROR of backend", show target ++ ":"
        , "the language name"
        , lang opts
        , "conflicts with a name defined in the grammar."
        ]

  -- Warn of fail if the grammar uses non unique names
  case filter (not . isDefinedRule) $ filterNonUnique names of
    [] -> return ()
    ns | target `notElem` [TargetHaskell,TargetHaskellGadt,TargetOCaml]
       -> die $ unlines $
            [ "ERROR: names not unique: " ++ unwords ns
            , "This is an error in the backend " ++ show target
            ]
       | otherwise
       -> putStrLn $ unlines $
            [ "Warning: names not unique: " ++ unwords ns
            , "This can be an error in other back ends."
            ]

  -- Print warnings if user defined nullable tokens.
  Fold.mapM_ putStrLn $ checkTokens cf

  -- Print the number of rules
  let nRules = length (cfgRules cf)
  -- Note: the match against () is necessary for type class instance resolution.
  () <- when (nRules == 0) $ die $ "ERROR: the grammar contains no rules."
  putStrLn $ show nRules +++ "rules accepted\n"

  -- Print a warning if comment delimiter are bigger than 2 characters
  let c3s = [(b,e) | (b,e) <- fst (comments cf), length b > 2 || length e > 2]
  unless (null c3s) $ do
      putStrLn "Warning: comment delimiters longer than 2 characters ignored in Haskell:"
      mapM_ putStrLn [b +++ "-" +++ e | (b,e) <- c3s]
  return cfp

  where
    runErr (Ok a) = return a
    runErr (Bad msg) = die msg

die :: String -> IO a
die msg = hPutStrLn stderr msg >> exitFailure

{-
    case filter (not . isDefinedRule) $ notUniqueFuns cf of
     [] -> case (badInheritence cf) of
       [] -> return (ret,True)
       xs -> do
        putStrLn "Warning :"
        putStrLn $ "  Bad Label name in Category(s) :" ++ unwords xs
        putStrLn $ "  These categories have more than one Label, yet one of these"
        putStrLn $ "  Labels has the same name as the Category. This will almost"
        putStrLn $ "  certainly cause problems in languages other than Haskell.\n"
        return (ret,True)
     xs -> do
       putStrLn $ "Warning :"
       putStrLn $ "  Non-unique label name(s) : " ++ unwords xs
       putStrLn $ "  There may be problems with the pretty-printer.\n"
       case (badInheritence cf) of
         [] -> return (ret,True)
         xs -> do
          putStrLn $ "Warning :"
          putStrLn $ "  Bad Label name in Category(s) :" ++ unwords xs
          putStrLn $ "  These categories have more than one Label, yet one of these"
          putStrLn $ "  Labels has the same name as the Category. This will almost"
          putStrLn $ "  certainly cause problems in languages other than Haskell.\n"
          return (ret,True)
-}

getCFP :: Bool -> Abs.Grammar -> Err CFP
getCFP cnf (Abs.Grammar defs0) = do
    let rules = inlineDelims rules0
        cf0 = revs srt
        srt = let literals           = nub [lit | xs <- map rhsRule rules,
                                                     Left (Cat lit) <- xs,
                                                     lit `elem` specialCatsP]
                  (symbols,keywords) = partition notIdent reservedWords
                  notIdent s         = null s || not (isAlpha (head s)) || any (not . isIdentRest) s
                  isIdentRest c      = isAlphaNum c || c == '_' || c == '\''
                  reservedWords      = nub [t | r <- rules, isParsable r, Right t <- rhsRule r]
                     -- Issue #204: exclude keywords from internal rules
              in CFG pragma literals symbols keywords [] rules
    case mapMaybe (checkRule (cfp2cf cf0)) (cfgRules cf0) of
      [] -> return ()
      msgs -> fail (unlines msgs)
    return cf0
  where
    (pragma,rules0) = partitionEithers $ concatMap transDef defs
    (defs,inlineDelims) = if cnf then (defs0,id) else removeDelims defs0
    revs cf1@CFG{..} =
        cf1 { cfgReversibleCats = findAllReversibleCats (cfp2cf cf1) }

-- | This function goes through each rule of a grammar and replace Cat "X" with
-- TokenCat "X" when "X" is a token type.
markTokenCategories :: CFP -> Err CFP
markTokenCategories cf@CFG{..} = return $ cf { cfgRules = newRules }
  where
    newRules = [ Rule f (mark c) (map (left mark) rhs) | Rule f c rhs <- cfgRules ]
    tokenCatNames = [ n | TokenReg n _ _ <- cfgPragmas ] ++ specialCatsP
    mark = toTokenCat tokenCatNames


-- | Change the constructor of categories with the given names from Cat to
-- TokenCat
-- >>> toTokenCat ["A"] (Cat "A") == TokenCat "A"
-- True
-- >>> toTokenCat ["A"] (ListCat (Cat "A")) == ListCat (TokenCat "A")
-- True
-- >>> toTokenCat ["A"] (Cat "B") == Cat "B"
-- True
toTokenCat :: [String] -> Cat -> Cat
toTokenCat ns (Cat a) | a `elem` ns = TokenCat a
toTokenCat ns (ListCat c) = ListCat (toTokenCat ns c)
toTokenCat _ c = c


removeDelims :: [Abs.Def] -> ([Abs.Def], [RuleP] -> [RuleP])
removeDelims xs = (ys ++ map delimToSep ds,
                   foldr (.) id [map (inlineDelim' d) | d <- ds])
  where
    (ds,ys) = partition isDelim xs
    isDelim (Abs.Delimiters{}) = True
    isDelim _ = False

    inlineDelim :: Abs.Def -> Either Cat String ->  [Either Cat String]
    inlineDelim (Abs.Delimiters cat open close _ _) (Left c)
      | c == ListCat (transCat cat) = [Right open, Left c, Right close]
    inlineDelim _ x = [x]

    inlineDelim' :: Abs.Def -> RuleP -> RuleP
    inlineDelim' d@(Abs.Delimiters cat _ _ _ _) r@(Rule f c rhs)
      | c == ListCat (transCat cat) = r
      | otherwise = Rule f c (concatMap (inlineDelim d) rhs)
    inlineDelim' _ _ = error "Not a delimiters pragma"


    delimToSep (Abs.Delimiters cat _ _ (Abs.SepTerm  s) sz) = Abs.Terminator sz cat s
    delimToSep (Abs.Delimiters cat _ _ (Abs.SepSepar s) sz) = Abs.Separator  sz cat s
    delimToSep (Abs.Delimiters cat _ _  Abs.SepNone     sz) = Abs.Terminator sz cat ""
    delimToSep x = x

transDef :: Abs.Def -> [Either Pragma RuleP]
transDef x = case x of
 Abs.Rule label cat items ->
   [Right $ Rule (transLabel label) (transCat cat) (concatMap transItem items)]
 Abs.Comment str               -> [Left $ CommentS str]
 Abs.Comments str0 str         -> [Left $ CommentM (str0,str)]
 Abs.Token ident reg           -> [Left $ TokenReg (transIdent ident) False reg]
 Abs.PosToken ident reg        -> [Left $ TokenReg (transIdent ident) True reg]
 Abs.Entryp idents             -> [Left $ EntryPoints (map (strToCat .transIdent) idents)]
 Abs.Internal label cat items  ->
   [Right $ Rule (transLabel label) (transCat cat) (Left InternalCat:concatMap transItem items)]
 Abs.Separator size ident str -> map  (Right . cf2cfpRule) $ separatorRules size ident str
 Abs.Terminator size ident str -> map  (Right . cf2cfpRule) $ terminatorRules size ident str
 Abs.Delimiters a b c d e -> map  (Right . cf2cfpRule) $ delimiterRules a b c d e
 Abs.Coercions ident int -> map  (Right . cf2cfpRule) $ coercionRules ident int
 Abs.Rules ident strs -> map (Right . cf2cfpRule) $ ebnfRules ident strs
 Abs.Layout ss      -> [Left $ Layout ss]
 Abs.LayoutStop ss  -> [Left $ LayoutStop ss]
 Abs.LayoutTop      -> [Left LayoutTop]
 Abs.Function f xs e -> [Left $ FunDef (transIdent f) (map transArg xs) (transExp e)]

delimiterRules :: Abs.Cat -> String -> String -> Abs.Separation -> Abs.MinimumSize -> [Rule]
delimiterRules a0 l r (Abs.SepTerm  "") size = delimiterRules a0 l r Abs.SepNone size
delimiterRules a0 l r (Abs.SepSepar "") size = delimiterRules a0 l r Abs.SepNone size
delimiterRules a0 l r sep size = [
   -- recognizing a single element
  Rule "(:[])"  (strToCat a')  (Left a : termin), -- optional terminator/separator

  -- glueing two sublists
  Rule "(++)"   (strToCat a')  [Left (strToCat a'), Left (strToCat a')],

   -- starting on either side with a delimiter
  Rule "[]"     (strToCat c)   [Right l],
  Rule (if optFinal then "(:[])" else
                         "[]")
                (strToCat d)   ([Left a | optFinal] ++ [Right r]),

   -- gathering chains
  Rule "(++)"   (strToCat c)   [Left (strToCat c), Left (strToCat a')],
  Rule "(++)"   (strToCat d)   [Left (strToCat a'), Left (strToCat d)],

   -- finally, put together left and right chains
  Rule "(++)"   as  [Left (strToCat c),Left (strToCat d)]] ++ [
  -- special rule for the empty list if necessary
  Rule "[]"     as  [Right l,Right r] | optEmpty]
 where a = transCat a0
       as = ListCat a
       a' = '@':'@':show a
       c  = '@':'{':show a
       d  = '@':'}':show a
       -- optionally separated concat. of x and y categories.
       termin = case sep of
                  Abs.SepSepar t -> [Right t]
                  Abs.SepTerm  t -> [Right t]
                  _ -> []
       optFinal = case (sep,size) of
         (Abs.SepSepar _,_) -> True
         (Abs.SepTerm _,Abs.MNonempty) -> True
         (Abs.SepNone,Abs.MNonempty) -> True
         _ -> False
       optEmpty = case sep of
         Abs.SepSepar _ -> size == Abs.MEmpty
         _ -> False


separatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule]
separatorRules size c s = if null s then terminatorRules size c s else ifEmpty [
  Rule "(:[])" cs [Left c'],
  Rule "(:)"   cs [Left c', Right s, Left cs]
  ]
 where
   c' = transCat c
   cs = ListCat c'
   ifEmpty rs = if size == Abs.MNonempty
                then rs
                else Rule "[]" cs [] : rs

terminatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule]
terminatorRules size c s = [
  ifEmpty,
  Rule "(:)" cs (Left c' : s' [Left cs])
  ]
 where
   c' = transCat c
   cs = ListCat c'
   s' its = if null s then its else Right s : its
   ifEmpty = if size == Abs.MNonempty
                then Rule "(:[])" cs (Left c' : if null s then [] else [Right s])
                else Rule "[]" cs []

coercionRules :: Abs.Ident -> Integer -> [Rule]
coercionRules (Abs.Ident c) n =
   Rule "_" (Cat c)            [Left (CoercCat c 1)] :
  [Rule "_" (CoercCat c (i-1)) [Left (CoercCat c i)] | i <- [2..n]] ++
  [Rule "_" (CoercCat c n)     [Right "(", Left (Cat c), Right ")"]]

ebnfRules :: Abs.Ident -> [Abs.RHS] -> [Rule]
ebnfRules (Abs.Ident c) rhss =
  [Rule (mkFun k its) (strToCat c) (concatMap transItem its)
     | (k, Abs.RHS its) <- zip [1 :: Int ..] rhss]
 where
   mkFun k i = case i of
     [Abs.Terminal s]  -> c' ++ "_" ++ mkName k s
     [Abs.NTerminal n] -> c' ++ identCat (transCat n)
     _ -> c' ++ "_" ++ show k
   c' = c --- normCat c
   mkName k s = if all (\c -> isAlphaNum c || elem c ("_'" :: String)) s
                   then s else show k

-- | Translate a rule item (terminal or non terminal)
-- It also sanitizes the terminals a bit by skipping empty terminals
-- or splitting multiwords terminals.
-- This means that the following rule
--   Foo. S ::= "foo bar" ""
-- is equivalent to
--   Foo. S ::= "foo" "bar"
transItem :: Abs.Item -> [Either Cat String]
transItem (Abs.Terminal str) = [Right w | w <- words str]
transItem (Abs.NTerminal cat) = [Left (transCat cat)]

transCat :: Abs.Cat -> Cat
transCat x = case x of
 Abs.ListCat cat  -> ListCat (transCat cat)
 Abs.IdCat (Abs.Ident c)     -> strToCat c

transLabel :: Abs.Label -> (Fun,Prof)
transLabel y = case y of
   Abs.LabNoP f     -> let g = transLabelId f in (g,(g,[])) ---- should be Nothing
   Abs.LabP   f p   -> let g = transLabelId f in (g,(g, map transProf p))
   Abs.LabPF  f g p -> (transLabelId f,(transLabelId g, map transProf p))
   Abs.LabF   f g   -> (transLabelId f,(transLabelId g, []))
 where
   transLabelId x = case x of
     Abs.Id id     -> transIdent id
     Abs.Wild      -> "_"
     Abs.ListE     -> "[]"
     Abs.ListCons  -> "(:)"
     Abs.ListOne   -> "(:[])"
   transProf (Abs.ProfIt bss as) =
     ([map fromInteger bs | Abs.Ints bs <- bss], map fromInteger as)

transIdent :: Abs.Ident -> String
transIdent x = case x of
 Abs.Ident str  -> str

transArg :: Abs.Arg -> String
transArg (Abs.Arg x) = transIdent x

transExp :: Abs.Exp -> Exp
transExp e = case e of
    Abs.App x es    -> App (transIdent x) (map transExp es)
    Abs.Var x       -> App (transIdent x) []
    Abs.Cons e1 e2  -> cons e1 (transExp e2)
    Abs.List es     -> foldr cons nil es
    Abs.LitInt x    -> LitInt x
    Abs.LitDouble x -> LitDouble x
    Abs.LitChar x   -> LitChar x
    Abs.LitString x -> LitString x
  where
    cons e1 e2 = App "(:)" [transExp e1, e2]
    nil        = App "[]" []

--------------------------------------------------------------------------------

-- | Check if any of the user-defined terminal categories is nullable.
checkTokens :: CFG f -> Maybe String
checkTokens cf
  | null ns   = Nothing
  | otherwise = Just $ unlines
      [ "Warning : "  -- TODO: change to error in a future version
      , "  The following tokens accept the empty string: "
      , "    " ++ unwords ns
      , "  This is error-prone and will not be supported in the future."
      ]
  where
    ns = map (show . fst) . filter (nullable . snd) $ tokenPragmas cf

-- | Check if a regular expression is nullable (accepts the empty string)
nullable :: Abs.Reg -> Bool
nullable r =
    case r of
      Abs.RSeq r1 r2   -> nullable r1 && nullable r2
      Abs.RAlt r1 r2   -> nullable r1 || nullable r2
      Abs.RMinus r1 r2 -> nullable r1 && not (nullable r2)
      Abs.RStar _      -> True
      Abs.RPlus r1     -> nullable r1
      Abs.ROpt _       -> True
      Abs.REps         -> True
      Abs.RChar _      -> False
      Abs.RAlts _      -> False
      Abs.RSeqs s      -> null s
      Abs.RDigit       -> False
      Abs.RLetter      -> False
      Abs.RUpper       -> False
      Abs.RLower       -> False
      Abs.RAny         -> False


-- we should actually check that
-- (1) coercions are always between variants
-- (2) no other digits are used

checkRule :: CF -> RuleP -> Maybe String
checkRule _ (Rule _ (Cat ('@':_)) _) = Nothing -- Generated by a pragma; it's a trusted category
checkRule cf (Rule (f,_) cat rhs)
  | badCoercion    = Just $ "Bad coercion in rule" +++ s
  | badNil         = Just $ "Bad empty list rule" +++ s
  | badOne         = Just $ "Bad one-element list rule" +++ s
  | badCons        = Just $ "Bad list construction rule" +++ s
  | badList        = Just $ "Bad list formation rule" +++ s
  | badSpecial     = Just $ "Bad special category rule" +++ s
  | badTypeName    = Just $ "Bad type name" +++ unwords (map show badtypes) +++ "in" +++ s
  | badFunName     = Just $ "Bad constructor name" +++ f +++ "in" +++ s
  | badMissing     = Just $ "no production for" +++ unwords missing ++
                             ", appearing in rule\n    " ++ s
  | otherwise      = Nothing
 where
   s  = f ++ "." +++ show cat +++ "::=" +++ unwords (map (either show show) rhs) -- Todo: consider using the show instance of Rule
   c  = normCat cat
   cs = [normCat c | Left c <- rhs]
   badCoercion = isCoercion f && [c] /= cs
   badNil      = isNilFun f   && not (isList c && null cs)
   badOne      = isOneFun f   && not (isList c && cs == [catOfList c])
   badCons     = isConsFun f  && not (isList c && cs == [catOfList c, c])
   badList     = isList c     &&
                 not (isCoercion f || isNilCons f)
   badSpecial  = elem c [ Cat x | x <- specialCatsP] && not (isCoercion f)

   badMissing  = not (null missing)
   missing     = filter nodef [show c | Left c <- rhs]
   nodef t = t `notElem` defineds
   defineds =
    show InternalCat : tokenNames cf ++ specialCatsP ++ map (show . valCat) (cfgRules cf)
   badTypeName = not (null badtypes)
   badtypes = filter isBadType $ cat : [c | Left c <- rhs]
   isBadType (ListCat c) = isBadType c
   isBadType InternalCat = False
   isBadType (CoercCat c _) = isBadCatName c
   isBadType (Cat s) = isBadCatName s
   isBadType (TokenCat s) = isBadCatName s
   isBadCatName s = not (isUpper (head s) || s == show InternalCat || (head s == '@'))
   badFunName = not (all (\c -> isAlphaNum c || c == '_') f {-isUpper (head f)-}
                       || isCoercion f || isNilCons f)


-- | Pre-processor that converts the `rules` macros to regular rules
-- by creating unique function names for them.
-- >>> :{
-- let rules1 = Abs.Rules (Abs.Ident "Foo")
--         [ Abs.RHS [Abs.Terminal "abc"]
--         , Abs.RHS [Abs.NTerminal (Abs.IdCat (Abs.Ident "A"))]
--         , Abs.RHS [Abs.Terminal "foo", Abs.Terminal "bar"]
--         , Abs.RHS [Abs.Terminal "++"]
--         ]
-- in
-- let Ok tree = expandRules (Abs.Grammar [rules1])
-- in putStrLn (printTree tree)
-- :}
-- Foo_abc . Foo ::= "abc";
-- FooA . Foo ::= A;
-- Foo1 . Foo ::= "foo" "bar";
-- Foo2 . Foo ::= "++"
--
-- Note that if there are two `rules` macro with the same category, the
-- generated names should be uniques:
-- >>> :{
-- let rules1 = Abs.Rules (Abs.Ident "Foo")
--         [ Abs.RHS [Abs.Terminal "foo", Abs.Terminal "bar"] ]
-- in
-- let rules2 = Abs.Rules (Abs.Ident "Foo")
--         [ Abs.RHS [Abs.Terminal "foo", Abs.Terminal "foo"] ]
-- in
-- let Ok tree = expandRules (Abs.Grammar [rules1, rules2])
-- in putStrLn (printTree tree)
-- :}
-- Foo1 . Foo ::= "foo" "bar";
-- Foo2 . Foo ::= "foo" "foo"
--
-- This is using a State monad to remember the last used index for a category.
expandRules :: Abs.Grammar -> Err Abs.Grammar
expandRules (Abs.Grammar defs) =
    return $ Abs.Grammar (concat (evalState (mapM expand defs) []))
  where
    expand (Abs.Rules ident rhss) = mapM (mkRule ident) rhss
    expand other = return [other]
    mkRule :: Abs.Ident -> Abs.RHS -> State [(String, Int)] Abs.Def
    mkRule ident (Abs.RHS rhs) = do
      fun <- Abs.LabNoP . Abs.Id . Abs.Ident <$> mkName ident rhs
      return (Abs.Rule fun (Abs.IdCat ident) rhs)
    mkName :: Abs.Ident -> [Abs.Item] -> State [(String, Int)] String
    mkName (Abs.Ident cat) [Abs.Terminal s]
      | all (\c -> isAlphaNum c || elem c ("_'" :: String)) s = return (cat ++ "_" ++ s)
    mkName (Abs.Ident cat) [Abs.NTerminal (Abs.IdCat (Abs.Ident s))] =
        return (cat ++ s)
    mkName (Abs.Ident cat) _ = do
        i <- maybe 1 (+1) . lookup cat <$> get
        modify ((cat, i):)
        return (cat ++ show i)