{-
    BNF Converter: Happy Generator
    Copyright (C) 2004  Author:  Markus Forberg, Aarne Ranta

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

module BNFC.Backend.Haskell.CFtoHappy
       (
       cf2HappyS -- cf2HappyS :: CF -> CFCat -> String
       )
        where

import BNFC.CF
--import Lexer
import Data.List (intersperse, sort)
import Data.Char
import BNFC.Options (HappyMode(..))
-- Type declarations

type Rules       = [(NonTerminal,[(Pattern,Action)])]
type Pattern     = String
type Action      = String
type MetaVar     = String

-- default naming

moduleName  = "HappyParser"
tokenName   = "Token"

-- Happy mode


cf2HappyS :: String -> String -> String -> String -> HappyMode -> Bool -> CF -> String
---- cf2HappyS :: String -> CF -> String
cf2HappyS = cf2Happy

-- The main function, that given a CF and a CFCat to parse according to,
-- generates a happy module.
cf2Happy name absName lexName errName mode byteStrings cf
 = unlines
    [header name absName lexName errName mode byteStrings,
     declarations mode (allEntryPoints cf),
     tokens (cfTokens cf),
     specialToks cf,
     delimiter,
     specialRules byteStrings cf,
     prRules (rulesForHappy cf),
     finalize byteStrings cf]

-- construct the header.
header :: String -> String -> String -> String -> HappyMode -> Bool -> String
header modName absName lexName errName mode byteStrings = unlines
         ["-- This Happy file was machine-generated by the BNF converter",
	  "{",
	  "{-# OPTIONS_GHC -fno-warn-incomplete-patterns -fno-warn-overlapping-patterns #-}",
          case mode of
	    Standard -> "module " ++ modName ++ " where"
	    GLR      -> "-- module name filled in by Happy",
          "import " ++ absName,
          "import " ++ lexName,
          "import " ++ errName,
          if byteStrings then "import qualified Data.ByteString.Char8 as BS" else "",
          "}"
         ]

{- ----
cf2Happy :: String -> CF -> String
cf2Happy name cf
 = unlines
    [header name,
     declarations (allEntryPoints cf),
     tokens (cfTokens cf),
     specialToks cf,
     delimiter,
     specialRules cf,
     prRules (rulesForHappy cf),
     finalize cf]

-- construct the header.
header :: String -> String
header name = unlines
         ["-- This Happy file was machine-generated by the BNF converter",
	  "{",
          "module Par" ++ name ++ " where",
          "import Abs"++name,
          "import Lex"++name,
          "import ErrM",
          "}"
         ]
-}

-- The declarations of a happy file.
declarations :: HappyMode -> [Cat] -> String
declarations mode ns = unlines
                 [generateP ns,
          	  case mode of
                    Standard -> "-- no lexer declaration"
                    GLR      -> "%lexer { myLexer } { Err _ }",
                  "%monad { Err } { thenM } { returnM }",
                  "%tokentype { " ++ tokenName ++ " }"]
   where generateP []     = []
	 generateP (n:ns) = concat ["%name p",n'," ",n',"\n",generateP ns]
                               where n' = identCat n

-- The useless delimiter symbol.
delimiter :: String
delimiter = "\n%%\n"

-- Generate the list of tokens and their identifiers.
tokens :: [(String,Int)] -> String
tokens toks = "%token \n" ++ prTokens toks
 where prTokens []         = []
       prTokens ((t,k):tk) = " " ++ (convert t) ++
                             " { " ++ oneTok t k ++ " }\n" ++
                             prTokens tk
       oneTok t k = "PT _ (TS _ " ++ show k ++ ")"

-- Happy doesn't allow characters such as åäö to occur in the happy file. This
-- is however not a restriction, just a naming paradigm in the happy source file.
convert :: String -> String
convert "\\" = concat ['\'':"\\\\","\'"]
convert xs   = concat ['\'':(escape xs),"\'"]
  where escape [] = []
	escape ('\'':xs) = '\\':'\'':escape xs
	escape (x:xs) = x:escape xs

rulesForHappy :: CF -> Rules
rulesForHappy cf = map mkOne $ ruleGroups cf where
  mkOne (cat,rules) = constructRule cf rules cat

-- For every non-terminal, we construct a set of rules. A rule is a sequence of
-- terminals and non-terminals, and an action to be performed
-- As an optimization, a pair of list rules [C] ::= "" | C k [C]
-- is left-recursivized into [C] ::= "" | [C] C k.
-- This could be generalized to cover other forms of list rules.
constructRule :: CF -> [Rule] -> NonTerminal -> (NonTerminal,[(Pattern,Action)])
constructRule cf rules nt = (nt,[(p,generateAction nt (revF b r) m) |
     r0 <- rules,
     let (b,r) = if isConsFun (funRule r0) && elem (valCat r0) revs
                   then (True,revSepListRule r0)
                 else (False,r0),
     let (p,m) = generatePatterns cf r])
 where
   revF b r = if b then ("flip " ++ funRule r) else (underscore $ funRule r)
   revs = reversibleCats cf
   underscore f | isDefinedRule f   = f ++ "_"
		| otherwise	    = f

-- Generates a string containing the semantic action.
-- An action can for example be: Sum $1 $2, that is, construct an AST
-- with the constructor Sum applied to the two metavariables $1 and $2.
generateAction :: NonTerminal -> Fun -> [MetaVar] -> Action
generateAction nt f ms = unwords $ (if isCoercion f then [] else [f]) ++ ms

-- Generate patterns and a set of metavariables indicating
-- where in the pattern the non-terminal

generatePatterns :: CF -> Rule -> (Pattern,[MetaVar])
generatePatterns cf r = case rhsRule r of
  []  -> ("{- empty -}",[])
  its -> (unwords (map mkIt its), metas its)
 where
   mkIt i = case i of
     Left c -> identCat c
     Right s -> convert s
   metas its = [revIf c ('$': show i) | (i,Left c) <- zip [1 ::Int ..] its]
   revIf c m = if (not (isConsFun (funRule r)) && elem c revs)
                 then ("(reverse " ++ m ++ ")")
               else m  -- no reversal in the left-recursive Cons rule itself
   revs = reversibleCats cf

-- We have now constructed the patterns and actions,
-- so the only thing left is to merge them into one string.

prRules :: Rules -> String
prRules = unlines . map prOne
  where
    prOne (nt,[]) = [] -- nt has only internal use
    prOne (nt,((p,a):ls)) =
      unwords [nt', "::", "{", show $ normCat nt, "}\n" ++
               nt', ":" , p, "{", a, "}", "\n" ++ pr ls] ++ "\n"
     where
       nt' = identCat nt
       pr [] = []
       pr ((p,a):ls) =
         unlines [(concat $ intersperse " " ["  |", p, "{", a , "}"])] ++ pr ls

-- Finally, some haskell code.

finalize :: Bool -> CF -> String
finalize byteStrings cf = unlines $
   [
     "{",
     "\nreturnM :: a -> Err a",
     "returnM = return",
     "\nthenM :: Err a -> (a -> Err b) -> Err b",
     "thenM = (>>=)",
     "\nhappyError :: [" ++ tokenName ++ "] -> Err a",
     "happyError ts =",
     "  Bad $ \"syntax error at \" ++ tokenPos ts ++ ",
     "  case ts of",
     "    [] -> []",
     "    [Err _] -> \" due to lexer error\"",
     "    _ -> \" before \" ++ unwords (map ("++stringUnpack++" . prToken) (take 4 ts))",
     "",
     "myLexer = tokens"
   ] ++ definedRules cf ++ [ "}" ]
   where
     stringUnpack
       | byteStrings = "BS.unpack"
       | otherwise   = "id"


definedRules cf = [ mkDef f xs e | FunDef f xs e <- pragmasOfCF cf ]
    where
	mkDef f xs e = unwords $ (f ++ "_") : xs' ++ ["=", show e']
	    where
		xs' = map (++"_") xs
		e'  = underscore e
	underscore (App x es)
	    | isLower $ head x	= App (x ++ "_") $ map underscore es
	    | otherwise		= App x $ map underscore es
	underscore e	      = e

-- aarne's modifs 8/1/2002:
-- Markus's modifs 11/02/2002

-- GF literals
specialToks :: CF -> String
specialToks cf = unlines $
		 (map aux (literals cf))
		  ++ ["L_err    { _ }"]
 where aux cat =
        case show cat of
          "Ident"  -> "L_ident  { PT _ (TV $$) }"
          "String" -> "L_quoted { PT _ (TL $$) }"
          "Integer" -> "L_integ  { PT _ (TI $$) }"
          "Double" -> "L_doubl  { PT _ (TD $$) }"
          "Char"   -> "L_charac { PT _ (TC $$) }"
          own      -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn ++ ") }"
         where
           posn = if isPositionCat cf cat then "_" else "$$"

specialRules :: Bool -> CF -> String
specialRules byteStrings cf = unlines $
                  map aux (literals cf)
 where
   aux cat =
     case show cat of
         "Ident"   -> "Ident   :: { Ident }   : L_ident  { Ident $1 }"
	 "String"  -> "String  :: { String }  : L_quoted { "++stringUnpack++" $1 }"
	 "Integer" -> "Integer :: { Integer } : L_integ  { (read ("++stringUnpack++" $1)) :: Integer }"
	 "Double"  -> "Double  :: { Double }  : L_doubl  { (read ("++stringUnpack++" $1)) :: Double }"
	 "Char"    -> "Char    :: { Char }    : L_charac { (read ("++stringUnpack++" $1)) :: Char }"
	 own       -> own ++ "    :: { " ++ own ++ "} : L_" ++ own ++ " { " ++ own ++ " ("++ posn ++ "$1)}"
		-- PCC: take "own" as type name? (manual says newtype)
      where
         posn = if isPositionCat cf cat then "mkPosToken " else ""

   stringUnpack
     | byteStrings = "BS.unpack"
     | otherwise   = ""