-- | Template Haskell system for translating formal grammars into real
-- Haskell code based on ADPfusion.
--
-- If you want automatic algebra products, ADPfusion provides these.
-- @makeAlgebraProductH ['h] ''SigName@ where @SigName@ is the
-- auto-generted signature name, will generate the algebra product.
--
-- When will build the grammar, the types and variables are @newName@s
-- while attribute functions names are deterministic and potentially
-- non-unique.

module FormalLanguage.CFG.TH
  ( thCodeGen
  ) where

import           Control.Applicative
import           Control.Arrow ((&&&))
import           Control.Exception (assert)
import           Control.Lens hiding (Strict, (...), outside)
import           Control.Monad
import           Control.Monad.Reader
import           Control.Monad.State.Strict as M
import           Control.Monad.Trans.Class
import           Data.Char (toUpper,toLower)
import           Data.Default
import           Data.Function (on)
import           Data.List (intersperse,nub,nubBy,groupBy)
import           Data.Maybe
import           Data.Vector.Fusion.Stream.Monadic (Stream)
import           Debug.Trace
import           GHC.Exts (the)
import           Language.Haskell.TH hiding (dataD)
import           Language.Haskell.TH.Syntax hiding (lift)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Text.PrettyPrint.ANSI.Leijen as PP
import           Text.Printf
import qualified GHC.TypeLits as Kind

import           ADP.Fusion.Core ( (%), (|||), (...), (<<<) )
import           Data.PrimitiveArray (Z(..), (:.)(..))
import qualified ADP.Fusion.Core as ADP

import           FormalLanguage.CFG.Grammar
import           FormalLanguage.CFG.PrettyPrint.ANSI
import           FormalLanguage.CFG.TH.Internal



-- * @StateT CfgState Q@ monad and wrapper for TH-based grammar
-- construction.

-- | The state we carry around. Contains all the bound names, and lookup
-- tables for functions, terminals and syntactic variables.
--
-- NOTE the defaults all start out undefined, making sure anything invalid
-- explodes in our face.
--
-- TODO if we allow multiple different choice function, we'll have to
-- extend @_qChoiceFun@

data CfgState = CfgState
  -- external stuff
  { _qGrammar             :: Grammar                      -- ^ the input grammar
  -- some basic names
  , _qElemTyName          :: Name                         -- ^ stream type name, as in @Stream m qElemTyName@
  , _qGrammarName         :: Name                         -- ^ the name for the body of the grammar
  , _qMTyName             :: Name                         -- ^ monad type name, as in @h :: Stream MTyName ...@
  , _qRetTyName           :: Name                         -- ^ choice return type name, as in @h :: Stream m qElemTyName -> m qRetTyName@
  , _qSigName             :: Name                         -- ^ the name of the signature type and data constructor, both (signatures need to have a single data constructor)
  -- attribute functions and choice; for now we allow only one choice
  -- function
  , _qAttribFuns          :: M.Map [AttributeFunction] VarStrictType -- ^ map from the composed name to the template haskell attribute function @(Var,Strict,Type)@ (functions are currently stored as @[String]@ in @Grammar.hs@
  , _qChoiceFun           :: VarStrictType                -- ^ the choice function
  -- syntactic variables
  , _qPartialSyntVarNames :: M.Map Symbol Name            -- ^ syntactic-id to var name -- partially applied table / syntactic
  , _qInsideSyntVarNames  :: M.Map Symbol Name            -- ^ for outside grammars, these are the var-names for inside syn-vars
  , _qFullSyntVarNames    :: M.Map Symbol Name            -- ^ type variable names for the fully applied grammar body / where part
  -- everything on terminals
  , _qTermAtomVarNames    :: M.Map (String,Int) Name      -- ^ (Term-id,Dimension) to var name
  , _qTermAtomTyNames     :: M.Map (String,Int) Name      -- ^ the type name for each unique terminal symbol (that is: the scalar terminals in each dimension)
  , _qTermSymbExp         :: M.Map Symbol (Type,Exp)      -- ^ associate a terminal @Symb@ with a complete @Type@ and @Exp@
  , _qPrefix              :: String                       -- ^ prefix for attribute functions
  }

makeLenses ''CfgState

instance Default CfgState where
  def = CfgState
    { _qGrammar             = error "def / grammar"
    , _qGrammarName         = error "def / grammarname"
    , _qElemTyName          = error "def / elemty"
    , _qRetTyName           = error "def / retty"
    , _qMTyName             = error "def / mty"
    , _qSigName             = error "def / signame"
    , _qTermAtomTyNames     = error "def / termtynames"
    , _qFullSyntVarNames    = error "def / synbodynames"
    , _qAttribFuns          = error "def / attribfuns"
    , _qChoiceFun           = error "def / choicefun"
    , _qTermSymbExp         = error "def / termsymbexp"
    , _qTermAtomVarNames    = error "def / termsingvarnames"
    , _qPartialSyntVarNames = error "def / partsyntvarnames"
    , _qInsideSyntVarNames  = error "def / insidesyntvarnames"
    , _qPrefix              = error "def / prefix"
    }

-- | The type of our little stateful @Q@ computations

type TQ z = StateT CfgState Q z



-- * TH functions

-- | Entry point for generation of @Grammar@ and @Signature@ code. Will
-- also stuff the 'Grammar' into the state data. A bunch of TH names are
-- generated here and become part of the state, as they are used in
-- multiple places.

thCodeGen :: Int -> Grammar -> Q [Dec]
thCodeGen prefixLen g = do
  let _qGrammar = g
  _qMTyName             <- newName "m"
  _qElemTyName          <- newName "s"
  _qRetTyName           <- newName "r"
  _qTermAtomTyNames     <- M.fromList <$> (mapM (\(name,tape) -> ((name,tape),) <$> newNameTerm "t" name tape) $ terminalsWithTape g) -- g^..termvars.folded.name.getSteName)
  _qPartialSyntVarNames <- M.fromList <$> (mapM (\n -> (n,) <$> newName ("s_" ++ (n^..getSymbolList.folded.name.getSteName.folded))) $ uniqueSyntacticSymbols g)
  _qInsideSyntVarNames  <- M.fromList <$> (mapM (\n -> (n,) <$> newName ("i_" ++ (n^..getSymbolList.folded.name.getSteName.folded))) $ uniqueSynTermSymbols   g)
  let _qPrefix          =  over _head toLower $ take prefixLen (g^.grammarName)
  let ls = (nub . map _lhs . S.elems) $ g^.rules
  let synKeys  = (filter (`elem` ls) . M.keys) _qPartialSyntVarNames
  bodySynNames  <- sequence [ (n,) <$> (newName $ "ss_" ++ concat k) | n <- synKeys, let k = n^..getSymbolList.folded.name.getSteName ]
  let _qFullSyntVarNames = M.fromList bodySynNames
  -- TODO inside synvars in outside context
  evalStateT codeGen def{_qGrammar, _qMTyName, _qElemTyName, _qRetTyName, _qTermAtomTyNames, _qPartialSyntVarNames, _qInsideSyntVarNames, _qPrefix, _qFullSyntVarNames}

-- | Actually create signature, grammar, inline pragma.

codeGen :: TQ [Dec]
codeGen = do
  -- build up the terminal symbol lookup
  qTermAtomVarNames <~ M.fromList <$> dimensionalTermSymbNames
  qTermSymbExp      <~ M.fromList <$> (mapM grammarTermExpression =<< uniqueTerminalSymbols <$> use qGrammar)
  -- create attribute function bindings (needed by signature and grammar)
  qAttribFuns <~ (use (qGrammar.rules) >>= (fmap M.fromList . mapM attributeFunctionType . S.toList))
  -- create choice function
  qChoiceFun <~ choiceFunction
  -- create signature
  sig <- signature
  -- create grammar
  gra <- grammar
  -- create inlining code
  inl <- use qGrammarName >>= \gname -> lift $ pragInlD gname Inline FunLike AllPhases
  -- outside grammars use the inside signature?!
  g <- use qGrammar
  if False -- isOutside $ g^.outside -- considering to just use unsafeCoerce on inside algebras
    then return [gra,inl]
    else return [sig,gra,inl]

-- | Create the signature. Will also set the signature name.

signature :: TQ Dec
signature = do
  m         <- use qMTyName
  x         <- use qElemTyName
  r         <- use qRetTyName
  termNames <- use qTermAtomTyNames
  sigName   <- (mkName . ("Sig" ++)) <$> use (qGrammar.grammarName)
  fs        <- use qAttribFuns
  h         <- use qChoiceFun
  qSigName .= sigName
  lift $ dataD (cxt [])
               sigName
               (PlainTV m : PlainTV x : PlainTV r : (map PlainTV $ termNames^..folded))
               [recC sigName ((map return $ fs^..folded) ++ [return h])]

-- | The grammar requires three types of arguments. First we need to bind
-- an algebra. Then we bind a list of non-terminals. Finally we bind a list
-- of terminals.
--
-- Once this function is called, it will print out the order of arguments!
--
-- TODO how about we wrap the non-terminals and terminals each in a tuple?

grammarArguments :: TQ [PatQ]
grammarArguments = do
  g       <- use qGrammar
  signame <- use qSigName
  h       <- use qChoiceFun
  fs      <- use qAttribFuns
  tavn    <- use qTermAtomVarNames
  psyn    <- use qPartialSyntVarNames
  isyn    <- use qInsideSyntVarNames
  -- bind algebra
  let alg = recP signame [ fieldPat n (varP n) | (n,_,_) <- h:(fs^..folded) ]
  -- bind partially applied non-terminals
  let syn = [ bangP $ varP s | s <- psyn^..folded ]
  -- bind fully applied non-terminals
  let isn = [ bangP $ varP s | s <- isyn^..folded ]
  -- bind terminals
  let ter = [ bangP $ varP t | t <- tavn^..folded ]
  --
  gname <- showName <$> use qGrammarName
  let ppSynt [x] = PP.red $ PP.text x
      ppSynt xs  = PP.list $ map (ppSynt . (:[])) xs
      ppTerm (n,k) = PP.yellow . PP.text $ printf "%s,%d" n k
      pp = PP.dullgreen $ PP.text (printf "%s $ALGEBRA" gname)
      sy = PP.encloseSep (PP.text "   ") (PP.empty) (PP.text "  ") (runReader (mapM symbolDoc $ M.keys psyn) g)
      iy = if M.null isyn then PP.text "" else PP.encloseSep (PP.text "   ") (PP.empty) (PP.text "  ") (runReader (mapM symbolDoc $ M.keys isyn) g)
      te = PP.encloseSep (PP.text "   ") (PP.empty) (PP.text "  ") (map (\s -> ppTerm $ s)                      $ M.keys tavn)
  lift . runIO . printDoc $ pp PP.<> sy PP.<> iy PP.<> te PP.<> PP.hardline
  return $ alg : syn ++ isn ++ ter

-- | Fully apply each partial syntactic variable to the corresponding
-- right-hand side. First, build up the map of fully applied names, then
-- associate each one.
--
-- @
--  Z:.s:.t
--    where s = s' (f >>> t ... h)
--          t = t' (f >>> s ... h)
-- @

grammarBodyWhere :: TQ [DecQ]
grammarBodyWhere = do
  {-
  ls <- (nub . map _lhs . S.elems) <$> use (qGrammar.rules)
  synKeys       <- (filter (`elem` ls) . M.keys) <$> use qPartialSyntVarNames
  bodySynNames  <- lift $ sequence [ (n,) <$> (newName $ "ss_" ++ concat k) | n <- synKeys, let k = n^..getSymbolList.folded.name.getSteName ]
  qFullSyntVarNames .= M.fromList bodySynNames
  -}
  bodySynNames <- M.toList <$> use qFullSyntVarNames
  -- TODO now we actually need to *ALSO* add symbols for the inside stuff,
  -- if this is an outside grammar.
  mapM grammarBodySyn bodySynNames

-- | Fully bind each 'Symb' (which is partially applied, coming in as an
-- argument in the grammar) to the correct right-hand side.

grammarBodySyn :: (Symbol,Name) -> TQ DecQ
grammarBodySyn (s,n) = do
  hname <- use (qChoiceFun._1)
  partial <- use qPartialSyntVarNames
  ix <- lift $ newName "ix"
  -- all rules that have @s@ on the left-hand side
  fs <- (filter ((s==) . _lhs) . S.elems) <$> use (qGrammar.rules)
  rs <- mapM grammarBodyRHS fs
  let rhs = assert (not $ null rs) $
            appE ( uInfixE (foldl1 (\acc z -> uInfixE acc (varE '(|||)) z) rs)
                           (varE '(...))
                           (varE hname) )
                 (varE ix)
  let sname = M.findWithDefault (error $ "grammarBodySyn: name not found for: " ++ show s) s partial
  -- use @TW@ to combine the table @varE sname@ and the rule RHS @lamE...@
  let bdy = [| ADP.TW $(varE sname) $(lamE [varP ix] rhs) |]
  -- return $ valD (varP n) (normalB $ appE (varE $ M.findWithDefault (error "grammarBodySyn") s partial) (lamE [varP ix] rhs)) []
  return $ valD (varP n) (normalB bdy) []

-- | Build up the rhs for each rule.
--
-- Requires using the fully bound syntactic variable name!

grammarBodyRHS :: Rule -> TQ ExpQ
grammarBodyRHS (Rule _ f rs) = do
  -- bundle up terminals and non-terminals
  terms        <- use qTermSymbExp
  synNames     <- use qFullSyntVarNames -- just the name of the fully applied symbol
  synTermNames <- use qInsideSyntVarNames
  let fragmentSynVar :: Symbol -> Maybe Name
      fragmentSynVar s@(Symbol [SynVar _ _ n k]) | n>1 && k<n = M.lookup (splitToFull s) synNames
      fragmentSynVar _ = Nothing
  let finalSynVar :: Symbol -> Maybe Name
      finalSynVar s@(Symbol [SynVar _ _ n k]) | n>1 && k==n = M.lookup (splitToFull s) synNames
      finalSynVar _ = Nothing
  let genSymbol :: Symbol -> ExpQ
      -- | If, for whatever reason, we have an empty symbol
      genSymbol (Symbol []) = error "empty genSymbol"
      -- | if we deal with terminals
      genSymbol ((`M.lookup` terms) -> Just (_,v)) = return v
      -- | if we deal with usual syntactic vars
      genSymbol ((`M.lookup` synNames) -> Just n) = varE n
      -- | usual syntactic terminals
      genSymbol ((`M.lookup` synTermNames) -> Just n) = varE n
      -- | if we deal with split synvars and have a fragment
      genSymbol (fragmentSynVar -> Just n) = let p = show n in [| ADP.split (ADP.Proxy :: ADP.Proxy ($(litT $ strTyLit p) :: Kind.Symbol)) (ADP.Proxy :: ADP.Proxy ADP.Fragment) $(varE n) |]
      -- | if we deal with split synvars and have a final split
      genSymbol (finalSynVar    -> Just n) = let p = show n in [| ADP.split (ADP.Proxy :: ADP.Proxy ($(litT $ strTyLit p) :: Kind.Symbol)) (ADP.Proxy :: ADP.Proxy ADP.Final   ) $(varE n) |]
      -- | single-tape synvars in a multi-tape setting
      genSymbol s
        | isSynStacked s = foldl go [|ADP.M|] $ _getSymbolList s
        where go acc Deletion = [| $(acc) ADP.:| ADP.Deletion |]
              go acc sv
                | Just n <- M.lookup (Symbol [sv]) synNames = [| $(acc) ADP.:| $(varE n) |]
                | otherwise = error $ "genSymbol:stacked: " ++ show (s,synTermNames)
      -- | catch-all error
      genSymbol s = error $ "genSymbol: " ++ show s
  let rhs = assert (not $ null rs) $ foldl1 (\acc z -> uInfixE acc (varE '(%)) z) . map genSymbol $ rs
  -- apply evaluation function
  Just (fname,_,_) <- use (qAttribFuns . at f)
  return $ appE (appE (varE '(<<<)) (varE $ fname)) rhs

-- | Terminal symbols are usually compound types, built up from different
-- terminals a la @M :| t1 :| t2 :| t3@. We here build up the type of these
-- and their expression.

grammarTermExpression :: Symbol -> TQ (Symbol, (Type,Exp))
grammarTermExpression s = do
  ttypes <- use qTermAtomTyNames
  tavn <- use qTermAtomVarNames
  elemTyName <- use qElemTyName
  synNames     <- use qFullSyntVarNames -- just the name of the fully applied symbol
  g <- use qGrammar
  let genType :: Int -> [SynTermEps] -> TypeQ
      genType tape z
        | [Deletion]      <- z = [t| () |]
        | [Epsilon ]      <- z = [t| () |]
        | [Term tnm tidx] <- z
        , Just v <- M.lookup (tnm^.getSteName,tape) ttypes = varT v -- single dimension only, set dim to 0
        | [Term tnm tidx] <- z = varT elemTyName
        | xs              <- z = foldl (\acc (tape',z) -> [t| $acc :. $(genType tape' [z]) |]) [t| Z |] (zip [0..] xs)
  let genSingleExp :: Int -> SynTermEps -> ExpQ
      genSingleExp _ Deletion = [| ADP.Deletion |]
      genSingleExp _ Epsilon  = [| ADP.Epsilon  |]
      genSingleExp _ (((`M.lookup` synNames) . Symbol . (:[])) -> Just n) = error $ show n
      genSingleExp k (Term tnm tidx)
        | Just n <- M.lookup (tnm^.getSteName,k) tavn = varE n
        -- TODO this one is dangerous but currently necessary for split
        -- systems
        | Just n <- M.lookup (tnm^.getSteName,k) tavn = varE n
        | otherwise = error $ show ("genSingleExp:Term: ",k,tnm,tidx, tavn)
      genSingleExp _ err      = error $ "genSingleExp: " ++ show (s,err)
  let genExp :: [SynTermEps] -> ExpQ
      genExp z
        | [Deletion]      <- z = [| ADP.Deletion |] -- TODO ???
        | [Epsilon ]      <- z = [| ADP.Epsilon  |]
        | [Term tnm tidx] <- z
        , Just v <- M.lookup (tnm^.getSteName,0) tavn = varE v
        | xs              <- z = foldl (\acc (k,z) -> [| $acc ADP.:| $(genSingleExp k z) |])
                                        [| ADP.M |] $ zip [0..] xs
  ty <- lift . genType 0 $ s^.getSymbolList
  ex <- lift . genExp  $ s^.getSymbolList
  return (s, (ty,ex))

-- | Given a grammar, gives us each terminal on each tape.

terminalsWithTape :: Grammar -> [(String,Int)]
terminalsWithTape = map go . filter isTerm . uniqueTermsWithTape -- uniqueBindableTermsWithTape
  where go (t,d) = (t^.name.getSteName,d^.getTape)
        isTerm (Term{},_) = True
        isTerm _          = False

-- | Each terminal symbol is bound to some input. Since we might have the
-- same name in different dimensions, we now explicitly annotate with
-- a dimensional index. This means that each atomic terminal is annotated
-- with the corresponding dimension.

dimensionalTermSymbNames :: TQ [((String,Int),Name)]
dimensionalTermSymbNames = do
  g <- use qGrammar
  ys <- forM (terminalsWithTape g) $ \(name,tape) -> do
          ( (name,tape) , ) <$> (lift $ newNameTerm "term" name tape)
  return ys

--newNameTerm :: String -> Int -> TQ Name
newNameTerm prefix name tape = newName $ prefix ++ "_" ++ name ++ "_" ++ show tape ++ "_"

-- | Build the full grammar. Generate a name (the grammar name prefixed
-- with a @"g"@), the arguments, and the body of the grammar.

grammar :: TQ Dec
grammar = do
  gn <- (mkName . ("g" ++) . _grammarName) <$> use qGrammar
  qGrammarName .= gn
  args         <- grammarArguments
  bodyWhere    <- grammarBodyWhere
  bodyNames    <- use qFullSyntVarNames
  let body     =  normalB . foldl (\acc z -> [| $acc :. $z |]) [|Z|] . map varE $ bodyNames^..folded
  lift $ funD gn [clause args body bodyWhere]

-- | Given a rule, create the name and type for the attribute function
-- being used.
--
-- TODO we currently assume that should we ever have @f <<< a b@ and also
-- @f <<< c d@ then the types match. We should actually group up rules by
-- function name, then take the set of rules with same name and check if
-- the types will actually match! However, one could argue that this should
-- way earlier in the grammar parser and not here.
--
-- TODO currently using @mkName@ instead of @newName@. This allows us to
-- share the signature between grammars, but might be problematic if names
-- overlap... We should combine the two generators for @g@ and @gO@ into
-- one. Then, we should be able to re-use names.

attributeFunctionType :: Rule -> TQ ([AttributeFunction],VarStrictType)
attributeFunctionType r = do
  let (f:fs) = r^..attr.folded
  elemTyName <- use qElemTyName
  terminal   <- use qTermSymbExp
  let argument :: Symbol -> TypeQ
      argument s
        -- split stuff has @()@ arg type
        | isSyntactic s
        , (Symbol [SynVar _ _ n k]) <- s
        , n>1 && k<n   = [t| () |]
        | isSyntactic s  = varT elemTyName
        | isSynTerm   s  = varT elemTyName
        | isTerminal  s  = return . fst $ terminal  M.! s
        | isSynStacked s = let go :: TypeQ -> SynTermEps -> TypeQ
                               go t Deletion = [t| $(t) :. () |]
                               go t SynVar{} = [t| $(t) :. $(varT elemTyName) |]
                               go t sv = error $ show sv
                           in  foldl go [t|Z|] $ _getSymbolList s
        | otherwise     = error $ "argument: " ++ show s
  prefix <- use qPrefix
  let attrFun = over _head toLower (f^.getAttr) ++ concatMap (over _head toUpper) (fs^..folded.getAttr) -- TODO mkName ???
  nm <- lift $ (return . mkName) $ if null prefix
                                      then attrFun
                                      else prefix ++ over _head toUpper attrFun
  tp <- lift $ foldr appT (varT elemTyName) $ map (appT arrowT . argument) $ r^.rhs
  ns <- lift notStrict
  return (f:fs, (nm,ns,tp))

-- | Build the choice function. Basically @Stream m s -> m r@.

choiceFunction :: TQ VarStrictType
choiceFunction = do
  elemTyName <- use qElemTyName
  retTyName  <- use qRetTyName
  mTyName    <- use qMTyName
  let args = AppT ArrowT $ AppT (AppT (ConT ''Stream) (VarT mTyName)) (VarT elemTyName)
  let rtrn = AppT (VarT mTyName) (VarT retTyName)
  prefix <- use qPrefix
  let hFun = if null prefix then "h" else prefix ++ "H"
  ns <- lift notStrict
  return (mkName hFun, ns, AppT args rtrn)