{-# OPTIONS_HADDOCK not-home #-}
{-# LANGUAGE TemplateHaskell #-}
module Pinchot.Terminalize where

import Control.Monad (join)
import Data.List.NonEmpty (NonEmpty((:|)), toList)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Foldable (foldlM)
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Language.Haskell.TH as T

import Pinchot.Names
import Pinchot.Types
import Pinchot.Rules

-- | For all the given rules and their ancestors, creates
-- declarations that reduce the rule and all its ancestors to
-- terminal symbols.  Each rule gets a declaration named
-- @t'RULE_NAME@ where @RULE_NAME@ is the name of the rule.  The
-- type of the declaration is either
--
-- Production a -> [(t, a)]
--
-- or
--
-- Production a -> NonEmpty (t, a)
--
-- where @Production@ is the production corresponding to the given
-- 'Rule', @t@ is the terminal token type (often 'Char'), and @a@ is
-- arbitrary metadata about each token (often 'Loc').  'NonEmpty' is
-- returned for productions that must always contain at least one
-- terminal symbol; for those that can be empty, 'Seq' is returned.
--
-- Example: "Pinchot.Examples.Terminalize".
terminalizers
  :: Qualifier
  -- ^ Qualifier for the module containing the data types created
  -- from the 'Rule's
 
  -> [Rule t]
  -> T.Q [T.Dec]

terminalizers qual
  = fmap concat
  . traverse (terminalizer qual)
  . families

-- | For the given rule, creates declarations that reduce the rule
-- to terminal symbols.  No ancestors are handled.  Each rule gets a
-- declaration named @t'RULE_NAME@ where @RULE_NAME@ is the name of
-- the rule.  The
-- type of the declaration is either
--
-- Production a -> [(t, a)]
--
-- or
--
-- Production a -> NonEmpty (t, a)
--
-- where @Production@ is the production corresponding to the given
-- 'Rule', @t@ is the terminal token type (often 'Char'), and @a@ is
-- arbitrary metadata about each token (often 'Loc').  'NonEmpty' is
-- returned for productions that must always contain at least one
-- terminal symbol; for those that can be empty, 'Seq' is returned.

terminalizer
  :: Qualifier
  -- ^ Qualifier for the module containing the data types created
  -- from the 'Rule's
 
  -> Rule t
  -> T.Q [T.Dec]

terminalizer qual rule@(Rule nm _ _) = sequence [sig, expn]
  where
    declName = "t'" ++ nm
    anyType = T.varT (T.mkName "a")
    charType = T.varT (T.mkName "t")
    sig
      | atLeastOne rule = do
          ctorName <- lookupTypeName (quald qual nm)
          T.sigD (T.mkName declName)
            . T.forallT [tyVarBndrT , tyVarBndrA ] (return [])
            $ [t| $(T.conT ctorName) $(charType) $(anyType)
              -> NonEmpty ($(charType), $(anyType)) |]
      | otherwise = do
          ctorName <- lookupTypeName (quald qual nm)
          T.sigD (T.mkName declName)
            . T.forallT [ tyVarBndrT , tyVarBndrA ] (return [])
            $ [t| $(T.conT ctorName) $(charType) $(anyType)
                -> [($(charType), $(anyType))] |]
    expn = T.valD (T.varP $ T.mkName declName)
      (T.normalB (terminalizeRuleExp qual rule)) []

-- | For the given rule, returns an expression that has type of
-- either
--
-- Production a -> [(t, a)]
--
-- or
--
-- Production a -> NonEmpty (t, a)
--
-- where @Production@ is the production corresponding to the given
-- 'Rule', and @t@ is the terminal token type.  'NonEmpty' is
-- returned for productions that must always contain at least one
-- terminal symbol; for those that can be empty, 'Seq' is returned.
--
-- Example:  'Pinchot.Examples.Terminalize.terminalizeAddress'.
terminalizeRuleExp
  :: Qualifier
  -> Rule t
  -> T.Q T.Exp
terminalizeRuleExp qual rule@(Rule nm _ _) = do
  let allRules = family rule 
  lkp <- ruleLookupMap allRules
  let mkDec r@(Rule rn _ _) =
        let expn = terminalizeSingleRule qual lkp r
            decName = lookupName lkp rn
        in T.valD (T.varP decName) (T.normalB expn) []
  T.letE (fmap mkDec $ allRules) (T.varE (lookupName lkp nm))

-- | Creates a 'Map' where each key is the name of the 'Rule' and
-- each value is a name corresponding to that 'Rule'.  No
-- ancestors are used.
ruleLookupMap
  :: Foldable c
  => c (Rule t)
  -> T.Q (Map RuleName (T.Name))
ruleLookupMap = foldlM f Map.empty
  where
    f mp (Rule nm _ _) = do
      name <- T.newName $ "rule" ++ nm
      return $ Map.insert nm name mp

lookupName
  :: Map RuleName T.Name
  -> RuleName
  -> T.Name
lookupName lkp n = case Map.lookup n lkp of
  Nothing -> error $ "lookupName: name not found: " ++ n
  Just r -> r

-- | For the given rule, returns an expression that has type
-- of either
--
-- Production a -> [(t, a)]
--
-- or
--
-- Production a -> NonEmpty (t, a)
--
-- where @Production@ is the production corresponding to the given
-- 'Rule', and @t@ is the terminal token type.  'NonEmpty' is
-- returned for productions that must always contain at least one
-- terminal symbol; for those that can be empty, 'Seq' is returned.
-- Gets no ancestors.
terminalizeSingleRule
  :: Qualifier
  -- ^ Module qualifier for module containing the generated types
  -- corresponding to all 'Rule's
  -> Map RuleName T.Name
  -- ^ For a given Rule, looks up the name of the expression that
  -- will terminalize that rule.
  -> Rule t
  -> T.Q T.Exp
terminalizeSingleRule qual lkp rule@(Rule nm _ ty) = case ty of
  Terminal _ -> do
    x <- T.newName "x"
    ctorName <- lookupValueName (quald qual nm)
    let pat = T.conP ctorName [T.varP x]
    [| \ $(pat) -> ( $(T.varE x) :| [] ) |]

  NonTerminal bs -> do
    x <- T.newName "x"
    let fTzn | atLeastOne rule = terminalizeProductAtLeastOne
             | otherwise = terminalizeProductAllowsZero
        tzr (Branch name sq)
          = fmap (\(pat, expn) -> T.match pat (T.normalB expn) [])
          (fTzn qual lkp name sq)
    ms <- traverse tzr . toList $ bs
    T.lamE [T.varP x] (T.caseE (T.varE x) ms)

  Wrap (Rule inner _ _) -> do
    x <- T.newName "x"
    ctorName <- lookupValueName (quald qual nm)
    let pat = T.conP ctorName [T.varP x]
    [| \ $(pat) -> $(T.varE (lookupName lkp inner)) $(T.varE x) |]

  Record rs -> do
    (pat, expn) <- fTzr qual lkp nm rs
    [| \ $(pat) -> $(expn) |]
    where
      fTzr | atLeastOne rule = terminalizeProductAtLeastOne
           | otherwise = terminalizeProductAllowsZero

  Opt r@(Rule inner _ _) -> do
    x <- T.newName "x"
    ctorName <- lookupValueName (quald qual nm)
    let pat = T.conP ctorName [T.varP x]
    [| \ $(pat) -> maybe [] 
          $(convert (T.varE (lookupName lkp inner))) $(T.varE x) |]
    where
      convert expn | atLeastOne r = [| NonEmpty.toList . $(expn) |]
                   | otherwise = expn

  Star r@(Rule inner _ _) -> do
    x <- T.newName "x"
    ctorName <- lookupValueName (quald qual nm)
    let pat = T.conP ctorName [T.varP x]
        convert e | atLeastOne r = [| NonEmpty.toList . $(e) |]
                  | otherwise = e
    [| \ $(pat) -> join . fmap $(convert (T.varE (lookupName lkp inner)))
          $ $(T.varE x) |]

  Plus r@(Rule inner _ _)
    | atLeastOne r -> do
        x <- T.newName "x"
        ctorName <- lookupValueName (quald qual nm)
        let pat = T.conP ctorName [T.varP x]
        [| \ $(pat) ->
            let getTermNonEmpty = $(T.varE (lookupName lkp inner))
                getTerms (e1 :| es)
                  = join . fmap getTermNonEmpty
                  $ (e1 :| es)
           in getTerms $(T.varE x)
          |]

    | otherwise -> do
        x <- T.newName "x"
        [| let getTermSeq = $(T.varE (lookupName lkp inner))
               getTerms (e1 :| es) = getTermSeq e1
                `mappend` (join (fmap getTermSeq es))
           in getTerms $(T.varE x)
          |]

  Series _ -> do
    x <- T.newName "x"
    ctorName <- lookupValueName (quald qual nm)
    let pat = T.conP ctorName [T.varP x]
    [| \ $(pat) -> $(T.varE x) |]

terminalizeProductAllowsZero
  :: Qualifier
  -> Map RuleName T.Name
  -> String
  -- ^ Rule name or branch name, as applicable
  -> [Rule t]
  -> T.Q (T.PatQ, T.ExpQ)
terminalizeProductAllowsZero qual lkp name bs = do
  pairs <- traverse (terminalizeProductRule lkp) $ bs
  ctorName <- lookupValueName (quald qual name)
  let pat = T.conP ctorName (fmap (fst . snd) pairs)
      body = case pairs of
        [] -> [| [] |]
        x:xs -> foldl f start xs
          where
            f acc trip = [| $(acc) `mappend` $(procTrip trip) |]
            start = procTrip x
            procTrip (rule, (_, expn))
              | atLeastOne rule = [| NonEmpty.toList $(expn) |]
              | otherwise = expn
  return (pat, body)

prependList :: [a] -> NonEmpty a -> NonEmpty a
prependList [] ne = ne
prependList (x:xs) (a :| as) = (x :| (xs ++ (a : as)))

appendList :: NonEmpty a -> [a] -> NonEmpty a
appendList (a :| as) xs = a :| (as ++ xs)

terminalizeProductAtLeastOne
  :: Qualifier
  -> Map RuleName T.Name
  -> String
  -- ^ Rule name or branch name, as applicable
  -> [Rule t]
  -> T.Q (T.PatQ, T.ExpQ)
terminalizeProductAtLeastOne qual lkp name bs = do
  pairs <- traverse (terminalizeProductRule lkp) $ bs
  ctorName <- lookupValueName (quald qual name)
  let pat = T.conP ctorName (fmap (fst . snd) pairs)
      body = [| ( $(leadSeq) `prependList` $(firstNonEmpty))
                `appendList` $(trailSeq) |]
        where
          (leadRules, lastRules) = span (not . atLeastOne . fst) pairs
          (firstNonEmptyRule, trailRules) = case lastRules of
            [] -> error $ "terminalizeProductAtLeastOne: failure 1: " ++ name
            x:xs -> (x, xs)
          leadSeq = case fmap (snd . snd) leadRules of
            [] -> [| [] |]
            x:xs -> foldl f x xs
              where
                f acc expn = [| $(acc) `mappend` $(expn) |]
          firstNonEmpty = [| $(snd . snd $ firstNonEmptyRule) |]

          trailSeq = foldl f [| [] |] trailRules
            where
              f acc (rule, (_, expn))
                | atLeastOne rule =
                    [| $(acc) `mappend` NonEmpty.toList $(expn) |]
                | otherwise =
                    [| $(acc) `mappend` $(expn) |]
  return (pat, body)

terminalizeProductRule
  :: Map RuleName T.Name
  -> Rule t
  -> T.Q (Rule t, (T.Q T.Pat, T.Q T.Exp))
terminalizeProductRule lkp r@(Rule nm _ _) = do
  x <- T.newName $ "terminalizeProductRule'" ++ nm
  let getTerms = [| $(T.varE (lookupName lkp nm)) $(T.varE x) |]
  return (r, (T.varP x, getTerms))

-- | Examines a rule to determine whether when terminalizing it will
-- always return at least one terminal symbol.
atLeastOne
  :: Rule t
  -> Bool
  -- ^ True if the rule will always have at least one terminal
  -- symbol.
atLeastOne (Rule _ _ ty) = case ty of
  Terminal _ -> True
  NonTerminal bs -> all branchAtLeastOne bs
    where
      branchAtLeastOne (Branch _ rs) = any atLeastOne rs
  Wrap r -> atLeastOne r
  Record rs -> any atLeastOne rs
  Opt _ -> False
  Star _ -> False
  Plus r -> atLeastOne r
  Series _ -> True