{-# LANGUAGE DeriveGeneric, DeriveAnyClass, FlexibleContexts, InstanceSigs
           , UndecidableInstances, StandaloneDeriving, TypeFamilies
           , ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses
           , OverloadedStrings, DeriveDataTypeable, ConstraintKinds #-}
{-|
  Module      : Text.ANTLR.Parser
  Description : Parsing API for constructing Haskell data types from lists of tokens
  Copyright   : (c) Karl Cronburg, 2018
  License     : BSD3
  Maintainer  : karl@cs.tufts.edu
  Stability   : experimental
  Portability : POSIX

-}
module Text.ANTLR.Parser where
import Text.ANTLR.Grammar hiding (Action)
import Text.ANTLR.Pretty
import Text.ANTLR.Set (Generic(..))
import Text.ANTLR.Lex.Tokenizer (Token(..))
import Data.Data (Data(..))
import Language.Haskell.TH.Lift (Lift(..))
import Text.ANTLR.Set (Hashable)

-- | Nonterminals in a grammar are tabular, terminal symbols are tabular (as are
--   the EOF-stripped version), terminals are referenceable (can be symbolized),
--   and terminals are also tabular.
type CanParse nts t =
  ( Tabular nts
  , Tabular (Sym t)
  , Tabular (StripEOF (Sym t))
  , HasEOF (Sym t)
  , Ref t
  , Tabular t)

-- | Same as 'CanParse' but with second formal parameter representing (StripEOF (Sym t))
--   aka "sts" (stripped terminal symbol).
type CanParse' nts sts = ( Tabular nts, Tabular sts )

type IsAST ast = ( Ord ast, Eq ast, Hashable ast )

type IsState st  = ( Ord st, Hashable st, Prettify st )
type Tabular sym = ( Ord sym, Hashable sym, Prettify sym, Eq sym )

-- | Action functions triggered during parsing are given the nonterminal we just matched on, the
--   corresponding list of production elements (grammar symbols) in the RHS of the matched production
--   alternative, and the result of recursively.
--
--   A 'ParseEvent' may also be just a terminal matched on, or an epsilon event
--   based heavily on which parsing algorithm is being run.
--
--   __This__ data type is one of the data types that tie together terminal (token) types
--   and terminal symbol types. When the parser produces a terminal event, you're
--   seeing a __token__, but when the parser produces a nonterminal event, you're
--   seeing a production in the grammar firing which contains terminal __symbols__,
--   not tokens.
data ParseEvent ast nts t =
    TermE t -- ^ A terminal was seen in the input
  | NonTE (nts, ProdElems nts (StripEOF (Sym t)), [ast]) -- ^ A non-terminal was seen in the input
  | EpsE -- ^ Epsilon event

deriving instance (Show ast, Show nts, Show (StripEOF (Sym t)), Show t) => Show (ParseEvent ast nts t)

instance (Prettify ast, Prettify nts, Prettify (StripEOF (Sym t)), Prettify t) => Prettify (ParseEvent ast nts t) where
  prettify e = do
    pStr "Terminal Event: "
    incrIndent 2
    prettify e
    incrIndent (-2)

-- | An Action as seen by the host language (Haskell) is a function from parse
--   events to an abstract-syntax tree that the function constructs based on which
--   non-terminal or terminal symbol was seen.
type Action ast nts t = ParseEvent ast nts t -> ast

-- | An Icon (as used in first and follow sets of the LL1 parser and the
--   shift-reduce table of the LR1 parser) is just a terminal symbol taken from
--   the grammar, or it's an epsilon or EOF.
data Icon ts =
    Icon ts  -- ^ Terminal symbol icon
  | IconEps  -- ^ Epsilon icon
  | IconEOF  -- ^ EOF (end of file / input) icon
  deriving (Generic, Hashable, Show, Eq, Ord, Data, Lift)

-- | __This__ is the function defining the (n == Sym t == ts) relationship between
--   the __name__ type of a token, the __symbol__ type of a terminal token (as
--   constructed by the tokenizer), and the __terminal symbol__ type as used by the
--   parser. When a parser wants to compare the symbol of an input token to a
--   terminal symbol found in the grammar, it should convert the token to an icon
--   using this function and then compare icons using Eq because icons throw away
--   the value of a token, leaving only the Eq-able piece that we care about.
token2symbol :: Token n v -> TokenSymbol n
token2symbol (Token n v _) = TokenSymbol n
token2symbol EOF = EOFSymbol
token2symbol (Error s) = EOFSymbol

-- | Tokens are symbolized by an icon containing their name.
instance Ref (Token n v) where
  type Sym (Token n v) = TokenSymbol n
  getSymbol = token2symbol

-- | The symbol for some tokenize is either just it's name @n@ or the special EOF symbol.
data TokenSymbol n =
    TokenSymbol n  -- ^ Named symbol
  | EOFSymbol      -- ^ End-of-file symbol
  deriving (Eq, Ord, Show, Hashable, Generic)

instance (Prettify n) => Prettify (TokenSymbol n) where
  prettify (TokenSymbol n) = do
    pStr "TokenSymbol "
    prettify n
  prettify EOFSymbol = pStr "EOFSymbol"

-- | A data type with an EOF constructor. There are two things you can do with a
--   data type that has an EOF:
--
-- > Ask for the type *without* the EOF at compile time
-- > Ask whether or not an instance is the EOF symbol at runtime
--
class HasEOF t where
  -- | The unwrapped type (without the EOF data constructor alternative)
  type StripEOF t :: *
  -- | Whether or not the given value of type t is the EOF value
  isEOF :: t -> Bool
  -- | Take a token and try to unwrap its name (an EOF should result in Nothing)
  stripEOF :: t -> Maybe (StripEOF t)

instance HasEOF (TokenSymbol n) where
  type StripEOF (TokenSymbol n) = n

  isEOF EOFSymbol = True
  isEOF     _     = False

  stripEOF EOFSymbol       = Nothing
  stripEOF (TokenSymbol n) = Just n

instance HasEOF String where
  type StripEOF String = String

  isEOF "" = True
  isEOF _  = False

  stripEOF "" = Nothing
  stripEOF x  = Just x

instance (Prettify ts) => Prettify (Icon ts) where
  prettify IconEps  = pStr "iϵ"
  prettify IconEOF  = pStr "iEOF"
  prettify (Icon ts) = do
    pStr "Icon "
    prettify ts

-- | Is this a terminal-symbol icon?
isIcon Icon{} = True
isIcon _ = False

-- | Is this an epsilon icon?
isIconEps IconEps = True
isIconEps _    = False

-- | Is this the EOF icon?
isIconEOF IconEOF = True
isIconEOF _   = False

-- | Universal Abstract Syntax Tree data type. All internal AST "nodes" have a
--   nonterminal, the grammar production symbols it reduced from, and the
--   resulting recursively defined AST nodes acquired from the parser. Leaf AST
--   nodes can be either an epsilon (when explicit epsilons are used in the
--   grammar) or more importantly a terminal symbol.
--   __This__ is another type that defines the relationship between the terminal
--   token type @t@ and the terminal symbol type @(ts == Sym t)@ where the AST tells
--   you the production rule that fired containing @ts@ as well as the tokens @t@
--   contained in leaves of the AST.
data AST nts t =
    LeafEps -- ^ Epsilon leaf AST node
  | Leaf t  -- ^ Terminal token leaf in the AST
  | AST nts (ProdElems nts (StripEOF (Sym t))) [AST nts t] -- ^ Internal AST node
  deriving (Generic)

deriving instance (Eq (StripEOF (Sym t)), Eq nts, Eq t) => Eq (AST nts t)
deriving instance (Ord (StripEOF (Sym t)), Ord nts, Ord t) => Ord (AST nts t)
deriving instance (Show (StripEOF (Sym t)), Show nts, Show t) => Show (AST nts t)
deriving instance (Hashable (StripEOF (Sym t)), Hashable nts, Hashable t) => Hashable (AST nts t)

instance (Prettify nts, Prettify t) => Prettify (AST nts t) where
  prettify LeafEps  = pStr "ϵ"
  prettify (Leaf t) = prettify t
  prettify (AST nts ps asts) = do
    prettify nts
    pStr "{"
    prettify asts
    pStr "}"

-- | Default AST-constructor function which just copies over the contents of
--   some parse event into an 'AST'.
event2ast :: ParseEvent (AST nts t) nts t -> AST nts t
event2ast (TermE t)               = Leaf t
event2ast (NonTE (nts, ss, asts)) = AST nts ss asts