{-# LANGUAGE DeriveLift, DeriveAnyClass, DeriveGeneric #-}
{-|
  Module      : Language.ANTLR4.Boot.Syntax
  Description : Both the boot and core syntax data types for G4
  Copyright   : (c) Karl Cronburg, 2018
  License     : BSD3
  Maintainer  : karl@cs.tufts.edu
  Stability   : experimental
  Portability : POSIX
-}
module Language.ANTLR4.Boot.Syntax
  ( G4(..), PRHS(..), ProdElem(..), GAnnot(..)
  , Directive(..)
  , LRHS(..), Regex(..), isGTerm, isGNonTerm
  , TermAnnot(..), isMaybeAnnot, isNoAnnot, annot
  ) where
import Text.ANTLR.Grammar ()
import Language.Haskell.TH.Lift (Lift(..))

import Language.Haskell.TH.Syntax (Exp)
import qualified Language.Haskell.TH.Syntax as S

import Text.ANTLR.Set ( Hashable(..), Generic(..) )

-- | .g4 style syntax representation
data G4 = -- | Grammar name declaration in g4
          Grammar { gName :: String -- ^ Name
                  }
        -- | One or more g4 productions
        | Prod { pName    :: String -- ^ Production's name
               , patterns :: [PRHS] -- ^ List of rules to match on
               }
        -- | A single, possibly annotated, g4 lexical rule
        | Lex  { annotation :: Maybe GAnnot -- ^ Lexical annotation (@fragment@)
               , lName      :: String       -- ^ Lexical rule name
               , pattern    :: LRHS         -- ^ The regex to match on
               }
  deriving (Show, Eq, Lift, Generic, Hashable)

instance Lift Exp

-- | The right-hand side of a G4 production rule.
data PRHS = PRHS
  { alphas      :: [ProdElem] -- ^ In-order list of elements defining this rule
  , pred        :: Maybe Exp  -- ^ Arbitrary boolean predicate to test whether or not this rule should fire
  , mutator     :: Maybe Exp  -- ^ Arbitrary mutator to run when this rule fires
  , pDirective  :: Maybe Directive -- ^ How to construct a Haskell type when this rules fires
  } deriving (Show, Eq, Lift, Generic)

-- | Antiquoted (or g4-embedded) string that goes to the right of an arrow in
--   a g4 production rule. This specifies how to construct a Haskell type.
data Directive =
    UpperD String   -- ^ Probably a Haskell data constructor
  | LowerD String   -- ^ Probably just a Haskell function to call
  | HaskellD String -- ^ Arbitrary antiquoted Haskell code embedded in the G4 grammar
  deriving (Show, Eq, Lift, Generic, Hashable)

instance Hashable PRHS where
  hashWithSalt salt prhs = salt `hashWithSalt` alphas prhs

-- | Annotations on a term (nonterminal or terminal) for extending our G4
--   BNF-like syntax with regular expression modifiers.
data TermAnnot =
    Regular Char -- ^ Regular expression modifier (e.g. +, ?, *)
  | NoAnnot      -- ^ Term is not annotated with anything
  deriving (Show, Eq, Ord, Lift, Generic, Hashable)

-- | Get the annotation from a 'ProdElem'
annot :: ProdElem -> TermAnnot
annot (GTerm a _) = a
annot (GNonTerm a _) = a

-- | Is this 'TermAnnot' a maybe?
isMaybeAnnot :: TermAnnot -> Bool
isMaybeAnnot (Regular '?') = True
isMaybeAnnot _             = False

-- | Does this 'TermAnnot' have no annotation?
isNoAnnot :: TermAnnot -> Bool
isNoAnnot NoAnnot = True
isNoAnnot _       = False

-- | A single production element with any accompanying regex annotation
data ProdElem =
    GTerm     TermAnnot String -- ^ G4 terminal
  | GNonTerm  TermAnnot String -- ^ G4 nonterminal
  deriving (Show, Eq, Ord, Lift, Generic, Hashable)

-- | Is this a terminal G4 element?
isGTerm (GTerm _ _) = True
isGTerm _           = False

-- | Is this a nonterminal G4 element?
isGNonTerm (GNonTerm _ _) = True
isGNonTerm _              = False

-- | Allowable annotations on a lexical production rule
data    GAnnot   = Fragment -- ^ For now the only annotation is @fragment@.
  deriving (Show, Eq, Lift, Generic, Hashable)

-- | Right-hand side of a lexical G4 rule
data LRHS = LRHS
  { regex     :: Regex Char      -- ^ A regular expression over characters as tokens.
  , directive :: Maybe Directive -- ^ Optional directive: @Nothing@ is equivalent to @(Just "String")@.
  }
  deriving (Show, Eq, Lift, Generic, Hashable)

-- | G4 representation of a regex (G4 regex syntax, not regexs used by tokenizer)
data Regex s =
    Epsilon              -- ^ Consume no input
  | Literal    [s]       -- ^ Match on a literal string (sequence of characters)
  | Union      [Regex s] -- ^ Match on any
  | Concat     [Regex s] -- ^ Match in sequence
  | Kleene     (Regex s) -- ^ Match zero or more times
  | PosClos    (Regex s) -- ^ Match one or more times
  | Question   (Regex s) -- ^ Match zero or one time.
  | CharSet    [s]       -- ^ Match once on any of the characters
  | Negation   (Regex s) -- ^ Match anything that doesn't match this
  | Named      String    -- ^ A reference to some other regex (need to track an environment)
  deriving (Lift, Eq, Show, Generic, Hashable)
-- TODO: Lex regexs (e.g. complement sets, escape chars, ...)
-- TODO: Set s, and ranges of characters