{-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes
    , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances
    , FlexibleInstances, UndecidableInstances, DeriveDataTypeable
    , TemplateHaskell #-}
{-|
  Module      : Language.ANTLR4.G4
  Description : Core G4 quasiquoter for antlr-haskell
  Copyright   : (c) Karl Cronburg, 2018
  License     : BSD3
  Maintainer  : karl@cs.tufts.edu
  Stability   : experimental
  Portability : POSIX

  Until better haddock integration is developed, you'll need to look
  at the source for this module to see the G4 grammar for G4.
-}
module Language.ANTLR4.G4 where

import Control.Arrow ( (&&&) )
import Data.Char (isUpper)

import Text.ANTLR.Common
import Text.ANTLR.Grammar
import Text.ANTLR.Parser
import qualified Text.ANTLR.LR as LR
import Text.ANTLR.Lex.Tokenizer as T
import qualified Text.ANTLR.Set as S
import Text.ANTLR.Set (Hashable(..), Generic(..))
import Text.ANTLR.Pretty
import Text.ANTLR.Lex.Regex (regex2dfa)
import Data.Data (Data(..))
import Language.Haskell.TH.Lift (Lift(..))

import Language.Haskell.TH.Quote (QuasiQuoter(..))
import qualified Language.Haskell.TH as TH
import Language.ANTLR4.Boot.Quote (antlr4)
import Language.ANTLR4.Syntax
import qualified Language.ANTLR4.Boot.Syntax  as G4S
import qualified Language.ANTLR4.Boot.Quote   as G4Q

import Debug.Trace as D

char :: String -> Char
char = head

append :: String -> String -> String
append = (++)

list a = [a]
cons = (:)
lexemeDirective r d = G4S.LRHS r (Just d)
lexemeNoDir     r   = G4S.LRHS r Nothing
lexDecl = G4S.Lex Nothing
lexFragment = G4S.Lex (Just G4S.Fragment)

literalRegex :: String -> G4S.Regex Char
literalRegex = G4S.Literal

prodDirective as d = G4S.PRHS as Nothing Nothing (Just d)
prodNoDir     as   = G4S.PRHS as Nothing Nothing Nothing
prodNoAlphas     d = G4S.PRHS [] Nothing Nothing (Just d)
prodNothing        = G4S.PRHS [] Nothing Nothing Nothing

list2 a b = [a,b]
range a b = [a .. b]

gterm         = G4S.GTerm    G4S.NoAnnot
gnonTerm      = G4S.GNonTerm G4S.NoAnnot

maybeGTerm    = G4S.GTerm    (G4S.Regular '?')
maybeGNonTerm = G4S.GNonTerm (G4S.Regular '?')

starGTerm    = G4S.GTerm    (G4S.Regular '*')
starGNonTerm = G4S.GNonTerm (G4S.Regular '*')

plusGTerm    = G4S.GTerm    (G4S.Regular '+')
plusGNonTerm = G4S.GNonTerm (G4S.Regular '+')

regexAnyChar = G4S.Negation (G4S.CharSet [])

dQual [] = G4S.UpperD []
dQual xs = case last xs of
  [] -> G4S.UpperD $ concatWith "." xs
  (a:as)
    | isUpper a -> G4S.UpperD $ concatWith "." xs
    | otherwise -> G4S.LowerD $ concatWith "." xs

qDir l u = [l,u]

haskellD = G4S.HaskellD

-- Force the above declarations (and their types) into scope:
$( return [] )

[antlr4|
  grammar G4;

  decls : decl1 ';'                 -> list
        | decl1 ';' decls           -> cons
        ;

  decl1 : 'grammar' UpperID                 -> G4S.Grammar
        | LowerID ':' prods                 -> G4S.Prod
        | UpperID ':' lexemeRHS             -> lexDecl
        | 'fragment' UpperID ':' lexemeRHS  -> lexFragment
        ;

  prods : prodRHS                   -> list
        | prodRHS '|' prods         -> cons
        ;

  lexemeRHS : regexes1 '->' directive  -> lexemeDirective
            | regexes1                 -> lexemeNoDir
            ;

  prodRHS : alphas '->' directive   -> prodDirective
          | alphas                  -> prodNoDir
          |        '->' directive   -> prodNoAlphas
          |                         -> prodNothing
          ;

  directive : qDirective          -> dQual
            | UpperID             -> G4S.UpperD
            | LowerID             -> G4S.LowerD
            | '${' HaskellExp '}' -> haskellD
            ;

  qDirective  : UpperID '.' qDot -> qDir
              ;

  qDot  : UpperID
        | LowerID
        ;

  alphas : alpha                    -> list
         | alpha alphas             -> cons
         | '(' alphas ')'
         | '(' alphas ')' '?'
         | '(' alphas ')' '*'
         | '(' alphas ')' '+'
         ;

  alpha : Literal '?'               -> maybeGTerm
        | LowerID '?'               -> maybeGNonTerm
        | UpperID '?'               -> maybeGNonTerm
        | Literal '*'               -> starGTerm
        | LowerID '*'               -> starGNonTerm
        | UpperID '*'               -> starGNonTerm
        | Literal '+'               -> plusGTerm
        | LowerID '+'               -> plusGNonTerm
        | UpperID '+'               -> plusGNonTerm
        | Literal                   -> gterm
        | LowerID                   -> gnonTerm
        | UpperID                   -> gnonTerm
        ;

  // Regex Stuff:

  regexes1 : regexes                -> G4S.Concat
           ;

  regexes : regex                   -> list
          | regex regexes           -> cons
          ;

  regex   :     regex1 '?'          -> G4S.Question
          |     regex1 '*'          -> G4S.Kleene
          |     regex1 '+'          -> G4S.PosClos
          | '~' regex1              -> G4S.Negation
          |     regex1              -> id
          ;

  regex1  : '[' charSet ']'           -> G4S.CharSet
          | Literal                   -> literalRegex
          | UpperID                   -> G4S.Named
          | '(' regexes1 ')'
          | unionR                    -> G4S.Union
          | '.'                       -> regexAnyChar
          ;

  unionR  : regex '|' regex         -> list2
          | regex '|' unionR        -> cons
          ;

  charSet : charSet1                -> id
          | charSet1 charSet        -> append
          ;

  charSet1 : SetChar '-' SetChar    -> range
           | SetChar                -> list
           | EscapedChar            -> list
           ;

  UpperID : [A-Z][a-zA-Z0-9_]*      -> String;
  LowerID : [a-z][a-zA-Z0-9_]*      -> String;
  Literal     : '\'' ( ( '\\\'' ) | (~ ( '\'' ) ) )+ '\'' -> stripQuotesReadEscape;
  LineComment : '//' (~ '\n')* '\n' -> String;

  HaskellExp : ( ~ '}' )+ -> String;

  SetChar     : ~ ']'               -> char ;
  WS          : [ \t\n\r\f\v]+      -> String;
  EscapedChar : '\\' [tnrfv]        -> readEscape ;

|]