{-|
Module      : Text.LParse.Metaparser
Description : Underlying data structure for sequential parsing
Copyright   : (c) Marcus Völker, 2018
License     : MIT
Maintainer  : marcus.voelker@rwth-aachen.de

This module contains the Metaparser, which is a parser that parses a grammar in EBNF and returns a parser that parses that grammar into an AST
-}
module Text.LParse.Metaparser (specParse,metaParser,AST) where

import Control.Applicative
import Control.Arrow
import Control.Monad.Fix

import Data.Char
import Data.List
import qualified Data.Map.Strict as M
import Data.Maybe

import Text.LParse.Parser
import Text.LParse.Prebuilt

data Token = Literal Char | CharClass String | RuleName String | Integer | Digit | Word | Star | Plus | May | Or | Is | Eoi | LParen | RParen | Sep | Whitespace deriving (Show,Eq)

-- | Abstract Syntax Tree generated by the metaparser, suitable for postprocessing
data AST = Node String [AST] | ILeaf Integer | SLeaf String | EOI deriving Eq

instance Show AST where
    show (Node s cs) = s ++ "(" ++ intercalate "," (map show cs) ++ ")"
    show (ILeaf i) = show i
    show (SLeaf s) = s
    show EOI = "$"

escaped :: Parser r String Token
escaped = consumeSReturn 'i' Integer
    <|> consumeSReturn 'd' Digit
    <|> consumeSReturn 'w' Word
    <|> consumeSReturn '\\' (Literal '\\')
    <|> consumeSReturn '*' (Literal '*')
    <|> consumeSReturn '+' (Literal '+')
    <|> consumeSReturn '?' (Literal '?')
    <|> consumeSReturn '[' (Literal '[')
    <|> consumeSReturn ']' (Literal ']')
    <|> consumeSReturn '(' (Literal '(')
    <|> consumeSReturn ')' (Literal ')')
    <|> consumeSReturn '$' (Literal '$')

charclass :: Parser r String Token
charclass = CharClass <$> some (nParse (/=']') tokenReturn "Expected character")

simpleSpecial :: Parser r String Token
simpleSpecial = consumeSReturn '*' Star
    <|> consumeSReturn '+' Plus
    <|> consumeSReturn '?' May
    <|> consumeSReturn '|' Or
    <|> consumeSReturn '$' Eoi
    <|> consumeSReturn '(' LParen
    <|> consumeSReturn ')' RParen
    <|> consumeReturn "::=" Is
    <|> consumeSReturn ';' Sep

whitespace :: Parser r String Token
whitespace = some (nParse isSpace tokenReturn "Expected Space") >> return Whitespace

ruleName :: Parser r String Token
ruleName = consumeSingle '%' >> (RuleName <$> word)

metaTokenizer :: Parser r String [Token]
metaTokenizer = many (
    (consumeSingle '\\' >> escaped)
    <|> surround "[]" charclass
    <|> simpleSpecial
    <|> ruleName
    <|> whitespace
    <|> (Literal <$> tokenReturn)
    )

iLeafParser :: Parser r [Token] (Parser r' String AST)
iLeafParser = consumeSReturn Integer (ILeaf <$> integer)

dLeafParser :: Parser r [Token] (Parser r' String AST)
dLeafParser = consumeSReturn Digit (ILeaf <$> digit)

sLeafParser :: Parser r [Token] (Parser r' String AST)
sLeafParser = consumeSReturn Word (SLeaf <$> word)

eoiParser :: Parser r [Token] (Parser r' String AST)
eoiParser = consumeSReturn Eoi (eoi >> return EOI)

isRuleName :: Token -> Bool
isRuleName (RuleName _) = True
isRuleName _ = False

getRuleName :: Token -> String
getRuleName (RuleName s) = s

subRuleParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String AST)
subRuleParser m = nParse isRuleName (tokenParse ((m M.!) . getRuleName)) "Expected RuleName"

charClassCharParser :: String -> Parser r String Char
charClassCharParser (c:s) | c == '^' = nParse (not . (`elem` s)) tokenReturn ("Expected not [" ++ s ++ "]")
charClassCharParser s = nParse (`elem` s) tokenReturn ("Expected [" ++ s ++ "]")

isCharClass :: Token -> Bool
isCharClass (CharClass _) = True
isCharClass _ = False

getCharClass :: Token -> String
getCharClass (CharClass s) = s

charClassParser :: Parser r [Token] (Parser r' String AST)
charClassParser = nParse isCharClass (tokenParse (fmap (SLeaf . return) . charClassCharParser . getCharClass)) "Expected character class"

isLiteral :: Token -> Bool
isLiteral (Literal _) = True
isLiteral _ = False

getLiteral :: Token -> Char
getLiteral (Literal s) = s

parseLiteral :: Parser r [Token] Char
parseLiteral = nParse isLiteral (tokenParse getLiteral) "Expected Literal"

charParser :: Parser r [Token] (Parser r' String AST)
charParser = fmap (SLeaf . return) . (\c -> consumeSReturn c c) <$> parseLiteral

atomParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String AST)
atomParser m = iLeafParser
    <|> dLeafParser
    <|> sLeafParser
    <|> eoiParser
    <|> charClassParser
    <|> subRuleParser m
    <|> charParser

starFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST])
starFreeParser m = surround [LParen,RParen] (cfexParser m) <|> (fmap return <$> atomParser m)

concatFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST])
concatFreeParser m = do
    sf <- starFreeParser m
    (consumeSingle Star >> return (concat <$> many sf))
        <|> (consumeSingle Plus >> return (concat <$> some sf))
        <|> (consumeSingle May >> return (concat . maybeToList <$> try sf))
        <|> return sf

orFreeParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST])
orFreeParser m = fmap concat . sequenceA <$> some (concatFreeParser m)

cfexParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (Parser r' String [AST])
cfexParser m = foldl1 (<|>) <$> sepSome (consumeSingle Or) (orFreeParser m)

ruleParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (String,Parser r' String AST)
ruleParser m = (\s as-> (s,Node s <$> as)) <$> (many (nParse isLiteral (tokenParse getLiteral) "Expected Literal") << consumeSingle Is) <*> cfexParser m

rulesetParser :: M.Map String (Parser r' String AST) -> Parser r [Token] (M.Map String (Parser r' String AST))
rulesetParser m = M.fromList<$> sepMany (consumeSingle Sep) (ruleParser m)

rulesetLoop :: Parser r [Token] (M.Map String (Parser r' String AST))
rulesetLoop = pfix rulesetParser

combine :: Maybe (M.Map String (Parser r' String AST)) -> Parser r [Token] (Parser r' String [AST])
combine Nothing = cfexParser M.empty
combine (Just rs) = cfexParser rs

fullParser :: Parser r [Token] (Parser r' String AST)
fullParser = try (rulesetLoop << consumeSingle Sep) >>= (fmap (fmap (Node "_")) . combine)

parserParser :: Parser r [Token] (Parser r' String AST)
parserParser = fullParser << eoi

-- | Parser that takes a grammar and returns a parser that parses that grammar into an @AST@
metaParser :: Parser r String (Parser r' String AST)
metaParser = metaTokenizer >>> skip [Whitespace] parserParser

-- | Convenience function chaining creation and usage of the @metaParser@ into a single invocation.
specParse :: String -> String -> Either String AST
specParse g i = doParse metaParser g >>= (`doParse` i)