{-# LANGUAGE QuasiQuotes, TemplateHaskell, ScopedTypeVariables, DataKinds,
LambdaCase, FlexibleContexts #-}
module Language.ANTLR4.Boot.Quote
( antlr4
, g4_decls
, g4_parsers
, mkLRParser
) where
import Prelude hiding (exp, init)
import System.IO.Unsafe (unsafePerformIO)
import Data.List (nub, elemIndex, groupBy, sortBy, sort, intersperse)
import Data.Ord (comparing)
import Data.Char (toLower, toUpper, isLower, isUpper)
import Data.Maybe (fromJust, catMaybes)
import qualified Debug.Trace as D
import qualified Language.Haskell.TH as TH
import Language.Haskell.TH
import Language.Haskell.TH.Syntax (lift, Exp(..))
import Language.Haskell.TH.Quote (QuasiQuoter(..))
import qualified Language.Haskell.Meta as LHM
import Control.Monad (mapM)
import qualified Language.ANTLR4.Boot.Syntax as G4S
import qualified Language.ANTLR4.Boot.SplicedParser as G4P
import Text.ANTLR.Grammar hiding (getNTs, getProds, s0)
import Text.ANTLR.Parser (AST(..), StripEOF(..))
import Text.ANTLR.Pretty
import Text.ANTLR.Lex.Tokenizer as T
import Text.ANTLR.LR as LR
import qualified Text.ANTLR.Allstar as ALL
import qualified Text.ANTLR.LL1 as LL
import qualified Text.ANTLR.Set as S
import qualified Text.ANTLR.MultiMap as M
import qualified Data.Map as M1
import Text.ANTLR.Set (Set(..))
import qualified Text.ANTLR.Set as Set
import qualified Text.ANTLR.Lex.Regex as R
trace s x = x
traceM s x = x
haskellParseExp :: (Monad m) => String -> m TH.Exp
haskellParseExp s =
case LHM.parseExp s of
Left err -> error err
Right expTH -> return expTH
haskellParseType :: (Monad m) => String -> m TH.Type
haskellParseType s = case LHM.parseType s of
Left err -> trace s (error err)
Right tyTH -> return tyTH
type2returnType :: TH.Type -> TH.Type
type2returnType = let
t2rT :: TH.Type -> TH.Type
t2rT (ForallT xs ys t) = t2rT t
t2rT ((AppT (AppT ArrowT from) to)) = t2rT to
t2rT t@(VarT _) = t
t2rT t@(AppT ListT as) = t
t2rT t@(ConT _) = t
t2rT t@(AppT (ConT _) _) = t
t2rT x = error (show x)
in t2rT
info2returnType :: Info -> TH.Type
info2returnType i = let
in case i of
(VarI _ t _) -> type2returnType t
_ -> error (show i)
antlr4 :: QuasiQuoter
antlr4 = QuasiQuoter
(error "parse exp")
(error "parse pattern")
(error "parse type")
aparse
data LexemeType =
Literal Int
| AString
| Named String TH.TypeQ
aparse :: String -> TH.Q [TH.Dec]
aparse input = do
loc <- TH.location
let fileName = TH.loc_filename loc
let (line,column) = TH.loc_start loc
case G4P.parseANTLR input of
r@(LR.ResultAccept ast) -> codeGen r
LR.ResultSet s ->
if S.size s == 1
then codeGen (S.findMin s)
else error $ pshow' s
err -> error $ pshow' err
codeGen (LR.ResultAccept ast) = g4_decls $ G4P.ast2decls ast
data BaseType = List | Mybe
deriving (Eq, Ord, Show)
baseType (G4S.Regular '?') = Mybe
baseType (G4S.Regular '*') = List
grammarName :: [G4S.G4] -> String
grammarName [] = error "Grammar missing a name"
grammarName (G4S.Grammar{G4S.gName = gName}:_) = gName
grammarName (_:xs) = grammarName xs
mkLower [] = []
mkLower (a:as) = toLower a : as
mkUpper [] = []
mkUpper (a:as) = toUpper a : as
justGrammarTy ast s = [t| Grammar $(s) $(ntConT ast) $(tConT ast) G4S.Directive |]
justGrammarTy' ast s = [t| Grammar $(s) $(ntConT ast) (StripEOF (Sym $(tConT ast))) G4S.Directive |]
ntConT ast = conT $ mkName $ ntDataName ast
tConT ast = conT $ mkName $ tDataName ast
ntDataName ast = gName ast ++ "NTSymbol"
tDataName ast = gName ast ++ "TSymbol"
gName ast = grammarName ast
type G4AST = [G4S.G4]
terminalLiterals :: G4AST -> [String]
terminalLiterals ast = (nub $ concatMap getTerminals ast)
getTerminals :: G4S.G4 -> [String]
getTerminals G4S.Prod{G4S.patterns = ps} = concatMap (justTerms . G4S.alphas) ps
getTerminals _ = []
lexemeTypes :: G4AST -> [(String, LexemeType)]
lexemeTypes ast = let
nullID (G4S.UpperD xs) = null xs
nullID (G4S.LowerD xs) = null xs
nullID (G4S.HaskellD _) = False
lN :: G4S.G4 -> [(String, LexemeType)]
lN (G4S.Lex{G4S.annotation = Nothing, G4S.lName = lName, G4S.pattern = G4S.LRHS{G4S.directive = Nothing}}) = [(lName, AString)]
lN (G4S.Lex{G4S.annotation = Nothing, G4S.lName = lName, G4S.pattern = G4S.LRHS{G4S.directive = Just s}})
| s == (G4S.UpperD "String") = [(lName, AString)]
| nullID s = [(lName, AString)]
| otherwise = case s of
(G4S.UpperD s) -> [(lName, Named s (conT $ mkName s))]
(G4S.LowerD s) -> [(lName, Named s (info2returnType <$> reify (mkName s)))]
(G4S.HaskellD s) -> []
lN _ = []
in concatMap lN ast
lexemeNames :: G4AST -> [String]
lexemeNames ast = map fst (lexemeTypes ast)
justTerms :: [G4S.ProdElem] -> [String]
justTerms [] = []
justTerms ((G4S.GTerm _ s) : as) = s : justTerms as
justTerms (_:as) = justTerms as
terminals :: G4AST -> [String]
terminals ast = terminalLiterals ast ++ (lexemeNames ast)
nonterms :: G4AST -> [String]
nonterms ast = nub $ concatMap getNTs ast
justNonTerms :: [G4S.ProdElem] -> [String]
justNonTerms [] = []
justNonTerms (G4S.GNonTerm _ s:as)
| (not . null) s && isLower (head s) = s : justNonTerms as
| otherwise = justNonTerms as
justNonTerms (_:as) = justNonTerms as
getNTs :: G4S.G4 -> [String]
getNTs G4S.Prod{G4S.pName = pName, G4S.patterns = ps} = pName : concatMap (justNonTerms . G4S.alphas) ps
getNTs _ = []
symbolDerives = derivClause Nothing $ map (conT . mkName)
[ "Eq", "Ord", "Show", "Hashable", "Generic", "Bounded", "Enum", "Data", "Lift"]
ntDataDeclQ :: G4AST -> DecQ
ntDataDeclQ ast =
dataD (cxt [])
(mkName $ ntDataName ast)
[]
Nothing
(map (\s -> normalC (mkName $ "NT_" ++ s) []) $ (nonterms ast) ++ (regexNonTermSymbols ast))
[symbolDerives]
allLexicalSymbols :: G4AST -> [String]
allLexicalSymbols ast = map (lookupTName ast "") (terminalLiterals ast) ++ (lexemeNames ast)
allLexicalTypes :: G4AST -> [(String, LexemeType)]
allLexicalTypes ast = (map (lookupLiteralType ast) (terminalLiterals ast)) ++ (lexemeTypes ast)
lookupLiteralType :: G4AST -> String -> (String, LexemeType)
lookupLiteralType ast s =
case s `elemIndex` (terminalLiterals ast) of
Nothing -> undefined
Just i -> (s, Literal i)
tDataDeclQ :: G4AST -> DecQ
tDataDeclQ ast =
dataD (cxt [])
(mkName $ tDataName ast)
[]
Nothing
(map (\s -> normalC (mkName s) []) (map ("T_" ++) (allLexicalSymbols ast)))
[symbolDerives]
lookupTName :: G4AST -> String -> String -> String
lookupTName ast pfx s = pfx ++
(case s `elemIndex` (terminalLiterals ast) of
Nothing -> s
Just i -> show i)
defBang = bang noSourceUnpackedness noSourceStrictness
strBangType = (defBang, conT $ mkName "String")
mkCon = conE . mkName . mkUpper
mkConNT = conE . mkName . ("NT_" ++)
genTermAnnotProds :: [G4S.G4] -> [G4S.G4]
genTermAnnotProds [] = []
genTermAnnotProds (G4S.Prod {G4S.pName = n, G4S.patterns = ps}:xs) = let
withAlphas newName d a = G4S.Prod {G4S.pName = newName, G4S.patterns =
[ G4S.PRHS
{ G4S.pred = Nothing
, G4S.alphas = a
, G4S.mutator = Nothing
, G4S.pDirective = Just d
}
]}
gTAP :: G4S.ProdElem -> [G4S.G4]
gTAP (G4S.GNonTerm (G4S.Regular '?') nt) = trace (show nt)
[ withAlphas (nt ++ "_quest") (G4S.UpperD "Just") [G4S.GNonTerm G4S.NoAnnot nt]
, withAlphas (nt ++ "_quest") (G4S.UpperD "Nothing") []
]
gTAP (G4S.GNonTerm (G4S.Regular '*') nt) =
[ withAlphas (nt ++ "_star") (G4S.HaskellD "(:)") [G4S.GNonTerm G4S.NoAnnot nt, G4S.GNonTerm G4S.NoAnnot (nt ++ "_star")]
, withAlphas (nt ++ "_star") (G4S.HaskellD "(\\x -> [x])") [G4S.GNonTerm G4S.NoAnnot nt]
, withAlphas (nt ++ "_star") (G4S.HaskellD "[]") []
]
gTAP (G4S.GNonTerm (G4S.Regular '+') nt) =
[ withAlphas (nt ++ "_plus") (G4S.HaskellD "(:)") [G4S.GNonTerm G4S.NoAnnot nt, G4S.GNonTerm G4S.NoAnnot (nt ++ "_plus")]
, withAlphas (nt ++ "_plus") (G4S.HaskellD "(\\x -> [x])") [G4S.GNonTerm G4S.NoAnnot nt]
]
gTAP (G4S.GNonTerm G4S.NoAnnot nt) = []
gTAP (G4S.GTerm _ t) = []
gTAP term = error $ show term
in concat (concatMap (map gTAP) (map G4S.alphas ps)) ++ genTermAnnotProds xs
genTermAnnotProds (_:xs) = genTermAnnotProds xs
annotName G4S.NoAnnot s = s
annotName (G4S.Regular '?') s = s ++ "_quest"
annotName (G4S.Regular '*') s = s ++ "_star"
annotName (G4S.Regular '+') s = s ++ "_plus"
annotName (G4S.Regular c) s = s ++ [c]
annotName' (G4S.GTerm annot s) = annotName annot s
annotName' (G4S.GNonTerm annot s) = annotName annot s
regexNonTermSymbols ast = let
rNTS (G4S.Prod {G4S.patterns = ps}) = Just $ map G4S.alphas ps
rNTS _ = Nothing
in nub $ map annotName' $ filter (not . G4S.isNoAnnot . G4S.annot) (concat $ concat $ catMaybes $ map rNTS ast)
getProds :: G4AST -> [G4S.G4] -> [TH.ExpQ]
getProds ast [] = []
getProds ast (G4S.Prod {G4S.pName = n, G4S.patterns = ps}:xs) = let
toElem :: G4S.ProdElem -> TH.ExpQ
toElem (G4S.GTerm annot s) = [| $(mkCon "T") $(mkCon $ lookupTName ast "T_" (annotName annot s)) |]
toElem (G4S.GNonTerm annot s)
| (not . null) s && isLower (head s) = [| $(mkCon "NT") $(mkConNT (annotName annot s)) |]
| otherwise = toElem (G4S.GTerm G4S.NoAnnot s)
mkProd :: Maybe G4S.Directive -> [TH.ExpQ] -> TH.ExpQ
mkProd dir [] = [| $(mkCon "Production") $(conE $ mkName $ "NT_" ++ n) ($(mkCon "Prod") $(mkCon "Pass") []) $(lift dir) |]
mkProd dir es = [| $(mkCon "Production") $(conE $ mkName $ "NT_" ++ n) ($(mkCon "Prod") $(mkCon "Pass") $(listE es)) $(lift dir) |]
in map (\p -> mkProd (G4S.pDirective p) (map toElem $ G4S.alphas p)) ps ++ ((getProds ast) xs)
getProds ast (_:xs) = (getProds ast) xs
s0 :: G4AST -> TH.ExpQ
s0 ast = conE $ mkName $ "NT_" ++ head (nonterms ast)
grammarProds ast = getProds ast (ast ++ ( (genTermAnnotProds ast)))
grammar ast gTy = [| (defaultGrammar $(s0 ast) :: $(return gTy))
{ ns = Set.fromList [minBound .. maxBound :: $(ntConT ast)]
, ts = Set.fromList [minBound .. maxBound :: $(tConT ast)]
, ps = $(listE $ grammarProds ast)
} |]
grammarTy ast s = [t| (Prettify $(s)) => $(justGrammarTy ast s) |]
tokenNameTypeQ ast = tySynD (mkName "TokenName") [] (conT $ mkName $ tDataName ast)
lexemeValueDerives = derivClause Nothing $ map (conT . mkName)
["Show", "Ord", "Eq", "Generic", "Hashable", "Data"]
lexemeTypeConstructors ast = let
nullD (G4S.UpperD s) = null s
nullD (G4S.LowerD s) = null s
nullD (G4S.HaskellD s) = null s
lTC (i, lex@(G4S.Lex{G4S.annotation = Nothing, G4S.lName = lName, G4S.pattern = G4S.LRHS{G4S.directive = Just d}}))
| null lName = error $ "null lexeme name: " ++ show lex
| nullD d = Just $ normalC (mkName $ "V_" ++ lName) [bangType defBang (conT $ mkName "String")]
| otherwise = case d of
(G4S.UpperD d) -> Just $ normalC (mkName $ "V_" ++ lName) [bangType defBang (conT $ mkName d)]
(G4S.LowerD d) -> Just $ do
info <- reify $ mkName d
normalC (mkName $ "V_" ++ lName) [bangType defBang (return $ info2returnType info)]
(G4S.HaskellD s) -> Nothing
lTC _ = Nothing
in ((catMaybes $ map lTC (zip [0 .. length ast - 1] ast))
++ (map (\s -> normalC (mkName $ lookupTName ast "V_" s) []) (terminalLiterals ast)))
tokenValueTypeQ ast =
dataD (cxt []) (mkName "TokenValue") [] Nothing
(lexemeTypeConstructors ast)
[lexemeValueDerives]
mkTyVar s f = return $ f $ mkName s
lookupTokenFncnDecl ast = let
lTFD t = clause [litP $ stringL t]
(normalB $ [| Token $(conE $ mkName $ lookupTName ast "T_" t)
$(conE $ mkName $ lookupTName ast "V_" t)
$(litE $ integerL $ fromIntegral $ length t) |])
[]
in funD (mkName "lookupToken")
( map lTFD (terminalLiterals ast)
++ [clause [varP $ mkName "s"]
(normalB $ [| error ("Error: '" ++ s ++ "' is not a token") |])
[]]
)
lexeme2ValueQ ast lName = let
l2VQ (_, Literal i) =
clause [varP lName, conP (mkName $ "T_" ++ show i) []]
(normalB [| $(conE $ mkName $ "V_" ++ show i) |]) []
l2VQ (s, AString) =
clause [varP lName, conP (mkName $ "T_" ++ s) []]
(normalB [| $(conE $ mkName $ "V_" ++ s) $(varE lName) |]) []
l2VQ (s, Named n t)
| isLower (head n) =
clause [varP lName, conP (mkName $ "T_" ++ s) []]
(normalB [| $(conE $ mkName $ "V_" ++ s) (trace $(varE lName) ($(varE $ mkName n) $(varE lName) :: $t)) |]) []
| otherwise =
clause [varP lName, conP (mkName $ "T_" ++ s) []]
(normalB [| $(conE $ mkName $ "V_" ++ s) (trace $(varE lName) (read $(varE lName) :: $t)) |]) []
in funD (mkName "lexeme2value") (map l2VQ (allLexicalTypes ast))
convertRegex :: (Show c) => (String -> G4S.Regex c) -> G4S.Regex c -> R.Regex c
convertRegex getNamedR = let
cR G4S.Epsilon = R.Epsilon
cR (G4S.Literal []) = R.Epsilon
cR (G4S.Literal [c]) = R.Symbol c
cR (G4S.Literal cs) = R.Literal cs
cR (G4S.Union rs) = R.MultiUnion $ map cR rs
cR (G4S.Concat rs) = R.Concat $ map cR rs
cR (G4S.Kleene r) = R.Kleene $ cR r
cR (G4S.PosClos r) = R.PosClos $ cR r
cR (G4S.Question r) = R.Question $ cR r
cR (G4S.CharSet cs) = R.Class cs
cR (G4S.Named n) = convertRegex getNamedR $ getNamedR n
cR (G4S.Negation (G4S.CharSet cs)) = R.NotClass cs
cR (G4S.Negation (G4S.Literal s)) = R.NotClass s
cR (G4S.Negation (G4S.Concat [G4S.Literal s])) = R.NotClass s
cR r@(G4S.Negation _) = error $ "unimplemented: " ++ show r
in cR
getNamedRegex :: G4AST -> String -> G4S.Regex Char
getNamedRegex ast n = let
gNR (G4S.Lex{G4S.annotation = Just G4S.Fragment, G4S.lName = lName}) = lName == n
gNR _ = False
in case filter gNR ast of
[] -> error $ "No fragment named '" ++ n ++ "'"
[(G4S.Lex{G4S.pattern = G4S.LRHS{G4S.regex = r}})] -> r
xs -> error $ "Too many fragments named '" ++ n ++ "', i.e.: " ++ show xs
mkRegexesQ ast = let
mkLitR :: String -> ExpQ
mkLitR s = [| ($( conE $ mkName $ lookupTName ast "T_" s)
, $(lift $ convertRegex (getNamedRegex ast) $ G4S.Literal s)) |]
mkLexR :: G4S.G4 -> Maybe ExpQ
mkLexR (G4S.Lex{G4S.annotation = Nothing, G4S.lName = lName, G4S.pattern = G4S.LRHS{G4S.regex = r}}) = Just
[| ($(conE $ mkName $ lookupTName ast "T_" lName), $(lift $ convertRegex (getNamedRegex ast) r)) |]
mkLexR _ = Nothing
in valD (varP $ mkName $ mkLower $ gName ast ++ "Regexes")
(normalB $ listE (map mkLitR (terminalLiterals ast) ++ (catMaybes $ map mkLexR ast)))
[]
prettyTFncnQ ast fncnName = let
pTFLit lexeme =
clause [conP (mkName $ lookupTName ast "T_" lexeme) []]
(normalB [| pStr $(litE $ stringL $ "'" ++ lexeme ++ "'") |])
[]
pTFName lexeme =
clause [conP (mkName $ lookupTName ast "T_" lexeme) []]
(normalB [| pStr $(litE $ stringL $ lexeme) |])
[]
in funD fncnName (map pTFLit (terminalLiterals ast) ++ map pTFName (lexemeNames ast))
prettyVFncnQ ast fncnName = let
pVFLit lexeme =
clause [conP (mkName $ lookupTName ast "V_" lexeme) []]
(normalB [| pStr $(litE $ stringL $ "'" ++ lexeme ++ "'") |])
[]
pVFName lexeme =
clause [conP (mkName $ lookupTName ast "V_" lexeme) [varP (mkName "v")]]
(normalB [| pChr '\'' >> prettify v >> pChr '\'' |])
[]
in funD fncnName (map pVFLit (terminalLiterals ast) ++ map pVFName (lexemeNames ast))
astFncnName s = mkName $ "ast2" ++ s
a2d ast nameAST G4S.Lex{G4S.annotation = Nothing, G4S.lName = _A, G4S.pattern = G4S.LRHS{G4S.directive = dir}}
= Just [(mkName $ "ast2" ++ _A
,[ clause [ conP (mkName "Leaf")
[ conP (mkName $ "Token")
[ wildP
, conP (mkName $ lookupTName ast "V_" _A)
[ varP $ mkName "t"]
, wildP]]]
(normalB (varE $ mkName "t"))
[]
]
)]
a2d ast nameAST G4S.Prod{G4S.pName = _A, G4S.patterns = ps} = let
mkConP (G4S.GNonTerm annot nt)
| isUpper (head nt) = conP (mkName "T") [conP (mkName $ lookupTName ast "T_" $ annotName annot nt) []]
| otherwise = conP (mkName "NT") [conP (mkName $ "NT_" ++ annotName annot nt) []]
mkConP (G4S.GTerm annot t) = conP (mkName "T") [conP (mkName $ lookupTName ast "T_" $ annotName annot t) []]
justStr (G4S.GNonTerm annot s) = annotName annot s
justStr (G4S.GTerm _ s) = s
vars as = catMaybes
[ if G4S.isGNonTerm a
then Just (a, mkName $ "v" ++ show i ++ "_" ++ justStr a, varE $ mkName $ "ast2" ++ justStr a)
else Nothing
| (i, a) <- zip [0 .. length as] as
]
astListPattern as = listP $
[ if G4S.isGNonTerm a
then varP $ mkName $ "v" ++ show i ++ "_" ++ justStr a
else wildP
| (i, a) <- zip [0 .. length as] as
]
astAppRec b (alpha, varName, recName) = appE b (appE recName $ varE varName)
clauses = [ clause [ [p| AST $(conP (mkName $ "NT_" ++ _A) [])
$(listP $ map mkConP as)
$(astListPattern as)
|]
]
(case (dir, vars as) of
(Just (G4S.UpperD d), vs) -> normalB $ foldl astAppRec (conE $ mkName d) vs
(Just (G4S.LowerD d), vs) -> normalB $ foldl astAppRec (varE $ mkName d) vs
(Just (G4S.HaskellD d), vs) -> normalB $ foldl astAppRec (haskellParseExp d) vs
(Nothing, []) -> normalB $ tupE []
(Nothing, [(a,v0,rec)]) -> normalB $ appE rec (varE v0)
(Nothing, vs) -> normalB $ tupE $ map (\(a,vN,rN) -> appE rN $ varE vN) vs
) []
| G4S.PRHS{G4S.alphas = as, G4S.pDirective = dir} <- ps
]
retType = let
rT G4S.PRHS{G4S.alphas = as, G4S.pDirective = dir}
= case (dir, vars as) of
(Just (G4S.UpperD d), vs) ->
(do i <- reify $ mkName d
(case i of
DataConI _ t n -> return $ type2returnType t
VarI n t _ -> return t
TyConI (DataD _ n _ _ _ _) -> conT n
other -> error $ show other))
(Just (G4S.LowerD d), vs) -> info2returnType <$> reify (mkName d)
(Just (G4S.HaskellD d), vs) -> error "unimplemented"
(Nothing, []) -> tupleT 0
(Nothing, [(a,v0,rec)]) -> tupleT 0
(Nothing, vs) -> tupleT $ length vs
in rT (head ps)
fncnSig
= do rT <- retType
(case rT of
ForallT vs c t -> forallT vs (cxt []) [t| $(conT nameAST) -> $(return t) |]
t -> forallT [] (cxt []) [t| $(conT nameAST) -> $(return t) |])
in Just $ [
(astFncnName _A, clauses)
]
a2d ast nameAST _ = Nothing
a2d_error_clauses G4S.Prod{G4S.pName = _A} =
[(astFncnName _A, [ clause [ [p| ast2 |] ] (normalB [| error $ "Failed pattern match on " ++ (show ast2) |]) [] ])]
a2d_error_clauses _ = []
mkTupler n = let
xs = ["p" ++ show i | i <- [0 .. n - 1]]
xs_comma = intersperse "," xs
in "(\\" ++ concat (intersperse " " xs) ++ " -> (" ++ concat xs_comma ++ "))"
wipeOutAnnots p@(G4S.Prod{G4S.pName = _A, G4S.patterns = ps}) = let
wOA prhs@(G4S.PRHS { G4S.alphas = as0, G4S.pDirective = dir }) = let
repAnnots pe@(G4S.GTerm G4S.NoAnnot _) = pe
repAnnots pe@(G4S.GNonTerm G4S.NoAnnot _) = pe
repAnnots (G4S.GTerm a s) = G4S.GTerm G4S.NoAnnot (annotName a s)
repAnnots (G4S.GNonTerm a s) = G4S.GNonTerm G4S.NoAnnot (annotName a s)
dir' = let
as0' = filter G4S.isGNonTerm as0
in case dir of
Just x -> Just x
Nothing
| length as0' == 0 -> Just $ G4S.HaskellD "()"
| length as0' == 1 -> Just $ G4S.HaskellD "(\\x -> x)"
| otherwise -> Just $ G4S.HaskellD $ mkTupler (length as0')
in prhs { G4S.alphas = map repAnnots as0, G4S.pDirective = dir' }
in p { G4S.patterns = map wOA ps }
wipeOutAnnots x = x
allClauses gr ast' nameAST = let
ast = genTermAnnotProds ast' ++ ast'
in
(concat . catMaybes . map (a2d ast nameAST)) ast
++ (concatMap a2d_error_clauses) ast
funDecls lst@((name, _):_) = Just $ funD name $ concatMap snd lst
funDecls [] = error "groupBy can't return an empty list"
ast2DTFncnsQ gr ast nameAST =
(catMaybes . map funDecls . groupBy (\a b -> fst a == fst b) . sortBy (comparing fst)) (allClauses gr ast nameAST)
unitTy = [t| () |]
removeEpsilonsAST :: [G4S.G4] -> [G4S.G4]
removeEpsilonsAST ast = let
getPRHS (G4S.Prod { G4S.pName = s, G4S.patterns = ps }) = map (\as -> (s, as)) ps
getPRHS _ = []
epsNT (_A, G4S.PRHS { G4S.alphas = [], G4S.pDirective = dir}) = (:) (_A, dir)
epsNT _ = id
epsNTs = foldr epsNT [] (concatMap getPRHS ast)
orderNub ast0 asts
| ast0 `elem` asts = asts
| otherwise = ast0 : asts
replicateDeclFor (nts0, dflt) (G4S.Prod { G4S.pName = nt1, G4S.patterns = ps }) = let
dropOne ys' xs' dir =
let ys = filter G4S.isGNonTerm ys'
xs = filter G4S.isGNonTerm xs'
params_ys = map (\i -> " p" ++ show i ++ " ") [0 .. length ys - 1]
params_xs = map (\i -> " p" ++ show i ++ " ") [length ys .. length ys + length xs - 1]
both = concat (intersperse "," $ params_ys ++ params_xs)
ifNull s
| null s = "id"
| otherwise = s
s_dir = case dir of
Just (G4S.UpperD s) -> "(" ++ ifNull s ++ ")"
Just (G4S.LowerD s) -> "(" ++ ifNull s ++ ")"
Just (G4S.HaskellD s) -> "(" ++ ifNull s ++ ")"
Nothing
| length (params_ys ++ params_xs) == 0 -> "()"
| length (params_ys ++ params_xs) == 1 -> "(\\x -> x)"
| otherwise -> "(\\" ++ concat params_ys ++ concat params_xs ++ " -> ("
++ both ++ "))"
s_dflt = case dflt of
Just (G4S.UpperD s) -> s
Just (G4S.LowerD s) -> s
Just (G4S.HaskellD s) -> s
Nothing -> " () "
ret
| length params_ys + length params_xs == 0 = Just $ G4S.HaskellD $ "(" ++ s_dir ++ " " ++ s_dflt ++ ")"
| otherwise = Just $ G4S.HaskellD $ "(\\" ++ concat params_ys ++ concat params_xs ++ " -> " ++ s_dir
++ " " ++ concat params_ys ++ " " ++ s_dflt ++ " " ++ concat params_xs ++ ")"
in ret
rDF prhs ys [] = [ updatePRHS prhs $ reverse ys ]
rDF prhs ys (x:xs) = let
newPRHS = prhs { G4S.pDirective = dropOne ys xs (G4S.pDirective prhs) }
result
| G4S.prodElemSymbol x == nts0
= updatePRHS newPRHS (reverse ys ++ xs)
: updatePRHS prhs (reverse ys ++ x:xs)
: ( rDF newPRHS ys xs
++ rDF prhs (x:ys) xs)
| otherwise = rDF prhs (x:ys) xs
in result
updatePRHS prhs xs = prhs { G4S.alphas = xs }
in ( G4S.Prod
{ G4S.pName = nt1
, G4S.patterns = nub $ concatMap
(\prhs -> rDF prhs [] (G4S.alphas prhs))
ps
}
)
replicateDeclFor _ p = p
eliminate nts prod@(G4S.Prod { G4S.pName = _A, G4S.patterns = ps }) =
if _A == nts
then prod { G4S.patterns = filter (not . null . G4S.alphas) ps }
else prod
eliminate nts prod = prod
ast' = case epsNTs of
[] -> ast
((nts, dflt):ntss) -> removeEpsilonsAST $
map (eliminate nts) (foldr orderNub [] (map (replicateDeclFor (nts, dflt)) ast))
in foldr orderNub [] ast'
g4_decls :: [G4S.G4] -> TH.Q [TH.Dec]
g4_decls ast' =
do let ast = removeEpsilonsAST $ map wipeOutAnnots (ast' ++ genTermAnnotProds ast')
tokVal = mkName "TokenValue"
tokName = mkName "TokenName"
ntSym = mkName $ ntDataName ast
tSym = mkName $ tDataName ast
nameAST = mkName (mkUpper $ gName ast ++ "AST")
nameToken = mkName (mkUpper $ gName ast ++ "Token")
nameDFAs = mkName (mkLower $ gName ast ++ "DFAs")
name = mkName $ mkLower (gName ast ++ "Grammar'")
nameUnit = mkName $ mkLower (gName ast ++ "Grammar")
lowerASTName = mkName (mkLower $ gName ast ++ "AST")
prettyTFncnName <- newName "prettifyT"
prettyValueFncnName <- newName "prettifyValue"
stateTypeName <- newName "s"
let stateType = varT stateTypeName
gTyUnit <- justGrammarTy ast unitTy
gUnitFunD <- funD nameUnit [clause [] (normalB $ [| $(varE name) |]) []]
gTySigUnit <- sigD nameUnit (return gTyUnit)
ntDataDecl <- ntDataDeclQ ast
tDataDecl <- tDataDeclQ ast
gTy <- grammarTy ast stateType
gTy' <- justGrammarTy ast stateType
gTySig <- sigD name (return gTy)
g <- grammar ast gTy'
gFunD <- funD name [clause [] (normalB (return g)) []]
prettyNT:_ <- [d| instance Prettify $(ntConT ast) where prettify = rshow |]
prettyT:_ <- [d| instance Prettify $(tConT ast) where prettify = $(varE prettyTFncnName) |]
prettyValue:_ <- [d| instance Prettify $(conT tokVal) where prettify = $(varE prettyValueFncnName) |]
lookupTokenD <- lookupTokenFncnDecl ast
tokenNameType <- tokenNameTypeQ ast
tokenValueType <- tokenValueTypeQ ast
let lName = mkName "l"
lexeme2Value <- lexeme2ValueQ ast lName
regexes <- mkRegexesQ ast
let dfasName = mkName $ mkLower (gName ast) ++ "DFAs"
let regexesE = varE $ mkName $ mkLower (gName ast) ++ "Regexes"
dfas <- funD dfasName [clause [] (normalB [| map (fst &&& regex2dfa . snd) $(regexesE) |]) []]
astDecl <-tySynD nameAST [] [t| AST $(conT ntSym) $(conT nameToken) |]
tokDecl <- tySynD nameToken [] [t| Token $(conT tSym) $(conT tokVal) |]
prettyTFncn <- prettyTFncnQ ast prettyTFncnName
prettyVFncn <- prettyVFncnQ ast prettyValueFncnName
the_ast <- funD lowerASTName [clause [] (normalB $ lift ast) []]
return $
[ ntDataDecl, tDataDecl
, gTySig, gFunD
, gTySigUnit, gUnitFunD
, tokenNameType, tokenValueType
, prettyTFncn, prettyVFncn
, prettyNT, prettyT, prettyValue
, lookupTokenD
, lexeme2Value
, regexes
, dfas, astDecl, tokDecl
, the_ast
]
g4_parsers ast gr = do
let tokVal = mkName "TokenValue"
tokName = mkName "TokenName"
ntSym = mkName $ ntDataName ast
tSym = mkName $ tDataName ast
nameAST = mkName (mkUpper $ gName ast ++ "AST")
nameToken = mkName (mkUpper $ gName ast ++ "Token")
nameDFAs = mkName (mkLower $ gName ast ++ "DFAs")
name = mkName $ mkLower (gName ast ++ "Grammar'")
nameUnit = mkName $ mkLower (gName ast ++ "Grammar")
ast2DTFncns <- sequence $ ast2DTFncnsQ gr ast nameAST
decls <- [d|
instance Ref $(conT ntSym) where
type Sym $(conT ntSym) = $(conT ntSym)
getSymbol = id
tokenize :: String -> [$(conT nameToken)]
tokenize = T.tokenize $(varE nameDFAs) lexeme2value
slrParse :: [$(conT nameToken)]
-> LR.LRResult
(LR.CoreSLRState $(conT ntSym) (StripEOF (Sym $(conT nameToken))))
$(conT nameToken)
$(conT nameToken)
$(conT nameAST)
slrParse = (LR.slrParse $(varE nameUnit) event2ast)
glrParse :: ($(conT tokName) -> Bool) -> [Char]
-> LR.GLRResult
Int
Char
$(conT nameToken)
$(conT nameAST)
glrParse filterF = (LR.glrParseInc2 $(varE nameUnit) event2ast (T.tokenizeInc filterF $(varE nameDFAs) lexeme2value))
allstarParse :: ($(conT tokName) -> Bool) -> String
-> Either String $(conT nameAST)
allstarParse filterF inp =
ALL.parse'
(T.tokenizeIncAll filterF $(varE nameDFAs) lexeme2value (Set.fromList $ map fst $(varE nameDFAs)))
inp
(ALL.NT $(s0 ast))
(ALL.atnOf ($(varE nameUnit) :: $(justGrammarTy ast unitTy)))
True
|]
return $ decls ++ ast2DTFncns
mkLRParser ast g =
let
nameDFAs = mkName (mkLower $ gName ast ++ "DFAs")
tokName = mkName "TokenName"
nameAST = mkName (mkUpper $ grammarName ast ++ "AST")
nameToken = mkName (mkUpper $ gName ast ++ "Token")
name = mkName $ mkLower (grammarName ast ++ "Grammar")
is = sort $ S.toList $ LR.lr1Items g
tbl = LR.lr1Table g
tblInt = LR.convTableInt tbl is
(_lr1Table', errs) = LR.disambiguate tblInt
lr1Table' = M.toList tblInt
lr1S0' = LR.convStateInt is $ LR.lr1Closure g $ LR.lr1S0 g
unitTy = [t| () |]
name' = [e| $(varE name) |]
in do
D.traceM $ "lr1S0 = " ++ (pshow' $ LR.lr1S0 g)
D.traceM $ "lr1S0' = " ++ (pshow' lr1S0')
D.traceM $ "lr1Table' = " ++ (pshow' lr1Table')
D.traceM $ "Total LR1 conflicts: " ++ (pshow' errs)
[d| lr1ItemsList = sort $ S.toList $ LR.lr1Items $(name')
lr1Table = $(lift lr1Table')
lr1Goto = LR.convGotoStatesInt (LR.convGoto $(name') (LR.lr1Goto $(name')) lr1ItemsList) lr1ItemsList
lr1Closure = convState $ LR.lr1Closure $(name') (LR.lr1S0 $(name'))
lr1S0 = $(lift lr1S0')
convState = LR.convStateInt lr1ItemsList
glrParseFast :: ($(conT tokName) -> Bool) -> [Char]
-> LR.GLRResult
Int
Char
$(conT nameToken)
$(conT nameAST)
glrParseFast filterF =
LR.glrParseInc'
$(name')
(M.fromList' lr1Table)
lr1Goto
lr1S0
(LR.tokenizerFirstSets convState $(name'))
event2ast
(T.tokenizeInc filterF $(varE nameDFAs) lexeme2value)
|]