{-# LANGUAGE DeriveAnyClass, DeriveGeneric, TypeFamilies, QuasiQuotes , DataKinds, ScopedTypeVariables, OverloadedStrings, TypeSynonymInstances , FlexibleInstances, UndecidableInstances, DeriveDataTypeable , TemplateHaskell #-} {-| Module : Language.ANTLR4.G4 Description : Core G4 quasiquoter for antlr-haskell Copyright : (c) Karl Cronburg, 2018 License : BSD3 Maintainer : karl@cs.tufts.edu Stability : experimental Portability : POSIX Until better haddock integration is developed, you'll need to look at the source for this module to see the G4 grammar for G4. -} module Language.ANTLR4.G4 (g4) where import Control.Arrow ( (&&&) ) import Data.Char (isUpper) import Text.ANTLR.Common import Text.ANTLR.Grammar import Text.ANTLR.Parser import qualified Text.ANTLR.LR as LR import Text.ANTLR.Lex.Tokenizer as T import qualified Text.ANTLR.Set as S import Text.ANTLR.Set (Hashable(..), Generic(..)) import Text.ANTLR.Pretty import Text.ANTLR.Lex.Regex (regex2dfa) import Data.Data (Data(..)) import Language.Haskell.TH.Lift (Lift(..)) import Language.Haskell.TH.Quote (QuasiQuoter(..)) import qualified Language.Haskell.TH as TH import Language.ANTLR4.Boot.Quote (antlr4) import Language.ANTLR4.Syntax import qualified Language.ANTLR4.Boot.Syntax as G4S import qualified Language.ANTLR4.Boot.Quote as G4Q import Debug.Trace as D char :: String -> Char char = head append :: String -> String -> String append = (++) list a = [a] cons = (:) lexemeDirective r d = G4S.LRHS r (Just d) lexemeNoDir r = G4S.LRHS r Nothing lexDecl = G4S.Lex Nothing lexFragment = G4S.Lex (Just G4S.Fragment) literalRegex :: String -> G4S.Regex Char literalRegex = G4S.Literal prodDirective as d = G4S.PRHS as Nothing Nothing (Just d) prodNoDir as = G4S.PRHS as Nothing Nothing Nothing list2 a b = [a,b] range a b = [a .. b] gterm = G4S.GTerm G4S.NoAnnot gnonTerm = G4S.GNonTerm G4S.NoAnnot maybeGTerm = G4S.GTerm (G4S.Regular '?') maybeGNonTerm = G4S.GNonTerm (G4S.Regular '?') starGTerm = G4S.GTerm (G4S.Regular '*') starGNonTerm = G4S.GNonTerm (G4S.Regular '*') plusGTerm = G4S.GTerm (G4S.Regular '+') plusGNonTerm = G4S.GNonTerm (G4S.Regular '+') regexAnyChar = G4S.Negation (G4S.CharSet []) dQual [] = G4S.UpperD [] dQual xs = case last xs of [] -> G4S.UpperD $ concatWith "." xs (a:as) | isUpper a -> G4S.UpperD $ concatWith "." xs | otherwise -> G4S.LowerD $ concatWith "." xs qDir l u = [l,u] -- Force the above declarations (and their types) into scope: $( return [] ) [antlr4| grammar G4; decls : decl1 ';' -> list | decl1 ';' decls -> cons ; decl1 : 'grammar' UpperID -> G4S.Grammar | LowerID ':' prods -> G4S.Prod | UpperID ':' lexemeRHS -> lexDecl | 'fragment' UpperID ':' lexemeRHS -> lexFragment ; prods : prodRHS -> list | prodRHS '|' prods -> cons ; lexemeRHS : regexes1 '->' directive -> lexemeDirective | regexes1 -> lexemeNoDir ; prodRHS : alphas '->' directive -> prodDirective | alphas -> prodNoDir ; directive : qDirective -> dQual | UpperID -> G4S.UpperD | LowerID -> G4S.LowerD | '${' HaskellExp '}' -> G4S.HaskellD ; qDirective : UpperID '.' qDot -> qDir ; qDot : UpperID | LowerID ; HaskellExp : ( ~ '}' )+ -> String; alphas : alpha -> list | alpha alphas -> cons | '(' alphas ')' | '(' alphas ')' '?' | '(' alphas ')' '*' | '(' alphas ')' '+' ; alpha : Literal '?' -> maybeGTerm | LowerID '?' -> maybeGNonTerm | UpperID '?' -> maybeGNonTerm | Literal '*' -> starGTerm | LowerID '*' -> starGNonTerm | UpperID '*' -> starGNonTerm | Literal '+' -> plusGTerm | LowerID '+' -> plusGNonTerm | UpperID '+' -> plusGNonTerm | Literal -> gterm | LowerID -> gnonTerm | UpperID -> gnonTerm ; // Regex Stuff: regexes1 : regexes -> G4S.Concat ; regexes : regex -> list | regex regexes -> cons ; regex : regex1 '?' -> G4S.Question | regex1 '*' -> G4S.Kleene | regex1 '+' -> G4S.PosClos | '~' regex1 -> G4S.Negation | regex1 -> id ; regex1 : '[' charSet ']' -> G4S.CharSet | Literal -> literalRegex | UpperID -> G4S.Named | '(' regexes1 ')' | unionR -> G4S.Union | '.' -> regexAnyChar ; unionR : regex '|' regex -> list2 | regex '|' unionR -> cons ; charSet : charSet1 -> id | charSet1 charSet -> append ; charSet1 : SetChar '-' SetChar -> range | SetChar -> list | EscapedChar -> list ; UpperID : [A-Z][a-zA-Z0-9_]* -> String; LowerID : [a-z][a-zA-Z0-9_]* -> String; Literal : '\'' (~ '\'')+ '\'' -> stripQuotesReadEscape; LineComment : '//' (~ '\n')* '\n' -> String; SetChar : ~ ']' -> char ; WS : [ \t\n\r\f\v]+ -> String; EscapedChar : '\\' [tnrfv] -> readEscape ; |] isWhitespace T_LineComment = True isWhitespace T_WS = True isWhitespace _ = False g4_codeGen :: String -> TH.Q [TH.Dec] g4_codeGen input = do loc <- TH.location let fileName = TH.loc_filename loc let (line,column) = TH.loc_start loc case glrParse isWhitespace input of r@(LR.ResultAccept ast) -> codeGen r LR.ResultSet s -> if S.size s == 1 then codeGen (S.findMin s) else D.trace (pshow' s) $ codeGen (S.findMin s) err -> error $ pshow' err -- TODO: Convert a Universal AST into a [G4S.G4] codeGen (LR.ResultAccept ast) = G4Q.g4_decls $ ast2decls ast -- | Entrypoint to the G4 quasiquoer. Currently only supports declaration-level -- Haskell generation of G4 grammars using a GLR parser. The output grammars -- need not use a GLR parser themselves. g4 :: QuasiQuoter g4 = QuasiQuoter (error "parse exp") (error "parse pattern") (error "parse type") g4_codeGen