{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module BNFC.Backend.Java.CFtoAntlr4Lexer ( cf2AntlrLex ) where
import Prelude hiding ((<>))
import Text.PrettyPrint
import BNFC.CF
import BNFC.Backend.Java.RegToAntlrLexer
import BNFC.Backend.Java.Utils
import BNFC.Backend.Common.NamedVariables
cf2AntlrLex :: String -> CF -> (Doc, KeywordEnv)
cf2AntlrLex :: String -> CF -> (Doc, KeywordEnv)
cf2AntlrLex String
packageBase CF
cf = ([Doc] -> Doc
vcat
[ String -> Doc
prelude String
packageBase
, Doc
cMacros
, KeywordEnv -> Doc
lexSymbols KeywordEnv
env
, CF -> Doc
restOfLexerGrammar CF
cf
], KeywordEnv
env)
where
env :: KeywordEnv
env = [String] -> Int -> KeywordEnv
forall t a. (Show t, Num t) => [a] -> t -> [(a, String)]
makeSymEnv (CF -> [String]
forall function. CFG function -> [String]
cfgSymbols CF
cf [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ CF -> [String]
forall function. CFG function -> [String]
reservedWords CF
cf)
(Int
0 :: Int)
makeSymEnv :: [a] -> t -> [(a, String)]
makeSymEnv [] t
_ = []
makeSymEnv (a
s:[a]
symbs) t
n = (a
s, String
"Surrogate_id_SYMB_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ t -> String
forall a. Show a => a -> String
show t
n)
(a, String) -> [(a, String)] -> [(a, String)]
forall a. a -> [a] -> [a]
: [a] -> t -> [(a, String)]
makeSymEnv [a]
symbs (t
nt -> t -> t
forall a. Num a => a -> a -> a
+t
1)
prelude :: String -> Doc
prelude :: String -> Doc
prelude String
packageBase = [Doc] -> Doc
vcat
[ Doc
"// Lexer definition for use with Antlr4"
, Doc
"lexer grammar" Doc -> Doc -> Doc
<+> String -> Doc
text String
name Doc -> Doc -> Doc
<> Doc
"Lexer;"
]
where name :: String
name = String -> String
getLastInPackage String
packageBase
cMacros :: Doc
cMacros :: Doc
cMacros = [Doc] -> Doc
vcat
[ Doc
"// Predefined regular expressions in BNFC"
, Doc -> Doc
frg Doc
"LETTER : CAPITAL | SMALL"
, Doc -> Doc
frg Doc
"CAPITAL : [A-Z\\u00C0-\\u00D6\\u00D8-\\u00DE]"
, Doc -> Doc
frg Doc
"SMALL : [a-z\\u00DF-\\u00F6\\u00F8-\\u00FF]"
, Doc -> Doc
frg Doc
"DIGIT : [0-9]"
]
where frg :: Doc -> Doc
frg Doc
a = Doc
"fragment" Doc -> Doc -> Doc
<+> Doc
a Doc -> Doc -> Doc
<+> Doc
";"
escapeChars :: String -> String
escapeChars :: String -> String
escapeChars = (Char -> String) -> String -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> String
escapeCharInSingleQuotes
lexSymbols :: KeywordEnv -> Doc
lexSymbols :: KeywordEnv -> Doc
lexSymbols KeywordEnv
ss = [Doc] -> Doc
vcat ([Doc] -> Doc) -> [Doc] -> Doc
forall a b. (a -> b) -> a -> b
$ ((String, String) -> Doc) -> KeywordEnv -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map (String, String) -> Doc
transSym KeywordEnv
ss
where
transSym :: (String, String) -> Doc
transSym (String
s,String
r) = String -> Doc
text String
r Doc -> Doc -> Doc
<> Doc
" : '" Doc -> Doc -> Doc
<> String -> Doc
text (String -> String
escapeChars String
s) Doc -> Doc -> Doc
<> Doc
"' ;"
restOfLexerGrammar :: CF -> Doc
restOfLexerGrammar :: CF -> Doc
restOfLexerGrammar CF
cf = [Doc] -> Doc
vcat
[ (KeywordEnv, [String]) -> Doc
lexComments (CF -> (KeywordEnv, [String])
comments CF
cf)
, Doc
""
, Doc
userDefTokens
, [Doc] -> Doc
ifString [Doc]
strdec
, [Doc] -> Doc
ifChar [Doc]
chardec
, String -> [Doc] -> Doc
ifC String
catDouble [
Doc
"// Double predefined token type",
Doc
"DOUBLE : DIGIT+ '.' DIGIT+ ('e' '-'? DIGIT+)?;"
]
, String -> [Doc] -> Doc
ifC String
catInteger [
Doc
"//Integer predefined token type",
Doc
"INTEGER : DIGIT+;"
]
, String -> [Doc] -> Doc
ifC String
catIdent [
Doc
"// Identifier token type" ,
Doc
"fragment" ,
Doc
"IDENTIFIER_FIRST : LETTER | '_';",
Doc
"IDENT : IDENTIFIER_FIRST (IDENTIFIER_FIRST | DIGIT)*;"
]
, Doc
"// Whitespace"
, Doc
"WS : (' ' | '\\r' | '\\t' | '\\n' | '\\f')+ -> skip;"
, Doc
"// Escapable sequences"
, Doc
"fragment"
, Doc
"Escapable : ('\"' | '\\\\' | 'n' | 't' | 'r' | 'f');"
, Doc
"ErrorToken : . ;"
, [Doc] -> Doc
ifString [Doc]
stringmodes
, [Doc] -> Doc
ifChar [Doc]
charmodes
]
where
ifC :: String -> [Doc] -> Doc
ifC String
cat [Doc]
s = if CF -> Cat -> Bool
forall f. CFG f -> Cat -> Bool
isUsedCat CF
cf (String -> Cat
TokenCat String
cat) then [Doc] -> Doc
vcat [Doc]
s else Doc
""
ifString :: [Doc] -> Doc
ifString = String -> [Doc] -> Doc
ifC String
catString
ifChar :: [Doc] -> Doc
ifChar = String -> [Doc] -> Doc
ifC String
catChar
strdec :: [Doc]
strdec = [ Doc
"// String token type"
, Doc
"STRING : '\"' -> more, mode(STRINGMODE);"
]
chardec :: [Doc]
chardec = [Doc
"CHAR : '\\'' -> more, mode(CHARMODE);"]
userDefTokens :: Doc
userDefTokens = [Doc] -> Doc
vcat
[ String -> Doc
text String
name Doc -> Doc -> Doc
<> Doc
" : " Doc -> Doc -> Doc
<> String -> Doc
text (Reg -> String
printRegJLex Reg
exp) Doc -> Doc -> Doc
<> Doc
";"
| (String
name, Reg
exp) <- CF -> [(String, Reg)]
forall f. CFG f -> [(String, Reg)]
tokenPragmas CF
cf ]
stringmodes :: [Doc]
stringmodes = [ Doc
"mode STRESCAPE;"
, Doc
"STRESCAPED : Escapable -> more, popMode ;"
, Doc
"mode STRINGMODE;"
, Doc
"STRINGESC : '\\\\' -> more , pushMode(STRESCAPE);"
, Doc
"STRINGEND : '\"' -> type(STRING), mode(DEFAULT_MODE);"
, Doc
"STRINGTEXT : ~[\"\\\\] -> more;"
]
charmodes :: [Doc]
charmodes = [ Doc
"mode CHARMODE;"
, Doc
"CHARANY : ~['\\\\] -> more, mode(CHAREND);"
, Doc
"CHARESC : '\\\\' -> more, pushMode(CHAREND),pushMode(ESCAPE);"
, Doc
"mode ESCAPE;"
, Doc
"ESCAPED : (Escapable | '\\'') -> more, popMode ;"
, Doc
"mode CHAREND;"
, Doc
"CHARENDC : '\\'' -> type(CHAR), mode(DEFAULT_MODE);"
]
lexComments :: ([(String, String)], [String]) -> Doc
([],[]) = Doc
""
lexComments (KeywordEnv
m,[String]
s) = [Doc] -> Doc
vcat
(Doc -> (String -> Doc) -> [String] -> [Doc]
forall a. Doc -> (a -> Doc) -> [a] -> [Doc]
prod Doc
"COMMENT_antlr_builtin" String -> Doc
lexSingleComment [String]
s [Doc] -> [Doc] -> [Doc]
forall a. [a] -> [a] -> [a]
++
Doc -> ((String, String) -> Doc) -> KeywordEnv -> [Doc]
forall a. Doc -> (a -> Doc) -> [a] -> [Doc]
prod Doc
"MULTICOMMENT_antlr_builtin" (String, String) -> Doc
lexMultiComment KeywordEnv
m )
where
prod :: Doc -> (a -> Doc) -> [a] -> [Doc]
prod Doc
bg a -> Doc
lc [a]
ty = [Doc
bg, Doc
": ("] [Doc] -> [Doc] -> [Doc]
forall a. [a] -> [a] -> [a]
++ Doc -> [Doc] -> [Doc]
punctuate Doc
"|" ((a -> Doc) -> [a] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map a -> Doc
lc [a]
ty) [Doc] -> [Doc] -> [Doc]
forall a. [a] -> [a] -> [a]
++ [Doc]
skiplex
skiplex :: [Doc]
skiplex = [Doc
") -> skip;"]
lexSingleComment :: String -> Doc
String
c =
Doc
"'" Doc -> Doc -> Doc
<>String -> Doc
text (String -> String
escapeChars String
c) Doc -> Doc -> Doc
<> Doc
"' ~[\\r\\n]* (('\\r'? '\\n')|EOF)"
lexMultiComment :: (String, String) -> Doc
(String
b,String
e) =
Doc
"'" Doc -> Doc -> Doc
<> String -> Doc
text (String -> String
escapeChars String
b)
Doc -> Doc -> Doc
<>Doc
"' (.)*? '"Doc -> Doc -> Doc
<> String -> Doc
text (String -> String
escapeChars String
e)
Doc -> Doc -> Doc
<> Doc
"'"