{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {- BNF Converter: Flex generator Copyright (C) 2004 Author: Michael Pellauer Copyright (C) 2020 Andreas Abel Description : This module generates the Flex file. It is similar to JLex but with a few peculiarities. Author : Michael Pellauer Created : 5 August, 2003 Modified : 22 August, 2006 by Aarne Ranta -} module BNFC.Backend.CPP.NoSTL.CFtoFlex (cf2flex) where import Prelude hiding ((<>)) import Data.Bifunctor (first) import Data.Maybe (fromMaybe) import qualified Data.Map as Map import BNFC.CF import BNFC.Backend.C.CFtoFlexC (preludeForBuffer, cMacros, commentStates, lexChars, lexStrings) import BNFC.Backend.C.RegToFlex import BNFC.Backend.Common.NamedVariables import BNFC.Backend.CPP.STL.STLUtils import BNFC.PrettyPrint import BNFC.Utils (cstring, when) --The environment must be returned for the parser to use. cf2flex :: Maybe String -> String -> CF -> (String, SymMap) cf2flex inPackage _name cf = (, env) $ unlines [ prelude stringLiterals inPackage , cMacros cf , lexSymbols env0 , restOfFlex inPackage cf env ] where env = Map.fromList env1 env0 = makeSymEnv (cfgSymbols cf ++ reservedWords cf) [0 :: Int ..] env1 = map (first Keyword) env0 ++ makeSymEnv (map Tokentype $ tokenNames cf) [length env0 ..] makeSymEnv = zipWith $ \ s n -> (s, "_SYMB_" ++ show n) stringLiterals = isUsedCat cf (TokenCat catString) prelude :: Bool -> Maybe String -> String prelude stringLiterals inPackage = unlines $ concat [ [ "/* This FLex file was machine-generated by the BNF converter */" ] , maybe [] (\ ns -> [ "%option prefix=\"" ++ ns ++ "yy\"" ]) inPackage , [ "%{" , "#include " , "#include \"Parser.H\"" , "extern int " ++ nsString inPackage ++ "yy_mylinenumber ;" --- hack to get line number. AR 2006 , "" ] , when stringLiterals $ preludeForBuffer "Buffer.H" , [ "%}" ] ] lexSymbols :: SymEnv -> String lexSymbols ss = concatMap transSym ss where transSym (s,r) = "\"" ++ s' ++ "\" \t return " ++ r ++ ";\n" where s' = escapeChars s restOfFlex :: Maybe String -> CF -> SymMap -> String restOfFlex inPackage cf env = unlines $ concat [ [ render $ lexComments inPackage (comments cf) , "" ] , userDefTokens , ifC catString $ lexStrings (ns ++ "yylval") (nsDefine inPackage "_STRING_") (nsDefine inPackage "_ERROR_") , ifC catChar $ lexChars (ns ++ "yylval") (nsDefine inPackage "_CHAR_") , ifC catDouble [ "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t " ++ ns ++ "yylval._double = atof(yytext); return " ++ nsDefine inPackage "_DOUBLE_" ++ ";" ] , ifC catInteger [ "{DIGIT}+ \t " ++ ns ++ "yylval._int = atoi(yytext); return " ++ nsDefine inPackage "_INTEGER_" ++ ";" ] , ifC catIdent [ "{LETTER}{IDENT}* \t " ++ ns ++ "yylval._string = strdup(yytext); return " ++ nsDefine inPackage "_IDENT_" ++ ";" ] , [ "\\n ++" ++ ns ++ "yy_mylinenumber ;" , "[ \\t\\r\\n\\f] \t /* ignore white space. */;" , ". \t return " ++ nsDefine inPackage "_ERROR_" ++ ";" , "%%" ] , footer ] where ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] ns = nsString inPackage userDefTokens = [ "" ++ printRegFlex exp ++ " \t " ++ ns ++ "yylval._string = strdup(yytext); return " ++ sName name ++ ";" | (name, exp) <- tokenPragmas cf ] where sName n = fromMaybe n $ Map.lookup (Tokentype n) env footer = [ "void " ++ ns ++ "initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }" , "int yywrap(void) { return 1; }" ] -- --------------------------------------------------------------------------- -- Comments -- | Create flex rules for single-line and multi-lines comments. -- The first argument is an optional namespace (for C++); the second -- argument is the set of comment delimiters as returned by BNFC.CF.comments. -- -- This function is only compiling the results of applying either -- lexSingleComment or lexMultiComment on each comment delimiter or pair of -- delimiters. -- -- >>> lexComments (Just "myns.") ([("{-","-}")],["--"]) -- "--"[^\n]* ; // BNFC: comment "--"; -- "{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++myns.yy_mylinenumber; lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc lexComments ns (m,s) = vcat $ concat [ map (lexSingleComment ns) s , zipWith (lexMultiComment ns) m commentStates ] -- | Create a lexer rule for single-line comments. -- The first argument is -- an optional c++ namespace -- The second argument is the delimiter that marks the beginning of the -- comment. -- -- >>> lexSingleComment (Just "mypackage.") "--" -- "--"[^\n]* ; // BNFC: comment "--"; -- -- >>> lexSingleComment Nothing "--" -- "--"[^\n]* ; // BNFC: comment "--"; -- -- >>> lexSingleComment Nothing "\"" -- "\""[^\n]* ; // BNFC: comment "\""; lexSingleComment :: Maybe String -> String -> Doc lexSingleComment _ c = "" <> cstring c <> "[^\\n]*" <+> ";" <+> "// BNFC: comment" <+> cstring c <> ";" -- -- | Create a lexer rule for single-line comments. -- -- The first argument is -- an optional c++ namespace -- -- The second argument is the delimiter that marks the beginning of the -- -- comment. -- -- -- -- >>> lexSingleComment (Just "mypackage.") "--" -- -- "--"[^\n]*\n ++mypackage.yy_mylinenumber; // BNFC: comment "--"; -- -- -- -- >>> lexSingleComment Nothing "--" -- -- "--"[^\n]*\n ++yy_mylinenumber; // BNFC: comment "--"; -- -- -- -- >>> lexSingleComment Nothing "\"" -- -- "\""[^\n]*\n ++yy_mylinenumber; // BNFC: comment "\""; -- lexSingleComment :: Maybe String -> String -> Doc -- lexSingleComment ns c = -- "" <> cstring c <> "[^\\n]*\\n" -- <+> "++"<> text (fromMaybe "" ns)<>"yy_mylinenumber;" -- <+> "// BNFC: comment" <+> cstring c <> ";" -- | Create a lexer rule for multi-lines comments. -- The first argument is -- an optional c++ namespace -- The second arguments is the pair of delimiter for the multi-lines comment: -- start deleminiter and end delimiter. -- There might be a possible bug here if a language includes 2 multi-line -- comments. They could possibly start a comment with one character and end it -- with another. However this seems rare. -- -- >>> lexMultiComment Nothing ("{-", "-}") "COMMENT" -- "{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++yy_mylinenumber; -- -- >>> lexMultiComment (Just "foo.") ("{-", "-}") "COMMENT" -- "{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++foo.yy_mylinenumber; -- -- >>> lexMultiComment Nothing ("\"'", "'\"") "COMMENT" -- "\"'" BEGIN COMMENT; // BNFC: block comment "\"'" "'\""; -- "'\"" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++yy_mylinenumber; lexMultiComment :: Maybe String -> (String, String) -> String -> Doc lexMultiComment ns (b,e) comment = vcat [ "" <> cstring b <+> "BEGIN" <+> text comment <> ";" <+> "// BNFC: block comment" <+> cstring b <+> cstring e <> ";" , commentTag <> cstring e <+> "BEGIN YYINITIAL;" , commentTag <> ". /* skip */;" , commentTag <> "[\\n] ++" <> text (fromMaybe "" ns) <> "yy_mylinenumber;" ] where commentTag = text $ "<" ++ comment ++ ">" -- | Helper function that escapes characters in strings. escapeChars :: String -> String escapeChars [] = [] escapeChars ('\\':xs) = '\\' : ('\\' : (escapeChars xs)) escapeChars ('\"':xs) = '\\' : ('\"' : (escapeChars xs)) escapeChars (x:xs) = x : (escapeChars xs)