{-# LANGUAGE NoImplicitPrelude #-} {- BNF Converter: C flex generator Copyright (C) 2004 Author: Michael Pellauer This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA -} {- ************************************************************** BNF Converter Module Description : This module generates the Flex file. It is similar to JLex but with a few peculiarities. Author : Michael Pellauer (pellauer@cs.chalmers.se) License : GPL (GNU General Public License) Created : 5 August, 2003 Modified : 10 August, 2003 ************************************************************** -} module BNFC.Backend.C.CFtoFlexC (cf2flex, lexComments, cMacros) where import Prelude' import Data.Maybe (fromMaybe) import BNFC.CF import BNFC.Backend.CPP.NoSTL.RegToFlex import BNFC.Backend.Common.NamedVariables import BNFC.PrettyPrint import BNFC.Utils (cstring) --The environment must be returned for the parser to use. cf2flex :: String -> CF -> (String, SymEnv) cf2flex name cf = (unlines [ prelude name, cMacros, lexSymbols env, restOfFlex cf env' ], env') where env = makeSymEnv (cfgSymbols cf ++ reservedWords cf) (0 :: Int) env' = env ++ (makeSymEnv (tokenNames cf) (length env)) makeSymEnv [] _ = [] makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ (show n)) : (makeSymEnv symbs (n+1)) prelude :: String -> String prelude name = unlines [ "/* -*- c -*- This FLex file was machine-generated by the BNF converter */", "%option noyywrap", "%{", "#define yylval " ++ name ++ "lval", "#define YY_BUFFER_APPEND " ++ name ++ "_BUFFER_APPEND", "#define YY_BUFFER_RESET " ++ name ++ "_BUFFER_RESET", "#define initialize_lexer " ++ name ++ "_initialize_lexer", "#include ", "#include \"Parser.h\"", "#define YY_BUFFER_LENGTH 4096", "extern int yy_mylinenumber ;", "char YY_PARSED_STRING[YY_BUFFER_LENGTH];", "void YY_BUFFER_APPEND(char *s)", "{", " strcat(YY_PARSED_STRING, s); //Do something better here!", "}", "void YY_BUFFER_RESET(void)", "{", " int x;", " for(x = 0; x < YY_BUFFER_LENGTH; x++)", " YY_PARSED_STRING[x] = 0;", "}", "", "%}" ] -- For now all categories are included. -- Optimally only the ones that are used should be generated. cMacros :: String cMacros = unlines [ "LETTER [a-zA-Z]", "CAPITAL [A-Z]", "SMALL [a-z]", "DIGIT [0-9]", "IDENT [a-zA-Z0-9'_]", "%START YYINITIAL COMMENT CHAR CHARESC CHAREND STRING ESCAPED", "%%" ] lexSymbols :: SymEnv -> String lexSymbols ss = concatMap transSym ss where transSym (s,r) = "\"" ++ s' ++ "\" \t return " ++ r ++ ";\n" where s' = escapeChars s restOfFlex :: CF -> SymEnv -> String restOfFlex cf env = concat [ render $ lexComments Nothing (comments cf), "\n\n", userDefTokens, ifC catString strStates, ifC catChar chStates, ifC catDouble "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval.double_ = atof(yytext); return _DOUBLE_;\n", ifC catInteger "{DIGIT}+ \t yylval.int_ = atoi(yytext); return _INTEGER_;\n", ifC catIdent "{LETTER}{IDENT}* \t yylval.string_ = strdup(yytext); return _IDENT_;\n", "\\n ++yy_mylinenumber ;\n", "[ \\t\\r\\n\\f] \t /* ignore white space. */;\n", ". \t return _ERROR_;\n", "%%\n", footer ] where ifC cat s = if isUsedCat cf cat then s else "" userDefTokens = unlines $ ["" ++ printRegFlex exp ++ " \t yylval.string_ = strdup(yytext); return " ++ sName name ++ ";" | (name, exp) <- tokenPragmas cf] where sName n = case lookup (show n) env of Just x -> x Nothing -> show n strStates = unlines --These handle escaped characters in Strings. [ "\"\\\"\" \t BEGIN STRING;", "\\\\ \t BEGIN ESCAPED;", "\\\" \t yylval.string_ = strdup(YY_PARSED_STRING); YY_BUFFER_RESET(); BEGIN YYINITIAL; return _STRING_;", ". \t YY_BUFFER_APPEND(yytext);", "n \t YY_BUFFER_APPEND(\"\\n\"); BEGIN STRING;", "\\\" \t YY_BUFFER_APPEND(\"\\\"\"); BEGIN STRING ;", "\\\\ \t YY_BUFFER_APPEND(\"\\\\\"); BEGIN STRING;", "t \t YY_BUFFER_APPEND(\"\\t\"); BEGIN STRING;", ". \t YY_BUFFER_APPEND(yytext); BEGIN STRING;" ] chStates = unlines --These handle escaped characters in Chars. [ "\"'\" \tBEGIN CHAR;", "\\\\ \t BEGIN CHARESC;", "[^'] \t BEGIN CHAREND; yylval.char_ = yytext[0]; return _CHAR_;", "n \t BEGIN CHAREND; yylval.char_ = '\\n'; return _CHAR_;", "t \t BEGIN CHAREND; yylval.char_ = '\\t'; return _CHAR_;", ". \t BEGIN CHAREND; yylval.char_ = yytext[0]; return _CHAR_;", "\"'\" \t BEGIN YYINITIAL;" ] footer = "void initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }" -- --------------------------------------------------------------------------- -- Comments -- | Create flex rules for single-line and multi-lines comments. -- The first argument is an optional namespace (for C++); the second -- argument is the set of comment delimiters as returned by BNFC.CF.comments. -- -- This function is only compiling the results of applying either -- lexSingleComment or lexMultiComment on each comment delimiter or pair of -- delimiters. -- -- >>> lexComments (Just "myns.") ([("{-","-}")],["--"]) -- "--"[^\n]*\n ++myns.yy_mylinenumber; // BNFC: comment "--"; -- "{-" BEGIN COMMENT; // BNFC: comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++myns.yy_mylinenumber; lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc lexComments ns (m,s) = vcat (map (lexSingleComment ns) s ++ map (lexMultiComment ns) m) -- | Create a lexer rule for single-line comments. -- The first argument is -- an optional c++ namespace -- The second argument is the delimiter that marks the beginning of the -- comment. -- -- >>> lexSingleComment (Just "mypackage.") "--" -- "--"[^\n]*\n ++mypackage.yy_mylinenumber; // BNFC: comment "--"; -- -- >>> lexSingleComment Nothing "--" -- "--"[^\n]*\n ++yy_mylinenumber; // BNFC: comment "--"; -- -- >>> lexSingleComment Nothing "\"" -- "\""[^\n]*\n ++yy_mylinenumber; // BNFC: comment "\""; lexSingleComment :: Maybe String -> String -> Doc lexSingleComment ns c = "" <> cstring c <> "[^\\n]*\\n" <+> "++"<> text (fromMaybe "" ns)<>"yy_mylinenumber;" <+> "// BNFC: comment" <+> cstring c <> ";" -- | Create a lexer rule for multi-lines comments. -- The first argument is -- an optional c++ namespace -- The second arguments is the pair of delimiter for the multi-lines comment: -- start deleminiter and end delimiter. -- There might be a possible bug here if a language includes 2 multi-line -- comments. They could possibly start a comment with one character and end it -- with another. However this seems rare. -- -- >>> lexMultiComment Nothing ("{-", "-}") -- "{-" BEGIN COMMENT; // BNFC: comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++yy_mylinenumber; -- -- >>> lexMultiComment (Just "foo.") ("{-", "-}") -- "{-" BEGIN COMMENT; // BNFC: comment "{-" "-}"; -- "-}" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++foo.yy_mylinenumber; -- -- >>> lexMultiComment Nothing ("\"'", "'\"") -- "\"'" BEGIN COMMENT; // BNFC: comment "\"'" "'\""; -- "'\"" BEGIN YYINITIAL; -- . /* skip */; -- [\n] ++yy_mylinenumber; lexMultiComment :: Maybe String -> (String, String) -> Doc lexMultiComment ns (b,e) = vcat [ "" <> cstring b <+> "BEGIN COMMENT;" <+> "// BNFC: comment" <+> cstring b <+> cstring e <> ";" , "" <> cstring e <+> "BEGIN YYINITIAL;" , ". /* skip */;" , "[\\n] ++"<> text (fromMaybe "" ns) <>"yy_mylinenumber;" ] -- --There might be a possible bug here if a language includes 2 multi-line comments. -- --They could possibly start a comment with one character and end it with another. -- --However this seems rare. -- -- -- lexMultiComment :: Maybe String -> (String, String) -> String -- lexMultiComment inPackage (b,e) = unlines [ -- "\"" ++ b ++ "\" \t BEGIN COMMENT;", -- "\"" ++ e ++ "\" \t BEGIN YYINITIAL;", -- ". \t /* BNFC multi-line comment */;", -- "[\\n] ++" ++ nsString inPackage ++ "yy_mylinenumber ; \t /* BNFC multi-line comment */;" -- ---- "\\n ++yy_mylinenumber ;" -- ] --Helper function that escapes characters in strings escapeChars :: String -> String escapeChars [] = [] escapeChars ('\\':xs) = '\\' : ('\\' : (escapeChars xs)) escapeChars ('\"':xs) = '\\' : ('\"' : (escapeChars xs)) escapeChars (x:xs) = x : (escapeChars xs)