{- BNF Converter: Java JLex generator Copyright (C) 2004 Author: Michael Pellauer This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- ************************************************************** BNF Converter Module Description : This module generates the JLex input file. This file is quite different than Alex or Flex. Author : Michael Pellauer (pellauer@cs.chalmers.se), Bjorn Bringert (bringert@cs.chalmers.se) License : GPL (GNU General Public License) Created : 25 April, 2003 Modified : 4 Nov, 2004 ************************************************************** -} module BNFC.Backend.Java.CFtoJLex15 ( cf2jlex ) where import BNFC.CF import BNFC.Backend.Java.RegToJLex import BNFC.Utils ( (+++) ) import BNFC.Backend.Common.NamedVariables import Text.PrettyPrint --The environment must be returned for the parser to use. cf2jlex :: String -> CF -> Bool -> (Doc, SymEnv) cf2jlex packageBase cf jflex = (vcat [ prelude jflex packageBase, cMacros, lexSymbols jflex env, text $ unlines $ restOfJLex cf ], env) where env = makeSymEnv (symbols cf ++ reservedWords cf) (0 :: Int) makeSymEnv [] _ = [] makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ show n) : makeSymEnv symbs (n+1) -- | File prelude prelude :: Bool -> String -> Doc prelude jflex packageBase = vcat [ "// This JLex file was machine-generated by the BNF converter" , "package" <+> text packageBase <> ";" , "" , "import java_cup.runtime.*;" , "%%" , "%cup" , "%unicode" , "%line" , "%public" , "%{" , nest 2 $ vcat [ "String pstring = new String();" , "public int line_num() { return (yyline+1); }" , "public String buff()" <+> braces (if jflex then "return new String(zzBuffer,zzCurrentPos,10).trim();" else "return new String(yy_buffer,yy_buffer_index,10).trim();") ] , "%}" ] --For now all categories are included. --Optimally only the ones that are used should be generated. cMacros :: Doc cMacros = vcat [ "LETTER = ({CAPITAL}|{SMALL})", "CAPITAL = [A-Z\\xC0-\\xD6\\xD8-\\xDE]", "SMALL = [a-z\\xDF-\\xF6\\xF8-\\xFF]", "DIGIT = [0-9]", "IDENT = ({LETTER}|{DIGIT}|['_])", "%state COMMENT", "%state CHAR", "%state CHARESC", "%state CHAREND", "%state STRING", "%state ESCAPED", "%%" ] -- | -- >>> lexSymbols False [("foo","bar")] -- foo { return new Symbol(sym.bar); } -- >>> lexSymbols False [("\\","bar")] -- \\ { return new Symbol(sym.bar); } -- >>> lexSymbols False [("/","bar")] -- / { return new Symbol(sym.bar); } -- >>> lexSymbols True [("/","bar")] -- \/ { return new Symbol(sym.bar); } -- >>> lexSymbols True [("~","bar")] -- \~ { return new Symbol(sym.bar); } lexSymbols :: Bool -> SymEnv -> Doc lexSymbols jflex ss = vcat $ map transSym ss where transSym (s,r) = "" <> text (escapeChars s) <> " { return new Symbol(sym." <> text r <> "); }" --Helper function that escapes characters in strings escapeChars :: String -> String escapeChars = concatMap (escapeChar jflex) restOfJLex :: CF -> [String] restOfJLex cf = [ lexComments (comments cf), userDefTokens, ifC catString strStates, ifC catChar chStates, ifC catDouble "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? { return new Symbol(sym._DOUBLE_, new Double(yytext())); }", ifC catInteger "{DIGIT}+ { return new Symbol(sym._INTEGER_, new Integer(yytext())); }", ifC catIdent "{LETTER}{IDENT}* { return new Symbol(sym._IDENT_, yytext().intern()); }" , "[ \\t\\r\\n\\f] { /* ignore white space. */ }" ] where ifC cat s = if isUsedCat cf cat then s else "" userDefTokens = unlines $ ["" ++ printRegJLex exp +++ "{ return new Symbol(sym." ++ show name ++ ", yytext().intern()); }" | (name, exp) <- tokenPragmas cf] strStates = unlines --These handle escaped characters in Strings. [ "\"\\\"\" { yybegin(STRING); }", "\\\\ { yybegin(ESCAPED); }", "\\\" { String foo = pstring; pstring = new String(); yybegin(YYINITIAL); return new Symbol(sym._STRING_, foo.intern()); }", ". { pstring += yytext(); }", "n { pstring += \"\\n\"; yybegin(STRING); }", "\\\" { pstring += \"\\\"\"; yybegin(STRING); }", "\\\\ { pstring += \"\\\\\"; yybegin(STRING); }", "t { pstring += \"\\t\"; yybegin(STRING); }", ". { pstring += yytext(); yybegin(STRING); }" ] chStates = unlines --These handle escaped characters in Chars. [ "\"'\" { yybegin(CHAR); }", "\\\\ { yybegin(CHARESC); }", "[^'] { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }", "n { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\n')); }", "t { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\t')); }", ". { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }", "\"'\" {yybegin(YYINITIAL);}" ] lexComments :: ([(String, String)], [String]) -> String lexComments (m,s) = (unlines (map lexSingleComment s)) ++ (unlines (map lexMultiComment m)) lexSingleComment :: String -> String lexSingleComment c = "\"" ++ c ++ "\"[^\\n]*\\n { /* BNFC single-line comment */ }" --There might be a possible bug here if a language includes 2 multi-line comments. --They could possibly start a comment with one character and end it with another. --However this seems rare. lexMultiComment :: (String, String) -> String lexMultiComment (b,e) = unlines [ "\"" ++ b ++ "\" { yybegin(COMMENT); }", "\"" ++ e ++ "\" { yybegin(YYINITIAL); }", ". { }", "[\\n] { }" ] -- lexReserved :: String -> String -- lexReserved s = "\"" ++ s ++ "\" { return new Symbol(sym.TS, yytext()); }"