{- BNF Converter: Alex 3.x Generator Copyright (C) 2012 Author: Antti-Juhani Kaijanaho Copyright (C) 2004 Author: Peter Gammie (C)opyright 2003, {aarne,markus,peteg} at cs dot chalmers dot se -} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module BNFC.Backend.Haskell.CFtoAlex3 (cf2alex3) where import Data.Char import qualified Data.List as List import BNFC.Abs import BNFC.CF import BNFC.Lexing ( mkRegMultilineComment ) import BNFC.Options ( TokenText(..) ) import BNFC.PrettyPrint import BNFC.Utils ( table, when, unless ) import BNFC.Backend.Common ( unicodeAndSymbols ) import BNFC.Backend.Haskell.Utils cf2alex3 :: String -> TokenText -> CF -> String cf2alex3 name tokenText cf = unlines $ List.intercalate [""] $ -- equivalent to vsep: intersperse empty lines [ prelude name tokenText , cMacros , rMacros cf , restOfAlex tokenText cf ] prelude :: String -> TokenText -> [String] prelude name tokenText = concat [ [ "-- Lexer definition for use with Alex 3" , "{" , "{-# OPTIONS -fno-warn-incomplete-patterns #-}" , "{-# OPTIONS_GHC -w #-}" , "" , "{-# LANGUAGE PatternSynonyms #-}" , "" , "module " ++ name ++ " where" , "" , "import Prelude" , "" ] , tokenTextImport tokenText , [ "import qualified Data.Bits" , "import Data.Char (ord)" , "import Data.Function (on)" , "import Data.Word (Word8)" , "}" ] ] -- | Character class definitions. cMacros :: [String] cMacros = [ "-- Predefined character classes" , "" , "$c = [A-Z\\192-\\221] # [\\215] -- capital isolatin1 letter (215 = \\times) FIXME" , "$s = [a-z\\222-\\255] # [\\247] -- small isolatin1 letter (247 = \\div ) FIXME" , "$l = [$c $s] -- letter" , "$d = [0-9] -- digit" , "$i = [$l $d _ '] -- identifier character" , "$u = [. \\n] -- universal: any character" ] -- | Regular expressions and lex actions. rMacros :: CF -> [String] rMacros cf = unless (null symbs) [ "-- Symbols and non-identifier-like reserved words" , "" , "@rsyms = " ++ List.intercalate " | " (map (unwords . esc) symbs) ] where symbs = unicodeAndSymbols cf esc :: String -> [String] esc s = if null a then rest else show a : rest where (a, r) = span (\ c -> isAscii c && isAlphaNum c) s rest = case r of [] -> [] c : xs -> (if isPrint c then ['\\',c] else '\\' : show (ord c)) : esc xs restOfAlex :: TokenText -> CF -> [String] restOfAlex tokenText cf = concat [ [ ":-" , "" ] , lexComments $ comments cf , [ "-- Whitespace (skipped)" , "$white+ ;" , "" ] , unless (null $ unicodeAndSymbols cf) [ "-- Symbols" , "@rsyms" , " { tok (eitherResIdent TV) }" , "" ] , userDefTokenTypes , [ "-- Keywords and Ident" , "$l $i*" , " { tok (eitherResIdent TV) }" , "" ] , ifC catString [ "-- String" , "\\\" ([$u # [\\\" \\\\ \\n]] | (\\\\ (\\\" | \\\\ | \\' | n | t | r | f)))* \\\"" , " { tok (TL . unescapeInitTail) }" , "" ] , ifC catChar [ "-- Char" , "\\\' ($u # [\\\' \\\\] | \\\\ [\\\\ \\\' n t r f]) \\'" , " { tok TC }" , "" ] , ifC catInteger [ "-- Integer" , "$d+" , " { tok TI }" , "" ] , ifC catDouble [ "-- Double" , "$d+ \\. $d+ (e (\\-)? $d+)?" , " { tok TD }" , "" ] , [ "{" , "-- | Create a token with position." , "tok :: (" ++ stringType ++ " -> Tok) -> (Posn -> " ++ stringType ++ " -> Token)" , "tok f p = PT p . f" , "" , "-- | Token without position." , "data Tok" ] , map (" " ++) $ table " " $ [ [ "= TK {-# UNPACK #-} !TokSymbol", "-- ^ Reserved word or symbol." ] , [ "| TL !" ++ stringType , "-- ^ String literal." ] , [ "| TI !" ++ stringType , "-- ^ Integer literal." ] , [ "| TV !" ++ stringType , "-- ^ Identifier." ] , [ "| TD !" ++ stringType , "-- ^ Float literal." ] , [ "| TC !" ++ stringType , "-- ^ Character literal." ] ] , [ " | T_" ++ name ++ " !" ++ stringType | name <- tokenNames cf ] , [ " deriving (Eq, Show, Ord)" , "" , "-- | Smart constructor for 'Tok' for the sake of backwards compatibility." , "pattern TS :: " ++ stringType ++ " -> Int -> Tok" , "pattern TS t i = TK (TokSymbol t i)" , "" , "-- | Keyword or symbol tokens have a unique ID." , "data TokSymbol = TokSymbol" , " { tsText :: " ++ stringType , " -- ^ Keyword or symbol text." , " , tsID :: !Int" , " -- ^ Unique ID." , " } deriving (Show)" , "" , "-- | Keyword/symbol equality is determined by the unique ID." , "instance Eq TokSymbol where (==) = (==) `on` tsID" , "" , "-- | Keyword/symbol ordering is determined by the unique ID." , "instance Ord TokSymbol where compare = compare `on` tsID" , "" , "-- | Token with position." , "data Token" , " = PT Posn Tok" , " | Err Posn" , " deriving (Eq, Show, Ord)" , "" , "-- | Pretty print a position." , "printPosn :: Posn -> String" , "printPosn (Pn _ l c) = \"line \" ++ show l ++ \", column \" ++ show c" , "" , "-- | Pretty print the position of the first token in the list." , "tokenPos :: [Token] -> String" , "tokenPos (t:_) = printPosn (tokenPosn t)" , "tokenPos [] = \"end of file\"" , "" , "-- | Get the position of a token." , "tokenPosn :: Token -> Posn" , "tokenPosn (PT p _) = p" , "tokenPosn (Err p) = p" , "" , "-- | Get line and column of a token." , "tokenLineCol :: Token -> (Int, Int)" , "tokenLineCol = posLineCol . tokenPosn" , "" , "-- | Get line and column of a position." , "posLineCol :: Posn -> (Int, Int)" , "posLineCol (Pn _ l c) = (l,c)" , "" , "-- | Convert a token into \"position token\" form." , "mkPosToken :: Token -> ((Int, Int), " ++ stringType ++ ")" , "mkPosToken t = (tokenLineCol t, tokenText t)" , "" , "-- | Convert a token to its text." , "tokenText :: Token -> " ++ stringType , "tokenText t = case t of" , " PT _ (TS s _) -> s" , " PT _ (TL s) -> " ++ applyP stringPack "show s" , " PT _ (TI s) -> s" , " PT _ (TV s) -> s" , " PT _ (TD s) -> s" , " PT _ (TC s) -> s" , " Err _ -> " ++ apply stringPack "\"#error\"" ] , [ " PT _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf ] , [ "" , "-- | Convert a token to a string." , "prToken :: Token -> String" , "prToken t = " ++ applyP stringUnpack "tokenText t" , "" , "-- | Finite map from text to token organized as binary search tree." , "data BTree" , " = N -- ^ Nil (leaf)." , " | B " ++ stringType ++ " Tok BTree BTree" , " -- ^ Binary node." , " deriving (Show)" , "" , "-- | Convert potential keyword into token or use fallback conversion." , "eitherResIdent :: (" ++ stringType ++ " -> Tok) -> " ++ stringType ++ " -> Tok" , "eitherResIdent tv s = treeFind resWords" , " where" , " treeFind N = tv s" , " treeFind (B a t left right) =" , " case compare s a of" , " LT -> treeFind left" , " GT -> treeFind right" , " EQ -> t" , "" , "-- | The keywords and symbols of the language organized as binary search tree." , "resWords :: BTree" , render $ hang "resWords =" 2 $ pretty $ sorted2tree tokens ] , unless (null tokens) [ " where" , " b s n = B bs (TS bs n)" , " where" , " bs = "++ apply stringPack "s" ] , [ "" , "-- | Unquote string literal." , "unescapeInitTail :: " ++ stringType ++ " -> " ++ stringType ++ "" , "unescapeInitTail = " ++ stringPack ++ " . unesc . tail . " ++ stringUnpack , " where" , " unesc s = case s of" , " '\\\\':c:cs | elem c ['\\\"', '\\\\', '\\\''] -> c : unesc cs" , " '\\\\':'n':cs -> '\\n' : unesc cs" , " '\\\\':'t':cs -> '\\t' : unesc cs" , " '\\\\':'r':cs -> '\\r' : unesc cs" , " '\\\\':'f':cs -> '\\f' : unesc cs" , " '\"':[] -> []" , " c:cs -> c : unesc cs" , " _ -> []" , "" , "-------------------------------------------------------------------" , "-- Alex wrapper code." , "-- A modified \"posn\" wrapper." , "-------------------------------------------------------------------" , "" , "data Posn = Pn !Int !Int !Int" , " deriving (Eq, Show, Ord)" , "" , "alexStartPos :: Posn" , "alexStartPos = Pn 0 1 1" , "" , "alexMove :: Posn -> Char -> Posn" , "alexMove (Pn a l c) '\\t' = Pn (a+1) l (((c+7) `div` 8)*8+1)" , "alexMove (Pn a l c) '\\n' = Pn (a+1) (l+1) 1" , "alexMove (Pn a l c) _ = Pn (a+1) l (c+1)" , "" , "type Byte = Word8" , "" , "type AlexInput = (Posn, -- current position," , " Char, -- previous char" , " [Byte], -- pending bytes on the current char" , " " ++ stringType ++ ") -- current input string" , "" , "tokens :: " ++ stringType ++ " -> [Token]" , "tokens str = go (alexStartPos, '\\n', [], str)" , " where" , " go :: AlexInput -> [Token]" , " go inp@(pos, _, _, str) =" , " case alexScan inp 0 of" , " AlexEOF -> []" , " AlexError (pos, _, _, _) -> [Err pos]" , " AlexSkip inp' len -> go inp'" , " AlexToken inp' len act -> act pos (" ++ stringTake ++ " len str) : (go inp')" , "" , "alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)" , "alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))" , "alexGetByte (p, _, [], s) =" , " case " ++ apply stringUncons "s" ++ " of" , " " ++ stringNilP ++ " -> Nothing" , " " ++ stringConsP ++ " ->" , " let p' = alexMove p c" , " (b:bs) = utf8Encode c" , " in p' `seq` Just (b, (p', c, bs, s))" , "" , "alexInputPrevChar :: AlexInput -> Char" , "alexInputPrevChar (p, c, bs, s) = c" , "" , "-- | Encode a Haskell String to a list of Word8 values, in UTF8 format." , "utf8Encode :: Char -> [Word8]" , "utf8Encode = map fromIntegral . go . ord" , " where" , " go oc" , " | oc <= 0x7f = [oc]" , "" , " | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6)" , " , 0x80 + oc Data.Bits..&. 0x3f" , " ]" , "" , " | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12)" , " , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)" , " , 0x80 + oc Data.Bits..&. 0x3f" , " ]" , " | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18)" , " , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)" , " , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)" , " , 0x80 + oc Data.Bits..&. 0x3f" , " ]" , "}" ] ] where (stringType, stringTake, stringUncons, stringPack, stringUnpack, stringNilP, stringConsP) = case tokenText of StringToken -> ("String", "take", "", "id", "id", "[]", "(c:s)" ) ByteStringToken -> ("BS.ByteString", "BS.take", "BS.uncons", "BS.pack", "BS.unpack", "Nothing", "Just (c,s)") TextToken -> ("Data.Text.Text", "Data.Text.take", "Data.Text.uncons", "Data.Text.pack", "Data.Text.unpack", "Nothing", "Just (c,s)") apply :: String -> String -> String apply "" s = s apply "id" s = s apply f s = f ++ " " ++ s applyP :: String -> String -> String applyP "" s = s applyP "id" s = s applyP f s = f ++ " (" ++ s ++ ")" ifC :: Monoid m => TokenCat -> m -> m ifC = when . isUsedCat cf . TokenCat lexComments :: ( [(String, String)] -- block comment delimiters , [String] -- line comment initiators ) -> [String] -- Alex declarations lexComments (block, line) = concat $ [ concatMap lexLineComment line , concatMap (uncurry lexBlockComment) block ] lexLineComment :: String -- ^ Line comment start. -> [String] -- ^ Alex declaration. lexLineComment s = [ unwords [ "-- Line comment", show s ] , concat [ "\"", s, "\" [.]* ;" ] , "" ] lexBlockComment :: String -- ^ Start of block comment. -> String -- ^ End of block comment. -> [String] -- ^ Alex declaration. lexBlockComment start end = [ unwords [ "-- Block comment", show start, show end ] , printRegAlex (mkRegMultilineComment start end) ++ " ;" , "" ] userDefTokenTypes :: [String] userDefTokenTypes = concat [ [ "-- token " ++ name , printRegAlex exp , " { tok (eitherResIdent T_" ++ name ++ ") }" , "" ] | (name, exp) <- tokenPragmas cf ] tokens = cfTokens cf -- | Binary search tree. data BTree a = N | B String a (BTree a) (BTree a) instance Pretty a => Pretty (BTree a) where prettyPrec _ N = text "N" prettyPrec n (B k v l r) = parensIf (n > 0) $ hang ("b" <+> text (show k) <+> pretty v) 2 $ sep [ prettyPrec 1 l , prettyPrec 1 r ] -- | Create a balanced search tree from a sorted list. sorted2tree :: [(String,a)] -> BTree a sorted2tree [] = N sorted2tree xs = B x n (sorted2tree t1) (sorted2tree t2) where (t1, (x,n) : t2) = splitAt (length xs `div` 2) xs ------------------------------------------------------------------- -- Inlined version of former @BNFC.Backend.Haskell.RegToAlex@. -- Syntax has changed... ------------------------------------------------------------------- -- modified from pretty-printer generated by the BNF converter -- the top-level printing method printRegAlex :: Reg -> String printRegAlex = render' . prt 0 render' :: [String] -> String render' = \case "[" : ts -> cons "[" $ render' ts "(" : ts -> cons "(" $ render' ts t : "," : ts -> cons t $ space "," $ render' ts t : ")" : ts -> cons t $ cons ")" $ render' ts t : "]" : ts -> cons t $ cons "]" $ render' ts t : ts -> space t $ render' ts _ -> "" where cons s t = s ++ t space t s = if null s then t else t ++ " " ++ s parenth :: [String] -> [String] parenth ss = ["("] ++ ss ++ [")"] -- the printer class does the job class Print a where prt :: Int -> a -> [String] prtList :: [a] -> [String] prtList = concatMap (prt 0) instance Print a => Print [a] where prt _ = prtList instance Print Char where prt _ = \case '\n' -> ["\\n"] '\t' -> ["\\t"] '\r' -> ["\\r"] '\f' -> ["\\f"] c | isAlphaNum c -> [[c]] c | isPrint c -> ['\\':[c]] -- ['\'':c:'\'':[]] -- Does not work for ) c -> ['\\':show (ord c)] prtList = map (concat . prt 0) prPrec :: Int -> Int -> [String] -> [String] prPrec i j = if j prPrec i 2 $ prt 2 reg0 ++ prt 3 reg RAlt reg0 reg -> prPrec i 1 $ concat [prt 1 reg0 , ["|"] , prt 2 reg] RStar reg -> prPrec i 3 $ prt 3 reg ++ ["*"] RPlus reg -> prPrec i 3 $ prt 3 reg ++ ["+"] ROpt reg -> prPrec i 3 $ prt 3 reg ++ ["?"] -- Atomic/parenthesized expressions RMinus reg0 reg -> concat [ ["["], prt 2 reg0 , ["#"] , prt 2 reg, ["]"] ] REps -> ["()"] RChar c -> prt 0 c RAlts str -> concat [["["],prt 0 str,["]"]] RSeqs str -> prPrec i 2 $ prt 0 str RDigit -> ["$d"] RLetter -> ["$l"] RUpper -> ["$c"] RLower -> ["$s"] RAny -> ["$u"]