{ module Lexer where import Data.Char (chr) } %wrapper "monad" $digit = 0-9 $lower = [a-z] $upper = [A-Z] $space = [\ \t\n\r] $backslash = \\ $dquote = \" $notdquote = [^ \" ] tokens :- <0> $space+ { skip } "{-" { nested_comment } <0> "data" { mkL L_DATA } <0> "deriving" { mkL L_DERIVING } <0> "let" { mkL L_LET } <0> "in" { mkL L_IN } <0> "if" { mkL L_IF } <0> "then" { mkL L_THEN } <0> "else" { mkL L_ELSE } <0> "prev" { mkL L_PREV } <0> "curr" { mkL L_CURR } <0> "val" { mkL L_VAL } <0> "is" { mkL L_IS } <0> "rs" { mkL L_RS } <0> "gof" { mkL L_GOF } <0> "=" { mkL L_EQUAL } <0> "::" { mkL L_DBLCOLON } <0> "," { mkL L_COMMA } <0> ";" { mkL L_SEMICOLON } <0> $backslash { mkL L_BACKSLASH } <0> "->" { mkL L_RARROW } <0> "<-" { mkL L_LARROW } <0> ".^" { mkL L_DOTHAT } <0> "|" { mkL L_PIPE } <0> "&&" { mkL L_DBLAND } <0> "||" { mkL L_DBLOR } <0> "==" { mkL L_EQ } <0> "!=" { mkL L_NE } <0> "/=" { mkL L_NE } <0> "<=" { mkL L_LE } <0> ">=" { mkL L_GE } <0> "<" { mkL L_LT } <0> ">" { mkL L_GT } <0> "+" { mkL L_PLUS } <0> "-" { mkL L_MINUS } <0> "*" { mkL L_AST } <0> "/" { mkL L_SLASH } <0> "`" { mkL L_BACKQUOTE } <0> "(" { mkL L_LPAREN } <0> ")" { mkL L_RPAREN } <0> "{" { mkL L_LBRACE } <0> "}" { mkL L_RBRACE } <0> "[" { mkL L_LBRACKET } <0> "]" { mkL L_RBRACKET } <0> "True" { mkL L_BOOL } <0> "False" { mkL L_BOOL } <0> $digit+ { mkL L_INT } <0> $digit+ ("." $digit*)? ([eE] [\-\+]? $digit+)? { mkL L_FLOAT } <0> $dquote $notdquote* $dquote { mkL L_STRING } <0> ($lower|[_]) ($digit|$lower|$upper|[_'])* { mkL L_IDENT } <0> $upper ($digit|$lower|$upper|[_])* { mkL L_CONSTRUCTOR } { mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme mkL c (p,_,_,str) len = return (L p c (take len str)) data Lexeme = L AlexPosn LexemeClass String deriving (Eq, Show) data LexemeClass = L_DATA | L_DERIVING | L_LET | L_IN | L_IF | L_THEN | L_ELSE | L_PREV | L_CURR | L_VAL | L_IS | L_RS | L_GOF | L_EQUAL | L_DBLCOLON | L_COMMA | L_SEMICOLON | L_BACKSLASH | L_RARROW | L_LARROW | L_DOTHAT | L_PIPE | L_DBLAND | L_DBLOR | L_EQ | L_NE | L_LE | L_GE | L_LT | L_GT | L_PLUS | L_MINUS | L_AST | L_SLASH | L_BACKQUOTE | L_LPAREN | L_RPAREN | L_LBRACE | L_RBRACE | L_LBRACKET | L_RBRACKET | L_IDENT | L_CONSTRUCTOR | L_BOOL | L_INT | L_FLOAT | L_STRING | LEOF deriving (Eq, Show) -- the idea of nested comments is borrowed from an Alex example for Haskell 98: -- https://github.com/simonmar/alex/blob/master/examples/haskell.x -- nested_comment :: AlexInput -> Int -> Alex Lexeme nested_comment _ _ = do input <- alexGetInput go 1 input where go 0 input = do alexSetInput input; alexMonadScan go n input = do case alexGetByte input of Nothing -> err input Just (c,input) -> do case chr (fromIntegral c) of '-' -> do -- checking the end of the commnet ? case alexGetByte input of Nothing -> err input Just (125,input) -> go (n-1) input Just (c,input) -> go n input '\123' -> do -- checking the beginnig of a new comment ? case alexGetByte input of Nothing -> err input Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input Just (c,input) -> go n input c -> go n input err input = do alexSetInput input; lexError "error in nested comment" lexError s = do (p,c,_,input) <- alexGetInput alexError (showPosn p ++ ": " ++ s ++ (if (not (null input)) then " before " ++ show (head input) else " at end of file")) scanner str = runAlex str $ do let loop i ts = do tok@(L _ cl _) <- alexMonadScan; if cl == LEOF then return (i, reverse ts) else do loop (i+1) (tok:ts) loop 0 [] alexEOF = return (L undefined LEOF "") showPosn (AlexPn _ line col) = show line ++ ':': show col main = getContents >>= print . scanner }