{ {- CAO Compiler Copyright (C) 2014 Cryptography and Information Security Group, HASLab — INESC TEC and Universidade do Minho This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {-# OPTIONS_GHC -w #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {- Module : $Header$ Description : CAO language lexer. Copyright : (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho License : GPL Maintainer : Paulo Silva Stability : experimental Portability : non-portable -} module Language.CAO.Parser.Lexer where import Control.Monad.Error import Control.Monad.State import Language.CAO.Common.Error import Language.CAO.Common.Outputable import Language.CAO.Common.SrcLoc import Language.CAO.Common.Utils (ifM, split) import Language.CAO.Parser.Tokens import Language.CAO.Semantics.Bits (stringToBits) } %wrapper "monadUserState" $digit = [0-9] $hexdig = [0-9A-Fa-f] $bindig = [01] $alpha = [A-Za-z] $alphaext = [0-9A-Za-z'_] @identifier = $alpha$alphaext* @number = $digit+ @hexnumber = 0x$hexdig+ @binnumber = 0b$bindig+ @signbinnumber = 1b$bindig+ tokens :- <0> def { lexerTokenInfo TokenDef } <0> typedef { lexerTokenInfo TokenTypedef } <0> const { lexerTokenInfo TokenConst } <0> \: { lexerTokenInfo TokenOfType } <0> of { lexerTokenInfo TokenOf } <0> \:= { lexerTokenInfo TokenAssign } <0> return { lexerTokenInfo TokenReturn } <0> \[ { lexerTokenInfo TokenOSB } <0> \] { lexerTokenInfo TokenCSB } <0> \{ { lexerTokenInfo TokenOCB } <0> \} { lexerTokenInfo TokenCCB } <0> \; { lexerTokenInfo TokenSemiColon } <0> \, { lexerTokenInfo TokenComma } <0> \.\. { lexerTokenInfo TokenDoublePeriod } <0> \. { lexerTokenInfo TokenPeriod } <0> true { lexerTokenInfo TokenTrue } <0> false { lexerTokenInfo TokenFalse } <0> void { lexerTokenInfo TokenVoid } <0> unsigned { lexerTokenInfo TokenUnsigned } <0> signed { lexerTokenInfo TokenSigned } <0> register { lexerTokenInfo TokenRegister } <0> int { lexerTokenInfo TokenInt } <0> bits { lexerTokenInfo TokenBits } <0> bool { lexerTokenInfo TokenBool } <0> vector { lexerTokenInfo TokenVector } <0> matrix { lexerTokenInfo TokenMatrix } <0> mod { lexerTokenInfo TokenMod } <0> struct { lexerTokenInfo TokenStruct } <0> if { lexerTokenInfo TokenIf } <0> else { lexerTokenInfo TokenElse } <0> while { lexerTokenInfo TokenWhile } <0> seq { lexerTokenInfo TokenSeq } <0> by { lexerTokenInfo TokenBy } <0> to { lexerTokenInfo TokenTo } <0> == { lexerTokenInfo TokenEq } <0> \&\& { lexerTokenInfo TokenAnd } <0> \|\| { lexerTokenInfo TokenOr } <0> \>= { lexerTokenInfo TokenGET } <0> \<= { lexerTokenInfo TokenLET } <0> \> { lexerTokenInfo TokenGT } <0> \< { lexerTokenInfo TokenLT } <0> ! { lexerTokenInfo TokenNot } <0> != { lexerTokenInfo TokenNotEqual } <0> \^\^ { lexerTokenInfo TokenXor } <0> \+ { lexerTokenInfo TokenPlus } <0> \- { lexerTokenInfo TokenMinus } <0> \* { lexerTokenInfo TokenTimes } <0> \/ { lexerTokenInfo TokenDiv } <0> \*\* { lexerTokenInfo TokenPower } <0> \% { lexerTokenInfo TokenRemainder } <0> \~ { lexerTokenInfo TokenBitNot } <0> \& { lexerTokenInfo TokenBitAnd } <0> \| { lexerTokenInfo TokenBitOr } <0> \^ { lexerTokenInfo TokenBitXor } <0> \<\< { lexerTokenInfo TokenShiftUp } <0> \>\> { lexerTokenInfo TokenShiftDown } <0> \<\| { lexerTokenInfo TokenRotUp } <0> \|\> { lexerTokenInfo TokenRotDown } <0> @ { lexerTokenInfo TokenConcat } <0> \( { lexerTokenInfo TokenOB } <0> \) { lexerTokenInfo TokenCB } <0> @number { lexerTokenInfoFunc handleIntValue } <0> @hexnumber { lexerTokenInfoFunc handleIntValue } <0> @identifier { lexerTokenInfoFunc handleStr } <0> @binnumber { lexerTokenInfoFunc (handleBitsValue TokenUnsignedBitsValue) } <0> @signbinnumber { lexerTokenInfoFunc (handleBitsValue TokenSignBitsValue) } <0> "//".* ; <0> \/\* { enterNewComment } \/\* { embedComment } \*\/ { unembedComment } <0> $white+ ; $white+ ; <0> . { lexerTokenInfoFunc handleError } . ; { -- Token Functions ------------------------------------------------------------- lexerTokenInfo :: Token -> AlexInput -> Int -> Alex TokenInfo lexerTokenInfo t (AlexPn a ln c, _, _, s) l = return $ TokenInfo t (take l $ s) (srcLoc ln c a) lexerTokenInfoFunc :: (String -> Alex Token) -> AlexInput -> Int -> Alex TokenInfo lexerTokenInfoFunc f (AlexPn a ln c, _, _, s) l = do r <- f (take (fromIntegral l) s) return $ TokenInfo r (take (fromIntegral l) s) (srcLoc ln c a) handleIntValue :: String -> Alex Token handleIntValue = return . TokenIntValue . read handleBitsValue :: ([Bool] -> Token) -> String -> Alex Token handleBitsValue cnstr = return . cnstr . stringToBits . drop 2 handleStr :: String -> Alex Token handleStr s = do aus <- get return $ ifM (`elem` types aus) TokenTypeAlias TokenStr s handleError :: String -> Alex Token handleError _ = return TokenError enterNewComment :: AlexInput -> Int -> Alex TokenInfo enterNewComment input len = do modify (\ aus -> aus { commentDepth = 1 } ) alexSetStartCode comment skip input len embedComment :: AlexInput -> Int -> Alex TokenInfo embedComment input len = do modify (\ aus -> aus { commentDepth = commentDepth aus + 1 }) skip input len unembedComment :: AlexInput -> Int -> Alex TokenInfo unembedComment input len = do aus <- get let cd = commentDepth aus put (aus { commentDepth = cd - 1 }) when (cd == 1) $ alexSetStartCode 0 skip input len -- Alex Functions -------------------------------------------------------------- data AlexUserState = AlexUserState { filename :: !String , types :: [String] , commentDepth :: Integer } alexInitUserState :: AlexUserState alexInitUserState = AlexUserState "" [] 0 instance MonadState AlexUserState Alex where get = alexGetUserState put = alexSetUserState instance MonadError CaoError Alex where throwError e = Alex $ \ s -> Left (show e) catchError (Alex un) f = Alex $ \ s -> either (catchMe s) Right (un s) where catchMe s = fmap (split (const s) id) . runAlex "" . f . read alexSetUserState :: AlexUserState -> Alex () alexSetUserState ust = Alex $ \ s -> Right (s {alex_ust = ust}, ()) alexGetUserState :: Alex AlexUserState alexGetUserState = Alex $ \ s -> Right (s, alex_ust s) alexEOF :: Alex TokenInfo alexEOF = do (AlexPn a ln c, _, _, _) <- alexGetInput return $ TokenInfo TokenEOF "" (srcLoc ln c a) -- Processing Functions -------------------------------------------------------- getTokens :: Alex [TokenInfo] getTokens = do tok <- alexMonadScan case tSymb tok of TokenEOF -> return [tok] _ -> liftM (tok:) getTokens flushLexer :: Alex () flushLexer = getTokens >> return () }