{
{- CAO Compiler
Copyright (C) 2014 Cryptography and Information Security Group, HASLab — INESC TEC and Universidade do Minho
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . -}
{-# OPTIONS_GHC -w #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances #-}
{-
Module : $Header$
Description : CAO language lexer.
Copyright : (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho
License : GPL
Maintainer : Paulo Silva
Stability : experimental
Portability : non-portable
-}
module Language.CAO.Parser.Lexer where
import Control.Monad.Error
import Control.Monad.State
import Language.CAO.Common.Error
import Language.CAO.Common.Outputable
import Language.CAO.Common.SrcLoc
import Language.CAO.Common.Utils (ifM, split)
import Language.CAO.Parser.Tokens
import Language.CAO.Semantics.Bits (stringToBits)
}
%wrapper "monadUserState"
$digit = [0-9]
$hexdig = [0-9A-Fa-f]
$bindig = [01]
$alpha = [A-Za-z]
$alphaext = [0-9A-Za-z'_]
@identifier = $alpha$alphaext*
@number = $digit+
@hexnumber = 0x$hexdig+
@binnumber = 0b$bindig+
@signbinnumber = 1b$bindig+
tokens :-
<0> def { lexerTokenInfo TokenDef }
<0> typedef { lexerTokenInfo TokenTypedef }
<0> const { lexerTokenInfo TokenConst }
<0> \: { lexerTokenInfo TokenOfType }
<0> of { lexerTokenInfo TokenOf }
<0> \:= { lexerTokenInfo TokenAssign }
<0> return { lexerTokenInfo TokenReturn }
<0> \[ { lexerTokenInfo TokenOSB }
<0> \] { lexerTokenInfo TokenCSB }
<0> \{ { lexerTokenInfo TokenOCB }
<0> \} { lexerTokenInfo TokenCCB }
<0> \; { lexerTokenInfo TokenSemiColon }
<0> \, { lexerTokenInfo TokenComma }
<0> \.\. { lexerTokenInfo TokenDoublePeriod }
<0> \. { lexerTokenInfo TokenPeriod }
<0> true { lexerTokenInfo TokenTrue }
<0> false { lexerTokenInfo TokenFalse }
<0> void { lexerTokenInfo TokenVoid }
<0> unsigned { lexerTokenInfo TokenUnsigned }
<0> signed { lexerTokenInfo TokenSigned }
<0> register { lexerTokenInfo TokenRegister }
<0> int { lexerTokenInfo TokenInt }
<0> bits { lexerTokenInfo TokenBits }
<0> bool { lexerTokenInfo TokenBool }
<0> vector { lexerTokenInfo TokenVector }
<0> matrix { lexerTokenInfo TokenMatrix }
<0> mod { lexerTokenInfo TokenMod }
<0> struct { lexerTokenInfo TokenStruct }
<0> if { lexerTokenInfo TokenIf }
<0> else { lexerTokenInfo TokenElse }
<0> while { lexerTokenInfo TokenWhile }
<0> seq { lexerTokenInfo TokenSeq }
<0> by { lexerTokenInfo TokenBy }
<0> to { lexerTokenInfo TokenTo }
<0> == { lexerTokenInfo TokenEq }
<0> \&\& { lexerTokenInfo TokenAnd }
<0> \|\| { lexerTokenInfo TokenOr }
<0> \>= { lexerTokenInfo TokenGET }
<0> \<= { lexerTokenInfo TokenLET }
<0> \> { lexerTokenInfo TokenGT }
<0> \< { lexerTokenInfo TokenLT }
<0> ! { lexerTokenInfo TokenNot }
<0> != { lexerTokenInfo TokenNotEqual }
<0> \^\^ { lexerTokenInfo TokenXor }
<0> \+ { lexerTokenInfo TokenPlus }
<0> \- { lexerTokenInfo TokenMinus }
<0> \* { lexerTokenInfo TokenTimes }
<0> \/ { lexerTokenInfo TokenDiv }
<0> \*\* { lexerTokenInfo TokenPower }
<0> \% { lexerTokenInfo TokenRemainder }
<0> \~ { lexerTokenInfo TokenBitNot }
<0> \& { lexerTokenInfo TokenBitAnd }
<0> \| { lexerTokenInfo TokenBitOr }
<0> \^ { lexerTokenInfo TokenBitXor }
<0> \<\< { lexerTokenInfo TokenShiftUp }
<0> \>\> { lexerTokenInfo TokenShiftDown }
<0> \<\| { lexerTokenInfo TokenRotUp }
<0> \|\> { lexerTokenInfo TokenRotDown }
<0> @ { lexerTokenInfo TokenConcat }
<0> \( { lexerTokenInfo TokenOB }
<0> \) { lexerTokenInfo TokenCB }
<0> @number { lexerTokenInfoFunc handleIntValue }
<0> @hexnumber { lexerTokenInfoFunc handleIntValue }
<0> @identifier { lexerTokenInfoFunc handleStr }
<0> @binnumber { lexerTokenInfoFunc (handleBitsValue
TokenUnsignedBitsValue) }
<0> @signbinnumber { lexerTokenInfoFunc (handleBitsValue
TokenSignBitsValue) }
<0> "//".* ;
<0> \/\* { enterNewComment }
\/\* { embedComment }
\*\/ { unembedComment }
<0> $white+ ;
$white+ ;
<0> . { lexerTokenInfoFunc handleError }
. ;
{
-- Token Functions -------------------------------------------------------------
lexerTokenInfo :: Token -> AlexInput -> Int -> Alex TokenInfo
lexerTokenInfo t (AlexPn a ln c, _, _, s) l =
return $ TokenInfo t (take l $ s) (srcLoc ln c a)
lexerTokenInfoFunc :: (String -> Alex Token) -> AlexInput -> Int -> Alex TokenInfo
lexerTokenInfoFunc f (AlexPn a ln c, _, _, s) l = do
r <- f (take (fromIntegral l) s)
return $ TokenInfo r (take (fromIntegral l) s) (srcLoc ln c a)
handleIntValue :: String -> Alex Token
handleIntValue = return . TokenIntValue . read
handleBitsValue :: ([Bool] -> Token) -> String -> Alex Token
handleBitsValue cnstr = return . cnstr . stringToBits . drop 2
handleStr :: String -> Alex Token
handleStr s = do
aus <- get
return $ ifM (`elem` types aus) TokenTypeAlias TokenStr s
handleError :: String -> Alex Token
handleError _ = return TokenError
enterNewComment :: AlexInput -> Int -> Alex TokenInfo
enterNewComment input len = do
modify (\ aus -> aus { commentDepth = 1 } )
alexSetStartCode comment
skip input len
embedComment :: AlexInput -> Int -> Alex TokenInfo
embedComment input len = do
modify (\ aus -> aus { commentDepth = commentDepth aus + 1 })
skip input len
unembedComment :: AlexInput -> Int -> Alex TokenInfo
unembedComment input len = do
aus <- get
let cd = commentDepth aus
put (aus { commentDepth = cd - 1 })
when (cd == 1) $ alexSetStartCode 0
skip input len
-- Alex Functions --------------------------------------------------------------
data AlexUserState = AlexUserState
{ filename :: !String
, types :: [String]
, commentDepth :: Integer
}
alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState "" [] 0
instance MonadState AlexUserState Alex where
get = alexGetUserState
put = alexSetUserState
instance MonadError CaoError Alex where
throwError e = Alex $ \ s -> Left (show e)
catchError (Alex un) f = Alex $ \ s -> either (catchMe s) Right (un s)
where
catchMe s = fmap (split (const s) id) . runAlex "" . f . read
alexSetUserState :: AlexUserState -> Alex ()
alexSetUserState ust = Alex $ \ s -> Right (s {alex_ust = ust}, ())
alexGetUserState :: Alex AlexUserState
alexGetUserState = Alex $ \ s -> Right (s, alex_ust s)
alexEOF :: Alex TokenInfo
alexEOF = do
(AlexPn a ln c, _, _, _) <- alexGetInput
return $ TokenInfo TokenEOF "" (srcLoc ln c a)
-- Processing Functions --------------------------------------------------------
getTokens :: Alex [TokenInfo]
getTokens = do
tok <- alexMonadScan
case tSymb tok of
TokenEOF -> return [tok]
_ -> liftM (tok:) getTokens
flushLexer :: Alex ()
flushLexer = getTokens >> return ()
}