{
{- CAO Compiler
Copyright (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . -}
{-
Module : $Header$
Description : CAO language parser.
Copyright : (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho
License : GPL
Maintainer : Paulo Silva
Stability : experimental
Portability : non-portable
-}
module Language.CAO.Parser.Parser (
parseFile,
parseCao,
parseCommand,
Command(..),
) where
import Control.Monad.Error
import Control.Monad.State
import Language.CAO.Common.Error
import Language.CAO.Common.Literal
import Language.CAO.Common.Monad
import Language.CAO.Common.Polynomial
import Language.CAO.Common.SrcLoc
import Language.CAO.Common.Var
import Language.CAO.Parser.Lexer
import Language.CAO.Parser.Tokens
import Language.CAO.Syntax
import Language.CAO.Type
}
%name parse
%name parseDef Definition
%name parseStmt Statement
%name parseExpr Expression
%tokentype { TokenInfo }
%error { parseError }
%monad { Alex }
%lexer { lexer }{ TokenInfo TokenEOF _ _ }
%token
def { TokenInfo TokenDef _ _ }
typedef { TokenInfo TokenTypedef _ _ }
const { TokenInfo TokenConst _ _ }
':' { TokenInfo TokenOfType _ _ }
of { TokenInfo TokenOf _ _ }
':=' { TokenInfo TokenAssign _ _ }
return { TokenInfo TokenReturn _ _ }
'[' { TokenInfo TokenOSB _ _ }
']' { TokenInfo TokenCSB _ _ }
'{' { TokenInfo TokenOCB _ _ }
'}' { TokenInfo TokenCCB _ _ }
';' { TokenInfo TokenSemiColon _ _ }
',' { TokenInfo TokenComma _ _ }
'..' { TokenInfo TokenDoublePeriod _ _ }
'.' { TokenInfo TokenPeriod _ _ }
true { TokenInfo TokenTrue _ _ }
false { TokenInfo TokenFalse _ _ }
intLit { TokenInfo (TokenIntValue _) _ _ }
bitsLit { TokenInfo (TokenUnsignedBitsValue _) _ _ }
signbitsLit { TokenInfo (TokenSignBitsValue _) _ _ }
str { TokenInfo (TokenStr _) _ _ }
type_alias { TokenInfo (TokenTypeAlias _) _ _ }
void { TokenInfo TokenVoid _ _ }
unsigned { TokenInfo TokenUnsigned _ _ }
signed { TokenInfo TokenSigned _ _ }
register { TokenInfo TokenRegister _ _ }
int { TokenInfo TokenInt _ _ }
bits { TokenInfo TokenBits _ _ }
bool { TokenInfo TokenBool _ _ }
vector { TokenInfo TokenVector _ _ }
matrix { TokenInfo TokenMatrix _ _ }
mod { TokenInfo TokenMod _ _ }
struct { TokenInfo TokenStruct _ _ }
if { TokenInfo TokenIf _ _ }
else { TokenInfo TokenElse _ _ }
while { TokenInfo TokenWhile _ _ }
seq { TokenInfo TokenSeq _ _ }
to { TokenInfo TokenTo _ _ }
by { TokenInfo TokenBy _ _ }
'==' { TokenInfo TokenEq _ _ }
'&&' { TokenInfo TokenAnd _ _ }
'||' { TokenInfo TokenOr _ _ }
'>=' { TokenInfo TokenGET _ _ }
'<=' { TokenInfo TokenLET _ _ }
'>' { TokenInfo TokenGT _ _ }
'<' { TokenInfo TokenLT _ _ }
'!' { TokenInfo TokenNot _ _ }
'!=' { TokenInfo TokenNotEqual _ _ }
'^^' { TokenInfo TokenXor _ _ }
'+' { TokenInfo TokenPlus _ _ }
'-' { TokenInfo TokenMinus _ _ }
'*' { TokenInfo TokenTimes _ _ }
'/' { TokenInfo TokenDiv _ _ }
'**' { TokenInfo TokenPower _ _ }
'%' { TokenInfo TokenRemainder _ _ }
'~' { TokenInfo TokenBitNot _ _ }
'&' { TokenInfo TokenBitAnd _ _ }
'|' { TokenInfo TokenBitOr _ _ }
'^' { TokenInfo TokenBitXor _ _ }
'<<' { TokenInfo TokenShiftUp _ _ }
'>>' { TokenInfo TokenShiftDown _ _ }
'<|' { TokenInfo TokenRotUp _ _ }
'|>' { TokenInfo TokenRotDown _ _ }
'@' { TokenInfo TokenConcat _ _ }
'(' { TokenInfo TokenOB _ _ }
')' { TokenInfo TokenCB _ _ }
-- precedences from the CAO Language manual
%right ':='
%left ','
%left '..'
%left '||'
%left '^^'
%left '&&'
%left '|'
%left '^'
%left '&'
%left '==' '!='
%left '<' '<=' '>' '>='
%left '>>' '<<' '|>' '<|'
%left '+' '-'
%left '*' '/' '%' '@'
%left '**'
%right CAST
%right '!' '~' UNARY_MINUS
%left '.' '[' ']'
%%
Prog :: { Prog Name }
Prog : Definitions { Prog $1 Nothing }
-- Definitions -----------------------------------------------------------------
Definitions :: { [LDef Name] }
Definitions
: Definition { [$1] }
| Definition Definitions { $1:$2 }
Definition :: { LDef Name }
Definition
: VarDecl ';' { fmap VarDef $1 }
| ConstDecl ';' { fmap ConstDef $1 }
| TypeDef { fmap TyDef $1 }
| Func { fmap FunDef $1 }
VarDecl :: { Located (VarDecl Name) }
VarDecl
: def str ':' TypeDecl { la $1 (VarD (str2Name $2) (unLoc $4) Nothing) }
| def Strings2 ':' TypeDecl { la $1 (MultiD (map str2Name $2) (unLoc $4)) }
| def str ':' TypeDecl ':=' Expression { la $1 (VarD (str2Name $2) (unLoc $4) (Just (nullTyp $6))) }
| def str ':' TypeDecl ':=' '{' Expressions '}' { la $1 (ContD (str2Name $2) (unLoc $4) (map nullTyp $7)) }
ConstDecl :: { Located (ConstDecl Name) }
ConstDecl
: def const str ':' TypeDecl { la $1 (ConstD (str2Name $3) (unLoc $5) None) }
| def const str ':' TypeDecl ':=' Expression { la $1 (ConstD (str2Name $3) (unLoc $5) (ConstInit $7)) }
| def const str ':' TypeDecl '{' Expression '}' { la $1 (ConstD (str2Name $3) (unLoc $5) (ConstCond $7)) }
| def const Strings2 ':' TypeDecl { la $1 (MultiConstD (map str2Name $3) (unLoc $5) Nothing) }
| def const Strings2 ':' TypeDecl '{' Expression '}' { la $1 (MultiConstD (map str2Name $3) (unLoc $5) (Just $7) ) }
Strings2 :: { [TokenInfo] }
Strings2
: str ',' str { [$1, $3] }
| str ',' Strings2 { $1:$3 }
-- TypeDecls -------------------------------------------------------------------
TypeDecls :: { [LTyDecl Name] }
TypeDecls
: TypeDecl { [$1] }
| TypeDecls ',' TypeDecl { $1 ++ [$3] }
TypeDecl :: { LTyDecl Name }
TypeDecl
: int { la $1 IntD }
| register int { la $1 RIntD }
| bool { la $1 BoolD }
| unsigned bits '[' Expression ']' { la $1 (BitsD U $4) }
| signed bits '[' Expression ']' { la $1 (BitsD S $4) }
| mod '[' Expression ']' { la $1 (ModD (ModNum $3)) }
| mod '[' TypeDecl '<' str '>' '/' Polynomial ']' { la $1 (ModD (ModPol (unLoc $3) (unLoc $ str2PolInd $5) (unLoc $8))) }
| vector '[' Expression ']' of TypeDecl { la $1 (VectorD $3 (unLoc $6)) }
| matrix '[' Expression ',' Expression ']' of TypeDecl { la $1 (MatrixD $3 $5 (unLoc $8)) }
| type_alias { la $1 (TySynD (str2Tv $1)) }
-- Polynomials -----------------------------------------------------------------
Polynomial :: { Located (Pol Name) }
Polynomial
: Monomial {% checkPol (getLoc $1) (mon (unLoc $1)) }
| '-' Monomial {% checkPol (tLoc $1) (mon (neg (unLoc $2))) }
| Polynomial '+' Monomial {% checkPol (getLoc $1) ((unLoc $3) .+. (unLoc $1)) }
| Polynomial '-' Monomial {% checkPol (getLoc $1) ((neg (unLoc $3)) .+. (unLoc $1)) }
Monomial :: { Located (Mon Name) }
Monomial
: str { la $1 (intC 1 .*. (unLoc $ str2PolInd $1) .^. 1 )}
| str '**' intLit { la $1 (intC 1 .*. (unLoc $ str2PolInd $1) .^. (int_value $ tSymb $3) )}
| intLit { la $1 (intC (int_value $ tSymb $1) .*. EZero )}
| intLit '*' str { la $1 (intC (int_value $ tSymb $1) .*. (unLoc $ str2PolInd $3) .^. 1 )}
| intLit '*' str '**' intLit { la $1 (intC (int_value $ tSymb $1) .*. (unLoc $ str2PolInd $3) .^. (int_value $ tSymb $5) )}
| '(' Polynomial ')' { la $1 (polC (unLoc $2) .*. EZero )}
| '(' Polynomial ')' '*' str { la $1 (polC (unLoc $2) .*. (unLoc $ str2PolInd $5) .^. 1 )}
| '(' Polynomial ')' '*' str '**' intLit { la $1 (polC (unLoc $2) .*. (unLoc $ str2PolInd $5) .^. (int_value $ tSymb $7) )}
-- Expressions -----------------------------------------------------------------
Expressions0 :: { [LExpr Name]}
Expressions0
: {- empty -} { [] }
| Expressions { $1 }
Expressions :: { [LExpr Name]}
Expressions
: Expression { [$1] }
| Expression ',' Expressions { ($1:$3) }
Expression :: { LExpr Name }
Expression
: Literal { lg $1 (Lit (unLoc $1)) }
| str { la $1 (Var (unLoc $ str2Name $1)) }
| str '(' Expressions0 ')' { la $1 (FunCall (str2FName $1) (map nullTyp $3)) }
| Expression '.' str { lg $1 (StructProj (nullTyp $1) (unLoc (str2SFldName $3))) }
| '-' Expression %prec UNARY_MINUS { la $1 (UnaryOp Sym (nullTyp $2)) }
| '!' Expression { la $1 (UnaryOp Not (nullTyp $2)) }
| '~' Expression { la $1 (UnaryOp BNot (nullTyp $2)) }
| Expression '+' Expression { lg $1 (BinaryOp (ArithOp Plus) (nullTyp $1) (nullTyp $3)) }
| Expression '-' Expression { lg $1 (BinaryOp (ArithOp Minus) (nullTyp $1) (nullTyp $3)) }
| Expression '*' Expression { lg $1 (BinaryOp (ArithOp Times) (nullTyp $1) (nullTyp $3)) }
| Expression '**' Expression { lg $1 (BinaryOp (ArithOp Power) (nullTyp $1) (nullTyp $3)) }
| Expression '/' Expression { lg $1 (BinaryOp (ArithOp Div) (nullTyp $1) (nullTyp $3)) }
| Expression '%' Expression { lg $1 (BinaryOp (ArithOp ModOp) (nullTyp $1) (nullTyp $3)) }
| Expression '==' Expression { lg $1 (BinaryOp (CmpOp Bullet Eq) (nullTyp $1) (nullTyp $3)) }
| Expression '!=' Expression { lg $1 (BinaryOp (CmpOp Bullet Neq) (nullTyp $1) (nullTyp $3)) }
| Expression '<' Expression { lg $1 (BinaryOp (CmpOp Bullet Lt) (nullTyp $1) (nullTyp $3)) }
| Expression '<=' Expression { lg $1 (BinaryOp (CmpOp Bullet Leq) (nullTyp $1) (nullTyp $3)) }
| Expression '>' Expression { lg $1 (BinaryOp (CmpOp Bullet Gt) (nullTyp $1) (nullTyp $3)) }
| Expression '>=' Expression { lg $1 (BinaryOp (CmpOp Bullet Geq) (nullTyp $1) (nullTyp $3)) }
| Expression '||' Expression { lg $1 (BinaryOp (BoolOp Or) (nullTyp $1) (nullTyp $3)) }
| Expression '&&' Expression { lg $1 (BinaryOp (BoolOp And) (nullTyp $1) (nullTyp $3)) }
| Expression '^^' Expression { lg $1 (BinaryOp (BoolOp Xor) (nullTyp $1) (nullTyp $3)) }
| Expression '|' Expression { lg $1 (BinaryOp (BitOp BWOr) (nullTyp $1) (nullTyp $3)) }
| Expression '&' Expression { lg $1 (BinaryOp (BitOp BWAnd) (nullTyp $1) (nullTyp $3)) }
| Expression '^' Expression { lg $1 (BinaryOp (BitOp BWXor) (nullTyp $1) (nullTyp $3)) }
| Expression '<<' Expression { lg $1 (BinaryOp (BitsSROp SUp) (nullTyp $1) (nullTyp $3)) }
| Expression '>>' Expression { lg $1 (BinaryOp (BitsSROp SDown) (nullTyp $1) (nullTyp $3)) }
| Expression '<|' Expression { lg $1 (BinaryOp (BitsSROp RUp) (nullTyp $1) (nullTyp $3)) }
| Expression '|>' Expression { lg $1 (BinaryOp (BitsSROp RDown) (nullTyp $1) (nullTyp $3)) }
| Expression '@' Expression { lg $1 (BinaryOp Concat (nullTyp $1) (nullTyp $3)) }
| Expression APat { lg $1 (Access (nullTyp $1) $2) }
| '(' TypeDecls ')' Expression %prec CAST { la $1 (Cast True $2 (nullTyp $4)) }
| '(' Expression ')' { $2 }
APat :: { APat Name }
APat
: '[' Expression ']' { VectP (CElem (nullTyp $2)) }
| '[' Expression '..' Expression ']' { VectP (CRange (nullTyp $2) (nullTyp $4)) }
| '[' Expression ',' Expression ']' { MatP (CElem (nullTyp $2)) (CElem (nullTyp $4)) }
| '[' Expression '..' Expression ','
Expression '..' Expression ']' { MatP (CRange (nullTyp $2) (nullTyp $4)) (CRange (nullTyp $6) (nullTyp $8)) }
| '[' Expression ','
Expression '..' Expression ']' { MatP (CElem (nullTyp $2)) (CRange (nullTyp $4) (nullTyp $6)) }
| '[' Expression '..' Expression ','
Expression ']' { MatP (CRange (nullTyp $2) (nullTyp $4)) (CElem (nullTyp $6)) }
-- Literal ---------------------------------------------------------------------
Literal :: { Located (Literal Name) }
Literal
: intLit { la $1 (ILit (int_value $ tSymb $1)) }
| bitsLit { la $1 (BSLit U (bit_value $ tSymb $1)) }
| signbitsLit { la $1 (BSLit S (bit_value $ tSymb $1)) }
| '[' Polynomial ']' { la $1 (PLit (unLoc $2)) }
| true { la $1 (BLit True) }
| false { la $1 (BLit False) }
-- TypeDef ---------------------------------------------------------------------
TypeDef :: { Located (TyDef Name) }
TypeDef
: typedef str ':=' TypeDecl ';' {% doTypeSynonymDecl (tLoc $1) $2 (unLoc $4) }
| typedef str ':=' struct '[' StructFieldDecls ']' ';' {% doStructDecl (tLoc $1) $2 $6 }
StructFieldDecls :: { [(Located Name, TyDecl Name)] }
StructFieldDecls
: StructFieldDecl { [$1] }
| StructFieldDecls StructFieldDecl { $1 ++ [$2] }
StructFieldDecl :: { (Located Name, TyDecl Name) }
StructFieldDecl
: def str ':' TypeDecl ';' { (str2SFldName $2, unLoc $4) }
-- Function --------------------------------------------------------------------
Func :: { Located (Fun Name) }
Func
: def str '(' Args0 ')' ':' ReturnTypes '{' Statements '}' { la $1 $ Fun (str2FName $2) $4 $7 $9 }
ReturnTypes :: { [TyDecl Name] }
ReturnTypes
: TypeDecls { map unLoc $1 }
| void { [] }
Args0 :: { [Arg Name]}
Args0
: {- empty -} { [] }
| Args { $1 }
Args :: { [Arg Name]}
Args
: Arg { [$1] }
| ArgList { $1 }
| Args ',' Arg { $1 ++ [$3] }
| Args ',' ArgList { $1 ++ $3 }
Arg :: { Arg Name }
Arg : str ':' TypeDecl { Arg (str2Name $1) (unLoc $3) }
| const str ':' TypeDecl { ArgConst (str2Name $2) (unLoc $4) Nothing }
| const str ':' TypeDecl '{' Expression '}' { ArgConst (str2Name $2) (unLoc $4) (Just $6) }
ArgList :: { [Arg Name] }
ArgList
: Strings2 ':' TypeDecl { map (flip Arg (unLoc $3) . str2Name) $1 }
| const Strings2 ':' TypeDecl { map (\c -> ArgConst (str2Name c) (unLoc $4) Nothing) $2 }
-- Statements ------------------------------------------------------------------
Statements :: { [LStmt Name] }
Statements
: Statement { [$1] }
| Statements Statement { $1 ++ [$2] }
Statement :: { LStmt Name }
Statement
: VarDecl ';' { fmap VDecl $1 }
| ConstDecl ';' { fmap CDecl $1 }
| Assignment ';' { $1 }
| FunctionCallStatement ';' { $1 }
| return Expressions0 ';' { la $1 (Ret (map nullTyp $2)) }
| IfThenElseStatement { $1 }
| WhileStatement { $1 }
| SeqStatement { $1 }
Assignment :: { LStmt Name }
Assignment
: LValues ':=' Expressions { lg $1 (Assign (unLoc $1) (map nullTyp $3)) }
LValues :: { Located [LVal Name] }
LValues
: LValue { lg $1 [unLoc $1] }
| LValues ',' LValue { lg $1 (unLoc $1 ++ [unLoc $3]) }
LValue :: { Located (LVal Name) }
LValue
: str { la $1 (LVVar (str2Name $1)) }
| LValue '.' str { lg $1 (LVStruct (unLoc $1) (unLoc $ str2SFldName $3)) }
| LValue APat { lg $1 (LVCont Bullet (unLoc $1) $2) }
FunctionCallStatement :: { LStmt Name }
FunctionCallStatement
: str '(' Expressions0 ')' { la $1 (FCallS (unLoc $ str2FName $1) (map nullTyp $3)) }
-- Control Statements ----------------------------------------------------------
IfThenElseStatement :: { LStmt Name }
IfThenElseStatement
: if '(' Expression ')' '{' Statements '}' { la $1 (Ite (nullTyp $3) $6 Nothing) }
| if '(' Expression ')' '{' Statements '}'
else '{' Statements '}' { la $1 (Ite (nullTyp $3) $6 (Just $10)) }
WhileStatement :: { LStmt Name }
WhileStatement
: while '(' Expression ')' '{' Statements '}' { la $1 (While (nullTyp $3) $6) }
SeqStatement :: { LStmt Name }
SeqStatement
: seq str ':=' Expression to Expression '{' Statements '}' { la $1 (Seq (SeqIter (unLoc $ str2Name $2) $4 $6 Nothing (SimpleRng [])) $8) }
| seq str ':=' Expression to Expression by Expression '{' Statements '}' { la $1 (Seq (SeqIter (unLoc $ str2Name $2) $4 $6 (Just $8) (SimpleRng [])) $10) }
{
data Command a
= CmdDefinition (LDef a)
| CmdStatement (LStmt a)
| CmdExp (LExpr a)
-- Parser Functions ------------------------------------------------------------
parseFile :: CaoMonad m
=> String -> m (Prog Name)
parseFile fn = liftIO (readFile fn) >>= parseCao
parseCao :: CaoMonad m
=> String -> m (Prog Name)
parseCao str = do
file <- getFileName
injectResult $ runAlex str (put (aus file) >> parse)
where
aus fln = alexInitUserState { filename = fln }
parseCommand :: CaoMonad m
=> [Name] -> String -> m (Command Name)
parseCommand types str = do
file <- getFileName
injectResult $ runAlex str (put (aus file) >> parseCmd)
where
aus fln = alexInitUserState { filename = fln, types = map nameStr types }
parseCmd :: Alex (Command Name)
parseCmd = do { inp <- alexGetInput;
ust <- get;
fnm <- gets filename;
liftM CmdDefinition parseDef
`catchError` \ md ->
do alexSetInput inp
put ust
liftM CmdStatement parseStmt
`catchError` \ ms ->
do alexSetInput inp
put ust
liftM CmdExp parseExpr
`catchError` \ me ->
throwError $ mkCaoError defSrcLoc fnm
(MultipleErr [md, ms, me]::ErrorCode String)
}
lexer :: (TokenInfo -> Alex a) -> Alex a
lexer cont = alexMonadScan >>= cont
parseError :: TokenInfo -> Alex a
parseError info = do
flushLexer
f <- gets filename
let e = case tSymb info of
TokenError -> LexicalException (tText info)
TokenEOF -> EOFException
_ -> ParsingException (tText info)
throwError $ mkCaoError (tLoc info) f $ (ParserException e::ErrorCode Name)
{-
let throw :: ParserException -> Alex a
--let throw :: ErrorCode String -> Alex a
throw = throwError . mkCaoError (tLoc info) f . ParserException
case tSymb info of
TokenError -> throw $ LexicalException (tText info)
TokenEOF -> throw EOFException
_ -> throw $ ParsingException (tText info)
-}
-- Auxiliary Functions ---------------------------------------------------------
-- All vars are marked as local vars. Before returning this must be fixed
{-# INLINE str2Name #-}
str2Name :: TokenInfo -> Located Name
str2Name = str2X mkVarName
{-# INLINE str2FName #-}
str2FName :: TokenInfo -> Located Name
str2FName = str2X mkFunName
{-# INLINE str2SFldName #-}
str2SFldName :: TokenInfo -> Located Name
str2SFldName = str2X mkStructFldName
{-# INLINE str2PolInd #-}
str2PolInd :: TokenInfo -> Located Name
str2PolInd = str2X mkPolIndName
{-# INLINE str2Tv #-}
str2Tv :: TokenInfo -> Located Name
str2Tv = str2X mkTvName
str2X :: (String -> Name) -> TokenInfo -> Located Name
str2X f t = L (tLoc t) (f $ str_value $ tSymb t)
doTypeSynonymDecl :: SrcLoc
-> TokenInfo
-> TyDecl Name
-> Alex (Located (TyDef Name))
doTypeSynonymDecl loc syn t = do
checkDeclType loc (str_value $ tSymb syn)
return (L loc (TySynDef (str2Tv syn) t))
doStructDecl :: SrcLoc
-> TokenInfo
-> [(Located Name, TyDecl Name)]
-> Alex (Located (TyDef Name))
doStructDecl loc sid flds = do
checkDeclType loc (str_value $ tSymb sid)
return (L loc (StructDecl (str2Tv sid) flds))
checkDeclType :: SrcLoc -> String -> Alex ()
checkDeclType loc nm = do
aus <- get
let fnm = filename aus
tys = types aus
when (nm `elem` tys) $ throwError (mkCaoError loc fnm (DeclException $ MultipleDeclException nm))
put aus { types = nm:tys }
checkPol :: SrcLoc -> Pol Name -> Alex (Located (Pol Name))
checkPol s p = if isValid (monomials p)
then return (L s p)
else do fnm <- gets filename
throwError $ mkCaoError s fnm (PolynomialErr p)
{-# INLINE nullTyp #-}
nullTyp :: LExpr id -> TLExpr id
nullTyp = annL Bullet
{-# INLINE la #-}
la :: TokenInfo -> a -> Located a
la l = L (tLoc l)
{-# INLINE lg #-}
lg :: Located b -> a -> Located a
lg l = L (getLoc l)
}