{ {- CAO Compiler Copyright (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- Module : $Header$ Description : CAO language parser. Copyright : (C) 2014 Cryptography and Information Security Group, HASLab - INESC TEC and Universidade do Minho License : GPL Maintainer : Paulo Silva Stability : experimental Portability : non-portable -} module Language.CAO.Parser.Parser ( parseFile, parseCao, parseCommand, Command(..), ) where import Control.Monad.Error import Control.Monad.State import Language.CAO.Common.Error import Language.CAO.Common.Literal import Language.CAO.Common.Monad import Language.CAO.Common.Polynomial import Language.CAO.Common.SrcLoc import Language.CAO.Common.Var import Language.CAO.Parser.Lexer import Language.CAO.Parser.Tokens import Language.CAO.Syntax import Language.CAO.Type } %name parse %name parseDef Definition %name parseStmt Statement %name parseExpr Expression %tokentype { TokenInfo } %error { parseError } %monad { Alex } %lexer { lexer }{ TokenInfo TokenEOF _ _ } %token def { TokenInfo TokenDef _ _ } typedef { TokenInfo TokenTypedef _ _ } const { TokenInfo TokenConst _ _ } ':' { TokenInfo TokenOfType _ _ } of { TokenInfo TokenOf _ _ } ':=' { TokenInfo TokenAssign _ _ } return { TokenInfo TokenReturn _ _ } '[' { TokenInfo TokenOSB _ _ } ']' { TokenInfo TokenCSB _ _ } '{' { TokenInfo TokenOCB _ _ } '}' { TokenInfo TokenCCB _ _ } ';' { TokenInfo TokenSemiColon _ _ } ',' { TokenInfo TokenComma _ _ } '..' { TokenInfo TokenDoublePeriod _ _ } '.' { TokenInfo TokenPeriod _ _ } true { TokenInfo TokenTrue _ _ } false { TokenInfo TokenFalse _ _ } intLit { TokenInfo (TokenIntValue _) _ _ } bitsLit { TokenInfo (TokenUnsignedBitsValue _) _ _ } signbitsLit { TokenInfo (TokenSignBitsValue _) _ _ } str { TokenInfo (TokenStr _) _ _ } type_alias { TokenInfo (TokenTypeAlias _) _ _ } void { TokenInfo TokenVoid _ _ } unsigned { TokenInfo TokenUnsigned _ _ } signed { TokenInfo TokenSigned _ _ } register { TokenInfo TokenRegister _ _ } int { TokenInfo TokenInt _ _ } bits { TokenInfo TokenBits _ _ } bool { TokenInfo TokenBool _ _ } vector { TokenInfo TokenVector _ _ } matrix { TokenInfo TokenMatrix _ _ } mod { TokenInfo TokenMod _ _ } struct { TokenInfo TokenStruct _ _ } if { TokenInfo TokenIf _ _ } else { TokenInfo TokenElse _ _ } while { TokenInfo TokenWhile _ _ } seq { TokenInfo TokenSeq _ _ } to { TokenInfo TokenTo _ _ } by { TokenInfo TokenBy _ _ } '==' { TokenInfo TokenEq _ _ } '&&' { TokenInfo TokenAnd _ _ } '||' { TokenInfo TokenOr _ _ } '>=' { TokenInfo TokenGET _ _ } '<=' { TokenInfo TokenLET _ _ } '>' { TokenInfo TokenGT _ _ } '<' { TokenInfo TokenLT _ _ } '!' { TokenInfo TokenNot _ _ } '!=' { TokenInfo TokenNotEqual _ _ } '^^' { TokenInfo TokenXor _ _ } '+' { TokenInfo TokenPlus _ _ } '-' { TokenInfo TokenMinus _ _ } '*' { TokenInfo TokenTimes _ _ } '/' { TokenInfo TokenDiv _ _ } '**' { TokenInfo TokenPower _ _ } '%' { TokenInfo TokenRemainder _ _ } '~' { TokenInfo TokenBitNot _ _ } '&' { TokenInfo TokenBitAnd _ _ } '|' { TokenInfo TokenBitOr _ _ } '^' { TokenInfo TokenBitXor _ _ } '<<' { TokenInfo TokenShiftUp _ _ } '>>' { TokenInfo TokenShiftDown _ _ } '<|' { TokenInfo TokenRotUp _ _ } '|>' { TokenInfo TokenRotDown _ _ } '@' { TokenInfo TokenConcat _ _ } '(' { TokenInfo TokenOB _ _ } ')' { TokenInfo TokenCB _ _ } -- precedences from the CAO Language manual %right ':=' %left ',' %left '..' %left '||' %left '^^' %left '&&' %left '|' %left '^' %left '&' %left '==' '!=' %left '<' '<=' '>' '>=' %left '>>' '<<' '|>' '<|' %left '+' '-' %left '*' '/' '%' '@' %left '**' %right CAST %right '!' '~' UNARY_MINUS %left '.' '[' ']' %% Prog :: { Prog Name } Prog : Definitions { Prog $1 Nothing } -- Definitions ----------------------------------------------------------------- Definitions :: { [LDef Name] } Definitions : Definition { [$1] } | Definition Definitions { $1:$2 } Definition :: { LDef Name } Definition : VarDecl ';' { fmap VarDef $1 } | ConstDecl ';' { fmap ConstDef $1 } | TypeDef { fmap TyDef $1 } | Func { fmap FunDef $1 } VarDecl :: { Located (VarDecl Name) } VarDecl : def str ':' TypeDecl { la $1 (VarD (str2Name $2) (unLoc $4) Nothing) } | def Strings2 ':' TypeDecl { la $1 (MultiD (map str2Name $2) (unLoc $4)) } | def str ':' TypeDecl ':=' Expression { la $1 (VarD (str2Name $2) (unLoc $4) (Just (nullTyp $6))) } | def str ':' TypeDecl ':=' '{' Expressions '}' { la $1 (ContD (str2Name $2) (unLoc $4) (map nullTyp $7)) } ConstDecl :: { Located (ConstDecl Name) } ConstDecl : def const str ':' TypeDecl { la $1 (ConstD (str2Name $3) (unLoc $5) None) } | def const str ':' TypeDecl ':=' Expression { la $1 (ConstD (str2Name $3) (unLoc $5) (ConstInit $7)) } | def const str ':' TypeDecl '{' Expression '}' { la $1 (ConstD (str2Name $3) (unLoc $5) (ConstCond $7)) } | def const Strings2 ':' TypeDecl { la $1 (MultiConstD (map str2Name $3) (unLoc $5) Nothing) } | def const Strings2 ':' TypeDecl '{' Expression '}' { la $1 (MultiConstD (map str2Name $3) (unLoc $5) (Just $7) ) } Strings2 :: { [TokenInfo] } Strings2 : str ',' str { [$1, $3] } | str ',' Strings2 { $1:$3 } -- TypeDecls ------------------------------------------------------------------- TypeDecls :: { [LTyDecl Name] } TypeDecls : TypeDecl { [$1] } | TypeDecls ',' TypeDecl { $1 ++ [$3] } TypeDecl :: { LTyDecl Name } TypeDecl : int { la $1 IntD } | register int { la $1 RIntD } | bool { la $1 BoolD } | unsigned bits '[' Expression ']' { la $1 (BitsD U $4) } | signed bits '[' Expression ']' { la $1 (BitsD S $4) } | mod '[' Expression ']' { la $1 (ModD (ModNum $3)) } | mod '[' TypeDecl '<' str '>' '/' Polynomial ']' { la $1 (ModD (ModPol (unLoc $3) (unLoc $ str2PolInd $5) (unLoc $8))) } | vector '[' Expression ']' of TypeDecl { la $1 (VectorD $3 (unLoc $6)) } | matrix '[' Expression ',' Expression ']' of TypeDecl { la $1 (MatrixD $3 $5 (unLoc $8)) } | type_alias { la $1 (TySynD (str2Tv $1)) } -- Polynomials ----------------------------------------------------------------- Polynomial :: { Located (Pol Name) } Polynomial : Monomial {% checkPol (getLoc $1) (mon (unLoc $1)) } | '-' Monomial {% checkPol (tLoc $1) (mon (neg (unLoc $2))) } | Polynomial '+' Monomial {% checkPol (getLoc $1) ((unLoc $3) .+. (unLoc $1)) } | Polynomial '-' Monomial {% checkPol (getLoc $1) ((neg (unLoc $3)) .+. (unLoc $1)) } Monomial :: { Located (Mon Name) } Monomial : str { la $1 (intC 1 .*. (unLoc $ str2PolInd $1) .^. 1 )} | str '**' intLit { la $1 (intC 1 .*. (unLoc $ str2PolInd $1) .^. (int_value $ tSymb $3) )} | intLit { la $1 (intC (int_value $ tSymb $1) .*. EZero )} | intLit '*' str { la $1 (intC (int_value $ tSymb $1) .*. (unLoc $ str2PolInd $3) .^. 1 )} | intLit '*' str '**' intLit { la $1 (intC (int_value $ tSymb $1) .*. (unLoc $ str2PolInd $3) .^. (int_value $ tSymb $5) )} | '(' Polynomial ')' { la $1 (polC (unLoc $2) .*. EZero )} | '(' Polynomial ')' '*' str { la $1 (polC (unLoc $2) .*. (unLoc $ str2PolInd $5) .^. 1 )} | '(' Polynomial ')' '*' str '**' intLit { la $1 (polC (unLoc $2) .*. (unLoc $ str2PolInd $5) .^. (int_value $ tSymb $7) )} -- Expressions ----------------------------------------------------------------- Expressions0 :: { [LExpr Name]} Expressions0 : {- empty -} { [] } | Expressions { $1 } Expressions :: { [LExpr Name]} Expressions : Expression { [$1] } | Expression ',' Expressions { ($1:$3) } Expression :: { LExpr Name } Expression : Literal { lg $1 (Lit (unLoc $1)) } | str { la $1 (Var (unLoc $ str2Name $1)) } | str '(' Expressions0 ')' { la $1 (FunCall (str2FName $1) (map nullTyp $3)) } | Expression '.' str { lg $1 (StructProj (nullTyp $1) (unLoc (str2SFldName $3))) } | '-' Expression %prec UNARY_MINUS { la $1 (UnaryOp Sym (nullTyp $2)) } | '!' Expression { la $1 (UnaryOp Not (nullTyp $2)) } | '~' Expression { la $1 (UnaryOp BNot (nullTyp $2)) } | Expression '+' Expression { lg $1 (BinaryOp (ArithOp Plus) (nullTyp $1) (nullTyp $3)) } | Expression '-' Expression { lg $1 (BinaryOp (ArithOp Minus) (nullTyp $1) (nullTyp $3)) } | Expression '*' Expression { lg $1 (BinaryOp (ArithOp Times) (nullTyp $1) (nullTyp $3)) } | Expression '**' Expression { lg $1 (BinaryOp (ArithOp Power) (nullTyp $1) (nullTyp $3)) } | Expression '/' Expression { lg $1 (BinaryOp (ArithOp Div) (nullTyp $1) (nullTyp $3)) } | Expression '%' Expression { lg $1 (BinaryOp (ArithOp ModOp) (nullTyp $1) (nullTyp $3)) } | Expression '==' Expression { lg $1 (BinaryOp (CmpOp Bullet Eq) (nullTyp $1) (nullTyp $3)) } | Expression '!=' Expression { lg $1 (BinaryOp (CmpOp Bullet Neq) (nullTyp $1) (nullTyp $3)) } | Expression '<' Expression { lg $1 (BinaryOp (CmpOp Bullet Lt) (nullTyp $1) (nullTyp $3)) } | Expression '<=' Expression { lg $1 (BinaryOp (CmpOp Bullet Leq) (nullTyp $1) (nullTyp $3)) } | Expression '>' Expression { lg $1 (BinaryOp (CmpOp Bullet Gt) (nullTyp $1) (nullTyp $3)) } | Expression '>=' Expression { lg $1 (BinaryOp (CmpOp Bullet Geq) (nullTyp $1) (nullTyp $3)) } | Expression '||' Expression { lg $1 (BinaryOp (BoolOp Or) (nullTyp $1) (nullTyp $3)) } | Expression '&&' Expression { lg $1 (BinaryOp (BoolOp And) (nullTyp $1) (nullTyp $3)) } | Expression '^^' Expression { lg $1 (BinaryOp (BoolOp Xor) (nullTyp $1) (nullTyp $3)) } | Expression '|' Expression { lg $1 (BinaryOp (BitOp BWOr) (nullTyp $1) (nullTyp $3)) } | Expression '&' Expression { lg $1 (BinaryOp (BitOp BWAnd) (nullTyp $1) (nullTyp $3)) } | Expression '^' Expression { lg $1 (BinaryOp (BitOp BWXor) (nullTyp $1) (nullTyp $3)) } | Expression '<<' Expression { lg $1 (BinaryOp (BitsSROp SUp) (nullTyp $1) (nullTyp $3)) } | Expression '>>' Expression { lg $1 (BinaryOp (BitsSROp SDown) (nullTyp $1) (nullTyp $3)) } | Expression '<|' Expression { lg $1 (BinaryOp (BitsSROp RUp) (nullTyp $1) (nullTyp $3)) } | Expression '|>' Expression { lg $1 (BinaryOp (BitsSROp RDown) (nullTyp $1) (nullTyp $3)) } | Expression '@' Expression { lg $1 (BinaryOp Concat (nullTyp $1) (nullTyp $3)) } | Expression APat { lg $1 (Access (nullTyp $1) $2) } | '(' TypeDecls ')' Expression %prec CAST { la $1 (Cast True $2 (nullTyp $4)) } | '(' Expression ')' { $2 } APat :: { APat Name } APat : '[' Expression ']' { VectP (CElem (nullTyp $2)) } | '[' Expression '..' Expression ']' { VectP (CRange (nullTyp $2) (nullTyp $4)) } | '[' Expression ',' Expression ']' { MatP (CElem (nullTyp $2)) (CElem (nullTyp $4)) } | '[' Expression '..' Expression ',' Expression '..' Expression ']' { MatP (CRange (nullTyp $2) (nullTyp $4)) (CRange (nullTyp $6) (nullTyp $8)) } | '[' Expression ',' Expression '..' Expression ']' { MatP (CElem (nullTyp $2)) (CRange (nullTyp $4) (nullTyp $6)) } | '[' Expression '..' Expression ',' Expression ']' { MatP (CRange (nullTyp $2) (nullTyp $4)) (CElem (nullTyp $6)) } -- Literal --------------------------------------------------------------------- Literal :: { Located (Literal Name) } Literal : intLit { la $1 (ILit (int_value $ tSymb $1)) } | bitsLit { la $1 (BSLit U (bit_value $ tSymb $1)) } | signbitsLit { la $1 (BSLit S (bit_value $ tSymb $1)) } | '[' Polynomial ']' { la $1 (PLit (unLoc $2)) } | true { la $1 (BLit True) } | false { la $1 (BLit False) } -- TypeDef --------------------------------------------------------------------- TypeDef :: { Located (TyDef Name) } TypeDef : typedef str ':=' TypeDecl ';' {% doTypeSynonymDecl (tLoc $1) $2 (unLoc $4) } | typedef str ':=' struct '[' StructFieldDecls ']' ';' {% doStructDecl (tLoc $1) $2 $6 } StructFieldDecls :: { [(Located Name, TyDecl Name)] } StructFieldDecls : StructFieldDecl { [$1] } | StructFieldDecls StructFieldDecl { $1 ++ [$2] } StructFieldDecl :: { (Located Name, TyDecl Name) } StructFieldDecl : def str ':' TypeDecl ';' { (str2SFldName $2, unLoc $4) } -- Function -------------------------------------------------------------------- Func :: { Located (Fun Name) } Func : def str '(' Args0 ')' ':' ReturnTypes '{' Statements '}' { la $1 $ Fun (str2FName $2) $4 $7 $9 } ReturnTypes :: { [TyDecl Name] } ReturnTypes : TypeDecls { map unLoc $1 } | void { [] } Args0 :: { [Arg Name]} Args0 : {- empty -} { [] } | Args { $1 } Args :: { [Arg Name]} Args : Arg { [$1] } | ArgList { $1 } | Args ',' Arg { $1 ++ [$3] } | Args ',' ArgList { $1 ++ $3 } Arg :: { Arg Name } Arg : str ':' TypeDecl { Arg (str2Name $1) (unLoc $3) } | const str ':' TypeDecl { ArgConst (str2Name $2) (unLoc $4) Nothing } | const str ':' TypeDecl '{' Expression '}' { ArgConst (str2Name $2) (unLoc $4) (Just $6) } ArgList :: { [Arg Name] } ArgList : Strings2 ':' TypeDecl { map (flip Arg (unLoc $3) . str2Name) $1 } | const Strings2 ':' TypeDecl { map (\c -> ArgConst (str2Name c) (unLoc $4) Nothing) $2 } -- Statements ------------------------------------------------------------------ Statements :: { [LStmt Name] } Statements : Statement { [$1] } | Statements Statement { $1 ++ [$2] } Statement :: { LStmt Name } Statement : VarDecl ';' { fmap VDecl $1 } | ConstDecl ';' { fmap CDecl $1 } | Assignment ';' { $1 } | FunctionCallStatement ';' { $1 } | return Expressions0 ';' { la $1 (Ret (map nullTyp $2)) } | IfThenElseStatement { $1 } | WhileStatement { $1 } | SeqStatement { $1 } Assignment :: { LStmt Name } Assignment : LValues ':=' Expressions { lg $1 (Assign (unLoc $1) (map nullTyp $3)) } LValues :: { Located [LVal Name] } LValues : LValue { lg $1 [unLoc $1] } | LValues ',' LValue { lg $1 (unLoc $1 ++ [unLoc $3]) } LValue :: { Located (LVal Name) } LValue : str { la $1 (LVVar (str2Name $1)) } | LValue '.' str { lg $1 (LVStruct (unLoc $1) (unLoc $ str2SFldName $3)) } | LValue APat { lg $1 (LVCont Bullet (unLoc $1) $2) } FunctionCallStatement :: { LStmt Name } FunctionCallStatement : str '(' Expressions0 ')' { la $1 (FCallS (unLoc $ str2FName $1) (map nullTyp $3)) } -- Control Statements ---------------------------------------------------------- IfThenElseStatement :: { LStmt Name } IfThenElseStatement : if '(' Expression ')' '{' Statements '}' { la $1 (Ite (nullTyp $3) $6 Nothing) } | if '(' Expression ')' '{' Statements '}' else '{' Statements '}' { la $1 (Ite (nullTyp $3) $6 (Just $10)) } WhileStatement :: { LStmt Name } WhileStatement : while '(' Expression ')' '{' Statements '}' { la $1 (While (nullTyp $3) $6) } SeqStatement :: { LStmt Name } SeqStatement : seq str ':=' Expression to Expression '{' Statements '}' { la $1 (Seq (SeqIter (unLoc $ str2Name $2) $4 $6 Nothing (SimpleRng [])) $8) } | seq str ':=' Expression to Expression by Expression '{' Statements '}' { la $1 (Seq (SeqIter (unLoc $ str2Name $2) $4 $6 (Just $8) (SimpleRng [])) $10) } { data Command a = CmdDefinition (LDef a) | CmdStatement (LStmt a) | CmdExp (LExpr a) -- Parser Functions ------------------------------------------------------------ parseFile :: CaoMonad m => String -> m (Prog Name) parseFile fn = liftIO (readFile fn) >>= parseCao parseCao :: CaoMonad m => String -> m (Prog Name) parseCao str = do file <- getFileName injectResult $ runAlex str (put (aus file) >> parse) where aus fln = alexInitUserState { filename = fln } parseCommand :: CaoMonad m => [Name] -> String -> m (Command Name) parseCommand types str = do file <- getFileName injectResult $ runAlex str (put (aus file) >> parseCmd) where aus fln = alexInitUserState { filename = fln, types = map nameStr types } parseCmd :: Alex (Command Name) parseCmd = do { inp <- alexGetInput; ust <- get; fnm <- gets filename; liftM CmdDefinition parseDef `catchError` \ md -> do alexSetInput inp put ust liftM CmdStatement parseStmt `catchError` \ ms -> do alexSetInput inp put ust liftM CmdExp parseExpr `catchError` \ me -> throwError $ mkCaoError defSrcLoc fnm (MultipleErr [md, ms, me]::ErrorCode String) } lexer :: (TokenInfo -> Alex a) -> Alex a lexer cont = alexMonadScan >>= cont parseError :: TokenInfo -> Alex a parseError info = do flushLexer f <- gets filename let e = case tSymb info of TokenError -> LexicalException (tText info) TokenEOF -> EOFException _ -> ParsingException (tText info) throwError $ mkCaoError (tLoc info) f $ (ParserException e::ErrorCode Name) {- let throw :: ParserException -> Alex a --let throw :: ErrorCode String -> Alex a throw = throwError . mkCaoError (tLoc info) f . ParserException case tSymb info of TokenError -> throw $ LexicalException (tText info) TokenEOF -> throw EOFException _ -> throw $ ParsingException (tText info) -} -- Auxiliary Functions --------------------------------------------------------- -- All vars are marked as local vars. Before returning this must be fixed {-# INLINE str2Name #-} str2Name :: TokenInfo -> Located Name str2Name = str2X mkVarName {-# INLINE str2FName #-} str2FName :: TokenInfo -> Located Name str2FName = str2X mkFunName {-# INLINE str2SFldName #-} str2SFldName :: TokenInfo -> Located Name str2SFldName = str2X mkStructFldName {-# INLINE str2PolInd #-} str2PolInd :: TokenInfo -> Located Name str2PolInd = str2X mkPolIndName {-# INLINE str2Tv #-} str2Tv :: TokenInfo -> Located Name str2Tv = str2X mkTvName str2X :: (String -> Name) -> TokenInfo -> Located Name str2X f t = L (tLoc t) (f $ str_value $ tSymb t) doTypeSynonymDecl :: SrcLoc -> TokenInfo -> TyDecl Name -> Alex (Located (TyDef Name)) doTypeSynonymDecl loc syn t = do checkDeclType loc (str_value $ tSymb syn) return (L loc (TySynDef (str2Tv syn) t)) doStructDecl :: SrcLoc -> TokenInfo -> [(Located Name, TyDecl Name)] -> Alex (Located (TyDef Name)) doStructDecl loc sid flds = do checkDeclType loc (str_value $ tSymb sid) return (L loc (StructDecl (str2Tv sid) flds)) checkDeclType :: SrcLoc -> String -> Alex () checkDeclType loc nm = do aus <- get let fnm = filename aus tys = types aus when (nm `elem` tys) $ throwError (mkCaoError loc fnm (DeclException $ MultipleDeclException nm)) put aus { types = nm:tys } checkPol :: SrcLoc -> Pol Name -> Alex (Located (Pol Name)) checkPol s p = if isValid (monomials p) then return (L s p) else do fnm <- gets filename throwError $ mkCaoError s fnm (PolynomialErr p) {-# INLINE nullTyp #-} nullTyp :: LExpr id -> TLExpr id nullTyp = annL Bullet {-# INLINE la #-} la :: TokenInfo -> a -> Located a la l = L (tLoc l) {-# INLINE lg #-} lg :: Located b -> a -> Located a lg l = L (getLoc l) }