{- cabal install happy alex -} { {-# LANGUAGE FlexibleContexts,FlexibleInstances,MultiParamTypeClasses,FunctionalDependencies #-} module Parser where import Lexer hiding (main) import Spec import Spec0 import Convert0 import Control.Monad.State } %monad { CM } %name fregelparser %partial exprparser expr %tokentype { Lexeme } %error { parseError } %token -- special patterns (must be defined first) T_INT { (L _ L_CONSTRUCTOR "Int") } T_BOOL { (L _ L_CONSTRUCTOR "Bool") } T_DOUBLE { (L _ L_CONSTRUCTOR "Double") } T_STRING { (L _ L_CONSTRUCTOR "String") } T_PAIR { (L _ L_CONSTRUCTOR "Pair") } A_SUM { (L _ L_IDENT "sum") } A_PROD { (L _ L_IDENT "prod") } A_MINIMUM { (L _ L_IDENT "minimum") } A_MAXIMUM { (L _ L_IDENT "maximum") } A_OR { (L _ L_IDENT "or") } A_AND { (L _ L_IDENT "and") } A_RANDOM { (L _ L_IDENT "random") } G_FREGEL { (L _ L_IDENT "fregel") } G_GMAP { (L _ L_IDENT "gmap") } G_GZIP { (L _ L_IDENT "gzip") } G_GITER { (L _ L_IDENT "giter") } TC_FIX { (L _ L_CONSTRUCTOR "Fix") } TC_ITER { (L _ L_CONSTRUCTOR "Iter") } TC_UNTIL { (L _ L_CONSTRUCTOR "Until") } TC_WHILE { (L _ L_CONSTRUCTOR "While") } -- normal tokens DATA { (L _ L_DATA _) } DERIVING { (L _ L_DERIVING _) } LET { (L _ L_LET _) } IN { (L _ L_IN _) } IF { (L _ L_IF _) } THEN { (L _ L_THEN _) } ELSE { (L _ L_ELSE _) } CURR { (L _ L_CURR _) } PREV { (L _ L_PREV _) } VAL { (L _ L_VAL _) } IS { (L _ L_IS _) } RS { (L _ L_RS _) } GOF { (L _ L_GOF _) } EQUAL { (L _ L_EQUAL _) } DBLCOLON { (L _ L_DBLCOLON _) } COMMA { (L _ L_COMMA _) } SEMICOLON { (L _ L_SEMICOLON _) } BACKSLASH { (L _ L_BACKSLASH _) } RARROW { (L _ L_RARROW _) } LARROW { (L _ L_LARROW _) } DOTHAT { (L _ L_DOTHAT _) } PIPE { (L _ L_PIPE _) } DBLAND { (L _ L_DBLAND _) } DBLOR { (L _ L_DBLOR _) } EQ { (L _ L_EQ _) } NE { (L _ L_NE _) } LT { (L _ L_LT _) } LE { (L _ L_LE _) } GT { (L _ L_GT _) } GE { (L _ L_GE _) } PLUS { (L _ L_PLUS _) } MINUS { (L _ L_MINUS _) } AST { (L _ L_AST _) } SLASH { (L _ L_SLASH _) } BACKQUOTE { (L _ L_BACKQUOTE _) } LPAREN { (L _ L_LPAREN _) } RPAREN { (L _ L_RPAREN _) } LBRACE { (L _ L_LBRACE _) } RBRACE { (L _ L_RBRACE _) } LBRACKET { (L _ L_LBRACKET _) } RBRACKET { (L _ L_RBRACKET _) } BOOL { (L _ L_BOOL _) } INT { (L _ L_INT _) } FLOAT { (L _ L_FLOAT _) } STRING { (L _ L_STRING _) } IDENT { (L _ L_IDENT _) } CONSTRUCTOR { (L _ L_CONSTRUCTOR _) } %% -- the start ; left-recursion is more efficient for Happy (but the list needs to be reversed) programSpecs :: { [DProgramSpec0 Pos] } programSpecs : programSpec { [$1] } | programSpecs SEMICOLON programSpec { $3:$1 } programSpec :: { DProgramSpec0 Pos } programSpec : recordSpecs smplDef { DProgramSpec0 (reverse $1) $2 (if $1 == [] then getData $2 else getData (head $1)) } recordSpecs :: { [ DRecordSpec Pos] } recordSpecs : {- empty -} { [ ] } | recordSpecs recordSpec { $2:$1 } recordSpec :: { DRecordSpec Pos } recordSpec : DATA constructor EQUAL constructor LBRACE fieldSpecs RBRACE opt_deriving { DRecordSpec $4 (reverse $6) (getData $1) } fieldSpecs :: { [(DField Pos, DType Pos)] } fieldSpecs : fieldSpec { [$1] } | fieldSpecs COMMA fieldSpec { $3:$1 } fieldSpec :: { (DField Pos, DType Pos) } fieldSpec : field DBLCOLON type { ($1, $3) } field :: { DField Pos } field : IDENT { DField (strToken $1) (getData $1)} type :: { DType Pos } type : T_INT { DTInt (getData $1) } | T_BOOL { DTBool (getData $1) } | T_STRING { DTString (getData $1) } | T_DOUBLE { DTDouble (getData $1) } | T_PAIR type type { DTRecord (DConstructor "Pair" (getData $1)) [$2, $3] (getData $1) } | LPAREN type RPAREN { $2 } | LPAREN types RPAREN { DTTuple (reverse $2) (getData $1) } | CONSTRUCTOR { DTRecord (DConstructor (strToken $1) (getData $1)) [] (getData $1) } types :: { [DType Pos] } types : type COMMA type { [$3, $1]} | types COMMA type { $3:$1 } -- ignored opt_deriving :: { () } opt_deriving : {- empty -} { () } | DERIVING CONSTRUCTOR { () } | DERIVING LPAREN deriving_constructors RPAREN { () } deriving_constructors :: { () } deriving_constructors : CONSTRUCTOR { () } | deriving_constructors COMMA CONSTRUCTOR { () } exprWithSmplDefs :: { ([DSmplDef0 Pos], DExpr0 Pos) } exprWithSmplDefs : LET smplDefs IN expr { (reverse $2, $4) } | expr { ([], $1) } smplDefs :: { [DSmplDef0 Pos] } smplDefs : smplDef { [$1] } | smplDefs SEMICOLON smplDef { $3:$1 } smplDef :: { DSmplDef0 Pos } smplDef : defFun { $1 } | defVar { $1 } | defTuple { $1 } | defVertComp { $1 } defFun :: { DSmplDef0 Pos } defFun : var vars EQUAL exprWithSmplDefs { DDefFun0 $1 (reverse $2) (fst $4) (snd $4) (getData $1) } defVar :: { DSmplDef0 Pos } defVar : var EQUAL exprWithSmplDefs { DDefVar0 $1 (fst $3) (snd $3) (getData $1) } defTuple :: { DSmplDef0 Pos } defTuple : LPAREN csVars RPAREN EQUAL exprWithSmplDefs { DDefTuple0 (reverse $2) (fst $5) (snd $5) (getData $1) } defVertComp :: { DSmplDef0 Pos } defVertComp : var var PREV CURR EQUAL exprWithSmplDefs {% mustbe $2 "v" >> return (DDefVertComp0 $1 (fst $6) (snd $6) (getData $1)) } var :: { DVar Pos } var : IDENT { DVar (strToken $1) (getData $1)} vars :: { [DVar Pos] } vars : var { [$1] } | vars var { $2:$1 } csVars :: { [DVar Pos] } csVars : var COMMA var { [$3,$1] } | csVars COMMA var { $3:$1 } -- expression hierarchy expr :: { DExpr0 Pos } expr : expr8 { $1 } expr8 :: { DExpr0 Pos } expr8 : IF expr7 THEN expr7 ELSE expr7 { DIf0 $2 $4 $6 (getData $1) } | expr7 { $1 } expr7 :: { DExpr0 Pos } expr7 : expr7 DBLOR expr6 { DFunAp0 (DBinOp "||" (getData $2)) [$1,$3] (getData $1) } | expr6 { $1 } expr6 :: { DExpr0 Pos } expr6 : expr6 DBLAND expr5 { DFunAp0 (DBinOp "&&" (getData $2)) [$1,$3] (getData $1) } | expr5 { $1 } expr5 :: { DExpr0 Pos } expr5 : expr4 LE expr4 { DFunAp0 (DBinOp "<=" (getData $2)) [$1,$3] (getData $1) } | expr4 GE expr4 { DFunAp0 (DBinOp ">=" (getData $2)) [$1,$3] (getData $1) } | expr4 EQ expr4 { DFunAp0 (DBinOp "==" (getData $2)) [$1,$3] (getData $1) } | expr4 NE expr4 { DFunAp0 (DBinOp "!=" (getData $2)) [$1,$3] (getData $1) } | expr4 GT expr4 { DFunAp0 (DBinOp ">" (getData $2)) [$1,$3] (getData $1) } | expr4 LT expr4 { DFunAp0 (DBinOp "<" (getData $2)) [$1,$3] (getData $1) } | expr4 { $1 } expr4 :: { DExpr0 Pos } expr4 : expr3 BACKQUOTE var BACKQUOTE expr3 { DFunAp0 (v2f $3) [$1,$5] (getData $1) } | expr4 op4 expr3 { DFunAp0 (DBinOp $2 (getData $1)) [$1, $3] (getData $1) } | expr3 { $1 } op4 :: { String } op4 : PLUS { "+" } | MINUS { "-" } expr3 :: { DExpr0 Pos } expr3 : expr3 op3 expr2 { DFunAp0 (DBinOp $2 (getData $1)) [$1, $3] (getData $1) } | MINUS expr2 { if isConstNum $2 then negConst $2 else DFunAp0 (DFun "neg" (getData $1)) [$2] (getData $1) } | expr2 { $1 } op3 :: { String } op3 : AST { "*" } | SLASH { "/" } expr2 :: { DExpr0 Pos } expr2 : tableExpr DOTHAT dhsFields { DFieldAcc0 $1 (reverse $3) (getData $1) } | tableExpr { DFieldAcc0 $1 [] (getData $1) } | var DOTHAT dhsFields {% mustbe $1 "e" >> return (DFieldAccE0 (DEdge (getData $1)) (reverse $3) (getData $1)) } | expr1 { $1 } dhsFields :: { [DField Pos] } dhsFields : field { [$1] } | dhsFields DOTHAT field { $3:$1 } tableExpr :: { DTableExpr Pos } tableExpr : CURR var { DCurr $2 (getData $1) } | PREV var { DPrev $2 (getData $1) } | VAL var { DVal $2 (getData $1) } expr1 :: { DExpr0 Pos } expr1 : var expr0s { DFunAp0 (v2f $1) (reverse $2) (getData $1) } | constructor expr0s { DConsAp0 $1 (reverse $2) (getData $1) } | agg LBRACKET expr PIPE gen csExprs RBRACKET { DAggr0 $1 $3 $5 (reverse $6) (getData $1) } | expr0 { $1 } | G_FREGEL var var termination var { DPregel0 $2 $3 $4 $5 (getData $1) } | G_GMAP var var { DGMap0 $2 $3 (getData $1) } | G_GZIP var var { DGZip0 $2 $3 (getData $1) } | G_GITER var var termination var { DGIter0 $2 $3 $4 $5 (getData $1) } termination :: { DTermination0 Pos } termination : TC_FIX { DTermF0 (getData $1) } | LPAREN TC_ITER expr RPAREN { DTermI0 $3 (getData $1) } | LPAREN TC_UNTIL predExpr RPAREN { DTermU0 $3 (getData $1) } | LPAREN TC_WHILE predExpr RPAREN { DTermU0 (DFunAp0 (DFun "not" (getData $1)) [$3] (getData $1)) (getData $1) } | LPAREN termination RPAREN { $2 } predExpr :: { DExpr0 Pos } predExpr : LPAREN BACKSLASH var RARROW expr RPAREN {% mustbe $3 "g" >> return $5 } expr0 :: { DExpr0 Pos } expr0 : constVal { DCExp0 $1 (getData $1) } | var { DVExp0 $1 (getData $1) } | LPAREN expr RPAREN { $2 } | LPAREN expr COMMA expr csExprs RPAREN { DTuple0 ($2:$4:reverse $5) (getData $1) } expr0s :: { [DExpr0 Pos] } expr0s : expr0 { [$1] } | expr0s expr0 { $2:$1 } csExprs :: { [DExpr0 Pos] } csExprs : {- empty -} { [] } | csExprs COMMA expr { $3:$1 } gen :: { DGen Pos } gen : LPAREN var COMMA var RPAREN LARROW IS var {% mustbe $2 "e" >> mustbe $4 "u" >> mustbe $8 "v" >> return (DGenI (getData $1)) } | LPAREN var COMMA var RPAREN LARROW RS var {% mustbe $2 "e" >> mustbe $4 "u" >> mustbe $8 "v" >> return (DGenO (getData $1)) } | var LARROW GOF var {% mustbe $1 "u" >> mustbe $4 "v" >> return (DGenG (getData $1)) } | var LARROW var {% mustbe $1 "u" >> mustbe $3 "v" >> return (DGenG (getData $1)) } agg :: { DAgg0 Pos } agg : A_MINIMUM { DAggMin0 (getData $1) } | A_MAXIMUM { DAggMax0 (getData $1) } | A_SUM { DAggSum0 (getData $1) } | A_PROD { DAggProd0 (getData $1) } | A_AND { DAggAnd0 (getData $1) } | A_OR { DAggOr0 (getData $1) } | A_RANDOM expr { DAggChoice0 $2 (getData $1) } constructor :: { DConstructor Pos } constructor : CONSTRUCTOR { DConstructor (strToken $1) (getData $1) } constVal :: { DConst Pos } constVal : INT { DCInt (readToken $1) (getData $1) } | FLOAT { DCDouble (readTokenF $1) (getData $1) } | BOOL { DCBool (readToken $1) (getData $1) } | STRING { DCString (strToken $1) (getData $1) } { -- misc functions -- extracs the string from a token strToken :: Lexeme -> String strToken (L _ _ s) = s -- reads data from a token readToken :: Read a => Lexeme -> a readToken = read . strToken -- reads data from a token readTokenF :: Lexeme -> Double readTokenF = read . correct . strToken where correct str = -- workaround for "0." let r = reverse str in reverse $ (if head r == '.' then ('0':) else id) r instance DAdditionalData Lexeme Pos where getData (L p t s) = p setData p (L _ t s) = L p t s -- a token must be a specific name mustbe :: DVar Pos -> String -> CM () mustbe (DVar name p) str = if name == str then return () else error $ "Parse error: variable name must be " ++ str ++ " at " ++ showPosn p parseError :: [Lexeme] -> a parseError ((L p t s):ts) = error $ "Parse error: " ++ s ++ " at " ++ showPosn p -- to make negative integral/float literals isConstNum :: DExpr0 Pos -> Bool isConstNum (DCExp0 (DCInt _ _) _) = True isConstNum (DCExp0 (DCDouble _ _) _) = True isConstNum _ = False negConst :: DExpr0 Pos -> DExpr0 Pos negConst (DCExp0 (DCInt i a) b) = (DCExp0 (DCInt (-i) a) b) negConst (DCExp0 (DCDouble d a) b) = (DCExp0 (DCDouble (-d) a) b) -- currently, these are meaningless data Params = Params deriving (Eq, Show) initParams = Params data CompilerState = CompilerState type CM a = State CompilerState a runCM m ps = runState m (initState ps) initState ps = CompilerState main = do str <- getContents let ast0 = parseString0 str ast = map convert0 ast0 print ast0 print ast {- for test use main = do getContents >>= mapM_ (\file -> check file >>= putStrLn . show) . lines -- checking function: parse = parse . prettyPrint . parse ? check file = do ast <- parseFile file print ast let pp = foldr (\x y -> x ++ "\n" ++ y) "" (map ppAST0 ast) putStrLn pp let ast2 = parseString0 pp print ast2 return (map (mapData (\_ -> "")) ast == map (mapData (\_ -> "")) ast2) --ignore line/pos -} --- misc functions to be used by other modules parseStringExpr0 str = let ts = snd (right (scanner str)) ast = fst $ runCM (exprparser ts) initParams in ast parseString0 :: String -> [DProgramSpec0 Pos] parseString0 str = let ts = snd (right (scanner str)) ast = fst $ runCM (fregelparser ts) initParams in ast parseString :: String -> [DProgramSpec String] parseString str = map (mapData (\_ -> "")) $ map convert0 $ parseString0 str parseFile file = do str <- readFile file return $ parseString str processFile file = do ast <- parseFile file print ast parseFile' file = do ast <- parseFile file return ast parseFile'' file = do asts <- parseFile file return (propRecs asts) where propRecs xs = let rss = concatMap (\(DProgramSpec rs _ _) -> rs) xs in map (\(DProgramSpec _ x a) -> DProgramSpec rss x a) xs parseString' ss = parseString ss }