module BrownPLT.JavaScript.Parser
(parseScript
, parseExpression
, parseString
, parseScriptFromString
, emptyParsedJavaScript
, ParsedStatement
, ParsedExpression
, parseJavaScriptFromFile
, parseSimpleExpr'
, parseBlockStmt
, parseStatement
, StatementParser
, ExpressionParser
, parseAssignExpr
) where
import BrownPLT.JavaScript.Lexer hiding (identifier)
import qualified BrownPLT.JavaScript.Lexer as Lexer
import BrownPLT.JavaScript.Syntax
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Control.Monad(liftM,liftM2)
import Control.Monad.Trans (MonadIO,liftIO)
import Numeric(readDec,readOct,readHex)
import Data.Char(chr)
import Data.Char
type ParsedStatement = Statement SourcePos
type ParsedExpression = Expression SourcePos
type StatementParser state = CharParser state ParsedStatement
type ExpressionParser state = CharParser state ParsedExpression
identifier =
liftM2 Id getPosition Lexer.identifier
parseIfStmt:: StatementParser st
parseIfStmt = do
pos <- getPosition
reserved "if"
test <- parseParenExpr <?> "parenthesized test-expression in if statement"
consequent <- parseStatement <?> "true-branch of if statement"
optional semi
((do reserved "else"
alternate <- parseStatement
return (IfStmt pos test consequent alternate))
<|> return (IfSingleStmt pos test consequent))
parseSwitchStmt:: StatementParser st
parseSwitchStmt =
let parseDefault = do
pos <- getPosition
reserved "default"
colon
statements <- many parseStatement
return (CaseDefault pos statements)
parseCase = do
pos <- getPosition
reserved "case"
condition <- parseListExpr
colon
actions <- many parseStatement
return (CaseClause pos condition actions)
in do pos <- getPosition
reserved "switch"
test <- parseParenExpr
clauses <- braces $ many $ parseDefault <|> parseCase
return (SwitchStmt pos test clauses)
parseWhileStmt:: StatementParser st
parseWhileStmt = do
pos <- getPosition
reserved "while"
test <- parseParenExpr <?> "parenthesized test-expression in while loop"
body <- parseStatement
return (WhileStmt pos test body)
parseDoWhileStmt:: StatementParser st
parseDoWhileStmt = do
pos <- getPosition
reserved "do"
body <- parseBlockStmt
reserved "while" <?> "while at the end of a do block"
test <- parseParenExpr <?> "parenthesized test-expression in do loop"
optional semi
return (DoWhileStmt pos body test)
parseContinueStmt:: StatementParser st
parseContinueStmt = do
pos <- getPosition
reserved "continue"
pos' <- getPosition
id <- (if (sourceLine pos == sourceLine pos')
then (liftM Just identifier) <|> (return Nothing)
else return Nothing)
return (ContinueStmt pos id)
parseBreakStmt:: StatementParser st
parseBreakStmt = do
pos <- getPosition
reserved "break"
pos' <- getPosition
id <- (if (sourceLine pos == sourceLine pos')
then (liftM Just identifier) <|> (return Nothing)
else return Nothing)
optional semi
return (BreakStmt pos id)
parseBlockStmt:: StatementParser st
parseBlockStmt = do
pos <- getPosition
statements <- braces (many parseStatement)
return (BlockStmt pos statements)
parseEmptyStmt:: StatementParser st
parseEmptyStmt = do
pos <- getPosition
semi
return (EmptyStmt pos)
parseLabelledStmt:: StatementParser st
parseLabelledStmt = do
pos <- getPosition
label <- try (do label <- identifier
colon
return label)
statement <- parseStatement
return (LabelledStmt pos label statement)
parseExpressionStmt:: StatementParser st
parseExpressionStmt = do
pos <- getPosition
expr <- parseListExpr
optional semi
return (ExprStmt pos expr)
parseForInStmt:: StatementParser st
parseForInStmt =
let parseInit = (reserved "var" >> liftM ForInVar identifier)
<|> (liftM ForInNoVar identifier)
in do pos <- getPosition
(init,expr) <- try (do reserved "for"
parens (do init <- parseInit
reserved "in"
expr <- parseExpression
return (init,expr)))
body <- parseStatement
return (ForInStmt pos init expr body)
parseForStmt:: StatementParser st
parseForStmt =
let parseInit =
(reserved "var" >> liftM VarInit (parseVarDecl `sepBy` comma)) <|>
(liftM ExprInit parseListExpr) <|>
(return NoInit)
in do pos <- getPosition
reserved "for"
reservedOp "("
init <- parseInit
semi
test <- (liftM Just parseExpression) <|> (return Nothing)
semi
iter <- (liftM Just parseListExpr) <|> (return Nothing)
reservedOp ")" <?> "closing paren"
stmt <- parseStatement
return (ForStmt pos init test iter stmt)
parseTryStmt:: StatementParser st
parseTryStmt =
let parseCatchClause = do
pos <- getPosition
reserved "catch"
id <- parens identifier
stmt <- parseStatement
return (CatchClause pos id stmt)
in do reserved "try"
pos <- getPosition
guarded <- parseStatement
catches <- many parseCatchClause
finally <- (reserved "finally" >> liftM Just parseStatement)
<|> (return Nothing)
return (TryStmt pos guarded catches finally)
parseThrowStmt:: StatementParser st
parseThrowStmt = do
pos <- getPosition
reserved "throw"
expr <- parseExpression
optional semi
return (ThrowStmt pos expr)
parseReturnStmt:: StatementParser st
parseReturnStmt = do
pos <- getPosition
reserved "return"
expr <- (liftM Just parseListExpr) <|> (return Nothing)
optional semi
return (ReturnStmt pos expr)
parseWithStmt:: StatementParser st
parseWithStmt = do
pos <- getPosition
reserved "with"
context <- parseParenExpr
stmt <- parseStatement
return (WithStmt pos context stmt)
parseVarDecl = do
pos <- getPosition
id <- identifier
init <- (reservedOp "=" >> liftM Just parseExpression) <|> (return Nothing)
return (VarDecl pos id init)
parseVarDeclStmt:: StatementParser st
parseVarDeclStmt = do
pos <- getPosition
reserved "var"
decls <- parseVarDecl `sepBy` comma
optional semi
return (VarDeclStmt pos decls)
parseFunctionStmt:: StatementParser st
parseFunctionStmt = do
pos <- getPosition
name <- try (reserved "function" >> identifier)
args <- parens (identifier `sepBy` comma)
body <- parseBlockStmt <?> "function body in { ... }"
return (FunctionStmt pos name args body)
parseStatement:: StatementParser st
parseStatement = parseIfStmt <|> parseSwitchStmt <|> parseWhileStmt
<|> parseDoWhileStmt <|> parseContinueStmt <|> parseBreakStmt
<|> parseBlockStmt <|> parseEmptyStmt <|> parseForInStmt <|> parseForStmt
<|> parseTryStmt <|> parseThrowStmt <|> parseReturnStmt <|> parseWithStmt
<|> parseVarDeclStmt <|> parseFunctionStmt
<|> parseLabelledStmt <|> parseExpressionStmt <?> "statement"
--}}}
parseThisRef:: ExpressionParser st
parseThisRef = do
pos <- getPosition
reserved "this"
return (ThisRef pos)
parseNullLit:: ExpressionParser st
parseNullLit = do
pos <- getPosition
reserved "null"
return (NullLit pos)
parseBoolLit:: ExpressionParser st
parseBoolLit = do
pos <- getPosition
let parseTrueLit = reserved "true" >> return (BoolLit pos True)
parseFalseLit = reserved "false" >> return (BoolLit pos False)
parseTrueLit <|> parseFalseLit
parseVarRef:: ExpressionParser st
parseVarRef = liftM2 VarRef getPosition identifier
parseArrayLit:: ExpressionParser st
parseArrayLit = liftM2 ArrayLit getPosition (squares (parseExpression `sepBy` comma))
parseFuncExpr = do
pos <- getPosition
reserved "function"
args <- parens (identifier `sepBy` comma)
body <- parseBlockStmt
return $ FuncExpr pos args body
escapeChars =
[('\'','\''),('\"','\"'),('\\','\\'),('b','\b'),('f','\f'),('n','\n'),
('r','\r'),('t','\t'),('v','\v'),('/','/'),(' ',' ')]
allEscapes:: String
allEscapes = map fst escapeChars
parseEscapeChar = do
c <- oneOf allEscapes
let (Just c') = lookup c escapeChars
return c'
parseAsciiHexChar = do
char 'x'
d1 <- hexDigit
d2 <- hexDigit
return ((chr.fst.head.readHex) (d1:d2:""))
parseUnicodeHexChar = do
char 'u'
liftM (chr.fst.head.readHex)
(sequence [hexDigit,hexDigit,hexDigit,hexDigit])
isWhitespace ch = ch `elem` " \t"
parseStringLit' endWith =
(char endWith >> return "") <|>
(do try (string "\\'")
cs <- parseStringLit' endWith
return $ "'" ++ cs) <|>
(do char '\\'
c <- parseEscapeChar <|> parseAsciiHexChar <|> parseUnicodeHexChar <|>
char '\r' <|> char '\n'
cs <- parseStringLit' endWith
if c == '\r' || c == '\n'
then return (c:(dropWhile isWhitespace cs))
else return (c:cs)) <|>
(liftM2 (:) anyChar (parseStringLit' endWith))
parseStringLit:: ExpressionParser st
parseStringLit = do
pos <- getPosition
str <- lexeme $ (char '\'' >>= parseStringLit') <|> (char '\"' >>= parseStringLit')
return $ StringLit pos str
--}}}
parseRegexpLit:: ExpressionParser st
parseRegexpLit = do
let parseFlags = do
flags <- many (oneOf "mgi")
return $ \f -> f ('g' `elem` flags) ('i' `elem` flags)
let parseEscape = char '\\' >> anyChar
let parseChar = noneOf "/"
let parseRe = (char '/' >> return "") <|>
(do char '\\'
ch <- anyChar
rest <- parseRe
return ('\\':ch:rest)) <|>
(liftM2 (:) anyChar parseRe)
pos <- getPosition
char '/'
pat <- parseRe
flags <- parseFlags
spaces
return $ flags (RegexpLit pos pat)
parseObjectLit:: ExpressionParser st
parseObjectLit =
let parseProp = do
name <- (liftM (uncurry PropString)
(liftM (\(StringLit p s) -> (p,s)) parseStringLit))
<|> (liftM2 PropId getPosition identifier)
<|> (liftM2 PropNum getPosition decimal)
colon
val <- parseAssignExpr
return (name,val)
in do pos <- getPosition
props <- braces (parseProp `sepEndBy` comma) <?> "object literal"
return $ ObjectLit pos props
hexLit = do
try (string "0x")
digits <- many1 (oneOf "0123456789abcdefABCDEF")
[(hex,"")] <- return $ Numeric.readHex digits
return (True, hex)
mkDecimal:: Double -> Double -> Int -> Double
mkDecimal w f e =
if (f >= 1.0)
then mkDecimal w (f / 10.0) e
else (w + f) * (10.0 ^^ e)
exponentPart = do
oneOf "eE"
(char '+' >> decimal) <|> (char '-' >> negate `fmap` decimal) <|> decimal
jparser p = p >>= (return . Just)
decLit =
(do whole <- decimal
mfrac <- option Nothing (jparser (char '.' >> decimal))
mexp <- option Nothing (jparser exponentPart)
if (mfrac == Nothing && mexp == Nothing)
then return (True, fromIntegral whole)
else return (False, mkDecimal (fromIntegral whole)
(fromIntegral (maybe 0 id mfrac))
(fromIntegral (maybe 0 id mexp)))) <|>
(do frac <- char '.' >> decimal
exp <- option 0 exponentPart
return (False, mkDecimal 0.0 (fromIntegral frac) (fromIntegral exp)))
parseNumLit:: ExpressionParser st
parseNumLit = do
pos <- getPosition
(isint, num) <- lexeme $ hexLit <|> decLit
notFollowedBy identifierStart <?> "whitespace"
if isint
then return $ IntLit pos (round num)
else return $ NumLit pos num
withPos cstr p = do { pos <- getPosition; e <- p; return $ cstr pos e }
dotRef e = (reservedOp "." >> withPos cstr identifier) <?> "property.ref"
where cstr pos key = DotRef pos e key
funcApp e = (parens $ withPos cstr (parseExpression `sepBy` comma)) <?> "(function application)"
where cstr pos args = CallExpr pos e args
bracketRef e = (brackets $ withPos cstr parseExpression) <?> "[property-ref]"
where cstr pos key = BracketRef pos e key
parseParenExpr:: ExpressionParser st
parseParenExpr = withPos ParenExpr (parens parseListExpr)
parseExprForNew = parseThisRef <|> parseNullLit <|> parseBoolLit <|> parseStringLit
<|> parseArrayLit <|> parseParenExpr <|> parseNewExpr <|> parseNumLit
<|> parseRegexpLit <|> parseObjectLit <|> parseVarRef
parseSimpleExpr' = parseThisRef <|> parseNullLit <|> parseBoolLit
<|> parseStringLit <|> parseArrayLit <|> parseParenExpr
<|> parseFuncExpr <|> parseNumLit <|> parseRegexpLit <|> parseObjectLit
<|> parseVarRef
parseNewExpr =
(do pos <- getPosition
reserved "new"
constructor <- parseSimpleExprForNew Nothing
arguments <- (try (parens (parseExpression `sepBy` comma))) <|> (return [])
return (NewExpr pos constructor arguments)) <|>
parseSimpleExpr'
parseSimpleExpr (Just e) = (do
e' <- dotRef e <|> funcApp e <|> bracketRef e
parseSimpleExpr $ Just e') <|> (return e)
parseSimpleExpr Nothing = do
e <- parseNewExpr <?> "expression (3)"
parseSimpleExpr (Just e)
parseSimpleExprForNew (Just e) = (do
e' <- dotRef e <|> bracketRef e
parseSimpleExprForNew $ Just e') <|> (return e)
parseSimpleExprForNew Nothing = do
e <- parseNewExpr <?> "expression (3)"
parseSimpleExprForNew (Just e)
--}}}
makeInfixExpr str constr = Infix parser AssocLeft where
parser:: CharParser st (Expression SourcePos -> Expression SourcePos -> Expression SourcePos)
parser = do
pos <- getPosition
reservedOp str
return (InfixExpr pos constr)
makePrefixExpr str constr = Prefix parser where
parser = do
pos <- getPosition
(reservedOp str <|> reserved str)
return (PrefixExpr pos constr)
mkPrefix operator constr = Prefix $ do
pos <- getPosition
operator
return (\operand -> PrefixExpr pos constr operand)
makePostfixExpr str constr = Postfix parser where
parser = do
pos <- getPosition
(reservedOp str <|> reserved str)
return (PostfixExpr pos constr)
prefixIncDecExpr = do
pos <- getPosition
op <- optionMaybe $ (reservedOp "++" >> return PrefixInc) <|>
(reservedOp "--" >> return PrefixDec)
case op of
Nothing -> parseSimpleExpr Nothing
Just op -> do
innerExpr <- parseSimpleExpr Nothing
return (PrefixExpr pos op innerExpr)
parsePrefixedExpr = do
pos <- getPosition
op <- optionMaybe $ (reservedOp "!" >> return PrefixLNot) <|>
(reservedOp "~" >> return PrefixBNot) <|>
(try (lexeme $ char '-' >> notFollowedBy (char '-')) >>
return PrefixMinus) <|>
(try (lexeme $ char '+' >> notFollowedBy (char '+')) >>
return PrefixPlus) <|>
(reserved "typeof" >> return PrefixTypeof) <|>
(reserved "void" >> return PrefixVoid) <|>
(reserved "delete" >> return PrefixDelete)
case op of
Nothing -> prefixIncDecExpr
Just op -> do
innerExpr <- parsePrefixedExpr
return (PrefixExpr pos op innerExpr)
exprTable:: [[Operator Char st ParsedExpression]]
exprTable =
[
[makePrefixExpr "++" PrefixInc,
makePostfixExpr "++" PostfixInc],
[makePrefixExpr "--" PrefixDec,
makePostfixExpr "--" PostfixDec],
[makeInfixExpr "*" OpMul, makeInfixExpr "/" OpDiv, makeInfixExpr "%" OpMod],
[makeInfixExpr "+" OpAdd, makeInfixExpr "-" OpSub],
[makeInfixExpr "<<" OpLShift, makeInfixExpr ">>" OpSpRShift,
makeInfixExpr ">>>" OpZfRShift],
[makeInfixExpr "<" OpLT, makeInfixExpr "<=" OpLEq, makeInfixExpr ">" OpGT,
makeInfixExpr ">=" OpGEq,
makeInfixExpr "instanceof" OpInstanceof, makeInfixExpr "in" OpIn],
[makeInfixExpr "&" OpBAnd],
[makeInfixExpr "^" OpBXor],
[makeInfixExpr "|" OpBOr],
[makeInfixExpr "&&" OpLAnd],
[makeInfixExpr "||" OpLOr],
[makeInfixExpr "==" OpEq, makeInfixExpr "!=" OpNEq,
makeInfixExpr "===" OpStrictEq, makeInfixExpr "!==" OpStrictNEq]
]
parseExpression' =
buildExpressionParser exprTable parsePrefixedExpr <?> "simple expression"
parseTernaryExpr':: CharParser st (ParsedExpression,ParsedExpression)
parseTernaryExpr' = do
reservedOp "?"
l <- parseAssignExpr
colon
r <- parseAssignExpr
return $(l,r)
parseTernaryExpr:: ExpressionParser st
parseTernaryExpr = do
e <- parseExpression'
e' <- optionMaybe parseTernaryExpr'
case e' of
Nothing -> return e
Just (l,r) -> do p <- getPosition
return $ CondExpr p e l r
--}}}
makeAssignExpr str constr = Infix parser AssocRight where
parser:: CharParser st (ParsedExpression -> ParsedExpression -> ParsedExpression)
parser = do
pos <- getPosition
reservedOp str
return (AssignExpr pos constr)
assignTable:: [[Operator Char st ParsedExpression]]
assignTable = [
[makeAssignExpr "=" OpAssign, makeAssignExpr "+=" OpAssignAdd,
makeAssignExpr "-=" OpAssignSub, makeAssignExpr "*=" OpAssignMul,
makeAssignExpr "/=" OpAssignDiv, makeAssignExpr "%=" OpAssignMod,
makeAssignExpr "<<=" OpAssignLShift, makeAssignExpr ">>=" OpAssignSpRShift,
makeAssignExpr ">>>=" OpAssignZfRShift, makeAssignExpr "&=" OpAssignBAnd,
makeAssignExpr "^=" OpAssignBXor, makeAssignExpr "|=" OpAssignBOr
]]
parseAssignExpr:: ExpressionParser st
parseAssignExpr = buildExpressionParser assignTable parseTernaryExpr
parseExpression:: ExpressionParser st
parseExpression = parseAssignExpr
parseListExpr =
liftM2 ListExpr getPosition (parseAssignExpr `sepBy1` comma)
--}}}
parseScript:: CharParser state (JavaScript SourcePos)
parseScript = do
whiteSpace
liftM2 Script getPosition (parseStatement `sepBy` whiteSpace)
parseJavaScriptFromFile :: MonadIO m => String -> m [Statement SourcePos]
parseJavaScriptFromFile filename = do
chars <- liftIO $ readFile filename
case parse parseScript filename chars of
Left err -> fail (show err)
Right (Script _ stmts) -> return stmts
parseScriptFromString :: String -> String
-> Either ParseError (JavaScript SourcePos)
parseScriptFromString src script = parse parseScript src script
emptyParsedJavaScript =
Script (error "Parser.emptyParsedJavaScript--no annotation") []
parseString :: String -> [Statement SourcePos]
parseString str = case parse parseScript "" str of
Left err -> error (show err)
Right (Script _ stmts) -> stmts