-- Parser utility functions for Hugs Core. module Yhc.Core.FrontEnd.Hugs.ParseUtil where import Yhc.Core.Extra import Data.Char import Data.List import Data.Maybe import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Token import Text.ParserCombinators.Parsec.Language (haskellDef) -- Tokenizer for Hugs Core, based on Haskell language definitions. tkp = makeTokenParser haskellDef { identStart = letter <|> oneOf "_'" ,identLetter = alphaNum <|> oneOf "_'" ,reservedNames = ["module", "data", "primitive", "case", "in", "of", "let", "let_", "_fatbar", "[]", "()", "::"] } -- State of the Hugs core parser data PState = PState { modName :: String -- current Core module name ,funName :: String -- current function name ,fbStack :: [Int] -- stack of FATBARs: number identifies -- a currently introduced FATBAR binding: -- the number will be appended to "v_fail_" -- If the stack contains only a zero, default -- binding is in effect, that is a code to throw -- a pattern match exception. -- to get a variable bound to the current FATBAR ,counter :: Int -- unique numbers producer ,autoFuncs :: [CoreFunc] -- any functions not included in the source Hugs Core -- and generated during the conversion. } -- Qualified identifier: data structure to distinguish between -- a function and a data constructor. data QUALID = QCTOR String | QFUNC String deriving (Show, Eq) -- Two-letter internal variable name generator. nameGen = [a : b : [] | a <- ['a' .. 'z'], b <- ['0' .. '9']] -- Parser. Reads in Hugs Core syntax, returns Yhc Core structure. -- ident = id [. id...] ident = (identifier tkp) `sepBy1` (dot tkp) >>= return . concat . intersperse "." -- Special names for data constructors: [], (), #n for tuples of n. -- Tuples will be recoded in Yhc way (,...,) nil = try $ squares tkp (return "[]") unit = try $ parens tkp (return "()") tuple = try $ do char '#' n <- natural tkp return $ "(" ++ replicate (fromIntegral n - 1) ',' ++ ")" -- Parser for a qualified (dot-separated) name, distinguishes -- between tuples (which are not qualified) functions and data constructors. -- Tuples will be artificially qualified with Prelude. pQUALID :: GenParser Char a QUALID pQUALID = (tuple >>= return . QCTOR . ("Hugs.Prelude;" ++)) <|> (try $ do let anyid = nil <|> infctor <|> unit <|> tuple <|> identifier tkp <|> operator tkp fstupr [] = False fstupr (h : t) = isUpper h funcid [] = False funcid (h : t) = isLower h || h `elem` "'_" infctid [] = False infctid (':' : _) = True infctid _ = False isoper s = either (const False) (const True) (parse (operator tkp) "" s) parts <- anyid `sepBy` dot tkp let strap = reverse parts x [] = (QCTOR, 0) x [_] = (QCTOR, 0) x (c : "Make" : md) | all fstupr (c : md) = (QCTOR, 2) x (f : md) | all fstupr md && funcid f = (QFUNC, 1) x (c : md) | all fstupr md && c `elem` ["[]", "()"] = (QCTOR, 1) x (c : md) | all fstupr md && infctid c = (QCTOR, 1) x (o : md) | all fstupr md && isoper o = (QFUNC, 1) x ps | all fstupr ps = (QCTOR, 1) | otherwise = (QCTOR, 0) case x strap of (_, 0) -> pzero (c, n) -> do let (m, f) = splitAt n strap (return . c . concat) $ reverse (intersperse "." m ++ [";"] ++ intersperse "." f)) -- Infix constructor: colon followed by an operator infctor = try $ do c <- char ':' o <- (operator tkp) <|> (reserved tkp "=" >> return "=") <|> (whiteSpace tkp >> return "") return (c : o) -- module = "module" id ";" [datas] [funcs] pModule :: GenParser Char PState Core pModule = do reserved tkp "module" mid <- ident semi tkp updateState (\st -> st {modName = mid}) datas <- many pData prims <- many pPrim funcs <- many pFunc cmts <- whiteSpace tkp autof <- getState >>= return . autoFuncs return $ Core { coreName = mid, coreImports = [], coreDatas = datas, coreFuncs = prims ++ funcs ++ autof} -- primitive = "primitive" QUALID args ";" pPrim = do reserved tkp "primitive" pid <- pQUALID pargs <- many ident semi tkp case pid of QCTOR _ -> pzero QFUNC pf -> return $ CorePrim { coreFuncName = pf, corePrimArity = length pargs, corePrimExternal = dropModule pf, corePrimConv = "hugsprim", corePrimImport = True, corePrimTypes = []} -- data = "data" id "=" [ctors] ";" pData = do reserved tkp "data" did <- (nil <|> unit <|> ident <|> tuple) symbol tkp "=" ctors <- pCtor `sepBy` (symbol tkp "|") semi tkp mid <- getState >>= return . modName return $ CoreData { coreDataName = mid ++ ";" ++ did, coreDataTypes = [], coreDataCtors = ctors} -- ctor = id ["*"] -- Constructor parser. pCtor = do cid <- (nil <|> infctor <|> unit <|> ident <|> tuple) stars <- many (symbol tkp "*") let cargs = take (length stars) nameGen cflds = zip cargs (repeat Nothing) mid <- getState >>= return . modName let qcid = mid ++ ";" ++ cid return $ CoreCtor { coreCtorName = qcid, coreCtorFields = cflds} -- function = id [id] "=" expr pFunc = do fname <- identifier tkp <|> parens tkp (operator tkp) fargs <- many ident symbol tkp "=" mid <- getState >>= return . modName let xfname = mid ++ ";" ++ fname updateState (\st -> st {funName = xfname}) fexpr <- pExpr semi tkp return $ CoreFunc { coreFuncName = xfname, coreFuncArgs = fargs, coreFuncBody = fexpr} -- Typed constants. They have format (value :: TYPE) where TYPE may be -- Int | Integer | Float | Double pTypConst = try $ parens tkp (tyFloat <|> tyInt) >>= return tyInt = try $ do n <- integer tkp reserved tkp "::" t <- many1 letter case t of "Int" -> return $ CoreLit (CoreInt $ fromIntegral n) "Integer" -> return $ CoreLit (CoreInteger n) _ -> fail "tyInt" tyFloat = try $ do n <- float tkp reserved tkp "::" t <- many1 letter case t of "Float" -> return $ CoreLit (CoreFloat $ realToFrac n) "Double" -> return $ CoreLit (CoreDouble n) _ -> fail "tyFloat" -- General expression parsers. pString = try $ do s <- stringLiteral tkp return $ CoreLit (CoreStr s) pChar = try $ do c <- charLiteral tkp return $ CoreLit (CoreChr c) pInt = try $ do n <- integer tkp return $ CoreLit (CoreInteger n) pFloat = try $ do f <- float tkp return $ CoreLit (CoreDouble f) pVar = do i <- identifier tkp return $ CoreVar i pQVar = try $ do q <- pQUALID let cf = case q of QCTOR c -> CoreCon c QFUNC f -> CoreFun f return cf pParExpr = try $ parens tkp pExpr >>= return pLetExpr = try $ do reserved tkp "let" bnds <- many1 pBind reserved tkp "in" e <- pExpr return $ coreLet bnds e pBind = try $ do vn <- ident symbol tkp "=" be <- pExpr semi tkp return (vn, be) -- FATBAR means that some expression will be evaluated as a default of a CASE. pFatBar = try $ do reserved tkp "let_" reserved tkp "_fatbar" symbol tkp "=" fbexpr <- pExpr semi tkp reserved tkp "in" doCase (Just fbexpr) -- This means that CASE reuses the currently bound FATBAR variable. -- If no FATBAR has been specified, this is a pattern match failure. pCase = doCase Nothing -- Common part for all CASE constructions. If new FATBAR was introduced. -- it creates a new binding for a "v_fail_N" variable. Otherwise -- it just generates a CASE construction. If a stack is empty (only -- 0 on top), failure expression will be bound to default failure -- handler (which throws a pattern match exception). doCase mbfb = try $ do reserved tkp "case" scrut <- pExpr reserved tkp "of" doCase' mbfb scrut -- A new FATBAR binding was introduced: push it on the stack and -- bind default pattern to it. doCase' (Just fb) scrut = do cnt <- getCount updateState (\st -> st {fbStack = cnt : fbStack st}) let failvar = "v_fail_" ++ show cnt cc <- doCase' Nothing scrut updateState (\st -> st {fbStack = tail (fbStack st)}) return $ coreLet [(failvar, fb)] cc -- No FATBAR was introduced. Use whatever is on the stack. If 0 -- is on the stack, bind default pattern to the default failure -- handler, otherwise to the failure variable whose name is -- determined by the stack contents. doCase' Nothing scrut = do fbnum <- getState >>= return . head . fbStack fun <- getState >>= return . funName let failvar = "v_fail_" ++ show fbnum cpats <- braces tkp ((pConPat <|> pCharPat <|> pDefPat failvar) `sepBy1` semi tkp) let dflet = if fbnum == 0 then coreLet [(failvar, defFailExpr fun)] else id cc = CoreCase scrut cpats return $ dflet cc -- Character literal pattern. pCharPat = try $ do (CoreLit c) <- pChar symbol tkp "->" patexp <- pExpr return (PatLit {patLit = c}, patexp) -- Default pattern is given a variable bound to a current FATBAR. If -- no actual expression is specified to handle the default case, -- FATBAR will be invoked. pDefPat fv = try $ do symbol tkp "_" symbol tkp "->" defexp <- (try (symbol tkp "_fatbar" >> return Nothing)) <|> (pExpr >>= return . Just) let patexp = fromMaybe (CoreVar fv) defexp return (PatDefault, patexp) -- Conctructor pattern takes care that the name given corresponds to -- a constructor. pConPat = try $ do q <- pQUALID cargs <- many ident symbol tkp "->" patexp <- pExpr case q of QCTOR c -> return (PatCon {patCon = c, patVars = cargs}, patexp) _ -> pzero -- A simple expression: one that does not include function application, -- or a parenthesized expression (i. e. with one term at tolevel) pSmpExpr = pString <|> pChar <|> pTypConst <|> pFloat <|> pInt <|> pQVar <|> pVar <|> pLetExpr <|> pCase <|> pFatBar <|> pParExpr -- Toplevel expression that may be an application pExpr :: GenParser Char PState CoreExpr pExpr = try $ do (f:es) <- many1 pSmpExpr return $ coreApp f es -- Default failure expression -- Prelude.throw (Hugs.Prelude.PatternMatchFail fun) defFailExpr fun = let failmsg = "pattern match failure in function " ++ fun primthrow = CoreFun "Hugs.Prelude;throw" exctor = CoreCon "Hugs.Prelude;PatternMatchFail" litstr = CoreLit (CoreStr failmsg) in coreApp primthrow [coreApp exctor [litstr]] -- Calling this function returns the current unique value counter -- and increments the counter. getCount = do cnt <- getState >>= return . counter updateState (\st -> st {counter = cnt + 1}) return $ cnt + 1