{-# LANGUAGE BangPatterns, GADTs #-} module MkGraph ( CmmAGraph, CmmAGraphScoped, CgStmt(..) , (<*>), catAGraphs , mkLabel, mkMiddle, mkLast, outOfLine , lgraphOfAGraph, labelAGraph , stackStubExpr , mkNop, mkAssign, mkStore , mkUnsafeCall, mkFinalCall, mkCallReturnsTo , mkJumpReturnsTo , mkJump, mkJumpExtra , mkRawJump , mkCbranch, mkSwitch , mkReturn, mkComment, mkCallEntry, mkBranch , mkUnwind , copyInOflow, copyOutOflow , noExtraStack , toCall, Transfer(..) ) where import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>) import BlockId import Cmm import CmmCallConv import CmmSwitch (SwitchTargets) import Hoopl.Block import Hoopl.Graph import Hoopl.Label import DynFlags import FastString import ForeignCall import OrdList import SMRep (ByteOff) import UniqSupply import Util import Panic ----------------------------------------------------------------------------- -- Building Graphs -- | CmmAGraph is a chunk of code consisting of: -- -- * ordinary statements (assignments, stores etc.) -- * jumps -- * labels -- * out-of-line labelled blocks -- -- The semantics is that control falls through labels and out-of-line -- blocks. Everything after a jump up to the next label is by -- definition unreachable code, and will be discarded. -- -- Two CmmAGraphs can be stuck together with <*>, with the meaning that -- control flows from the first to the second. -- -- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) -- by providing a label for the entry point and a tick scope; see -- 'labelAGraph'. type CmmAGraph = OrdList CgStmt -- | Unlabeled graph with tick scope type CmmAGraphScoped = (CmmAGraph, CmmTickScope) data CgStmt = CgLabel BlockId CmmTickScope | CgStmt (CmmNode O O) | CgLast (CmmNode O C) | CgFork BlockId CmmAGraph CmmTickScope flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph flattenCmmAGraph id (stmts_t, tscope) = CmmGraph { g_entry = id, g_graph = GMany NothingO body NothingO } where body = foldr addBlock emptyBody $ flatten id stmts_t tscope [] -- -- flatten: given an entry label and a CmmAGraph, make a list of blocks. -- -- NB. avoid the quadratic-append trap by passing in the tail of the -- list. This is important for Very Long Functions (e.g. in T783). -- flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C] -> [Block CmmNode C C] flatten id g tscope blocks = flatten1 (fromOL g) block' blocks where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock -- -- flatten0: we are outside a block at this point: any code before -- the first label is unreachable, so just drop it. -- flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C] flatten0 [] blocks = blocks flatten0 (CgLabel id tscope : stmts) blocks = flatten1 stmts block blocks where !block = blockJoinHead (CmmEntry id tscope) emptyBlock flatten0 (CgFork fork_id stmts_t tscope : rest) blocks = flatten fork_id stmts_t tscope $ flatten0 rest blocks flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks -- -- flatten1: we have a partial block, collect statements until the -- next last node to make a block, then call flatten0 to get the rest -- of the blocks -- flatten1 :: [CgStmt] -> Block CmmNode C O -> [Block CmmNode C C] -> [Block CmmNode C C] -- The current block falls through to the end of a function or fork: -- this code should not be reachable, but it may be referenced by -- other code that is not reachable. We'll remove it later with -- dead-code analysis, but for now we have to keep the graph -- well-formed, so we terminate the block with a branch to the -- beginning of the current block. flatten1 [] block blocks = blockJoinTail block (CmmBranch (entryLabel block)) : blocks flatten1 (CgLast stmt : stmts) block blocks = block' : flatten0 stmts blocks where !block' = blockJoinTail block stmt flatten1 (CgStmt stmt : stmts) block blocks = flatten1 stmts block' blocks where !block' = blockSnoc block stmt flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks = flatten fork_id stmts_t tscope $ flatten1 rest block blocks -- a label here means that we should start a new block, and the -- current block should fall through to the new block. flatten1 (CgLabel id tscp : stmts) block blocks = blockJoinTail block (CmmBranch id) : flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks ---------- AGraph manipulation (<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph (<*>) = appOL catAGraphs :: [CmmAGraph] -> CmmAGraph catAGraphs = concatOL -- | created a sequence "goto id; id:" as an AGraph mkLabel :: BlockId -> CmmTickScope -> CmmAGraph mkLabel bid scp = unitOL (CgLabel bid scp) -- | creates an open AGraph from a given node mkMiddle :: CmmNode O O -> CmmAGraph mkMiddle middle = unitOL (CgStmt middle) -- | created a closed AGraph from a given node mkLast :: CmmNode O C -> CmmAGraph mkLast last = unitOL (CgLast last) -- | A labelled code block; should end in a last node outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph outOfLine l (c,s) = unitOL (CgFork l c s) -- | allocate a fresh label for the entry point lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph lgraphOfAGraph g = do u <- getUniqueM return (labelAGraph (mkBlockId u) g) -- | use the given BlockId as the label of the entry point labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph labelAGraph lbl ag = flattenCmmAGraph lbl ag ---------- No-ops mkNop :: CmmAGraph mkNop = nilOL mkComment :: FastString -> CmmAGraph mkComment fs -- SDM: generating all those comments takes time, this saved about 4% for me | debugIsOn = mkMiddle $ CmmComment fs | otherwise = nilOL ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph mkAssign l (CmmReg r) | l == r = mkNop mkAssign l r = mkMiddle $ CmmAssign l r mkStore :: CmmExpr -> CmmExpr -> CmmAGraph mkStore l r = mkMiddle $ CmmStore l r ---------- Control transfer mkJump :: DynFlags -> Convention -> CmmExpr -> [CmmExpr] -> UpdFrameOffset -> CmmAGraph mkJump dflags conv e actuals updfr_off = lastWithArgs dflags Jump Old conv actuals updfr_off $ toCall e Nothing updfr_off 0 -- | A jump where the caller says what the live GlobalRegs are. Used -- for low-level hand-written Cmm. mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg] -> CmmAGraph mkRawJump dflags e updfr_off vols = lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $ \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr] -> UpdFrameOffset -> [CmmExpr] -> CmmAGraph mkJumpExtra dflags conv e actuals updfr_off extra_stack = lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $ toCall e Nothing updfr_off 0 mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph mkCbranch pred ifso ifnot likely = mkLast (CmmCondBranch pred ifso ifnot likely) mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph mkSwitch e tbl = mkLast $ CmmSwitch e tbl mkReturn :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset -> CmmAGraph mkReturn dflags e actuals updfr_off = lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 mkBranch :: BlockId -> CmmAGraph mkBranch bid = mkLast (CmmBranch bid) mkFinalCall :: DynFlags -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset -> CmmAGraph mkFinalCall dflags f _ actuals updfr_off = lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0 mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] -> BlockId -> ByteOff -> UpdFrameOffset -> [CmmExpr] -> CmmAGraph mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals updfr_off extra_stack $ toCall f (Just ret_lbl) updfr_off ret_off -- Like mkCallReturnsTo, but does not push the return address (it is assumed to be -- already on the stack). mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] -> BlockId -> ByteOff -> UpdFrameOffset -> CmmAGraph mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off = do lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $ toCall f (Just ret_lbl) updfr_off ret_off mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as -- | Construct a 'CmmUnwind' node for the given register and unwinding -- expression. mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)] -------------------------------------------------------------------------- -- Why are we inserting extra blocks that simply branch to the successors? -- Because in addition to the branch instruction, @mkBranch@ will insert -- a necessary adjustment to the stack pointer. -- For debugging purposes, we can stub out dead stack slots: stackStubExpr :: Width -> CmmExpr stackStubExpr w = CmmLit (CmmInt 0 w) -- When we copy in parameters, we usually want to put overflow -- parameters on the stack, but sometimes we want to pass the -- variables in their spill slots. Therefore, for copying arguments -- and results, we provide different functions to pass the arguments -- in an overflow area and to pass them in spill slots. copyInOflow :: DynFlags -> Convention -> Area -> [CmmFormal] -> [CmmFormal] -> (Int, [GlobalReg], CmmAGraph) copyInOflow dflags conv area formals extra_stk = (offset, gregs, catAGraphs $ map mkMiddle nodes) where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk -- Return the number of bytes used for copying arguments, as well as the -- instructions to copy the arguments. copyIn :: DynFlags -> Convention -> Area -> [CmmFormal] -> [CmmFormal] -> (ByteOff, [GlobalReg], [CmmNode O O]) copyIn dflags conv area formals extra_stk = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args)) where -- See Note [Width of parameters] ci (reg, RegisterParam r@(VanillaReg {})) = let local = CmmLocal reg global = CmmReg (CmmGlobal r) width = cmmRegWidth dflags local expr | width == wordWidth dflags = global | width < wordWidth dflags = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global] | otherwise = panic "Parameter width greater than word width" in CmmAssign local expr -- Non VanillaRegs ci (reg, RegisterParam r) = CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r)) ci (reg, StackParam off) | isBitsType $ localRegType reg , typeWidth (localRegType reg) < wordWidth dflags = let stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags)) local = CmmLocal reg width = cmmRegWidth dflags local expr = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot] in CmmAssign local expr | otherwise = CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) where ty = localRegType reg init_offset = widthInBytes (wordWidth dflags) -- infotable (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk (stk_size, args) = assignArgumentsPos dflags stk_off conv localRegType formals -- Factoring out the common parts of the copyout functions yielded something -- more complicated: data Transfer = Call | JumpRet | Jump | Ret deriving Eq copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr] -> UpdFrameOffset -> [CmmExpr] -- extra stack args -> (Int, [GlobalReg], CmmAGraph) -- Generate code to move the actual parameters into the locations -- required by the calling convention. This includes a store for the -- return address. -- -- The argument layout function ignores the pointer to the info table, -- so we slot that in here. When copying-out to a young area, we set -- the info table for return and adjust the offsets of the other -- parameters. If this is a call instruction, we adjust the offsets -- of the other parameters. copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff = (stk_size, regs, graph) where (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params) -- See Note [Width of parameters] co (v, RegisterParam r@(VanillaReg {})) (rs, ms) = let width = cmmExprWidth dflags v value | width == wordWidth dflags = v | width < wordWidth dflags = CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v] | otherwise = panic "Parameter width greater than word width" in (r:rs, mkAssign (CmmGlobal r) value <*> ms) -- Non VanillaRegs co (v, RegisterParam r) (rs, ms) = (r:rs, mkAssign (CmmGlobal r) v <*> ms) -- See Note [Width of parameters] co (v, StackParam off) (rs, ms) = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms) width v = cmmExprWidth dflags v value v | isBitsType $ cmmExprType dflags v , width v < wordWidth dflags = CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v] | otherwise = v (setRA, init_offset) = case area of Young id -> -- Generate a store instruction for -- the return address if making a call case transfer of Call -> ([(CmmLit (CmmBlock id), StackParam init_offset)], widthInBytes (wordWidth dflags)) JumpRet -> ([], widthInBytes (wordWidth dflags)) _other -> ([], 0) Old -> ([], updfr_off) (extra_stack_off, stack_params) = assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv (cmmExprType dflags) actuals -- Note [Width of parameters] -- -- Consider passing a small (< word width) primitive like Int8# to a function. -- It's actually non-trivial to do this without extending/narrowing: -- * Global registers are considered to have native word width (i.e., 64-bits on -- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a -- global register. -- * Same problem exists with LLVM IR. -- * Lowering gets harder since on x86-32 not every register exposes its lower -- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding -- 8-bit register for %edi). So we would either need to extend/narrow anyway, -- or complicate the calling convention. -- * Passing a small integer in a stack slot, which has native word width, -- requires extending to word width when writing to the stack and narrowing -- when reading off the stack (see #16258). -- So instead, we always extend every parameter smaller than native word width -- in copyOutOflow and then truncate it back to the expected width in copyIn. -- Note that we do this in cmm using MO_XX_Conv to avoid requiring -- zero-/sign-extending - it's up to a backend to handle this in a most -- efficient way (e.g., a simple register move or a smaller size store). -- This convention (of ignoring the upper bits) is different from some C ABIs, -- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters. -- -- There was some discussion about this on this PR: -- https://github.com/ghc-proposals/ghc-proposals/pull/74 mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal] -> (Int, [GlobalReg], CmmAGraph) mkCallEntry dflags conv formals extra_stk = copyInOflow dflags conv Old formals extra_stk lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr] -> UpdFrameOffset -> (ByteOff -> [GlobalReg] -> CmmAGraph) -> CmmAGraph lastWithArgs dflags transfer area conv actuals updfr_off last = lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off noExtraStack last lastWithArgsAndExtraStack :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr] -> UpdFrameOffset -> [CmmExpr] -> (ByteOff -> [GlobalReg] -> CmmAGraph) -> CmmAGraph lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off extra_stack last = copies <*> last outArgs regs where (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals updfr_off extra_stack noExtraStack :: [CmmExpr] noExtraStack = [] toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> [GlobalReg] -> CmmAGraph toCall e cont updfr_off res_space arg_space regs = mkLast $ CmmCall e cont regs arg_space res_space updfr_off