Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data KernelConstants = KernelConstants {
- kernelGlobalThreadId :: TExp Int32
- kernelLocalThreadId :: TExp Int32
- kernelGroupId :: TExp Int32
- kernelGlobalThreadIdVar :: VName
- kernelLocalThreadIdVar :: VName
- kernelGroupIdVar :: VName
- kernelNumGroups :: TExp Int64
- kernelGroupSize :: TExp Int64
- kernelNumThreads :: TExp Int32
- kernelWaveSize :: TExp Int32
- kernelThreadActive :: TExp Bool
- kernelLocalIdMap :: Map [SubExp] [TExp Int32]
- keyWithEntryPoint :: Maybe Name -> Name -> Name
- type CallKernelGen = ImpM KernelsMem HostEnv HostOp
- type InKernelGen = ImpM KernelsMem KernelEnv KernelOp
- data HostEnv = HostEnv {}
- data Target
- data KernelEnv = KernelEnv {}
- computeThreadChunkSize :: SplitOrdering -> TExp Int64 -> Count Elements (TExp Int64) -> Count Elements (TExp Int64) -> TV Int64 -> ImpM lore r op ()
- groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int64 -> TExp Int64 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- isActive :: [(VName, SubExp)] -> TExp Bool
- sKernelThread :: String -> Count NumGroups (TExp Int64) -> Count GroupSize (TExp Int64) -> VName -> InKernelGen () -> CallKernelGen ()
- sKernelGroup :: String -> Count NumGroups (TExp Int64) -> Count GroupSize (TExp Int64) -> VName -> InKernelGen () -> CallKernelGen ()
- sReplicate :: VName -> SubExp -> CallKernelGen ()
- sIota :: VName -> TExp Int64 -> Exp -> Exp -> IntType -> CallKernelGen ()
- sCopy :: CopyCompiler KernelsMem HostEnv HostOp
- compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
- groupLoop :: TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
- kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
- groupCoverSpace :: [TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
- precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a
- atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv
- type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)
- data Locking = Locking {
- lockingArray :: VName
- lockingIsUnlocked :: TExp Int32
- lockingToLock :: TExp Int32
- lockingToUnlock :: TExp Int32
- lockingMapping :: [TExp Int64] -> [TExp Int64]
- data AtomicUpdate lore r
- = AtomicPrim (DoAtomicUpdate lore r)
- | AtomicCAS (DoAtomicUpdate lore r)
- | AtomicLocking (Locking -> DoAtomicUpdate lore r)
- type DoAtomicUpdate lore r = Space -> [VName] -> [TExp Int64] -> ImpM lore r KernelOp ()
Documentation
data KernelConstants Source #
KernelConstants | |
|
type CallKernelGen = ImpM KernelsMem HostEnv HostOp Source #
type InKernelGen = ImpM KernelsMem KernelEnv KernelOp Source #
Which target are we ultimately generating code for? While most of the kernels code is the same, there are some cases where we generate special code based on the ultimate low-level API we are targeting.
computeThreadChunkSize :: SplitOrdering -> TExp Int64 -> Count Elements (TExp Int64) -> Count Elements (TExp Int64) -> TV Int64 -> ImpM lore r op () Source #
groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int64 -> TExp Int64 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
sKernelThread :: String -> Count NumGroups (TExp Int64) -> Count GroupSize (TExp Int64) -> VName -> InKernelGen () -> CallKernelGen () Source #
sKernelGroup :: String -> Count NumGroups (TExp Int64) -> Count GroupSize (TExp Int64) -> VName -> InKernelGen () -> CallKernelGen () Source #
sReplicate :: VName -> SubExp -> CallKernelGen () Source #
Perform a Replicate with a kernel.
sIota :: VName -> TExp Int64 -> Exp -> Exp -> IntType -> CallKernelGen () Source #
Perform an Iota with a kernel.
compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen () Source #
For many kernels, we may not have enough physical groups to cover the logical iteration space. Some groups thus have to perform double duty; we put an outer loop to accomplish this. The advantage over just launching a bazillion threads is that the cost of memory expansion should be proportional to the number of *physical* threads (hardware parallelism), not the amount of application parallelism.
groupLoop :: TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to threads in the workgroup. The
passed-in function is invoked with the (symbolic) iteration. For
multidimensional loops, use groupCoverSpace
.
kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to all threads in the kernel.
The passed-in function is invoked with the (symbolic) iteration.
threadOperations
will be in effect in the body. For
multidimensional loops, use groupCoverSpace
.
groupCoverSpace :: [TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen () Source #
Iterate collectively though a multidimensional space, such that all threads in the group participate. The passed-in function is invoked with a (symbolic) point in the index space.
precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a Source #
atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv Source #
Do an atomic update corresponding to a binary operator lambda.
type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp) Source #
Locking strategy used for an atomic update.
Locking | |
|
data AtomicUpdate lore r Source #
The mechanism that will be used for performing the atomic update. Approximates how efficient it will be. Ordered from most to least efficient.
AtomicPrim (DoAtomicUpdate lore r) | Supported directly by primitive. |
AtomicCAS (DoAtomicUpdate lore r) | Can be done by efficient swaps. |
AtomicLocking (Locking -> DoAtomicUpdate lore r) | Requires explicit locking. |