{-# LANGUAGE QuasiQuotes #-}
module Futhark.CodeGen.Backends.COpenCL
( compileProg,
GC.CParts (..),
GC.asLibrary,
GC.asExecutable,
GC.asServer,
)
where
import Control.Monad hiding (mapM)
import Data.Text qualified as T
import Futhark.CodeGen.Backends.COpenCL.Boilerplate
import Futhark.CodeGen.Backends.GenericC qualified as GC
import Futhark.CodeGen.Backends.GenericC.Options
import Futhark.CodeGen.Backends.SimpleRep (primStorageType, toStorage)
import Futhark.CodeGen.ImpCode.OpenCL
import Futhark.CodeGen.ImpGen.OpenCL qualified as ImpGen
import Futhark.IR.GPUMem hiding
( CmpSizeLe,
GetSize,
GetSizeMax,
)
import Futhark.MonadFreshNames
import Language.C.Quote.OpenCL qualified as C
import Language.C.Syntax qualified as C
import NeatInterpolation (untrimming)
compileProg :: MonadFreshNames m => T.Text -> Prog GPUMem -> m (ImpGen.Warnings, GC.CParts)
compileProg :: forall (m :: * -> *).
MonadFreshNames m =>
Text -> Prog GPUMem -> m (Warnings, CParts)
compileProg Text
version Prog GPUMem
prog = do
( Warnings
ws,
Program
Text
opencl_code
Text
opencl_prelude
Map Name KernelSafety
kernels
[PrimType]
types
ParamMap
params
[FailureMsg]
failures
Definitions OpenCL
prog'
) <-
Prog GPUMem -> m (Warnings, Program)
forall (m :: * -> *).
MonadFreshNames m =>
Prog GPUMem -> m (Warnings, Program)
ImpGen.compileProg Prog GPUMem
prog
let cost_centres :: [Name]
cost_centres =
[ Name
copyDevToDev,
Name
copyDevToHost,
Name
copyHostToDev,
Name
copyScalarToDev,
Name
copyScalarFromDev
]
(Warnings
ws,)
(CParts -> (Warnings, CParts)) -> m CParts -> m (Warnings, CParts)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text
-> Text
-> ParamMap
-> Operations OpenCL ()
-> CompilerM OpenCL () ()
-> Text
-> (Space, [Space])
-> [Option]
-> Definitions OpenCL
-> m CParts
forall (m :: * -> *) op.
MonadFreshNames m =>
Text
-> Text
-> ParamMap
-> Operations op ()
-> CompilerM op () ()
-> Text
-> (Space, [Space])
-> [Option]
-> Definitions op
-> m CParts
GC.compileProg
Text
"opencl"
Text
version
ParamMap
params
Operations OpenCL ()
operations
( Text
-> Text
-> [Name]
-> Map Name KernelSafety
-> [PrimType]
-> [FailureMsg]
-> CompilerM OpenCL () ()
generateBoilerplate
Text
opencl_code
Text
opencl_prelude
[Name]
cost_centres
Map Name KernelSafety
kernels
[PrimType]
types
[FailureMsg]
failures
)
Text
include_opencl_h
(String -> Space
Space String
"device", [String -> Space
Space String
"device", Space
DefaultSpace])
[Option]
cliOptions
Definitions OpenCL
prog'
where
operations :: GC.Operations OpenCL ()
operations :: Operations OpenCL ()
operations =
Operations OpenCL ()
forall op s. Operations op s
GC.defaultOperations
{ opsCompiler :: OpCompiler OpenCL ()
GC.opsCompiler = OpCompiler OpenCL ()
callKernel,
opsWriteScalar :: WriteScalar OpenCL ()
GC.opsWriteScalar = WriteScalar OpenCL ()
writeOpenCLScalar,
opsReadScalar :: ReadScalar OpenCL ()
GC.opsReadScalar = ReadScalar OpenCL ()
readOpenCLScalar,
opsAllocate :: Allocate OpenCL ()
GC.opsAllocate = Allocate OpenCL ()
allocateOpenCLBuffer,
opsDeallocate :: Allocate OpenCL ()
GC.opsDeallocate = Allocate OpenCL ()
deallocateOpenCLBuffer,
opsCopy :: Copy OpenCL ()
GC.opsCopy = Copy OpenCL ()
copyOpenCLMemory,
opsMemoryType :: MemoryType OpenCL ()
GC.opsMemoryType = MemoryType OpenCL ()
openclMemoryType,
opsFatMemory :: Bool
GC.opsFatMemory = Bool
True
}
include_opencl_h :: Text
include_opencl_h =
[untrimming|
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
|]
cliOptions :: [Option]
cliOptions :: [Option]
cliOptions =
[Option]
commonOptions
[Option] -> [Option] -> [Option]
forall a. [a] -> [a] -> [a]
++ [ Option
{ optionLongName :: String
optionLongName = String
"platform",
optionShortName :: Maybe Char
optionShortName = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'p',
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"NAME",
optionDescription :: String
optionDescription = String
"Use the first OpenCL platform whose name contains the given string.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_platform(cfg, optarg);|]
},
Option
{ optionLongName :: String
optionLongName = String
"dump-opencl",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Dump the embedded OpenCL program to the indicated file.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_dump_program_to(cfg, optarg);
entry_point = NULL;}|]
},
Option
{ optionLongName :: String
optionLongName = String
"load-opencl",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Instead of using the embedded OpenCL program, load it from the indicated file.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_load_program_from(cfg, optarg);|]
},
Option
{ optionLongName :: String
optionLongName = String
"dump-opencl-binary",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Dump the compiled version of the embedded OpenCL program to the indicated file.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_dump_binary_to(cfg, optarg);
entry_point = NULL;}|]
},
Option
{ optionLongName :: String
optionLongName = String
"load-opencl-binary",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Load an OpenCL binary from the indicated file.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_load_binary_from(cfg, optarg);|]
},
Option
{ optionLongName :: String
optionLongName = String
"build-option",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"OPT",
optionDescription :: String
optionDescription = String
"Add an additional build option to the string passed to clBuildProgram().",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_add_build_option(cfg, optarg);|]
},
Option
{ optionLongName :: String
optionLongName = String
"profile",
optionShortName :: Maybe Char
optionShortName = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'P',
optionArgument :: OptionArgument
optionArgument = OptionArgument
NoArgument,
optionDescription :: String
optionDescription = String
"Gather profiling data while executing and print out a summary at the end.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_profiling(cfg, 1);|]
},
Option
{ optionLongName :: String
optionLongName = String
"list-devices",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = OptionArgument
NoArgument,
optionDescription :: String
optionDescription = String
"List all OpenCL devices and platforms available on the system.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_list_devices(cfg);
entry_point = NULL;}|]
}
]
writeOpenCLScalar :: GC.WriteScalar OpenCL ()
writeOpenCLScalar :: WriteScalar OpenCL ()
writeOpenCLScalar Exp
mem Exp
i Type
t String
"device" Volatility
_ Exp
val = do
VName
val' <- String -> CompilerM OpenCL () VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"write_tmp"
let (BlockItem
decl, Exp
blocking) =
case Exp
val of
C.Const {} -> ([C.citem|static $ty:t $id:val' = $exp:val;|], [C.cexp|CL_FALSE|])
Exp
_ -> ([C.citem|$ty:t $id:val' = $exp:val;|], [C.cexp|CL_TRUE|])
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|{$item:decl
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->queue, $exp:mem, $exp:blocking,
$exp:i * sizeof($ty:t), sizeof($ty:t),
&$id:val',
0, NULL, $exp:(profilingEvent copyScalarToDev)));
}|]
writeOpenCLScalar Exp
_ Exp
_ Type
_ String
space Volatility
_ Exp
_ =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot write to '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
readOpenCLScalar :: GC.ReadScalar OpenCL ()
readOpenCLScalar :: ReadScalar OpenCL ()
readOpenCLScalar Exp
mem Exp
i Type
t String
"device" Volatility
_ = do
VName
val <- String -> CompilerM OpenCL () VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"read_res"
InitGroup -> CompilerM OpenCL () ()
forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|$ty:t $id:val;|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->queue, $exp:mem,
ctx->failure_is_an_option ? CL_FALSE : CL_TRUE,
$exp:i * sizeof($ty:t), sizeof($ty:t),
&$id:val,
0, NULL, $exp:(profilingEvent copyScalarFromDev)));
|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|if (ctx->failure_is_an_option && futhark_context_sync(ctx) != 0)
{ return 1; }|]
Exp -> CompilerM OpenCL () Exp
forall a. a -> CompilerM OpenCL () a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cexp|$id:val|]
readOpenCLScalar Exp
_ Exp
_ Type
_ String
space Volatility
_ =
String -> CompilerM OpenCL () Exp
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () Exp)
-> String -> CompilerM OpenCL () Exp
forall a b. (a -> b) -> a -> b
$ String
"Cannot read from '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
allocateOpenCLBuffer :: GC.Allocate OpenCL ()
allocateOpenCLBuffer :: Allocate OpenCL ()
allocateOpenCLBuffer Exp
mem Exp
size Exp
tag String
"device" =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|ctx->error =
OPENCL_SUCCEED_NONFATAL(opencl_alloc(ctx, ctx->log,
(size_t)$exp:size, $exp:tag,
&$exp:mem, (size_t*)&$exp:size));|]
allocateOpenCLBuffer Exp
_ Exp
_ Exp
_ String
space =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot allocate in '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
deallocateOpenCLBuffer :: GC.Deallocate OpenCL ()
deallocateOpenCLBuffer :: Allocate OpenCL ()
deallocateOpenCLBuffer Exp
mem Exp
size Exp
tag String
"device" =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|OPENCL_SUCCEED_OR_RETURN(opencl_free(ctx, $exp:mem, $exp:size, $exp:tag));|]
deallocateOpenCLBuffer Exp
_ Exp
_ Exp
_ String
space =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot deallocate in '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' space"
syncArg :: GC.CopyBarrier -> C.Exp
syncArg :: CopyBarrier -> Exp
syncArg CopyBarrier
GC.CopyBarrier = [C.cexp|CL_TRUE|]
syncArg CopyBarrier
GC.CopyNoBarrier = [C.cexp|CL_FALSE|]
copyOpenCLMemory :: GC.Copy OpenCL ()
copyOpenCLMemory :: Copy OpenCL ()
copyOpenCLMemory CopyBarrier
b Exp
destmem Exp
destidx Space
DefaultSpace Exp
srcmem Exp
srcidx (Space String
"device") Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
if ($exp:nbytes > 0) {
typename cl_bool sync_call = $exp:(syncArg b);
OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->queue, $exp:srcmem,
ctx->failure_is_an_option ? CL_FALSE : sync_call,
(size_t)$exp:srcidx, (size_t)$exp:nbytes,
$exp:destmem + $exp:destidx,
0, NULL, $exp:(profilingEvent copyHostToDev)));
if (sync_call &&
ctx->failure_is_an_option &&
futhark_context_sync(ctx) != 0) { return 1; }
}
|]
copyOpenCLMemory CopyBarrier
b Exp
destmem Exp
destidx (Space String
"device") Exp
srcmem Exp
srcidx Space
DefaultSpace Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
if ($exp:nbytes > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->queue, $exp:destmem, $exp:(syncArg b),
(size_t)$exp:destidx, (size_t)$exp:nbytes,
$exp:srcmem + $exp:srcidx,
0, NULL, $exp:(profilingEvent copyDevToHost)));
}
|]
copyOpenCLMemory CopyBarrier
_ Exp
destmem Exp
destidx (Space String
"device") Exp
srcmem Exp
srcidx (Space String
"device") Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|{
if ($exp:nbytes > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueCopyBuffer(ctx->queue,
$exp:srcmem, $exp:destmem,
(size_t)$exp:srcidx, (size_t)$exp:destidx,
(size_t)$exp:nbytes,
0, NULL, $exp:(profilingEvent copyDevToDev)));
if (ctx->debugging) {
OPENCL_SUCCEED_FATAL(clFinish(ctx->queue));
}
}
}|]
copyOpenCLMemory CopyBarrier
_ Exp
destmem Exp
destidx Space
DefaultSpace Exp
srcmem Exp
srcidx Space
DefaultSpace Exp
nbytes =
Exp -> Exp -> Exp -> Exp -> Exp -> CompilerM OpenCL () ()
forall op s. Exp -> Exp -> Exp -> Exp -> Exp -> CompilerM op s ()
GC.copyMemoryDefaultSpace Exp
destmem Exp
destidx Exp
srcmem Exp
srcidx Exp
nbytes
copyOpenCLMemory CopyBarrier
_ Exp
_ Exp
_ Space
destspace Exp
_ Exp
_ Space
srcspace Exp
_ =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot copy to " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Space -> String
forall a. Show a => a -> String
show Space
destspace String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" from " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Space -> String
forall a. Show a => a -> String
show Space
srcspace
openclMemoryType :: GC.MemoryType OpenCL ()
openclMemoryType :: MemoryType OpenCL ()
openclMemoryType String
"device" = Type -> CompilerM OpenCL () Type
forall a. a -> CompilerM OpenCL () a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cty|typename cl_mem|]
openclMemoryType String
space =
MemoryType OpenCL ()
forall a. HasCallStack => String -> a
error MemoryType OpenCL () -> MemoryType OpenCL ()
forall a b. (a -> b) -> a -> b
$ String
"OpenCL backend does not support '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
kernelConstToExp :: KernelConst -> C.Exp
kernelConstToExp :: KernelConst -> Exp
kernelConstToExp (SizeConst Name
key) =
[C.cexp|*ctx->tuning_params.$id:key|]
kernelConstToExp (SizeMaxConst SizeClass
size_class) =
[C.cexp|ctx->$id:field|]
where
field :: String
field = String
"max_" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> SizeClass -> String
forall a. Pretty a => a -> String
prettyString SizeClass
size_class
compileGroupDim :: GroupDim -> GC.CompilerM op s C.Exp
compileGroupDim :: forall op s. GroupDim -> CompilerM op s Exp
compileGroupDim (Left Exp
e) = Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
e
compileGroupDim (Right KernelConst
kc) = Exp -> CompilerM op s Exp
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Exp -> CompilerM op s Exp) -> Exp -> CompilerM op s Exp
forall a b. (a -> b) -> a -> b
$ KernelConst -> Exp
kernelConstToExp KernelConst
kc
callKernel :: GC.OpCompiler OpenCL ()
callKernel :: OpCompiler OpenCL ()
callKernel (GetSize VName
v Name
key) = do
let e :: Exp
e = KernelConst -> Exp
kernelConstToExp (KernelConst -> Exp) -> KernelConst -> Exp
forall a b. (a -> b) -> a -> b
$ Name -> KernelConst
SizeConst Name
key
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = $exp:e;|]
callKernel (CmpSizeLe VName
v Name
key Exp
x) = do
let e :: Exp
e = KernelConst -> Exp
kernelConstToExp (KernelConst -> Exp) -> KernelConst -> Exp
forall a b. (a -> b) -> a -> b
$ Name -> KernelConst
SizeConst Name
key
Exp
x' <- Exp -> CompilerM OpenCL () Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
x
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = $exp:e <= $exp:x';|]
VName -> Name -> Exp -> CompilerM OpenCL () ()
forall op. VName -> Name -> Exp -> CompilerM op () ()
sizeLoggingCode VName
v Name
key Exp
x'
callKernel (GetSizeMax VName
v SizeClass
size_class) = do
let e :: Exp
e = KernelConst -> Exp
kernelConstToExp (KernelConst -> Exp) -> KernelConst -> Exp
forall a b. (a -> b) -> a -> b
$ SizeClass -> KernelConst
SizeMaxConst SizeClass
size_class
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = $exp:e;|]
callKernel (LaunchKernel KernelSafety
safety Name
name [KernelArg]
args [Exp]
num_workgroups [GroupDim]
workgroup_size) = do
Bool -> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (KernelSafety
safety KernelSafety -> KernelSafety -> Bool
forall a. Eq a => a -> a -> Bool
== KernelSafety
SafetyFull) (CompilerM OpenCL () () -> CompilerM OpenCL () ())
-> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->program->$id:name, 1,
sizeof(ctx->failure_is_an_option),
&ctx->failure_is_an_option));
|]
([Param]
arg_params, [(Exp, Exp)]
arg_set, [Exp]
call_args) <-
[(Param, (Exp, Exp), Exp)] -> ([Param], [(Exp, Exp)], [Exp])
forall a b c. [(a, b, c)] -> ([a], [b], [c])
unzip3 ([(Param, (Exp, Exp), Exp)] -> ([Param], [(Exp, Exp)], [Exp]))
-> CompilerM OpenCL () [(Param, (Exp, Exp), Exp)]
-> CompilerM OpenCL () ([Param], [(Exp, Exp)], [Exp])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Int -> KernelArg -> CompilerM OpenCL () (Param, (Exp, Exp), Exp))
-> [Int]
-> [KernelArg]
-> CompilerM OpenCL () [(Param, (Exp, Exp), Exp)]
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m [c]
zipWithM Int -> KernelArg -> CompilerM OpenCL () (Param, (Exp, Exp), Exp)
forall {p} {op} {s}.
Show p =>
p -> KernelArg -> CompilerM op s (Param, (Exp, Exp), Exp)
onArg [(Int
0 :: Int) ..] [KernelArg]
args
[Exp]
num_workgroups' <- (Exp -> CompilerM OpenCL () Exp)
-> [Exp] -> CompilerM OpenCL () [Exp]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM Exp -> CompilerM OpenCL () Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp [Exp]
num_workgroups
[Exp]
workgroup_size' <- (GroupDim -> CompilerM OpenCL () Exp)
-> [GroupDim] -> CompilerM OpenCL () [Exp]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM GroupDim -> CompilerM OpenCL () Exp
forall op s. GroupDim -> CompilerM op s Exp
compileGroupDim [GroupDim]
workgroup_size
Exp
local_bytes <- (Exp -> KernelArg -> CompilerM OpenCL () Exp)
-> Exp -> [KernelArg] -> CompilerM OpenCL () Exp
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Exp -> KernelArg -> CompilerM OpenCL () Exp
forall {op} {s}. Exp -> KernelArg -> CompilerM op s Exp
localBytes [C.cexp|0|] [KernelArg]
args
Name
kernel_fname <- Name
-> KernelSafety
-> [Param]
-> [(Exp, Exp)]
-> CompilerM OpenCL () Name
forall op s.
Name
-> KernelSafety -> [Param] -> [(Exp, Exp)] -> CompilerM op s Name
genKernelFunction Name
name KernelSafety
safety [Param]
arg_params [(Exp, Exp)]
arg_set
let Exp
grid_x : Exp
grid_y : Exp
grid_z : [Exp]
_ = [Exp]
num_workgroups' [Exp] -> [Exp] -> [Exp]
forall a. [a] -> [a] -> [a]
++ Exp -> [Exp]
forall a. a -> [a]
repeat [C.cexp|1|]
Exp
group_x : Exp
group_y : Exp
group_z : [Exp]
_ = [Exp]
workgroup_size' [Exp] -> [Exp] -> [Exp]
forall a. [a] -> [a] -> [a]
++ Exp -> [Exp]
forall a. a -> [a]
repeat [C.cexp|1|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|{
err = $id:kernel_fname(ctx,
$exp:grid_x,$exp:grid_y,$exp:grid_z,
$exp:group_x, $exp:group_y, $exp:group_z,
$exp:local_bytes,
$args:call_args);
if (err != FUTHARK_SUCCESS) { goto cleanup; }
}|]
Bool -> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (KernelSafety
safety KernelSafety -> KernelSafety -> Bool
forall a. Ord a => a -> a -> Bool
>= KernelSafety
SafetyFull) (CompilerM OpenCL () () -> CompilerM OpenCL () ())
-> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|ctx->failure_is_an_option = 1;|]
where
localBytes :: Exp -> KernelArg -> CompilerM op s Exp
localBytes Exp
cur (SharedMemoryKArg Count Bytes Exp
num_bytes) = do
Exp
num_bytes' <- Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp (Exp -> CompilerM op s Exp) -> Exp -> CompilerM op s Exp
forall a b. (a -> b) -> a -> b
$ Count Bytes Exp -> Exp
forall {k} (u :: k) e. Count u e -> e
unCount Count Bytes Exp
num_bytes
Exp -> CompilerM op s Exp
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cexp|$exp:cur + $exp:num_bytes'|]
localBytes Exp
cur KernelArg
_ = Exp -> CompilerM op s Exp
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Exp
cur
onArg :: p -> KernelArg -> CompilerM op s (Param, (Exp, Exp), Exp)
onArg p
i (ValueKArg Exp
e PrimType
t) = do
let arg :: String
arg = String
"arg" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> p -> String
forall a. Show a => a -> String
show p
i
Exp
e' <- Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
e
(Param, (Exp, Exp), Exp) -> CompilerM op s (Param, (Exp, Exp), Exp)
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
( [C.cparam|$ty:(primStorageType t) $id:arg|],
([C.cexp|sizeof($id:arg)|], [C.cexp|&$id:arg|]),
PrimType -> Exp -> Exp
toStorage PrimType
t Exp
e'
)
onArg p
i (MemKArg VName
v) = do
let arg :: String
arg = String
"arg" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> p -> String
forall a. Show a => a -> String
show p
i
Exp
v' <- VName -> CompilerM op s Exp
forall op s. VName -> CompilerM op s Exp
GC.rawMem VName
v
(Param, (Exp, Exp), Exp) -> CompilerM op s (Param, (Exp, Exp), Exp)
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
( [C.cparam|typename cl_mem $id:arg|],
([C.cexp|sizeof($id:arg)|], [C.cexp|&$id:arg|]),
Exp
v'
)
onArg p
i (SharedMemoryKArg (Count Exp
c)) = do
let arg :: String
arg = String
"arg" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> p -> String
forall a. Show a => a -> String
show p
i
Exp
num_bytes <- Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
c
(Param, (Exp, Exp), Exp) -> CompilerM op s (Param, (Exp, Exp), Exp)
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
( [C.cparam|unsigned int $id:arg|],
([C.cexp|$id:arg|], [C.cexp|NULL|]),
Exp
num_bytes
)
genKernelFunction ::
KernelName ->
KernelSafety ->
[C.Param] ->
[(C.Exp, C.Exp)] ->
GC.CompilerM op s Name
genKernelFunction :: forall op s.
Name
-> KernelSafety -> [Param] -> [(Exp, Exp)] -> CompilerM op s Name
genKernelFunction Name
kernel_name KernelSafety
safety [Param]
arg_params [(Exp, Exp)]
arg_set = do
let kernel_fname :: Name
kernel_fname = Name
"gpu_kernel_" Name -> Name -> Name
forall a. Semigroup a => a -> a -> a
<> Name
kernel_name
Definition -> CompilerM op s ()
forall op s. Definition -> CompilerM op s ()
GC.libDecl
[C.cedecl|static int $id:kernel_fname(
struct futhark_context* ctx,
unsigned int grid_x, unsigned int grid_y, unsigned int grid_z,
unsigned int block_x, unsigned int block_y, unsigned int block_z,
unsigned int local_bytes, $params:arg_params) {
(void)local_bytes;
if (grid_x * grid_y * grid_z * block_x * block_y * block_z != 0) {
const size_t global_work_size[3] = {grid_x*block_x, grid_y*block_y, grid_z*block_z};
const size_t local_work_size[3] = {block_x, block_y, block_z};
typename int64_t time_start = 0, time_end = 0;
$stms:set_args
if (ctx->debugging) {
fprintf(ctx->log, "Launching %s with grid size [%d, %d, %d] and group size [%d, %d, %d]; local memory: %d bytes.\n",
$string:(prettyString kernel_name),
grid_x, grid_y, grid_z,
block_x, block_y, block_z,
local_bytes);
time_start = get_wall_time();
}
typename cl_event *pevent = $exp:(profilingEvent kernel_name);
OPENCL_SUCCEED_OR_RETURN(
clEnqueueNDRangeKernel(ctx->queue, ctx->program->$id:kernel_name, 3, NULL,
global_work_size, local_work_size,
0, NULL, pevent));
if (ctx->debugging) {
OPENCL_SUCCEED_FATAL(clFinish(ctx->queue));
time_end = get_wall_time();
long int time_diff = time_end - time_start;
fprintf(ctx->log, "kernel %s runtime: %ldus\n",
$string:(prettyString kernel_name), time_diff);
}
}
return FUTHARK_SUCCESS;
}|]
Name -> CompilerM op s Name
forall a. a -> CompilerM op s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Name
kernel_fname
where
set_args :: [Stm]
set_args = (Int -> (Exp, Exp) -> Stm) -> [Int] -> [(Exp, Exp)] -> [Stm]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> (Exp, Exp) -> Stm
forall {a} {a} {a}.
(Show a, Integral a, ToExp a, ToExp a) =>
a -> (a, a) -> Stm
setKernelArg [KernelSafety -> Int
numFailureParams KernelSafety
safety ..] [(Exp, Exp)]
arg_set
setKernelArg :: a -> (a, a) -> Stm
setKernelArg a
i (a
size, a
e) =
[C.cstm|OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->program->$id:kernel_name, $int:i, $exp:size, $exp:e));|]