module Futhark.CodeGen.OpenCL.Heuristics
       ( SizeHeuristic (..)
       , DeviceType (..)
       , WhichSize (..)
       , HeuristicValue (..)
       , sizeHeuristicsTable
       )
       where

-- Some OpenCL platforms have a SIMD/warp/wavefront-based execution
-- model that execute groups of threads in lockstep, permitting us to
-- perform cross-thread synchronisation within each such group without
-- the use of barriers.  Unfortunately, there seems to be no reliable
-- way to query these sizes at runtime.  Instead, we use this table to
-- figure out which size we should use for a specific platform and
-- device.  If nothing matches here, the wave size should be set to
-- one.
--
-- We also use this to select reasonable default group sizes and group
-- counts.

-- | The type of OpenCL device that this heuristic applies to.
data DeviceType = DeviceCPU | DeviceGPU

-- | The value supplies by a heuristic can be a constant, or inferred
-- from some device information.
data HeuristicValue = HeuristicConst Int
                    | HeuristicDeviceInfo String

-- | A size that can be assigned a default.
data WhichSize = LockstepWidth | NumGroups | GroupSize | TileSize | Threshold

-- | A heuristic for setting the default value for something.
data SizeHeuristic =
    SizeHeuristic { SizeHeuristic -> String
platformName :: String
                  , SizeHeuristic -> DeviceType
deviceType :: DeviceType
                  , SizeHeuristic -> WhichSize
heuristicSize :: WhichSize
                  , SizeHeuristic -> HeuristicValue
heuristicValue :: HeuristicValue
                  }

-- | All of our heuristics.
sizeHeuristicsTable :: [SizeHeuristic]
sizeHeuristicsTable :: [SizeHeuristic]
sizeHeuristicsTable =
  [ String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"NVIDIA CUDA" DeviceType
DeviceGPU WhichSize
LockstepWidth (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
32
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"AMD Accelerated Parallel Processing" DeviceType
DeviceGPU WhichSize
LockstepWidth (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
32
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceGPU WhichSize
LockstepWidth (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
1
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceGPU WhichSize
NumGroups (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
256
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceGPU WhichSize
GroupSize (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
256
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceGPU WhichSize
TileSize (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
32
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceGPU WhichSize
Threshold (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst (Int -> HeuristicValue) -> Int -> HeuristicValue
forall a b. (a -> b) -> a -> b
$ Int
32Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
1024

  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceCPU WhichSize
LockstepWidth (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
1
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceCPU WhichSize
NumGroups (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ String -> HeuristicValue
HeuristicDeviceInfo String
"MAX_COMPUTE_UNITS"
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceCPU WhichSize
GroupSize (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
32
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceCPU WhichSize
TileSize (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ Int -> HeuristicValue
HeuristicConst Int
4
  , String
-> DeviceType -> WhichSize -> HeuristicValue -> SizeHeuristic
SizeHeuristic String
"" DeviceType
DeviceCPU WhichSize
Threshold (HeuristicValue -> SizeHeuristic)
-> HeuristicValue -> SizeHeuristic
forall a b. (a -> b) -> a -> b
$ String -> HeuristicValue
HeuristicDeviceInfo String
"MAX_COMPUTE_UNITS"
  ]