{-# LANGUAGE TypeFamilies #-}

-- | Code generation for ImpCode with multicore operations.
module Futhark.CodeGen.ImpGen.Multicore
  ( Futhark.CodeGen.ImpGen.Multicore.compileProg,
    Warnings,
  )
where

import Control.Monad
import Data.Map qualified as M
import Futhark.CodeGen.ImpCode.Multicore qualified as Imp
import Futhark.CodeGen.ImpGen
import Futhark.CodeGen.ImpGen.Multicore.Base
import Futhark.CodeGen.ImpGen.Multicore.SegHist
import Futhark.CodeGen.ImpGen.Multicore.SegMap
import Futhark.CodeGen.ImpGen.Multicore.SegRed
import Futhark.CodeGen.ImpGen.Multicore.SegScan
import Futhark.IR.MCMem
import Futhark.MonadFreshNames
import Futhark.Util.IntegralExp (rem)
import Prelude hiding (quot, rem)

opCompiler :: OpCompiler MCMem HostEnv Imp.Multicore
opCompiler :: OpCompiler MCMem HostEnv Multicore
opCompiler Pat (LetDec MCMem)
dest (Alloc SubExp
e Space
space) = forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> SubExp -> Space -> ImpM rep r op ()
compileAlloc Pat (LetDec MCMem)
dest SubExp
e Space
space
opCompiler Pat (LetDec MCMem)
dest (Inner MCOp NoOp MCMem
op) = Pat LParamMem -> MCOp NoOp MCMem -> ImpM MCMem HostEnv Multicore ()
compileMCOp Pat (LetDec MCMem)
dest MCOp NoOp MCMem
op

parallelCopy :: CopyCompiler MCMem HostEnv Imp.Multicore
parallelCopy :: CopyCompiler MCMem HostEnv Multicore
parallelCopy PrimType
pt MemLoc
destloc MemLoc
srcloc = do
  Code Multicore
seq_code <- forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect forall a b. (a -> b) -> a -> b
$ forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations MCMem HostEnv Multicore
inThreadOps forall a b. (a -> b) -> a -> b
$ do
    Code Multicore
body <- ImpM MCMem HostEnv Multicore (Code Multicore)
genCopy
    [Param]
free_params <- forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams Code Multicore
body
    forall op rep r. Code op -> ImpM rep r op ()
emit forall a b. (a -> b) -> a -> b
$ forall a. a -> Code a
Imp.Op forall a b. (a -> b) -> a -> b
$ [Char] -> Code Multicore -> [Param] -> Multicore
Imp.ParLoop [Char]
"copy" Code Multicore
body [Param]
free_params
  [Param]
free_params <- forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams Code Multicore
seq_code
  [Char]
s <- forall a. Pretty a => a -> [Char]
prettyString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *). MonadFreshNames m => [Char] -> m VName
newVName [Char]
"copy"
  TExp Int64
iterations <- forall {k} (t :: k) rep r op.
[Char] -> TExp t -> ImpM rep r op (TExp t)
dPrimVE [Char]
"iterations" forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
srcshape
  let scheduling :: SchedulerInfo
scheduling = Exp -> Scheduling -> SchedulerInfo
Imp.SchedulerInfo (forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp Int64
iterations) Scheduling
Imp.Static
  forall op rep r. Code op -> ImpM rep r op ()
emit forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Code a
Imp.Op forall a b. (a -> b) -> a -> b
$
    [Char]
-> [Param]
-> ParallelTask
-> Maybe ParallelTask
-> [Param]
-> SchedulerInfo
-> Multicore
Imp.SegOp [Char]
s [Param]
free_params (Code Multicore -> ParallelTask
Imp.ParallelTask Code Multicore
seq_code) forall a. Maybe a
Nothing [] SchedulerInfo
scheduling
  where
    MemLoc VName
destmem [SubExp]
_ IxFun (TExp Int64)
_ = MemLoc
destloc
    MemLoc VName
srcmem [SubExp]
srcshape IxFun (TExp Int64)
_ = MemLoc
srcloc
    genCopy :: ImpM MCMem HostEnv Multicore (Code Multicore)
genCopy = forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect forall b c a. (b -> c) -> (a -> b) -> a -> c
. ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
inISPC forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char]
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop [Char]
"copy" ChunkLoopVectorization
Vectorized forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
      [TExp Int64]
is <- forall rep r op.
[Char] -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
dIndexSpace' [Char]
"i" (forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
srcshape) TExp Int64
i
      (VName
_, Space
destspace, Count Elements (TExp Int64)
destidx) <- forall rep r op.
MemLoc
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray' MemLoc
destloc [TExp Int64]
is
      (VName
_, Space
srcspace, Count Elements (TExp Int64)
srcidx) <- forall rep r op.
MemLoc
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray' MemLoc
srcloc [TExp Int64]
is
      VName
tmp <- forall {k} (t :: k). TV t -> VName
tvVar forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"tmp" PrimType
pt
      forall op rep r. Code op -> ImpM rep r op ()
emit forall a b. (a -> b) -> a -> b
$ forall a.
VName
-> VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> Code a
Imp.Read VName
tmp VName
srcmem Count Elements (TExp Int64)
srcidx PrimType
pt Space
srcspace Volatility
Imp.Nonvolatile
      forall op rep r. Code op -> ImpM rep r op ()
emit forall a b. (a -> b) -> a -> b
$ forall a.
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> Exp
-> Code a
Imp.Write VName
destmem Count Elements (TExp Int64)
destidx PrimType
pt Space
destspace Volatility
Imp.Nonvolatile forall a b. (a -> b) -> a -> b
$ VName -> PrimType -> Exp
Imp.var VName
tmp PrimType
pt

topLevelOps, inThreadOps :: Operations MCMem HostEnv Imp.Multicore
inThreadOps :: Operations MCMem HostEnv Multicore
inThreadOps =
  (forall rep (inner :: * -> *) op r.
(Mem rep inner, FreeIn op) =>
OpCompiler rep r op -> Operations rep r op
defaultOperations OpCompiler MCMem HostEnv Multicore
opCompiler)
    { opsExpCompiler :: ExpCompiler MCMem HostEnv Multicore
opsExpCompiler = ExpCompiler MCMem HostEnv Multicore
compileMCExp
    }
topLevelOps :: Operations MCMem HostEnv Multicore
topLevelOps =
  (forall rep (inner :: * -> *) op r.
(Mem rep inner, FreeIn op) =>
OpCompiler rep r op -> Operations rep r op
defaultOperations OpCompiler MCMem HostEnv Multicore
opCompiler)
    { opsExpCompiler :: ExpCompiler MCMem HostEnv Multicore
opsExpCompiler = ExpCompiler MCMem HostEnv Multicore
compileMCExp,
      opsCopyCompiler :: CopyCompiler MCMem HostEnv Multicore
opsCopyCompiler = CopyCompiler MCMem HostEnv Multicore
parallelCopy
    }

updateAcc :: VName -> [SubExp] -> [SubExp] -> MulticoreGen ()
updateAcc :: VName -> [SubExp] -> [SubExp] -> ImpM MCMem HostEnv Multicore ()
updateAcc VName
acc [SubExp]
is [SubExp]
vs = forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"UpdateAcc" forall a b. (a -> b) -> a -> b
$ do
  -- See the ImpGen implementation of UpdateAcc for general notes.
  let is' :: [TExp Int64]
is' = forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
is
  (VName
c, Space
_space, [VName]
arrs, [TExp Int64]
dims, Maybe (Lambda MCMem)
op) <- forall rep r op.
VName
-> [TExp Int64]
-> ImpM
     rep r op (VName, Space, [VName], [TExp Int64], Maybe (Lambda rep))
lookupAcc VName
acc [TExp Int64]
is'
  forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (Slice (TExp Int64) -> [TExp Int64] -> TExp Bool
inBounds (forall d. [DimIndex d] -> Slice d
Slice (forall a b. (a -> b) -> [a] -> [b]
map forall d. d -> DimIndex d
DimFix [TExp Int64]
is')) [TExp Int64]
dims) forall a b. (a -> b) -> a -> b
$
    case Maybe (Lambda MCMem)
op of
      Maybe (Lambda MCMem)
Nothing ->
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
arrs [SubExp]
vs) forall a b. (a -> b) -> a -> b
$ \(VName
arr, SubExp
v) -> forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int64]
is' SubExp
v []
      Just Lambda MCMem
lam -> do
        forall rep (inner :: * -> *) r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams forall a b. (a -> b) -> a -> b
$ forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda MCMem
lam
        let ([VName]
_x_params, [VName]
y_params) =
              forall a. Int -> [a] -> ([a], [a])
splitAt (forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
vs) forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall dec. Param dec -> VName
paramName forall a b. (a -> b) -> a -> b
$ forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda MCMem
lam
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
y_params [SubExp]
vs) forall a b. (a -> b) -> a -> b
$ \(VName
yp, SubExp
v) -> forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
yp [] SubExp
v []
        AtomicBinOp
atomics <- HostEnv -> AtomicBinOp
hostAtomics forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall rep r op. ImpM rep r op r
askEnv
        case AtomicBinOp -> Lambda MCMem -> AtomicUpdate MCMem ()
atomicUpdateLocking AtomicBinOp
atomics Lambda MCMem
lam of
          AtomicPrim DoAtomicUpdate MCMem ()
f -> DoAtomicUpdate MCMem ()
f [VName]
arrs [TExp Int64]
is'
          AtomicCAS DoAtomicUpdate MCMem ()
f -> DoAtomicUpdate MCMem ()
f [VName]
arrs [TExp Int64]
is'
          AtomicLocking Locking -> DoAtomicUpdate MCMem ()
f -> do
            Maybe Locks
c_locks <- forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup VName
c forall b c a. (b -> c) -> (a -> b) -> a -> c
. HostEnv -> Map VName Locks
hostLocks forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall rep r op. ImpM rep r op r
askEnv
            case Maybe Locks
c_locks of
              Just (Locks VName
locks Int
num_locks) -> do
                let locking :: Locking
locking =
                      VName
-> TExp Int32
-> TExp Int32
-> TExp Int32
-> ([TExp Int64] -> [TExp Int64])
-> Locking
Locking VName
locks TExp Int32
0 TExp Int32
1 TExp Int32
0 forall a b. (a -> b) -> a -> b
$
                        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall e. IntegralExp e => e -> e -> e
`rem` forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
num_locks) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall num. IntegralExp num => [num] -> [num] -> num
flattenIndex [TExp Int64]
dims
                Locking -> DoAtomicUpdate MCMem ()
f Locking
locking [VName]
arrs [TExp Int64]
is'
              Maybe Locks
Nothing ->
                forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"Missing locks for " forall a. [a] -> [a] -> [a]
++ forall a. Pretty a => a -> [Char]
prettyString VName
acc

withAcc ::
  Pat LetDecMem ->
  [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))] ->
  Lambda MCMem ->
  MulticoreGen ()
withAcc :: Pat LParamMem
-> [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
-> Lambda MCMem
-> ImpM MCMem HostEnv Multicore ()
withAcc Pat LParamMem
pat [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
inputs Lambda MCMem
lam = do
  AtomicBinOp
atomics <- HostEnv -> AtomicBinOp
hostAtomics forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall rep r op. ImpM rep r op r
askEnv
  AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
-> ImpM MCMem HostEnv Multicore ()
locksForInputs AtomicBinOp
atomics forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
accs [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
inputs
  where
    accs :: [VName]
accs = forall a b. (a -> b) -> [a] -> [b]
map forall dec. Param dec -> VName
paramName forall a b. (a -> b) -> a -> b
$ forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda MCMem
lam
    locksForInputs :: AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
-> ImpM MCMem HostEnv Multicore ()
locksForInputs AtomicBinOp
_ [] =
      forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> Exp rep -> ImpM rep r op ()
defCompileExp Pat LParamMem
pat forall a b. (a -> b) -> a -> b
$ forall rep. [WithAccInput rep] -> Lambda rep -> Exp rep
WithAcc [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
inputs Lambda MCMem
lam
    locksForInputs AtomicBinOp
atomics ((VName
c, (Shape
_, [VName]
_, Maybe (Lambda MCMem, [SubExp])
op)) : [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
inputs')
      | Just (Lambda MCMem
op_lam, [SubExp]
_) <- Maybe (Lambda MCMem, [SubExp])
op,
        AtomicLocking Locking -> DoAtomicUpdate MCMem ()
_ <- AtomicBinOp -> Lambda MCMem -> AtomicUpdate MCMem ()
atomicUpdateLocking AtomicBinOp
atomics Lambda MCMem
op_lam = do
          let num_locks :: Int
num_locks = Int
100151
          VName
locks_arr <-
            forall rep r op.
[Char] -> PrimType -> ArrayContents -> ImpM rep r op VName
sStaticArray [Char]
"withacc_locks" PrimType
int32 forall a b. (a -> b) -> a -> b
$ Int -> ArrayContents
Imp.ArrayZeros Int
num_locks
          let locks :: Locks
locks = VName -> Int -> Locks
Locks VName
locks_arr Int
num_locks
              extend :: HostEnv -> HostEnv
extend HostEnv
env = HostEnv
env {hostLocks :: Map VName Locks
hostLocks = forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert VName
c Locks
locks forall a b. (a -> b) -> a -> b
$ HostEnv -> Map VName Locks
hostLocks HostEnv
env}
          forall r rep op a. (r -> r) -> ImpM rep r op a -> ImpM rep r op a
localEnv HostEnv -> HostEnv
extend forall a b. (a -> b) -> a -> b
$ AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
-> ImpM MCMem HostEnv Multicore ()
locksForInputs AtomicBinOp
atomics [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
inputs'
      | Bool
otherwise =
          AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
-> ImpM MCMem HostEnv Multicore ()
locksForInputs AtomicBinOp
atomics [(VName, (Shape, [VName], Maybe (Lambda MCMem, [SubExp])))]
inputs'

compileMCExp :: ExpCompiler MCMem HostEnv Imp.Multicore
compileMCExp :: ExpCompiler MCMem HostEnv Multicore
compileMCExp Pat (LetDec MCMem)
_ (BasicOp (UpdateAcc VName
acc [SubExp]
is [SubExp]
vs)) =
  VName -> [SubExp] -> [SubExp] -> ImpM MCMem HostEnv Multicore ()
updateAcc VName
acc [SubExp]
is [SubExp]
vs
compileMCExp Pat (LetDec MCMem)
pat (WithAcc [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
inputs Lambda MCMem
lam) =
  Pat LParamMem
-> [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
-> Lambda MCMem
-> ImpM MCMem HostEnv Multicore ()
withAcc Pat (LetDec MCMem)
pat [(Shape, [VName], Maybe (Lambda MCMem, [SubExp]))]
inputs Lambda MCMem
lam
compileMCExp Pat (LetDec MCMem)
dest Exp MCMem
e =
  forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> Exp rep -> ImpM rep r op ()
defCompileExp Pat (LetDec MCMem)
dest Exp MCMem
e

compileMCOp ::
  Pat LetDecMem ->
  MCOp NoOp MCMem ->
  ImpM MCMem HostEnv Imp.Multicore ()
compileMCOp :: Pat LParamMem -> MCOp NoOp MCMem -> ImpM MCMem HostEnv Multicore ()
compileMCOp Pat LParamMem
_ (OtherOp NoOp MCMem
NoOp) = forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
compileMCOp Pat LParamMem
pat (ParOp Maybe (SegOp () MCMem)
par_op SegOp () MCMem
op) = do
  let space :: SegSpace
space = SegOp () MCMem -> SegSpace
getSpace SegOp () MCMem
op
  forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ (SegSpace -> VName
segFlat SegSpace
space) (TExp Int64
0 :: Imp.TExp Int64)
  TExp Int64
iterations <- SegOp () MCMem
-> SegSpace -> ImpM MCMem HostEnv Multicore (TExp Int64)
getIterationDomain SegOp () MCMem
op SegSpace
space
  Code Multicore
seq_code <- forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect forall a b. (a -> b) -> a -> b
$ forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations MCMem HostEnv Multicore
inThreadOps forall a b. (a -> b) -> a -> b
$ do
    TV Int32
nsubtasks <- forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"nsubtasks" PrimType
int32
    forall op rep r. op -> ImpM rep r op ()
sOp forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetNumTasks forall a b. (a -> b) -> a -> b
$ forall {k} (t :: k). TV t -> VName
tvVar TV Int32
nsubtasks
    forall op rep r. Code op -> ImpM rep r op ()
emit forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Pat LParamMem
-> SegOp () MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegOp Pat LParamMem
pat SegOp () MCMem
op TV Int32
nsubtasks
  [Param]
retvals <- Pat LParamMem -> SegOp () MCMem -> MulticoreGen [Param]
getReturnParams Pat LParamMem
pat SegOp () MCMem
op

  let scheduling_info :: Scheduling -> SchedulerInfo
scheduling_info = Exp -> Scheduling -> SchedulerInfo
Imp.SchedulerInfo (forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp Int64
iterations)

  Maybe ParallelTask
par_task <- case Maybe (SegOp () MCMem)
par_op of
    Just SegOp () MCMem
nested_op -> do
      let space' :: SegSpace
space' = SegOp () MCMem -> SegSpace
getSpace SegOp () MCMem
nested_op
      forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ (SegSpace -> VName
segFlat SegSpace
space') (TExp Int64
0 :: Imp.TExp Int64)
      Code Multicore
par_code <- forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect forall a b. (a -> b) -> a -> b
$ do
        TV Int32
nsubtasks <- forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"nsubtasks" PrimType
int32
        forall op rep r. op -> ImpM rep r op ()
sOp forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetNumTasks forall a b. (a -> b) -> a -> b
$ forall {k} (t :: k). TV t -> VName
tvVar TV Int32
nsubtasks
        forall op rep r. Code op -> ImpM rep r op ()
emit forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Pat LParamMem
-> SegOp () MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegOp Pat LParamMem
pat SegOp () MCMem
nested_op TV Int32
nsubtasks
      forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Code Multicore -> ParallelTask
Imp.ParallelTask Code Multicore
par_code
    Maybe (SegOp () MCMem)
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing

  [Char]
s <- SegOp () MCMem -> ImpM MCMem HostEnv Multicore [Char]
segOpString SegOp () MCMem
op
  let seq_task :: ParallelTask
seq_task = Code Multicore -> ParallelTask
Imp.ParallelTask Code Multicore
seq_code
  [Param]
free_params <- forall a. (a -> Bool) -> [a] -> [a]
filter (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Param]
retvals) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams (Maybe ParallelTask
par_task, ParallelTask
seq_task)
  forall op rep r. Code op -> ImpM rep r op ()
emit forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Code a
Imp.Op forall a b. (a -> b) -> a -> b
$
    [Char]
-> [Param]
-> ParallelTask
-> Maybe ParallelTask
-> [Param]
-> SchedulerInfo
-> Multicore
Imp.SegOp [Char]
s [Param]
free_params ParallelTask
seq_task Maybe ParallelTask
par_task [Param]
retvals forall a b. (a -> b) -> a -> b
$
      Scheduling -> SchedulerInfo
scheduling_info (forall rep. SegOp () rep -> Code Multicore -> Scheduling
decideScheduling' SegOp () MCMem
op Code Multicore
seq_code)

compileSegOp ::
  Pat LetDecMem ->
  SegOp () MCMem ->
  TV Int32 ->
  ImpM MCMem HostEnv Imp.Multicore Imp.MCCode
compileSegOp :: Pat LParamMem
-> SegOp () MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegOp Pat LParamMem
pat (SegHist ()
_ SegSpace
space [HistOp MCMem]
histops [Type]
_ KernelBody MCMem
kbody) TV Int32
ntasks =
  Pat LParamMem
-> SegSpace
-> [HistOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegHist Pat LParamMem
pat SegSpace
space [HistOp MCMem]
histops KernelBody MCMem
kbody TV Int32
ntasks
compileSegOp Pat LParamMem
pat (SegScan ()
_ SegSpace
space [SegBinOp MCMem]
scans [Type]
_ KernelBody MCMem
kbody) TV Int32
ntasks =
  Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scans KernelBody MCMem
kbody TV Int32
ntasks
compileSegOp Pat LParamMem
pat (SegRed ()
_ SegSpace
space [SegBinOp MCMem]
reds [Type]
_ KernelBody MCMem
kbody) TV Int32
ntasks =
  Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegRed Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
reds KernelBody MCMem
kbody TV Int32
ntasks
compileSegOp Pat LParamMem
pat (SegMap ()
_ SegSpace
space [Type]
_ KernelBody MCMem
kbody) TV Int32
_ =
  Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> ImpM MCMem HostEnv Multicore (Code Multicore)
compileSegMap Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody

-- GCC supported primitve atomic Operations
-- TODO: Add support for 1, 2, and 16 bytes too
gccAtomics :: AtomicBinOp
gccAtomics :: AtomicBinOp
gccAtomics = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(BinOp,
  VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)]
cpu
  where
    cpu :: [(BinOp,
  VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)]
cpu =
      [ (IntType -> Overflow -> BinOp
Add IntType
Int32 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicAdd IntType
Int32),
        (IntType -> Overflow -> BinOp
Sub IntType
Int32 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicSub IntType
Int32),
        (IntType -> BinOp
And IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicAnd IntType
Int32),
        (IntType -> BinOp
Xor IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicXor IntType
Int32),
        (IntType -> BinOp
Or IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicOr IntType
Int32),
        (IntType -> Overflow -> BinOp
Add IntType
Int64 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicAdd IntType
Int64),
        (IntType -> Overflow -> BinOp
Sub IntType
Int64 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicSub IntType
Int64),
        (IntType -> BinOp
And IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicAnd IntType
Int64),
        (IntType -> BinOp
Xor IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicXor IntType
Int64),
        (IntType -> BinOp
Or IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp
Imp.AtomicOr IntType
Int64)
      ]

-- | Compile the program.
compileProg ::
  MonadFreshNames m =>
  Prog MCMem ->
  m (Warnings, Imp.Definitions Imp.Multicore)
compileProg :: forall (m :: * -> *).
MonadFreshNames m =>
Prog MCMem -> m (Warnings, Definitions Multicore)
compileProg =
  forall rep (inner :: * -> *) op (m :: * -> *) r.
(Mem rep inner, FreeIn op, MonadFreshNames m) =>
r
-> Operations rep r op
-> Space
-> Prog rep
-> m (Warnings, Definitions op)
Futhark.CodeGen.ImpGen.compileProg
    (AtomicBinOp -> Map VName Locks -> HostEnv
HostEnv AtomicBinOp
gccAtomics forall a. Monoid a => a
mempty)
    Operations MCMem HostEnv Multicore
topLevelOps
    Space
Imp.DefaultSpace