{-# LANGUAGE CPP                 #-}
{-# LANGUAGE GADTs               #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE RecordWildCards     #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications    #-}
-- |
-- Module      : Data.Array.Accelerate.LLVM.Native.CodeGen.Permute
-- Copyright   : [2016..2020] The Accelerate Team
-- License     : BSD3
--
-- Maintainer  : Trevor L. McDonell <trevor.mcdonell@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)
--

module Data.Array.Accelerate.LLVM.Native.CodeGen.Permute
  where

import Data.Array.Accelerate.AST                                    ( PrimMaybe )
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.Representation.Array
import Data.Array.Accelerate.Representation.Shape
import Data.Array.Accelerate.Representation.Type

import Data.Array.Accelerate.LLVM.CodeGen.Arithmetic                as A
import Data.Array.Accelerate.LLVM.CodeGen.Array
import Data.Array.Accelerate.LLVM.CodeGen.Base
import Data.Array.Accelerate.LLVM.CodeGen.Constant
import Data.Array.Accelerate.LLVM.CodeGen.Environment
import Data.Array.Accelerate.LLVM.CodeGen.Exp
import Data.Array.Accelerate.LLVM.CodeGen.IR
import Data.Array.Accelerate.LLVM.CodeGen.Monad
import Data.Array.Accelerate.LLVM.CodeGen.Permute
import Data.Array.Accelerate.LLVM.CodeGen.Ptr
import Data.Array.Accelerate.LLVM.CodeGen.Sugar
import Data.Array.Accelerate.LLVM.Compile.Cache

import Data.Array.Accelerate.LLVM.Native.Target                     ( Native )
import Data.Array.Accelerate.LLVM.Native.CodeGen.Base
import Data.Array.Accelerate.LLVM.Native.CodeGen.Loop

import LLVM.AST.Type.AddrSpace
import LLVM.AST.Type.Instruction
import LLVM.AST.Type.Instruction.Atomic
import LLVM.AST.Type.Instruction.RMW                                as RMW
import LLVM.AST.Type.Instruction.Volatile
import LLVM.AST.Type.Representation

import Control.Applicative
import Control.Monad                                                ( void )
import Prelude


-- Forward permutation specified by an indexing mapping. The resulting array is
-- initialised with the given defaults, and any further values that are permuted
-- into the result array are added to the current value using the combination
-- function.
--
-- The combination function must be /associative/ and /commutative/. Elements
-- that are mapped to the magic index 'ignore' are dropped.
--
mkPermute
    :: HasCallStack
    => UID
    -> Gamma               aenv
    -> ArrayR (Array sh e)
    -> ShapeR sh'
    -> IRPermuteFun Native aenv (e -> e -> e)
    -> IRFun1       Native aenv (sh -> PrimMaybe sh')
    -> MIRDelayed   Native aenv (Array sh e)
    -> CodeGen      Native      (IROpenAcc Native aenv (Array sh' e))
mkPermute :: UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermute UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRPermuteFun Native aenv (e -> e -> e)
combine IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr =
  IROpenAcc Native aenv (Array sh' e)
-> IROpenAcc Native aenv (Array sh' e)
-> IROpenAcc Native aenv (Array sh' e)
forall aenv a.
IROpenAcc Native aenv a
-> IROpenAcc Native aenv a -> IROpenAcc Native aenv a
(+++) (IROpenAcc Native aenv (Array sh' e)
 -> IROpenAcc Native aenv (Array sh' e)
 -> IROpenAcc Native aenv (Array sh' e))
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
-> CodeGen
     Native
     (IROpenAcc Native aenv (Array sh' e)
      -> IROpenAcc Native aenv (Array sh' e))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv sh e sh'.
UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteS UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRPermuteFun Native aenv (e -> e -> e)
combine IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr
        CodeGen
  Native
  (IROpenAcc Native aenv (Array sh' e)
   -> IROpenAcc Native aenv (Array sh' e))
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv sh e sh'.
HasCallStack =>
UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRPermuteFun Native aenv (e -> e -> e)
combine IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr


-- Forward permutation which does not require locking the output array. This
-- could be because we are executing sequentially with a single thread, or
-- because the default values are unused (e.g. for a filter).
--
-- We could also use this method if we can prove that the mapping function is
-- injective (distinct elements in the domain map to distinct elements in the
-- co-domain).
--
mkPermuteS
    :: UID
    -> Gamma               aenv
    -> ArrayR (Array sh e)
    -> ShapeR sh'
    -> IRPermuteFun Native aenv (e -> e -> e)
    -> IRFun1       Native aenv (sh -> PrimMaybe sh')
    -> MIRDelayed   Native aenv (Array sh e)
    -> CodeGen      Native      (IROpenAcc Native aenv (Array sh' e))
mkPermuteS :: UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteS UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRPermuteFun{Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
IRFun2 Native aenv (e -> e -> e)
combine :: forall arch aenv e.
IRPermuteFun arch aenv (e -> e -> e)
-> IRFun2 arch aenv (e -> e -> e)
atomicRMW :: forall arch aenv e.
IRPermuteFun arch aenv (e -> e -> e)
-> Maybe (RMWOperation, IRFun1 arch aenv (e -> e))
atomicRMW :: Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
combine :: IRFun2 Native aenv (e -> e -> e)
..} IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
marr =
  let
      (Operands sh
start, Operands sh
end, [Parameter]
paramGang) = ShapeR sh -> (Operands sh, Operands sh, [Parameter])
forall sh. ShapeR sh -> (Operands sh, Operands sh, [Parameter])
gangParam    (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr)
      (IRArray (Array sh' e)
arrOut, [Parameter]
paramOut)      = ArrayR (Array sh' e)
-> Name (Array sh' e) -> (IRArray (Array sh' e), [Parameter])
forall sh e.
ArrayR (Array sh e)
-> Name (Array sh e) -> (IRArray (Array sh e), [Parameter])
mutableArray (ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
forall sh e sh'.
ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
reprOut ArrayR (Array sh e)
repr ShapeR sh'
shr) Name (Array sh' e)
"out"
      (IRDelayed Native aenv (Array sh e)
arrIn,  [Parameter]
paramIn)       = Name (Array sh e)
-> MIRDelayed Native aenv (Array sh e)
-> (IRDelayed Native aenv (Array sh e), [Parameter])
forall sh e arch aenv.
Name (Array sh e)
-> MIRDelayed arch aenv (Array sh e)
-> (IRDelayed arch aenv (Array sh e), [Parameter])
delayedArray Name (Array sh e)
"in" MIRDelayed Native aenv (Array sh e)
marr
      paramEnv :: [Parameter]
paramEnv                = Gamma aenv -> [Parameter]
forall aenv. Gamma aenv -> [Parameter]
envParam Gamma aenv
aenv
  in
  UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv a.
UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv a)
makeOpenAcc UID
uid Label
"permuteS" ([Parameter]
paramGang [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramOut [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramIn [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramEnv) (CodeGen Native ()
 -> CodeGen Native (IROpenAcc Native aenv (Array sh' e)))
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall a b. (a -> b) -> a -> b
$ do

    Operands sh
sh <- IRDelayed Native aenv (Array sh e) -> IRExp Native aenv sh
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRExp arch aenv sh
delayedExtent IRDelayed Native aenv (Array sh e)
arrIn

    ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall sh.
ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
imapNestFromTo (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr) Operands sh
start Operands sh
end Operands sh
sh ((Operands sh -> Operands Int -> CodeGen Native ())
 -> CodeGen Native ())
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ \Operands sh
ix Operands Int
_ -> do

      Operands (PrimMaybe sh')
ix' <- IRFun1 Native aenv (sh -> PrimMaybe sh')
-> Operands sh -> IROpenExp Native ((), sh) aenv (PrimMaybe sh')
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 IRFun1 Native aenv (sh -> PrimMaybe sh')
project Operands sh
ix

      CodeGen Native (Operands Bool)
-> CodeGen Native () -> CodeGen Native ()
forall arch.
CodeGen arch (Operands Bool) -> CodeGen arch () -> CodeGen arch ()
when (Operands (PrimMaybe sh') -> CodeGen Native (Operands Bool)
forall a arch.
Operands (PrimMaybe a) -> CodeGen arch (Operands Bool)
isJust Operands (PrimMaybe sh')
ix') (CodeGen Native () -> CodeGen Native ())
-> CodeGen Native () -> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ do
        Operands sh'
i <- Operands (PrimMaybe sh') -> CodeGen Native (Operands sh')
forall a arch. Operands (PrimMaybe a) -> CodeGen arch (Operands a)
fromJust Operands (PrimMaybe sh')
ix'
        Operands Int
j <- ShapeR sh'
-> Operands sh' -> Operands sh' -> CodeGen Native (Operands Int)
forall sh arch.
ShapeR sh
-> Operands sh -> Operands sh -> CodeGen arch (Operands Int)
intOfIndex ShapeR sh'
shr (IRArray (Array sh' e) -> Operands sh'
forall sh e. IRArray (Array sh e) -> Operands sh
irArrayShape IRArray (Array sh' e)
arrOut) Operands sh'
i

        -- project element onto the destination array and update
        Operands e
x <- IROpenFun1 Native () aenv (sh -> e)
-> Operands sh -> IROpenExp Native ((), sh) aenv e
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 (IRDelayed Native aenv (Array sh e)
-> IROpenFun1 Native () aenv (sh -> e)
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRFun1 arch aenv (sh -> e)
delayedIndex IRDelayed Native aenv (Array sh e)
arrIn) Operands sh
ix
        Operands e
y <- IntegralType Int
-> IRArray (Array sh' e)
-> Operands Int
-> IROpenExp Native ((), sh) aenv e
forall int sh e arch.
IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> CodeGen arch (Operands e)
readArray IntegralType Int
TypeInt IRArray (Array sh' e)
arrOut Operands Int
j
        Operands e
r <- IRFun2 Native aenv (e -> e -> e)
-> Operands e -> Operands e -> IROpenExp Native (((), e), e) aenv e
forall arch env aenv a b c.
IROpenFun2 arch env aenv (a -> b -> c)
-> Operands a -> Operands b -> IROpenExp arch ((env, a), b) aenv c
app2 IRFun2 Native aenv (e -> e -> e)
combine Operands e
Operands e
x Operands e
Operands e
y

        IntegralType Int
-> IRArray (Array sh' e)
-> Operands Int
-> Operands e
-> CodeGen Native ()
forall int sh e arch.
IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> Operands e
-> CodeGen arch ()
writeArray IntegralType Int
TypeInt IRArray (Array sh' e)
arrOut Operands Int
j Operands e
Operands e
r

    CodeGen Native ()
forall arch. HasCallStack => CodeGen arch ()
return_


-- Parallel forward permutation has to take special care because different
-- threads could concurrently try to update the same memory location. Where
-- available we make use of special atomic instructions and other optimisations,
-- but in the general case each element of the output array has a lock which
-- must be obtained by the thread before it can update that memory location.
--
-- TODO: After too many failures to acquire the lock on an element, the thread
-- should back off and try a different element, adding this failed element to
-- a queue or some such.
--
mkPermuteP
    :: HasCallStack
    => UID
    -> Gamma               aenv
    -> ArrayR (Array sh e)
    -> ShapeR sh'
    -> IRPermuteFun Native aenv (e -> e -> e)
    -> IRFun1       Native aenv (sh -> PrimMaybe sh')
    -> MIRDelayed   Native aenv (Array sh e)
    -> CodeGen      Native      (IROpenAcc Native aenv (Array sh' e))
mkPermuteP :: UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRPermuteFun Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRPermuteFun{Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
IRFun2 Native aenv (e -> e -> e)
atomicRMW :: Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
combine :: IRFun2 Native aenv (e -> e -> e)
combine :: forall arch aenv e.
IRPermuteFun arch aenv (e -> e -> e)
-> IRFun2 arch aenv (e -> e -> e)
atomicRMW :: forall arch aenv e.
IRPermuteFun arch aenv (e -> e -> e)
-> Maybe (RMWOperation, IRFun1 arch aenv (e -> e))
..} IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr =
  case Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
atomicRMW of
    Maybe (RMWOperation, IRFun1 Native aenv (e -> e))
Nothing       -> UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRFun2 Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv sh e sh'.
UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRFun2 Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_mutex UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRFun2 Native aenv (e -> e -> e)
IRFun2 Native aenv (e -> e -> e)
combine IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr
    Just (RMWOperation
rmw, IRFun1 Native aenv (e -> e)
f) -> UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> RMWOperation
-> IRFun1 Native aenv (e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv sh e sh'.
HasCallStack =>
UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> RMWOperation
-> IRFun1 Native aenv (e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_rmw   UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr RMWOperation
rmw IRFun1 Native aenv (e -> e)
IRFun1 Native aenv (e -> e)
f   IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
arr


-- Parallel forward permutation function which uses atomic instructions to
-- implement lock-free array updates.
--
mkPermuteP_rmw
    :: HasCallStack
    => UID
    -> Gamma aenv
    -> ArrayR (Array sh e)
    -> ShapeR sh'
    -> RMWOperation
    -> IRFun1     Native aenv (e -> e)
    -> IRFun1     Native aenv (sh -> PrimMaybe sh')
    -> MIRDelayed Native aenv (Array sh e)
    -> CodeGen    Native      (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_rmw :: UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> RMWOperation
-> IRFun1 Native aenv (e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_rmw UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr RMWOperation
rmw IRFun1 Native aenv (e -> e)
update IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
marr =
  let
      (Operands sh
start, Operands sh
end, [Parameter]
paramGang) = ShapeR sh -> (Operands sh, Operands sh, [Parameter])
forall sh. ShapeR sh -> (Operands sh, Operands sh, [Parameter])
gangParam    (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr)
      (IRArray (Array sh' e)
arrOut, [Parameter]
paramOut)      = ArrayR (Array sh' e)
-> Name (Array sh' e) -> (IRArray (Array sh' e), [Parameter])
forall sh e.
ArrayR (Array sh e)
-> Name (Array sh e) -> (IRArray (Array sh e), [Parameter])
mutableArray (ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
forall sh e sh'.
ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
reprOut ArrayR (Array sh e)
repr ShapeR sh'
shr) Name (Array sh' e)
"out"
      (IRDelayed Native aenv (Array sh e)
arrIn,  [Parameter]
paramIn)       = Name (Array sh e)
-> MIRDelayed Native aenv (Array sh e)
-> (IRDelayed Native aenv (Array sh e), [Parameter])
forall sh e arch aenv.
Name (Array sh e)
-> MIRDelayed arch aenv (Array sh e)
-> (IRDelayed arch aenv (Array sh e), [Parameter])
delayedArray Name (Array sh e)
"in" MIRDelayed Native aenv (Array sh e)
marr
      paramEnv :: [Parameter]
paramEnv                = Gamma aenv -> [Parameter]
forall aenv. Gamma aenv -> [Parameter]
envParam Gamma aenv
aenv
  in
  UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv a.
UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv a)
makeOpenAcc UID
uid Label
"permuteP_rmw" ([Parameter]
paramGang [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramOut [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramIn [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramEnv) (CodeGen Native ()
 -> CodeGen Native (IROpenAcc Native aenv (Array sh' e)))
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall a b. (a -> b) -> a -> b
$ do

    Operands sh
sh <- IRDelayed Native aenv (Array sh e) -> IRExp Native aenv sh
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRExp arch aenv sh
delayedExtent IRDelayed Native aenv (Array sh e)
arrIn

    ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall sh.
ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
imapNestFromTo (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr) Operands sh
start Operands sh
end Operands sh
sh ((Operands sh -> Operands Int -> CodeGen Native ())
 -> CodeGen Native ())
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ \Operands sh
ix Operands Int
_ -> do

      Operands (PrimMaybe sh')
ix' <- IRFun1 Native aenv (sh -> PrimMaybe sh')
-> Operands sh -> IROpenExp Native ((), sh) aenv (PrimMaybe sh')
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 IRFun1 Native aenv (sh -> PrimMaybe sh')
project Operands sh
ix

      CodeGen Native (Operands Bool)
-> CodeGen Native () -> CodeGen Native ()
forall arch.
CodeGen arch (Operands Bool) -> CodeGen arch () -> CodeGen arch ()
when (Operands (PrimMaybe sh') -> CodeGen Native (Operands Bool)
forall a arch.
Operands (PrimMaybe a) -> CodeGen arch (Operands Bool)
isJust Operands (PrimMaybe sh')
ix') (CodeGen Native () -> CodeGen Native ())
-> CodeGen Native () -> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ do
        Operands sh'
i <- Operands (PrimMaybe sh') -> CodeGen Native (Operands sh')
forall a arch. Operands (PrimMaybe a) -> CodeGen arch (Operands a)
fromJust Operands (PrimMaybe sh')
ix'
        Operands Int
j <- ShapeR sh'
-> Operands sh' -> Operands sh' -> CodeGen Native (Operands Int)
forall sh arch.
ShapeR sh
-> Operands sh -> Operands sh -> CodeGen arch (Operands Int)
intOfIndex ShapeR sh'
shr (IRArray (Array sh' e) -> Operands sh'
forall sh e. IRArray (Array sh e) -> Operands sh
irArrayShape IRArray (Array sh' e)
arrOut) Operands sh'
i
        Operands e
x <- IROpenFun1 Native () aenv (sh -> e)
-> Operands sh -> IROpenExp Native ((), sh) aenv e
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 (IRDelayed Native aenv (Array sh e)
-> IROpenFun1 Native () aenv (sh -> e)
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRFun1 arch aenv (sh -> e)
delayedIndex IRDelayed Native aenv (Array sh e)
arrIn) Operands sh
ix
        Operands e
r <- IRFun1 Native aenv (e -> e)
-> Operands e -> IROpenExp Native ((), sh) aenv e
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 IRFun1 Native aenv (e -> e)
update Operands e
x

        case RMWOperation
rmw of
          RMWOperation
Exchange
            -> IntegralType Int
-> IRArray (Array sh' e)
-> Operands Int
-> Operands e
-> CodeGen Native ()
forall int sh e arch.
IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> Operands e
-> CodeGen arch ()
writeArray IntegralType Int
TypeInt IRArray (Array sh' e)
arrOut Operands Int
j Operands e
r
          --
          RMWOperation
_ | TupRsingle (SingleScalarType SingleType e
s)   <- ArrayR (Array sh e) -> TupR ScalarType e
forall sh e. ArrayR (Array sh e) -> TypeR e
arrayRtype ArrayR (Array sh e)
repr
            , Operands e
adata                             <- IRArray (Array sh' e) -> Operands e
forall sh e. IRArray (Array sh e) -> Operands e
irArrayData IRArray (Array sh' e)
arrOut
            -> do
                  Operand (Ptr e)
addr <- Instruction (Ptr e) -> CodeGen Native (Operand (Ptr e))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr e) -> CodeGen Native (Operand (Ptr e)))
-> Instruction (Ptr e) -> CodeGen Native (Operand (Ptr e))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr e) -> [Operand Int] -> Instruction (Ptr e)
forall a1 i.
Operand (Ptr a1) -> [Operand i] -> Instruction (Ptr a1)
GetElementPtr (AddrSpace -> Operand e -> Operand (Ptr e)
forall t. HasCallStack => AddrSpace -> Operand t -> Operand (Ptr t)
asPtr AddrSpace
defaultAddrSpace (SingleType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op SingleType e
s Operands e
adata)) [IntegralType Int -> Operands Int -> Operand Int
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op IntegralType Int
forall a. IsIntegral a => IntegralType a
integralType Operands Int
j]
                  --
                  case SingleType e
s of
#if MIN_VERSION_llvm_hs(10,0,0)
                    NumSingleType t             -> void . instr' $ AtomicRMW t NonVolatile rmw addr (op t r) (CrossThread, AcquireRelease)
#else
                    NumSingleType NumType e
t
                      | IntegralNumType{} <- NumType e
t  -> CodeGen Native (Operand e) -> CodeGen Native ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (CodeGen Native (Operand e) -> CodeGen Native ())
-> (Instruction e -> CodeGen Native (Operand e))
-> Instruction e
-> CodeGen Native ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Instruction e -> CodeGen Native (Operand e)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction e -> CodeGen Native ())
-> Instruction e -> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ NumType e
-> Volatility
-> RMWOperation
-> Operand (Ptr e)
-> Operand e
-> Atomicity
-> Instruction e
forall a.
NumType a
-> Volatility
-> RMWOperation
-> Operand (Ptr a)
-> Operand a
-> Atomicity
-> Instruction a
AtomicRMW NumType e
t Volatility
NonVolatile RMWOperation
rmw Operand (Ptr e)
addr (NumType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op NumType e
t Operands e
r) (Synchronisation
CrossThread, MemoryOrdering
AcquireRelease)
                      | RMWOperation
RMW.Add <- RMWOperation
rmw          -> SingleType e
-> (Operands e -> IROpenExp Native ((), sh) aenv e)
-> Operand (Ptr e)
-> CodeGen Native ()
forall arch e.
HasCallStack =>
SingleType e
-> (Operands e -> CodeGen arch (Operands e))
-> Operand (Ptr e)
-> CodeGen arch ()
atomicCAS_rmw SingleType e
s (NumType e
-> Operands e -> Operands e -> IROpenExp Native ((), sh) aenv e
forall a arch.
NumType a -> Operands a -> Operands a -> CodeGen arch (Operands a)
A.add NumType e
t Operands e
r) Operand (Ptr e)
addr
                      | RMWOperation
RMW.Sub <- RMWOperation
rmw          -> SingleType e
-> (Operands e -> IROpenExp Native ((), sh) aenv e)
-> Operand (Ptr e)
-> CodeGen Native ()
forall arch e.
HasCallStack =>
SingleType e
-> (Operands e -> CodeGen arch (Operands e))
-> Operand (Ptr e)
-> CodeGen arch ()
atomicCAS_rmw SingleType e
s (NumType e
-> Operands e -> Operands e -> IROpenExp Native ((), sh) aenv e
forall a arch.
NumType a -> Operands a -> Operands a -> CodeGen arch (Operands a)
A.sub NumType e
t Operands e
r) Operand (Ptr e)
addr
#endif
                    SingleType e
_ | RMWOperation
RMW.Min <- RMWOperation
rmw          -> SingleType e
-> (SingleType e
    -> Operands e -> Operands e -> CodeGen Native (Operands Bool))
-> Operand (Ptr e)
-> Operand e
-> CodeGen Native ()
forall arch e.
HasCallStack =>
SingleType e
-> (SingleType e
    -> Operands e -> Operands e -> CodeGen arch (Operands Bool))
-> Operand (Ptr e)
-> Operand e
-> CodeGen arch ()
atomicCAS_cmp SingleType e
s SingleType e
-> Operands e -> Operands e -> CodeGen Native (Operands Bool)
forall a arch.
SingleType a
-> Operands a -> Operands a -> CodeGen arch (Operands Bool)
A.lt Operand (Ptr e)
addr (SingleType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op SingleType e
s Operands e
r)
                      | RMWOperation
RMW.Max <- RMWOperation
rmw          -> SingleType e
-> (SingleType e
    -> Operands e -> Operands e -> CodeGen Native (Operands Bool))
-> Operand (Ptr e)
-> Operand e
-> CodeGen Native ()
forall arch e.
HasCallStack =>
SingleType e
-> (SingleType e
    -> Operands e -> Operands e -> CodeGen arch (Operands Bool))
-> Operand (Ptr e)
-> Operand e
-> CodeGen arch ()
atomicCAS_cmp SingleType e
s SingleType e
-> Operands e -> Operands e -> CodeGen Native (Operands Bool)
forall a arch.
SingleType a
-> Operands a -> Operands a -> CodeGen arch (Operands Bool)
A.gt Operand (Ptr e)
addr (SingleType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op SingleType e
s Operands e
r)
                    SingleType e
_                           -> String -> CodeGen Native ()
forall a. HasCallStack => String -> a
internalError String
"unexpected transition"
          --
          RMWOperation
_ -> String -> CodeGen Native ()
forall a. HasCallStack => String -> a
internalError String
"unexpected transition"

    CodeGen Native ()
forall arch. HasCallStack => CodeGen arch ()
return_


-- Parallel forward permutation function which uses a spinlock to acquire
-- a mutex before updating the value at that location.
--
mkPermuteP_mutex
    :: UID
    -> Gamma             aenv
    -> ArrayR (Array sh e)
    -> ShapeR sh'
    -> IRFun2     Native aenv (e -> e -> e)
    -> IRFun1     Native aenv (sh -> PrimMaybe sh')
    -> MIRDelayed Native aenv (Array sh e)
    -> CodeGen    Native      (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_mutex :: UID
-> Gamma aenv
-> ArrayR (Array sh e)
-> ShapeR sh'
-> IRFun2 Native aenv (e -> e -> e)
-> IRFun1 Native aenv (sh -> PrimMaybe sh')
-> MIRDelayed Native aenv (Array sh e)
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
mkPermuteP_mutex UID
uid Gamma aenv
aenv ArrayR (Array sh e)
repr ShapeR sh'
shr IRFun2 Native aenv (e -> e -> e)
combine IRFun1 Native aenv (sh -> PrimMaybe sh')
project MIRDelayed Native aenv (Array sh e)
marr =
  let
      (Operands sh
start, Operands sh
end, [Parameter]
paramGang) = ShapeR sh -> (Operands sh, Operands sh, [Parameter])
forall sh. ShapeR sh -> (Operands sh, Operands sh, [Parameter])
gangParam    (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr)
      (IRArray (Array sh' e)
arrOut,  [Parameter]
paramOut)     = ArrayR (Array sh' e)
-> Name (Array sh' e) -> (IRArray (Array sh' e), [Parameter])
forall sh e.
ArrayR (Array sh e)
-> Name (Array sh e) -> (IRArray (Array sh e), [Parameter])
mutableArray (ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
forall sh e sh'.
ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
reprOut ArrayR (Array sh e)
repr ShapeR sh'
shr)  Name (Array sh' e)
"out"
      (IRArray (Array ((), Int) Word8)
arrLock, [Parameter]
paramLock)    = ArrayR (Array ((), Int) Word8)
-> Name (Array ((), Int) Word8)
-> (IRArray (Array ((), Int) Word8), [Parameter])
forall sh e.
ArrayR (Array sh e)
-> Name (Array sh e) -> (IRArray (Array sh e), [Parameter])
mutableArray ArrayR (Array ((), Int) Word8)
reprLock Name (Array ((), Int) Word8)
"lock"
      (IRDelayed Native aenv (Array sh e)
arrIn,   [Parameter]
paramIn)      = Name (Array sh e)
-> MIRDelayed Native aenv (Array sh e)
-> (IRDelayed Native aenv (Array sh e), [Parameter])
forall sh e arch aenv.
Name (Array sh e)
-> MIRDelayed arch aenv (Array sh e)
-> (IRDelayed arch aenv (Array sh e), [Parameter])
delayedArray Name (Array sh e)
"in" MIRDelayed Native aenv (Array sh e)
marr
      paramEnv :: [Parameter]
paramEnv                = Gamma aenv -> [Parameter]
forall aenv. Gamma aenv -> [Parameter]
envParam Gamma aenv
aenv
  in
  UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall aenv a.
UID
-> Label
-> [Parameter]
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv a)
makeOpenAcc UID
uid Label
"permuteP_mutex" ([Parameter]
paramGang [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramOut [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramLock [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramIn [Parameter] -> [Parameter] -> [Parameter]
forall a. [a] -> [a] -> [a]
++ [Parameter]
paramEnv) (CodeGen Native ()
 -> CodeGen Native (IROpenAcc Native aenv (Array sh' e)))
-> CodeGen Native ()
-> CodeGen Native (IROpenAcc Native aenv (Array sh' e))
forall a b. (a -> b) -> a -> b
$ do

    Operands sh
sh <- IRDelayed Native aenv (Array sh e) -> IRExp Native aenv sh
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRExp arch aenv sh
delayedExtent IRDelayed Native aenv (Array sh e)
arrIn

    ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall sh.
ShapeR sh
-> Operands sh
-> Operands sh
-> Operands sh
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
imapNestFromTo (ArrayR (Array sh e) -> ShapeR sh
forall sh e. ArrayR (Array sh e) -> ShapeR sh
arrayRshape ArrayR (Array sh e)
repr) Operands sh
start Operands sh
end Operands sh
sh ((Operands sh -> Operands Int -> CodeGen Native ())
 -> CodeGen Native ())
-> (Operands sh -> Operands Int -> CodeGen Native ())
-> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ \Operands sh
ix Operands Int
_ -> do

      Operands (PrimMaybe sh')
ix' <- IRFun1 Native aenv (sh -> PrimMaybe sh')
-> Operands sh -> IROpenExp Native ((), sh) aenv (PrimMaybe sh')
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 IRFun1 Native aenv (sh -> PrimMaybe sh')
project Operands sh
ix

      -- project element onto the destination array and (atomically) update
      CodeGen Native (Operands Bool)
-> CodeGen Native () -> CodeGen Native ()
forall arch.
CodeGen arch (Operands Bool) -> CodeGen arch () -> CodeGen arch ()
when (Operands (PrimMaybe sh') -> CodeGen Native (Operands Bool)
forall a arch.
Operands (PrimMaybe a) -> CodeGen arch (Operands Bool)
isJust Operands (PrimMaybe sh')
ix') (CodeGen Native () -> CodeGen Native ())
-> CodeGen Native () -> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ do
        Operands sh'
i <- Operands (PrimMaybe sh') -> CodeGen Native (Operands sh')
forall a arch. Operands (PrimMaybe a) -> CodeGen arch (Operands a)
fromJust Operands (PrimMaybe sh')
ix'
        Operands Int
j <- ShapeR sh'
-> Operands sh' -> Operands sh' -> CodeGen Native (Operands Int)
forall sh arch.
ShapeR sh
-> Operands sh -> Operands sh -> CodeGen arch (Operands Int)
intOfIndex ShapeR sh'
shr (IRArray (Array sh' e) -> Operands sh'
forall sh e. IRArray (Array sh e) -> Operands sh
irArrayShape IRArray (Array sh' e)
arrOut) Operands sh'
i
        Operands e
x <- IROpenFun1 Native () aenv (sh -> e)
-> Operands sh -> IROpenExp Native ((), sh) aenv e
forall arch env aenv a b.
IROpenFun1 arch env aenv (a -> b)
-> Operands a -> IROpenExp arch (env, a) aenv b
app1 (IRDelayed Native aenv (Array sh e)
-> IROpenFun1 Native () aenv (sh -> e)
forall arch aenv sh e.
IRDelayed arch aenv (Array sh e) -> IRFun1 arch aenv (sh -> e)
delayedIndex IRDelayed Native aenv (Array sh e)
arrIn) Operands sh
ix

        IRArray (Array ((), Int) Word8)
-> Operands Int -> CodeGen Native () -> CodeGen Native ()
forall a.
IRArray (Array ((), Int) Word8)
-> Operands Int -> CodeGen Native a -> CodeGen Native a
atomically IRArray (Array ((), Int) Word8)
arrLock Operands Int
j (CodeGen Native () -> CodeGen Native ())
-> CodeGen Native () -> CodeGen Native ()
forall a b. (a -> b) -> a -> b
$ do
          Operands e
y <- IntegralType Int
-> IRArray (Array sh' e)
-> Operands Int
-> IROpenExp Native ((), sh) aenv e
forall int sh e arch.
IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> CodeGen arch (Operands e)
readArray IntegralType Int
TypeInt IRArray (Array sh' e)
arrOut Operands Int
j
          Operands e
r <- IRFun2 Native aenv (e -> e -> e)
-> Operands e -> Operands e -> IROpenExp Native ((), sh) aenv e
forall arch env aenv a b c.
IROpenFun2 arch env aenv (a -> b -> c)
-> Operands a -> Operands b -> IROpenExp arch ((env, a), b) aenv c
app2 IRFun2 Native aenv (e -> e -> e)
combine Operands e
x Operands e
y
          IntegralType Int
-> IRArray (Array sh' e)
-> Operands Int
-> Operands e
-> CodeGen Native ()
forall int sh e arch.
IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> Operands e
-> CodeGen arch ()
writeArray IntegralType Int
TypeInt IRArray (Array sh' e)
arrOut Operands Int
j Operands e
r

    CodeGen Native ()
forall arch. HasCallStack => CodeGen arch ()
return_


-- Atomically execute the critical section only when the lock at the given array
-- index is obtained. The thread spins waiting for the lock to be released and
-- there is no backoff strategy in case the lock is contended.
--
-- It is important that the thread loops trying to acquire the lock without
-- writing data anything until the lock value changes. Then, because of MESI
-- caching protocols there will be no bus traffic while the CPU waits for the
-- value to change.
--
-- <https://en.wikipedia.org/wiki/Spinlock#Significant_optimizations>
--
atomically
    :: IRArray (Vector Word8)
    -> Operands Int
    -> CodeGen Native a
    -> CodeGen Native a
atomically :: IRArray (Array ((), Int) Word8)
-> Operands Int -> CodeGen Native a -> CodeGen Native a
atomically IRArray (Array ((), Int) Word8)
barriers Operands Int
i CodeGen Native a
action = do
  let
      lock :: Operand Word8
lock      = IntegralType Word8 -> Word8 -> Operand Word8
forall a. IntegralType a -> a -> Operand a
integral IntegralType Word8
forall a. IsIntegral a => IntegralType a
integralType Word8
1
      unlock :: Operand Word8
unlock    = IntegralType Word8 -> Word8 -> Operand Word8
forall a. IntegralType a -> a -> Operand a
integral IntegralType Word8
forall a. IsIntegral a => IntegralType a
integralType Word8
0
      unlocked :: Operands Word8
unlocked  = IntegralType Word8 -> Operand Word8 -> Operands Word8
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operand a -> Operands a
ir IntegralType Word8
TypeWord8 Operand Word8
unlock
  --
  Block
spin <- String -> CodeGen Native Block
forall arch. HasCallStack => String -> CodeGen arch Block
newBlock String
"spinlock.entry"
  Block
crit <- String -> CodeGen Native Block
forall arch. HasCallStack => String -> CodeGen arch Block
newBlock String
"spinlock.critical-section"
  Block
exit <- String -> CodeGen Native Block
forall arch. HasCallStack => String -> CodeGen arch Block
newBlock String
"spinlock.exit"

  Operand (Ptr Word8)
addr <- Instruction (Ptr Word8) -> CodeGen Native (Operand (Ptr Word8))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr Word8) -> CodeGen Native (Operand (Ptr Word8)))
-> Instruction (Ptr Word8) -> CodeGen Native (Operand (Ptr Word8))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr Word8) -> [Operand Int] -> Instruction (Ptr Word8)
forall a1 i.
Operand (Ptr a1) -> [Operand i] -> Instruction (Ptr a1)
GetElementPtr (AddrSpace -> Operand Word8 -> Operand (Ptr Word8)
forall t. HasCallStack => AddrSpace -> Operand t -> Operand (Ptr t)
asPtr AddrSpace
defaultAddrSpace (IntegralType Word8 -> Operands Word8 -> Operand Word8
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op IntegralType Word8
forall a. IsIntegral a => IntegralType a
integralType (IRArray (Array ((), Int) Word8) -> Operands Word8
forall sh e. IRArray (Array sh e) -> Operands e
irArrayData IRArray (Array ((), Int) Word8)
barriers))) [IntegralType Int -> Operands Int -> Operand Int
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op IntegralType Int
forall a. IsIntegral a => IntegralType a
integralType Operands Int
i]
  Block
_    <- Block -> CodeGen Native Block
forall arch. HasCallStack => Block -> CodeGen arch Block
br Block
spin

  -- Atomically (attempt to) set the lock slot to the locked state. If the slot
  -- was unlocked we just acquired it, otherwise the state remains unchanged and
  -- we spin until it becomes available.
  Block -> CodeGen Native ()
forall arch. Block -> CodeGen arch ()
setBlock Block
spin
  Operands Word8
old  <- Instruction Word8 -> CodeGen Native (Operands Word8)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operands a)
instr (Instruction Word8 -> CodeGen Native (Operands Word8))
-> Instruction Word8 -> CodeGen Native (Operands Word8)
forall a b. (a -> b) -> a -> b
$ NumType Word8
-> Volatility
-> RMWOperation
-> Operand (Ptr Word8)
-> Operand Word8
-> Atomicity
-> Instruction Word8
forall a.
NumType a
-> Volatility
-> RMWOperation
-> Operand (Ptr a)
-> Operand a
-> Atomicity
-> Instruction a
AtomicRMW NumType Word8
forall a. IsNum a => NumType a
numType Volatility
NonVolatile RMWOperation
Exchange Operand (Ptr Word8)
addr Operand Word8
lock   (Synchronisation
CrossThread, MemoryOrdering
Acquire)
  Operands Bool
ok   <- SingleType Word8
-> Operands Word8
-> Operands Word8
-> CodeGen Native (Operands Bool)
forall a arch.
SingleType a
-> Operands a -> Operands a -> CodeGen arch (Operands Bool)
A.eq SingleType Word8
forall a. IsSingle a => SingleType a
singleType Operands Word8
old Operands Word8
unlocked
  Block
_    <- Operands Bool -> Block -> Block -> CodeGen Native Block
forall arch.
HasCallStack =>
Operands Bool -> Block -> Block -> CodeGen arch Block
cbr Operands Bool
ok Block
crit Block
spin

  -- We just acquired the lock; perform the critical section then release the
  -- lock and exit. For ("some") x86 processors, an unlocked MOV instruction
  -- could be used rather than the slower XCHG, due to subtle memory ordering
  -- rules.
  Block -> CodeGen Native ()
forall arch. Block -> CodeGen arch ()
setBlock Block
crit
  a
r    <- CodeGen Native a
action
  Operands Word8
_    <- Instruction Word8 -> CodeGen Native (Operands Word8)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operands a)
instr (Instruction Word8 -> CodeGen Native (Operands Word8))
-> Instruction Word8 -> CodeGen Native (Operands Word8)
forall a b. (a -> b) -> a -> b
$ NumType Word8
-> Volatility
-> RMWOperation
-> Operand (Ptr Word8)
-> Operand Word8
-> Atomicity
-> Instruction Word8
forall a.
NumType a
-> Volatility
-> RMWOperation
-> Operand (Ptr a)
-> Operand a
-> Atomicity
-> Instruction a
AtomicRMW NumType Word8
forall a. IsNum a => NumType a
numType Volatility
NonVolatile RMWOperation
Exchange Operand (Ptr Word8)
addr Operand Word8
unlock (Synchronisation
CrossThread, MemoryOrdering
Release)
  Block
_    <- Block -> CodeGen Native Block
forall arch. HasCallStack => Block -> CodeGen arch Block
br Block
exit

  Block -> CodeGen Native ()
forall arch. Block -> CodeGen arch ()
setBlock Block
exit
  a -> CodeGen Native a
forall (m :: * -> *) a. Monad m => a -> m a
return a
r


-- Helper functions
-- ----------------

reprOut :: ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
reprOut :: ArrayR (Array sh e) -> ShapeR sh' -> ArrayR (Array sh' e)
reprOut (ArrayR ShapeR sh
_ TypeR e
tp) ShapeR sh'
shr = ShapeR sh' -> TypeR e -> ArrayR (Array sh' e)
forall sh e. ShapeR sh -> TypeR e -> ArrayR (Array sh e)
ArrayR ShapeR sh'
shr TypeR e
tp

reprLock :: ArrayR (Array ((), Int) Word8)
reprLock :: ArrayR (Array ((), Int) Word8)
reprLock = ShapeR ((), Int) -> TypeR Word8 -> ArrayR (Array ((), Int) Word8)
forall sh e. ShapeR sh -> TypeR e -> ArrayR (Array sh e)
ArrayR (ShapeR () -> ShapeR ((), Int)
forall sh1. ShapeR sh1 -> ShapeR (sh1, Int)
ShapeRsnoc ShapeR ()
ShapeRz) (TypeR Word8 -> ArrayR (Array ((), Int) Word8))
-> TypeR Word8 -> ArrayR (Array ((), Int) Word8)
forall a b. (a -> b) -> a -> b
$ ScalarType Word8 -> TypeR Word8
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle ScalarType Word8
scalarTypeWord8