{-# LANGUAGE CPP #-}
{-# LANGUAGE TemplateHaskell #-}
module Data.Array.Accelerate.LLVM.PTX.State (
evalPTX,
createTargetForDevice, createTargetFromContext, defaultTarget
) where
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.LLVM.PTX.Analysis.Device
import Data.Array.Accelerate.LLVM.PTX.Target
import Data.Array.Accelerate.LLVM.State
import qualified Data.Array.Accelerate.LLVM.PTX.Array.Table as MT
import qualified Data.Array.Accelerate.LLVM.PTX.Context as CT
import qualified Data.Array.Accelerate.LLVM.PTX.Debug as Debug
import qualified Data.Array.Accelerate.LLVM.PTX.Execute.Stream as ST
import qualified Data.Array.Accelerate.LLVM.PTX.Link.Cache as LC
import Data.Range.Range ( Range(..) )
import Control.Parallel.Meta ( Executable(..) )
import Control.Concurrent ( runInBoundThread )
import Control.Exception ( catch )
import System.IO.Unsafe ( unsafePerformIO )
import Foreign.CUDA.Driver.Error
import qualified Foreign.CUDA.Driver as CUDA
import qualified Foreign.CUDA.Driver.Context as Context
evalPTX :: PTX -> LLVM PTX a -> IO a
evalPTX ptx acc =
runInBoundThread (CT.withContext (ptxContext ptx) (evalLLVM ptx acc))
`catch`
\e -> $internalError "unhandled" (show (e :: CUDAException))
createTargetForDevice
:: CUDA.Device
-> CUDA.DeviceProperties
-> [CUDA.ContextFlag]
-> IO PTX
createTargetForDevice dev prp flags = do
ctx <- CT.new dev prp flags
mt <- MT.new ctx
lc <- LC.new
st <- ST.new ctx
return $! PTX ctx mt lc st simpleIO
createTargetFromContext
:: CUDA.Context
-> IO PTX
createTargetFromContext ctx' = do
dev <- Context.device
prp <- CUDA.props dev
ctx <- CT.raw dev prp ctx'
mt <- MT.new ctx
lc <- LC.new
st <- ST.new ctx
return $! PTX ctx mt lc st simpleIO
{-# INLINE simpleIO #-}
simpleIO :: Executable
simpleIO = Executable $ \_name _ppt range action ->
case range of
Empty -> return ()
IE u v -> action u v 0
{-# NOINLINE defaultTarget #-}
defaultTarget :: PTX
defaultTarget = unsafePerformIO $ do
Debug.traceIO Debug.dump_gc "gc: initialise default PTX target"
CUDA.initialise []
(dev,prp) <- selectBestDevice
createTargetForDevice dev prp [CUDA.SchedAuto]