{-# LINE 1 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_HADDOCK prune #-}
module Foreign.CUDA.Driver.Module.Base (
Module(..),
JITOption(..), JITTarget(..), JITResult(..), JITFallback(..), JITInputType(..),
JITOptionInternal(..),
loadFile,
loadData, loadDataFromPtr,
loadDataEx, loadDataFromPtrEx,
unload,
jitOptionUnpack, jitTargetOfCompute,
) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp
{-# LINE 36 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
import Foreign.CUDA.Analysis.Device
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Internal.C2HS
import Foreign
import Foreign.C
import Unsafe.Coerce
import Control.Monad ( liftM )
import Data.ByteString.Char8 ( ByteString )
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
newtype Module = Module { useModule :: ((C2HSImp.Ptr ()))}
deriving (Eq, Show)
data JITOption
= MaxRegisters !Int
| ThreadsPerBlock !Int
| OptimisationLevel !Int
| Target !Compute
| FallbackStrategy !JITFallback
| GenerateDebugInfo
| GenerateLineInfo
| Verbose
deriving (Show)
data JITResult = JITResult
{
jitTime :: !Float,
jitInfoLog :: !ByteString,
jitModule :: !Module
}
deriving (Show)
data JITTarget = Compute10
| Compute11
| Compute12
| Compute13
| Compute20
| Compute21
| Compute30
| Compute32
| Compute35
| Compute37
| Compute50
| Compute52
| Compute53
| Compute60
| Compute61
| Compute62
deriving (Eq,Show)
instance Enum JITTarget where
succ Compute10 = Compute11
succ Compute11 = Compute12
succ Compute12 = Compute13
succ Compute13 = Compute20
succ Compute20 = Compute21
succ Compute21 = Compute30
succ Compute30 = Compute32
succ Compute32 = Compute35
succ Compute35 = Compute37
succ Compute37 = Compute50
succ Compute50 = Compute52
succ Compute52 = Compute53
succ Compute53 = Compute60
succ Compute60 = Compute61
succ Compute61 = Compute62
succ Compute62 = error "JITTarget.succ: Compute62 has no successor"
pred Compute11 = Compute10
pred Compute12 = Compute11
pred Compute13 = Compute12
pred Compute20 = Compute13
pred Compute21 = Compute20
pred Compute30 = Compute21
pred Compute32 = Compute30
pred Compute35 = Compute32
pred Compute37 = Compute35
pred Compute50 = Compute37
pred Compute52 = Compute50
pred Compute53 = Compute52
pred Compute60 = Compute53
pred Compute61 = Compute60
pred Compute62 = Compute61
pred Compute10 = error "JITTarget.pred: Compute10 has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Compute62
fromEnum Compute10 = 10
fromEnum Compute11 = 11
fromEnum Compute12 = 12
fromEnum Compute13 = 13
fromEnum Compute20 = 20
fromEnum Compute21 = 21
fromEnum Compute30 = 30
fromEnum Compute32 = 32
fromEnum Compute35 = 35
fromEnum Compute37 = 37
fromEnum Compute50 = 50
fromEnum Compute52 = 52
fromEnum Compute53 = 53
fromEnum Compute60 = 60
fromEnum Compute61 = 61
fromEnum Compute62 = 62
toEnum 10 = Compute10
toEnum 11 = Compute11
toEnum 12 = Compute12
toEnum 13 = Compute13
toEnum 20 = Compute20
toEnum 21 = Compute21
toEnum 30 = Compute30
toEnum 32 = Compute32
toEnum 35 = Compute35
toEnum 37 = Compute37
toEnum 50 = Compute50
toEnum 52 = Compute52
toEnum 53 = Compute53
toEnum 60 = Compute60
toEnum 61 = Compute61
toEnum 62 = Compute62
toEnum unmatched = error ("JITTarget.toEnum: Cannot match " ++ show unmatched)
{-# LINE 95 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
data JITFallback = PreferPTX
| PreferBinary
deriving (Eq,Show)
instance Enum JITFallback where
succ PreferPTX = PreferBinary
succ PreferBinary = error "JITFallback.succ: PreferBinary has no successor"
pred PreferBinary = PreferPTX
pred PreferPTX = error "JITFallback.pred: PreferPTX has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from PreferBinary
fromEnum PreferPTX = 0
fromEnum PreferBinary = 1
toEnum 0 = PreferPTX
toEnum 1 = PreferBinary
toEnum unmatched = error ("JITFallback.toEnum: Cannot match " ++ show unmatched)
{-# LINE 103 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
data JITInputType = Cubin
| PTX
| Fatbinary
| Object
| Library
| CuJitNumInputTypes
deriving (Eq,Show)
instance Enum JITInputType where
succ Cubin = PTX
succ PTX = Fatbinary
succ Fatbinary = Object
succ Object = Library
succ Library = CuJitNumInputTypes
succ CuJitNumInputTypes = error "JITInputType.succ: CuJitNumInputTypes has no successor"
pred PTX = Cubin
pred Fatbinary = PTX
pred Object = Fatbinary
pred Library = Object
pred CuJitNumInputTypes = Library
pred Cubin = error "JITInputType.pred: Cubin has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from CuJitNumInputTypes
fromEnum Cubin = 0
fromEnum PTX = 1
fromEnum Fatbinary = 2
fromEnum Object = 3
fromEnum Library = 4
fromEnum CuJitNumInputTypes = 5
toEnum 0 = Cubin
toEnum 1 = PTX
toEnum 2 = Fatbinary
toEnum 3 = Object
toEnum 4 = Library
toEnum 5 = CuJitNumInputTypes
toEnum unmatched = error ("JITInputType.toEnum: Cannot match " ++ show unmatched)
{-# LINE 115 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
data JITOptionInternal = JIT_MAX_REGISTERS
| JIT_THREADS_PER_BLOCK
| JIT_WALL_TIME
| JIT_INFO_LOG_BUFFER
| JIT_INFO_LOG_BUFFER_SIZE_BYTES
| JIT_ERROR_LOG_BUFFER
| JIT_ERROR_LOG_BUFFER_SIZE_BYTES
| JIT_OPTIMIZATION_LEVEL
| JIT_TARGET_FROM_CUCONTEXT
| JIT_TARGET
| JIT_FALLBACK_STRATEGY
| JIT_GENERATE_DEBUG_INFO
| JIT_LOG_VERBOSE
| JIT_GENERATE_LINE_INFO
| JIT_CACHE_MODE
| JIT_NEW_SM3X_OPT
| JIT_FAST_COMPILE
| JIT_NUM_OPTIONS
deriving (Eq,Show)
instance Enum JITOptionInternal where
succ JIT_MAX_REGISTERS = JIT_THREADS_PER_BLOCK
succ JIT_THREADS_PER_BLOCK = JIT_WALL_TIME
succ JIT_WALL_TIME = JIT_INFO_LOG_BUFFER
succ JIT_INFO_LOG_BUFFER = JIT_INFO_LOG_BUFFER_SIZE_BYTES
succ JIT_INFO_LOG_BUFFER_SIZE_BYTES = JIT_ERROR_LOG_BUFFER
succ JIT_ERROR_LOG_BUFFER = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
succ JIT_ERROR_LOG_BUFFER_SIZE_BYTES = JIT_OPTIMIZATION_LEVEL
succ JIT_OPTIMIZATION_LEVEL = JIT_TARGET_FROM_CUCONTEXT
succ JIT_TARGET_FROM_CUCONTEXT = JIT_TARGET
succ JIT_TARGET = JIT_FALLBACK_STRATEGY
succ JIT_FALLBACK_STRATEGY = JIT_GENERATE_DEBUG_INFO
succ JIT_GENERATE_DEBUG_INFO = JIT_LOG_VERBOSE
succ JIT_LOG_VERBOSE = JIT_GENERATE_LINE_INFO
succ JIT_GENERATE_LINE_INFO = JIT_CACHE_MODE
succ JIT_CACHE_MODE = JIT_NEW_SM3X_OPT
succ JIT_NEW_SM3X_OPT = JIT_FAST_COMPILE
succ JIT_FAST_COMPILE = JIT_NUM_OPTIONS
succ JIT_NUM_OPTIONS = error "JITOptionInternal.succ: JIT_NUM_OPTIONS has no successor"
pred JIT_THREADS_PER_BLOCK = JIT_MAX_REGISTERS
pred JIT_WALL_TIME = JIT_THREADS_PER_BLOCK
pred JIT_INFO_LOG_BUFFER = JIT_WALL_TIME
pred JIT_INFO_LOG_BUFFER_SIZE_BYTES = JIT_INFO_LOG_BUFFER
pred JIT_ERROR_LOG_BUFFER = JIT_INFO_LOG_BUFFER_SIZE_BYTES
pred JIT_ERROR_LOG_BUFFER_SIZE_BYTES = JIT_ERROR_LOG_BUFFER
pred JIT_OPTIMIZATION_LEVEL = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
pred JIT_TARGET_FROM_CUCONTEXT = JIT_OPTIMIZATION_LEVEL
pred JIT_TARGET = JIT_TARGET_FROM_CUCONTEXT
pred JIT_FALLBACK_STRATEGY = JIT_TARGET
pred JIT_GENERATE_DEBUG_INFO = JIT_FALLBACK_STRATEGY
pred JIT_LOG_VERBOSE = JIT_GENERATE_DEBUG_INFO
pred JIT_GENERATE_LINE_INFO = JIT_LOG_VERBOSE
pred JIT_CACHE_MODE = JIT_GENERATE_LINE_INFO
pred JIT_NEW_SM3X_OPT = JIT_CACHE_MODE
pred JIT_FAST_COMPILE = JIT_NEW_SM3X_OPT
pred JIT_NUM_OPTIONS = JIT_FAST_COMPILE
pred JIT_MAX_REGISTERS = error "JITOptionInternal.pred: JIT_MAX_REGISTERS has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from JIT_NUM_OPTIONS
fromEnum JIT_MAX_REGISTERS = 0
fromEnum JIT_THREADS_PER_BLOCK = 1
fromEnum JIT_WALL_TIME = 2
fromEnum JIT_INFO_LOG_BUFFER = 3
fromEnum JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
fromEnum JIT_ERROR_LOG_BUFFER = 5
fromEnum JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
fromEnum JIT_OPTIMIZATION_LEVEL = 7
fromEnum JIT_TARGET_FROM_CUCONTEXT = 8
fromEnum JIT_TARGET = 9
fromEnum JIT_FALLBACK_STRATEGY = 10
fromEnum JIT_GENERATE_DEBUG_INFO = 11
fromEnum JIT_LOG_VERBOSE = 12
fromEnum JIT_GENERATE_LINE_INFO = 13
fromEnum JIT_CACHE_MODE = 14
fromEnum JIT_NEW_SM3X_OPT = 15
fromEnum JIT_FAST_COMPILE = 16
fromEnum JIT_NUM_OPTIONS = 17
toEnum 0 = JIT_MAX_REGISTERS
toEnum 1 = JIT_THREADS_PER_BLOCK
toEnum 2 = JIT_WALL_TIME
toEnum 3 = JIT_INFO_LOG_BUFFER
toEnum 4 = JIT_INFO_LOG_BUFFER_SIZE_BYTES
toEnum 5 = JIT_ERROR_LOG_BUFFER
toEnum 6 = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
toEnum 7 = JIT_OPTIMIZATION_LEVEL
toEnum 8 = JIT_TARGET_FROM_CUCONTEXT
toEnum 9 = JIT_TARGET
toEnum 10 = JIT_FALLBACK_STRATEGY
toEnum 11 = JIT_GENERATE_DEBUG_INFO
toEnum 12 = JIT_LOG_VERBOSE
toEnum 13 = JIT_GENERATE_LINE_INFO
toEnum 14 = JIT_CACHE_MODE
toEnum 15 = JIT_NEW_SM3X_OPT
toEnum 16 = JIT_FAST_COMPILE
toEnum 17 = JIT_NUM_OPTIONS
toEnum unmatched = error ("JITOptionInternal.toEnum: Cannot match " ++ show unmatched)
{-# LINE 119 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# INLINEABLE loadFile #-}
loadFile :: FilePath -> IO Module
loadFile !ptx = resultIfOk =<< cuModuleLoad ptx
{-# INLINE cuModuleLoad #-}
cuModuleLoad :: (FilePath) -> IO ((Status), (Module))
cuModuleLoad a2 =
alloca $ \a1' ->
withCString a2 $ \a2' ->
cuModuleLoad'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 139 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# INLINEABLE loadData #-}
loadData :: ByteString -> IO Module
loadData !img =
B.useAsCString img (\p -> loadDataFromPtr (castPtr p))
{-# INLINEABLE loadDataFromPtr #-}
loadDataFromPtr :: Ptr Word8 -> IO Module
loadDataFromPtr !img = resultIfOk =<< cuModuleLoadData img
{-# INLINE cuModuleLoadData #-}
cuModuleLoadData :: (Ptr Word8) -> IO (( Status), (Module))
cuModuleLoadData a2 =
alloca $ \a1' ->
let {a2' = castPtr a2} in
cuModuleLoadData'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 168 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# INLINEABLE loadDataEx #-}
loadDataEx :: ByteString -> [JITOption] -> IO JITResult
loadDataEx !img !options =
B.useAsCString img (\p -> loadDataFromPtrEx (castPtr p) options)
{-# INLINEABLE loadDataFromPtrEx #-}
loadDataFromPtrEx :: Ptr Word8 -> [JITOption] -> IO JITResult
loadDataFromPtrEx !img !options = do
let logSize = 2048
fp_ilog <- B.mallocByteString logSize
allocaArray logSize $ \p_elog -> do
withForeignPtr fp_ilog $ \p_ilog -> do
let (opt,val) = unzip $
[ (JIT_WALL_TIME, 0)
, (JIT_INFO_LOG_BUFFER_SIZE_BYTES, logSize)
, (JIT_ERROR_LOG_BUFFER_SIZE_BYTES, logSize)
, (JIT_INFO_LOG_BUFFER, unsafeCoerce (p_ilog :: CString))
, (JIT_ERROR_LOG_BUFFER, unsafeCoerce (p_elog :: CString))
]
++
map jitOptionUnpack options
withArrayLen (map cFromEnum opt) $ \i p_opts -> do
withArray (map unsafeCoerce val) $ \ p_vals -> do
(s,mdl) <- cuModuleLoadDataEx img i p_opts p_vals
case s of
Success -> do
time <- peek (castPtr p_vals)
bytes <- c_strnlen p_ilog logSize
let infoLog | bytes == 0 = B.empty
| otherwise = B.fromForeignPtr (castForeignPtr fp_ilog) 0 bytes
return $! JITResult time infoLog mdl
_ -> do
errLog <- peekCString p_elog
cudaError (unlines [describe s, errLog])
{-# INLINE cuModuleLoadDataEx #-}
cuModuleLoadDataEx :: (Ptr Word8) -> (Int) -> (Ptr CInt) -> (Ptr (Ptr ())) -> IO ((Status), (Module))
cuModuleLoadDataEx a2 a3 a4 a5 =
alloca $ \a1' ->
let {a2' = castPtr a2} in
let {a3' = fromIntegral a3} in
let {a4' = id a4} in
let {a5' = id a5} in
cuModuleLoadDataEx'_ a1' a2' a3' a4' a5' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 235 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# INLINEABLE unload #-}
unload :: Module -> IO ()
unload !m = nothingIfOk =<< cuModuleUnload m
{-# INLINE cuModuleUnload #-}
cuModuleUnload :: (Module) -> IO ((Status))
cuModuleUnload a1 =
let {a1' = useModule a1} in
cuModuleUnload'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 249 "src/Foreign/CUDA/Driver/Module/Base.chs" #-}
{-# INLINE peekMod #-}
peekMod :: Ptr ((C2HSImp.Ptr ())) -> IO Module
peekMod = liftM Module . peek
{-# INLINE jitOptionUnpack #-}
jitOptionUnpack :: JITOption -> (JITOptionInternal, Int)
jitOptionUnpack (MaxRegisters x) = (JIT_MAX_REGISTERS, x)
jitOptionUnpack (ThreadsPerBlock x) = (JIT_THREADS_PER_BLOCK, x)
jitOptionUnpack (OptimisationLevel x) = (JIT_OPTIMIZATION_LEVEL, x)
jitOptionUnpack (Target x) = (JIT_TARGET, fromEnum (jitTargetOfCompute x))
jitOptionUnpack (FallbackStrategy x) = (JIT_FALLBACK_STRATEGY, fromEnum x)
jitOptionUnpack GenerateDebugInfo = (JIT_GENERATE_DEBUG_INFO, fromEnum True)
jitOptionUnpack GenerateLineInfo = (JIT_GENERATE_LINE_INFO, fromEnum True)
jitOptionUnpack Verbose = (JIT_LOG_VERBOSE, fromEnum True)
{-# INLINE jitTargetOfCompute #-}
jitTargetOfCompute :: Compute -> JITTarget
jitTargetOfCompute (Compute 1 0) = Compute10
jitTargetOfCompute (Compute 1 1) = Compute11
jitTargetOfCompute (Compute 1 2) = Compute12
jitTargetOfCompute (Compute 1 3) = Compute13
jitTargetOfCompute (Compute 2 0) = Compute20
jitTargetOfCompute (Compute 2 1) = Compute21
jitTargetOfCompute (Compute 3 0) = Compute30
jitTargetOfCompute (Compute 3 5) = Compute35
jitTargetOfCompute (Compute 3 2) = Compute32
jitTargetOfCompute (Compute 5 0) = Compute50
jitTargetOfCompute (Compute 3 7) = Compute37
jitTargetOfCompute (Compute 5 2) = Compute52
jitTargetOfCompute compute = error ("Unknown JIT Target for Compute " ++ show compute)
foreign import ccall unsafe "string.h strnlen" c_strnlen'
:: CString -> CSize -> IO CSize
{-# INLINE c_strnlen #-}
c_strnlen :: CString -> Int -> IO Int
c_strnlen str maxlen = cIntConv `fmap` c_strnlen' str (cIntConv maxlen)
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoad"
cuModuleLoad'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr C2HSImp.CChar) -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoadData"
cuModuleLoadData'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoadDataEx"
cuModuleLoadDataEx'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> ((C2HSImp.Ptr C2HSImp.CInt) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (IO C2HSImp.CInt))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleUnload"
cuModuleUnload'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))