{-# LINE 1 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE TemplateHaskell #-}
module Foreign.CUDA.Runtime.Marshal (
AllocFlag(..),
mallocHostArray, freeHost,
mallocArray, allocaArray, free,
AttachFlag(..),
mallocManagedArray,
peekArray, peekArrayAsync, peekArray2D, peekArray2DAsync, peekListArray,
pokeArray, pokeArrayAsync, pokeArray2D, pokeArray2DAsync, pokeListArray,
copyArray, copyArrayAsync, copyArray2D, copyArray2DAsync,
newListArray, newListArrayLen,
withListArray, withListArrayLen,
memset
) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp
{-# LINE 43 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
import Foreign.CUDA.Ptr
import Foreign.CUDA.Runtime.Error
import Foreign.CUDA.Runtime.Stream
import Foreign.CUDA.Internal.C2HS
import Data.Int
import Data.Maybe
import Control.Exception
import Foreign.C
import Foreign.Ptr
import Foreign.Storable
import qualified Foreign.Marshal as F
data AllocFlag = Portable
| DeviceMapped
| WriteCombined
deriving (Eq,Show,Bounded)
instance Enum AllocFlag where
succ Portable = DeviceMapped
succ DeviceMapped = WriteCombined
succ WriteCombined = error "AllocFlag.succ: WriteCombined has no successor"
pred DeviceMapped = Portable
pred WriteCombined = DeviceMapped
pred Portable = error "AllocFlag.pred: Portable has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from WriteCombined
fromEnum Portable = 1
fromEnum DeviceMapped = 2
fromEnum WriteCombined = 4
toEnum 1 = Portable
toEnum 2 = DeviceMapped
toEnum 4 = WriteCombined
toEnum unmatched = error ("AllocFlag.toEnum: Cannot match " ++ show unmatched)
{-# LINE 88 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE mallocHostArray #-}
mallocHostArray :: Storable a => [AllocFlag] -> Int -> IO (HostPtr a)
mallocHostArray !flags = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (HostPtr a')
doMalloc x !n = resultIfOk =<< cudaHostAlloc (fromIntegral n * fromIntegral (sizeOf x)) flags
{-# INLINE cudaHostAlloc #-}
cudaHostAlloc :: (Int64) -> ([AllocFlag]) -> IO ((Status), (HostPtr a))
cudaHostAlloc a2 a3 =
alloca' $ \a1' ->
let {a2' = cIntConv a2} in
let {a3' = combineBitMasks a3} in
cudaHostAlloc'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
hptr a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 113 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
hptr !p = (HostPtr . castPtr) `fmap` peek p
{-# INLINEABLE freeHost #-}
freeHost :: HostPtr a -> IO ()
freeHost !p = nothingIfOk =<< cudaFreeHost p
{-# INLINE cudaFreeHost #-}
cudaFreeHost :: (HostPtr a) -> IO ((Status))
cudaFreeHost a1 =
let {a1' = hptr a1} in
cudaFreeHost'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 128 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where hptr = castPtr . useHostPtr
{-# INLINEABLE mallocArray #-}
mallocArray :: Storable a => Int -> IO (DevicePtr a)
mallocArray = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
doMalloc x !n = resultIfOk =<< cudaMalloc (fromIntegral n * fromIntegral (sizeOf x))
{-# INLINE cudaMalloc #-}
cudaMalloc :: (Int64) -> IO ((Status), (DevicePtr a))
cudaMalloc a2 =
alloca' $ \a1' ->
let {a2' = cIntConv a2} in
cudaMalloc'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
dptr a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 151 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
dptr !p = (castDevPtr . DevicePtr) `fmap` peek p
{-# INLINEABLE allocaArray #-}
allocaArray :: Storable a => Int -> (DevicePtr a -> IO b) -> IO b
allocaArray n = bracket (mallocArray n) free
{-# INLINEABLE free #-}
free :: DevicePtr a -> IO ()
free !p = nothingIfOk =<< cudaFree p
{-# INLINE cudaFree #-}
cudaFree :: (DevicePtr a) -> IO ((Status))
cudaFree a1 =
let {a1' = dptr a1} in
cudaFree'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 181 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where
dptr = useDevicePtr . castDevPtr
data AttachFlag = Global
| Host
| Single
deriving (Eq,Show,Bounded)
instance Enum AttachFlag where
succ Global = Host
succ Host = Single
succ Single = error "AttachFlag.succ: Single has no successor"
pred Host = Global
pred Single = Host
pred Global = error "AttachFlag.pred: Global has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Single
fromEnum Global = 1
fromEnum Host = 2
fromEnum Single = 4
toEnum 1 = Global
toEnum 2 = Host
toEnum 4 = Single
toEnum unmatched = error ("AttachFlag.toEnum: Cannot match " ++ show unmatched)
{-# LINE 199 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE mallocManagedArray #-}
mallocManagedArray :: Storable a => [AttachFlag] -> Int -> IO (DevicePtr a)
mallocManagedArray !flags = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
doMalloc x !n = resultIfOk =<< cudaMallocManaged (fromIntegral n * fromIntegral (sizeOf x)) flags
{-# INLINE cudaMallocManaged #-}
cudaMallocManaged :: (Int64) -> ([AttachFlag]) -> IO ((Status), (DevicePtr a))
cudaMallocManaged a2 a3 =
alloca' $ \a1' ->
let {a2' = cIntConv a2} in
let {a3' = combineBitMasks a3} in
cudaMallocManaged'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
dptr a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 219 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
dptr !p = (castDevPtr . DevicePtr) `fmap` peek p
{-# INLINEABLE peekArray #-}
peekArray :: Storable a => Int -> DevicePtr a -> Ptr a -> IO ()
peekArray !n !dptr !hptr = memcpy hptr (useDevicePtr dptr) n DeviceToHost
{-# INLINEABLE peekArrayAsync #-}
peekArrayAsync :: Storable a => Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
peekArrayAsync !n !dptr !hptr !mst =
memcpyAsync (useHostPtr hptr) (useDevicePtr dptr) n DeviceToHost mst
{-# INLINEABLE peekArray2D #-}
peekArray2D
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> Ptr a
-> Int
-> IO ()
peekArray2D !w !h !dptr !dw !hptr !hw =
memcpy2D hptr hw (useDevicePtr dptr) dw w h DeviceToHost
{-# INLINEABLE peekArray2DAsync #-}
peekArray2DAsync
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> HostPtr a
-> Int
-> Maybe Stream
-> IO ()
peekArray2DAsync !w !h !dptr !dw !hptr !hw !mst =
memcpy2DAsync (useHostPtr hptr) hw (useDevicePtr dptr) dw w h DeviceToHost mst
{-# INLINEABLE peekListArray #-}
peekListArray :: Storable a => Int -> DevicePtr a -> IO [a]
peekListArray !n !dptr =
F.allocaArray n $ \p -> do
peekArray n dptr p
F.peekArray n p
{-# INLINEABLE pokeArray #-}
pokeArray :: Storable a => Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray !n !hptr !dptr = memcpy (useDevicePtr dptr) hptr n HostToDevice
{-# INLINEABLE pokeArrayAsync #-}
pokeArrayAsync :: Storable a => Int -> HostPtr a -> DevicePtr a -> Maybe Stream -> IO ()
pokeArrayAsync !n !hptr !dptr !mst =
memcpyAsync (useDevicePtr dptr) (useHostPtr hptr) n HostToDevice mst
{-# INLINEABLE pokeArray2D #-}
pokeArray2D
:: Storable a
=> Int
-> Int
-> Ptr a
-> Int
-> DevicePtr a
-> Int
-> IO ()
pokeArray2D !w !h !hptr !dw !dptr !hw =
memcpy2D (useDevicePtr dptr) dw hptr hw w h HostToDevice
{-# INLINEABLE pokeArray2DAsync #-}
pokeArray2DAsync
:: Storable a
=> Int
-> Int
-> HostPtr a
-> Int
-> DevicePtr a
-> Int
-> Maybe Stream
-> IO ()
pokeArray2DAsync !w !h !hptr !hw !dptr !dw !mst =
memcpy2DAsync (useDevicePtr dptr) dw (useHostPtr hptr) hw w h HostToDevice mst
{-# INLINEABLE pokeListArray #-}
pokeListArray :: Storable a => [a] -> DevicePtr a -> IO ()
pokeListArray !xs !dptr = F.withArrayLen xs $ \len p -> pokeArray len p dptr
{-# INLINEABLE copyArray #-}
copyArray :: Storable a => Int -> DevicePtr a -> DevicePtr a -> IO ()
copyArray !n !src !dst = memcpy (useDevicePtr dst) (useDevicePtr src) n DeviceToDevice
{-# INLINEABLE copyArrayAsync #-}
copyArrayAsync :: Storable a => Int -> DevicePtr a -> DevicePtr a -> Maybe Stream -> IO ()
copyArrayAsync !n !src !dst !mst =
memcpyAsync (useDevicePtr dst) (useDevicePtr src) n DeviceToDevice mst
{-# INLINEABLE copyArray2D #-}
copyArray2D
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> DevicePtr a
-> Int
-> IO ()
copyArray2D !w !h !src !sw !dst !dw =
memcpy2D (useDevicePtr dst) dw (useDevicePtr src) sw w h DeviceToDevice
{-# INLINEABLE copyArray2DAsync #-}
copyArray2DAsync
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> DevicePtr a
-> Int
-> Maybe Stream
-> IO ()
copyArray2DAsync !w !h !src !sw !dst !dw !mst =
memcpy2DAsync (useDevicePtr dst) dw (useDevicePtr src) sw w h DeviceToDevice mst
data CopyDirection = HostToHost
| HostToDevice
| DeviceToHost
| DeviceToDevice
| Default
deriving (Eq,Show)
instance Enum CopyDirection where
succ HostToHost = HostToDevice
succ HostToDevice = DeviceToHost
succ DeviceToHost = DeviceToDevice
succ DeviceToDevice = Default
succ Default = error "CopyDirection.succ: Default has no successor"
pred HostToDevice = HostToHost
pred DeviceToHost = HostToDevice
pred DeviceToDevice = DeviceToHost
pred Default = DeviceToDevice
pred HostToHost = error "CopyDirection.pred: HostToHost has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Default
fromEnum HostToHost = 0
fromEnum HostToDevice = 1
fromEnum DeviceToHost = 2
fromEnum DeviceToDevice = 3
fromEnum Default = 4
toEnum 0 = HostToHost
toEnum 1 = HostToDevice
toEnum 2 = DeviceToHost
toEnum 3 = DeviceToDevice
toEnum 4 = Default
toEnum unmatched = error ("CopyDirection.toEnum: Cannot match " ++ show unmatched)
{-# LINE 431 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE memcpy #-}
memcpy :: Storable a
=> Ptr a
-> Ptr a
-> Int
-> CopyDirection
-> IO ()
memcpy !dst !src !n !dir = doMemcpy undefined dst
where
doMemcpy :: Storable a' => a' -> Ptr a' -> IO ()
doMemcpy x _ =
nothingIfOk =<< cudaMemcpy dst src (fromIntegral n * fromIntegral (sizeOf x)) dir
{-# INLINE cudaMemcpy #-}
cudaMemcpy :: (Ptr a) -> (Ptr a) -> (Int64) -> (CopyDirection) -> IO ((Status))
cudaMemcpy a1 a2 a3 a4 =
let {a1' = castPtr a1} in
let {a2' = castPtr a2} in
let {a3' = cIntConv a3} in
let {a4' = cFromEnum a4} in
cudaMemcpy'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 454 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE memcpyAsync #-}
memcpyAsync :: Storable a
=> Ptr a
-> Ptr a
-> Int
-> CopyDirection
-> Maybe Stream
-> IO ()
memcpyAsync !dst !src !n !kind !mst = doMemcpy undefined dst
where
doMemcpy :: Storable a' => a' -> Ptr a' -> IO ()
doMemcpy x _ =
let bytes = fromIntegral n * fromIntegral (sizeOf x) in
nothingIfOk =<< cudaMemcpyAsync dst src bytes kind (fromMaybe defaultStream mst)
{-# INLINE cudaMemcpyAsync #-}
cudaMemcpyAsync :: (Ptr a) -> (Ptr a) -> (Int64) -> (CopyDirection) -> (Stream) -> IO ((Status))
cudaMemcpyAsync a1 a2 a3 a4 a5 =
let {a1' = castPtr a1} in
let {a2' = castPtr a2} in
let {a3' = cIntConv a3} in
let {a4' = cFromEnum a4} in
let {a5' = useStream a5} in
cudaMemcpyAsync'_ a1' a2' a3' a4' a5' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 483 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE memcpy2D #-}
memcpy2D :: Storable a
=> Ptr a
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> CopyDirection
-> IO ()
memcpy2D !dst !dw !src !sw !w !h !kind = doCopy undefined dst
where
doCopy :: Storable a' => a' -> Ptr a' -> IO ()
doCopy x _ =
let bytes = fromIntegral (sizeOf x)
dw' = fromIntegral dw * bytes
sw' = fromIntegral sw * bytes
w' = fromIntegral w * bytes
h' = fromIntegral h
in
nothingIfOk =<< cudaMemcpy2D dst dw' src sw' w' h' kind
{-# INLINE cudaMemcpy2D #-}
cudaMemcpy2D :: (Ptr a) -> (Int64) -> (Ptr a) -> (Int64) -> (Int64) -> (Int64) -> (CopyDirection) -> IO ((Status))
cudaMemcpy2D a1 a2 a3 a4 a5 a6 a7 =
let {a1' = castPtr a1} in
let {a2' = fromIntegral a2} in
let {a3' = castPtr a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = fromIntegral a6} in
let {a7' = cFromEnum a7} in
cudaMemcpy2D'_ a1' a2' a3' a4' a5' a6' a7' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 522 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE memcpy2DAsync #-}
memcpy2DAsync :: Storable a
=> Ptr a
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> CopyDirection
-> Maybe Stream
-> IO ()
memcpy2DAsync !dst !dw !src !sw !w !h !kind !mst = doCopy undefined dst
where
doCopy :: Storable a' => a' -> Ptr a' -> IO ()
doCopy x _ =
let bytes = fromIntegral (sizeOf x)
dw' = fromIntegral dw * bytes
sw' = fromIntegral sw * bytes
w' = fromIntegral w * bytes
h' = fromIntegral h
st = fromMaybe defaultStream mst
in
nothingIfOk =<< cudaMemcpy2DAsync dst dw' src sw' w' h' kind st
{-# INLINE cudaMemcpy2DAsync #-}
cudaMemcpy2DAsync :: (Ptr a) -> (Int64) -> (Ptr a) -> (Int64) -> (Int64) -> (Int64) -> (CopyDirection) -> (Stream) -> IO ((Status))
cudaMemcpy2DAsync a1 a2 a3 a4 a5 a6 a7 a8 =
let {a1' = castPtr a1} in
let {a2' = fromIntegral a2} in
let {a3' = castPtr a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = fromIntegral a6} in
let {a7' = cFromEnum a7} in
let {a8' = useStream a8} in
cudaMemcpy2DAsync'_ a1' a2' a3' a4' a5' a6' a7' a8' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 565 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
{-# INLINEABLE newListArrayLen #-}
newListArrayLen :: Storable a => [a] -> IO (DevicePtr a, Int)
newListArrayLen !xs =
F.withArrayLen xs $ \len p ->
bracketOnError (mallocArray len) free $ \d_xs -> do
pokeArray len p d_xs
return (d_xs, len)
{-# INLINEABLE newListArray #-}
newListArray :: Storable a => [a] -> IO (DevicePtr a)
newListArray !xs = fst `fmap` newListArrayLen xs
{-# INLINEABLE withListArray #-}
withListArray :: Storable a => [a] -> (DevicePtr a -> IO b) -> IO b
withListArray !xs = withListArrayLen xs . const
{-# INLINEABLE withListArrayLen #-}
withListArrayLen :: Storable a => [a] -> (Int -> DevicePtr a -> IO b) -> IO b
withListArrayLen !xs !f =
bracket (newListArrayLen xs) (free . fst) (uncurry . flip $ f)
{-# INLINEABLE memset #-}
memset :: DevicePtr a
-> Int64
-> Int8
-> IO ()
memset !dptr !bytes !symbol = nothingIfOk =<< cudaMemset dptr symbol bytes
{-# INLINE cudaMemset #-}
cudaMemset :: (DevicePtr a) -> (Int8) -> (Int64) -> IO ((Status))
cudaMemset a1 a2 a3 =
let {a1' = dptr a1} in
let {a2' = cIntConv a2} in
let {a3' = cIntConv a3} in
cudaMemset'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 643 "src/Foreign/CUDA/Runtime/Marshal.chs" #-}
where
dptr = useDevicePtr . castDevPtr
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaHostAlloc"
cudaHostAlloc'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaFreeHost"
cudaFreeHost'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMalloc"
cudaMalloc'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULong -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaFree"
cudaFree'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMallocManaged"
cudaMallocManaged'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))
foreign import ccall safe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMemcpy"
cudaMemcpy'_ :: ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))))
foreign import ccall safe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMemcpyAsync"
cudaMemcpyAsync'_ :: ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))))
foreign import ccall safe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMemcpy2D"
cudaMemcpy2D'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (C2HSImp.CULong -> (C2HSImp.CULong -> (C2HSImp.CInt -> (IO C2HSImp.CInt))))))))
foreign import ccall safe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMemcpy2DAsync"
cudaMemcpy2DAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (C2HSImp.CULong -> (C2HSImp.CULong -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))))))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Marshal.chs.h cudaMemset"
cudaMemset'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))