-- GENERATED by C->Haskell Compiler, version 0.28.2 Switcheroo, 1 April 2016 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "src/Foreign/CUDA/Driver/Unified.chs" #-}
{-# LANGUAGE BangPatterns             #-}
{-# LANGUAGE CPP                      #-}
{-# LANGUAGE EmptyDataDecls           #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE ScopedTypeVariables      #-}
{-# LANGUAGE TemplateHaskell          #-}
{-# OPTIONS_HADDOCK prune #-}
--------------------------------------------------------------------------------
-- |
-- Module    : Foreign.CUDA.Driver.Unified
-- Copyright : [2017] Trevor L. McDonell
-- License   : BSD
--
-- Unified addressing functions for the low-level driver interface
--
-- [/Overview/]
--
-- CUDA devices can share a unified address space with the host. For these
-- devices, there is no distinction between a device pointer and a host
-- pointer---the same pointer value may be used to access memory from the host
-- program and from a kernel running on the device (with exceptions enumerated
-- below).
--
-- [/Support/]
--
-- Whether or not a device supports unified addressing may be queried by calling
-- 'Foreign.CUDA.Driver.Device.attribute' with the
-- 'Foreign.CUDA.Driver.Device.UnifiedAddressing' attribute.
--
-- Unified addressing is automatically enabled in 64-bit processes on devices
-- with compute capability at leas 2.0.
--
-- [/Looking up information about pointers/]
--
-- It is possible to look up information about the memory which backs a pointer;
-- that is, whether the memory resides on the host or the device (and in
-- particular, which device).
--
-- [/Automatic mapping of host memory/]
--
-- All host memory allocated in all contexts using
-- 'Foreign.CUDA.Driver.Marshal.mallocHostArray' or
-- 'Foreign.CUDA.Driver.Marshal.mallocHostForeignPtr' is always directly
-- accessible from all contexts on all devices which support unified addressing.
-- This is the case whether or not the flags
-- 'Foreign.CUDA.Driver.Marshal.Portable' or
-- 'Foreign.CUDA.Driver.Marshal.DeviceMapped' are specified.
--
-- The pointer value through which allocated host memory may be accessed in
-- kernels on all devices which support unified addressing is the same as the
-- pointer value as on the host; that is, it is not necessary to call
-- 'Foreign.CUDA.Driver.Marshal.getDevicePtr' for these allocations.
--
-- Note that this is not the case for memory allocated using the
-- 'Foreign.CUDA.Driver.Marshal.WriteCombined' option; see below.
--
-- [/Automatic registration of peer memory/]
--
-- Upon enabling direct access from a context which supports unified addressing
-- to another peer context which supports unified addressing using
-- 'Foreign.CUDA.Driver.Context.Peer.add', all memory allocated in the peer
-- context will immediately be accessible by the current context. The device
-- pointer values are the same on both contexts.
--
-- [/Exceptions (disjoint addressing/]
--
-- Not all memory may be accessed on devices through the same pointer value
-- as they are accessed with on the host. These exceptions are host arrays
-- registered with 'Foreign.CUDA.Driver.Marshal.registerArray', and those
-- allocated with the flag 'Foreign.CUDA.Driver.Marshal.WriteCombined'. In these
-- cases, the host and device arrays have distinct addresses (pointer values).
-- However, the device address is guaranteed to not overlap with any valid host
-- pointer range and is guaranteed to have the same value across all contexts
-- which support unified addressing.
--
-- The value of the device pointer may be queried with
-- 'Foreign.CUDA.Driver.Marshal.getDevicePtr' from any context supporting
-- unified addressing.
--
--------------------------------------------------------------------------------

module Foreign.CUDA.Driver.Unified (

  -- ** Querying pointer attributes
  PointerAttributes(..), MemoryType(..),
  getAttributes,

  -- ** Setting pointer attributes
  Advice(..),
  setSyncMemops,
  advise,

) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp





{-# LINE 96 "src/Foreign/CUDA/Driver/Unified.chs" #-}


-- Friends
import Foreign.CUDA.Driver.Context
import Foreign.CUDA.Driver.Device
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Marshal
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Types

-- System
import Control.Applicative
import Control.Monad
import Data.Maybe
import Foreign
import Foreign.C
import Foreign.Storable
import Prelude


-- | Information about a pointer
--
data PointerAttributes a = PointerAttributes
  { ptrContext    :: {-# UNPACK #-} !Context
  , ptrDevice     :: {-# UNPACK #-} !(DevicePtr a)
  , ptrHost       :: {-# UNPACK #-} !(HostPtr a)
  , ptrBufferID   :: {-# UNPACK #-} !CULLong
  , ptrMemoryType :: !MemoryType
  , ptrSyncMemops :: !Bool
  , ptrIsManaged  :: !Bool
  }
  deriving Show

data MemoryType = HostMemory
                | DeviceMemory
                | ArrayMemory
                | UnifiedMemory
  deriving (Eq,Show,Bounded)
instance Enum MemoryType where
  succ HostMemory = DeviceMemory
  succ DeviceMemory = ArrayMemory
  succ ArrayMemory = UnifiedMemory
  succ UnifiedMemory = error "MemoryType.succ: UnifiedMemory has no successor"

  pred DeviceMemory = HostMemory
  pred ArrayMemory = DeviceMemory
  pred UnifiedMemory = ArrayMemory
  pred HostMemory = error "MemoryType.pred: HostMemory has no predecessor"

  enumFromTo from to = go from
    where
      end = fromEnum to
      go v = case compare (fromEnum v) end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from UnifiedMemory

  fromEnum HostMemory = 1
  fromEnum DeviceMemory = 2
  fromEnum ArrayMemory = 3
  fromEnum UnifiedMemory = 4

  toEnum 1 = HostMemory
  toEnum 2 = DeviceMemory
  toEnum 3 = ArrayMemory
  toEnum 4 = UnifiedMemory
  toEnum unmatched = error ("MemoryType.toEnum: Cannot match " ++ show unmatched)

{-# LINE 139 "src/Foreign/CUDA/Driver/Unified.chs" #-}


data PointerAttribute = AttributeContext
                      | AttributeMemoryType
                      | AttributeDevicePointer
                      | AttributeHostPointer
                      | AttributeP2pTokens
                      | AttributeSyncMemops
                      | AttributeBufferId
                      | AttributeIsManaged
  deriving (Eq,Show,Bounded)
instance Enum PointerAttribute where
  succ AttributeContext = AttributeMemoryType
  succ AttributeMemoryType = AttributeDevicePointer
  succ AttributeDevicePointer = AttributeHostPointer
  succ AttributeHostPointer = AttributeP2pTokens
  succ AttributeP2pTokens = AttributeSyncMemops
  succ AttributeSyncMemops = AttributeBufferId
  succ AttributeBufferId = AttributeIsManaged
  succ AttributeIsManaged = error "PointerAttribute.succ: AttributeIsManaged has no successor"

  pred AttributeMemoryType = AttributeContext
  pred AttributeDevicePointer = AttributeMemoryType
  pred AttributeHostPointer = AttributeDevicePointer
  pred AttributeP2pTokens = AttributeHostPointer
  pred AttributeSyncMemops = AttributeP2pTokens
  pred AttributeBufferId = AttributeSyncMemops
  pred AttributeIsManaged = AttributeBufferId
  pred AttributeContext = error "PointerAttribute.pred: AttributeContext has no predecessor"

  enumFromTo from to = go from
    where
      end = fromEnum to
      go v = case compare (fromEnum v) end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from AttributeIsManaged

  fromEnum AttributeContext = 1
  fromEnum AttributeMemoryType = 2
  fromEnum AttributeDevicePointer = 3
  fromEnum AttributeHostPointer = 4
  fromEnum AttributeP2pTokens = 5
  fromEnum AttributeSyncMemops = 6
  fromEnum AttributeBufferId = 7
  fromEnum AttributeIsManaged = 8

  toEnum 1 = AttributeContext
  toEnum 2 = AttributeMemoryType
  toEnum 3 = AttributeDevicePointer
  toEnum 4 = AttributeHostPointer
  toEnum 5 = AttributeP2pTokens
  toEnum 6 = AttributeSyncMemops
  toEnum 7 = AttributeBufferId
  toEnum 8 = AttributeIsManaged
  toEnum unmatched = error ("PointerAttribute.toEnum: Cannot match " ++ show unmatched)

{-# LINE 144 "src/Foreign/CUDA/Driver/Unified.chs" #-}


data Advice = SetReadMostly
            | UnsetReadMostly
            | SetPreferredLocation
            | UnsetPreferredLocation
            | SetAccessedBy
            | UnsetAccessedBy
  deriving (Eq,Show,Bounded)
instance Enum Advice where
  succ SetReadMostly = UnsetReadMostly
  succ UnsetReadMostly = SetPreferredLocation
  succ SetPreferredLocation = UnsetPreferredLocation
  succ UnsetPreferredLocation = SetAccessedBy
  succ SetAccessedBy = UnsetAccessedBy
  succ UnsetAccessedBy = error "Advice.succ: UnsetAccessedBy has no successor"

  pred UnsetReadMostly = SetReadMostly
  pred SetPreferredLocation = UnsetReadMostly
  pred UnsetPreferredLocation = SetPreferredLocation
  pred SetAccessedBy = UnsetPreferredLocation
  pred UnsetAccessedBy = SetAccessedBy
  pred SetReadMostly = error "Advice.pred: SetReadMostly has no predecessor"

  enumFromTo from to = go from
    where
      end = fromEnum to
      go v = case compare (fromEnum v) end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from UnsetAccessedBy

  fromEnum SetReadMostly = 1
  fromEnum UnsetReadMostly = 2
  fromEnum SetPreferredLocation = 3
  fromEnum UnsetPreferredLocation = 4
  fromEnum SetAccessedBy = 5
  fromEnum UnsetAccessedBy = 6

  toEnum 1 = SetReadMostly
  toEnum 2 = UnsetReadMostly
  toEnum 3 = SetPreferredLocation
  toEnum 4 = UnsetPreferredLocation
  toEnum 5 = SetAccessedBy
  toEnum 6 = UnsetAccessedBy
  toEnum unmatched = error ("Advice.toEnum: Cannot match " ++ show unmatched)

{-# LINE 152 "src/Foreign/CUDA/Driver/Unified.chs" #-}



-- Return information about a pointer.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__UNIFIED.html#group__CUDA__UNIFIED_1g0c28ed0aff848042bc0533110e45820c>
--
-- Requires CUDA-7.0.
--
{-# INLINEABLE getAttributes #-}
getAttributes :: Ptr a -> IO (PointerAttributes a)
getAttributes ptr =
  alloca $ \p_ctx  ->
  alloca $ \p_dptr ->
  alloca $ \p_hptr ->
  alloca $ \(p_bid :: Ptr CULLong) ->
  alloca $ \(p_mt  :: Ptr CUInt)   ->
  alloca $ \(p_sm  :: Ptr CInt)    ->
  alloca $ \(p_im  :: Ptr CInt)    -> do
    let n       = length as
        (as,ps) = unzip [ (AttributeContext,       castPtr p_ctx)
                        , (AttributeDevicePointer, castPtr p_dptr)
                        , (AttributeHostPointer,   castPtr p_hptr)
                        , (AttributeBufferId,      castPtr p_bid)
                        , (AttributeMemoryType,    castPtr p_mt)
                        , (AttributeSyncMemops,    castPtr p_sm)
                        , (AttributeIsManaged,     castPtr p_im)
                        ]
    --
    nothingIfOk =<< cuPointerGetAttributes n as ps ptr
    PointerAttributes
      <$> liftM Context (peek p_ctx)
      <*> liftM DevicePtr (peek p_dptr)
      <*> liftM HostPtr   (peek p_hptr)
      <*> peek p_bid
      <*> liftM cToEnum (peek p_mt)
      <*> liftM cToBool (peek p_sm)
      <*> liftM cToBool (peek p_im)

{-# INLINE cuPointerGetAttributes #-}
cuPointerGetAttributes :: (Int) -> ([PointerAttribute]) -> ([Ptr ()]) -> (Ptr a) -> IO ((Status))
cuPointerGetAttributes a1 a2 a3 a4 =
  let {a1' = fromIntegral a1} in 
  withAttrs a2 $ \a2' -> 
  withArray a3 $ \a3' -> 
  let {a4' = useHandle a4} in 
  cuPointerGetAttributes'_ a1' a2' a3' a4' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 201 "src/Foreign/CUDA/Driver/Unified.chs" #-}

  where
    withAttrs as = withArray (map cFromEnum as)
    useHandle    = fromIntegral . ptrToIntPtr


-- Set whether or not the given memory region is guaranteed to always
-- synchronise memory operations that are synchronous. If there are some
-- previously initiated synchronous memory operations that are pending when this
-- attribute is set, the function does not return until those memory operations
-- are complete. See
-- <http://docs.nvidia.com/cuda/cuda-driver-api/api-sync-behavior.html API
-- synchronisation behaviour> for more information on cases where synchronous
-- memory operations can exhibit asynchronous behaviour.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__UNIFIED.html#group__CUDA__UNIFIED_1g89f7ad29a657e574fdea2624b74d138e>
--
-- Requires CUDA-7.0.
--
{-# INLINE setSyncMemops #-}
setSyncMemops :: Ptr a -> Bool -> IO ()
setSyncMemops ptr val = nothingIfOk =<< cuPointerSetAttribute val AttributeSyncMemops ptr

{-# INLINE cuPointerSetAttribute #-}
cuPointerSetAttribute :: (Bool) -> (PointerAttribute) -> (Ptr a) -> IO ((Status))
cuPointerSetAttribute a1 a2 a3 =
  withBool' a1 $ \a1' -> 
  let {a2' = cFromEnum a2} in 
  let {a3' = useHandle a3} in 
  cuPointerSetAttribute'_ a1' a2' a3' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 234 "src/Foreign/CUDA/Driver/Unified.chs" #-}

  where
    withBool' :: Bool -> (Ptr () -> IO b) -> IO b
    withBool' v k = with (fromBool v :: CUInt) (k . castPtr)

    useHandle = fromIntegral . ptrToIntPtr


-- | Advise about the usage of a given range of memory. If the supplied device
-- is Nothing, then the preferred location is taken to mean the CPU.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__UNIFIED.html#group__CUDA__UNIFIED_1g27608c857a9254789c13f3e3b72029e2>
--
-- Requires CUDA-8.0.
--
{-# INLINEABLE advise #-}
advise :: Storable a => Ptr a -> Int -> Advice -> Maybe Device -> IO ()
advise ptr n a mdev = go undefined ptr
  where
    go :: Storable a' => a' -> Ptr a' -> IO ()
    go x _ = nothingIfOk =<< cuMemAdvise ptr (n * sizeOf x) a (maybe (-1) useDevice mdev)

{-# INLINE cuMemAdvise #-}
cuMemAdvise :: (Ptr a) -> (Int) -> (Advice) -> (CInt) -> IO ((Status))
cuMemAdvise a1 a2 a3 a4 =
  let {a1' = useHandle a1} in 
  let {a2' = fromIntegral a2} in 
  let {a3' = cFromEnum a3} in 
  let {a4' = fromIntegral a4} in 
  cuMemAdvise'_ a1' a2' a3' a4' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 267 "src/Foreign/CUDA/Driver/Unified.chs" #-}

  where
    useHandle = fromIntegral . ptrToIntPtr


foreign import ccall unsafe "Foreign/CUDA/Driver/Unified.chs.h cuPointerGetAttributes"
  cuPointerGetAttributes'_ :: (C2HSImp.CUInt -> ((C2HSImp.Ptr C2HSImp.CInt) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULLong -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Unified.chs.h cuPointerSetAttribute"
  cuPointerSetAttribute'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CULLong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Unified.chs.h cuMemAdvise"
  cuMemAdvise'_ :: (C2HSImp.CULLong -> (C2HSImp.CULong -> (C2HSImp.CInt -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))))