{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeOperators #-}
module Data.Array.Accelerate.LLVM.PTX.Debug (
module Data.Array.Accelerate.Debug,
module Data.Array.Accelerate.LLVM.PTX.Debug,
) where
import Data.Array.Accelerate.Debug hiding ( timed, elapsed )
import Foreign.CUDA.Driver.Stream ( Stream )
import qualified Foreign.CUDA.Driver.Event as Event
import Control.Concurrent
import Data.Label
import Data.Time.Clock
import System.CPUTime
import Text.Printf
import GHC.Float
timed
:: (Flags :-> Bool)
-> (Double -> Double -> Double -> String)
-> Maybe Stream
-> IO ()
-> IO ()
{-# INLINE timed #-}
timed f msg =
monitorProcTime (queryFlag f) (\t1 t2 t3 -> traceIO f (msg t1 t2 t3))
monitorProcTime
:: IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> IO ()
-> IO ()
{-# INLINE monitorProcTime #-}
#if ACCELERATE_DEBUG
monitorProcTime enabled display stream action = do
yes <- enabled
if yes
then do
gpuBegin <- Event.create []
gpuEnd <- Event.create []
wallBegin <- getCurrentTime
cpuBegin <- getCPUTime
Event.record gpuBegin stream
action
Event.record gpuEnd stream
cpuEnd <- getCPUTime
wallEnd <- getCurrentTime
_ <- forkIO $ do
Event.block gpuEnd
diff <- Event.elapsedTime gpuBegin gpuEnd
let gpuTime = float2Double $ diff * 1E-3
cpuTime = fromIntegral (cpuEnd - cpuBegin) * 1E-12
wallTime = realToFrac (diffUTCTime wallEnd wallBegin)
Event.destroy gpuBegin
Event.destroy gpuEnd
display wallTime cpuTime gpuTime
return ()
else
action
#else
monitorProcTime _ _ _ action = action
#endif
{-# INLINE elapsed #-}
elapsed :: Double -> Double -> Double -> String
elapsed wallTime cpuTime gpuTime =
printf "%s (wall), %s (cpu), %s (gpu)"
(showFFloatSIBase (Just 3) 1000 wallTime "s")
(showFFloatSIBase (Just 3) 1000 cpuTime "s")
(showFFloatSIBase (Just 3) 1000 gpuTime "s")