{-# LANGUAGE CPP #-} -- | -- Module : Data.Array.Accelerate.LLVM.Native.State -- Copyright : [2014..2020] The Accelerate Team -- License : BSD3 -- -- Maintainer : Trevor L. McDonell <trevor.mcdonell@gmail.com> -- Stability : experimental -- Portability : non-portable (GHC extensions) -- module Data.Array.Accelerate.LLVM.Native.State ( evalNative, createTarget, defaultTarget, ) where -- accelerate import Data.Array.Accelerate.LLVM.State import Data.Array.Accelerate.LLVM.Native.Target import Data.Array.Accelerate.LLVM.Native.Execute.Scheduler import qualified Data.Array.Accelerate.LLVM.Native.Link.Cache as LC import qualified Data.Array.Accelerate.LLVM.Native.Debug as Debug -- library import Data.Maybe import System.Environment import System.IO.Unsafe import Text.Printf import Text.Read import Prelude as P import GHC.Conc -- | Execute a computation in the Native backend -- evalNative :: Native -> LLVM Native a -> IO a evalNative :: Native -> LLVM Native a -> IO a evalNative = Native -> LLVM Native a -> IO a forall t a. t -> LLVM t a -> IO a evalLLVM -- | Create a Native execution target by spawning a worker thread on each of the -- given capabilities. -- createTarget :: [Int] -- ^ CPUs to launch worker threads on -> IO Native createTarget :: [Int] -> IO Native createTarget [Int] cpus = do Workers gang <- [Int] -> IO Workers hireWorkersOn [Int] cpus LinkCache FunctionTable ObjectCode linker <- IO (LinkCache FunctionTable ObjectCode) forall f o. IO (LinkCache f o) LC.new Native -> IO Native forall (m :: * -> *) a. Monad m => a -> m a return (Native -> IO Native) -> Native -> IO Native forall a b. (a -> b) -> a -> b $! LinkCache FunctionTable ObjectCode -> Workers -> Native Native LinkCache FunctionTable ObjectCode linker Workers gang {-- -- | The strategy for balancing work amongst the available worker threads. -- type Strategy = Gang -> Executable -- | Execute an operation sequentially on a single thread -- sequentialIO :: Strategy sequentialIO gang = Executable $ \name _ppt range fill -> timed name $ runSeqIO gang range fill -- | Execute a computation without load balancing. Each thread computes an -- equally sized chunk of the input. No work stealing occurs. -- unbalancedParIO :: Strategy unbalancedParIO gang = Executable $ \name _ppt range fill -> timed name $ runParIO Single.mkResource gang range fill -- | Execute a computation where threads use work stealing (based on lazy -- splitting of work stealing queues and exponential backoff) in order to -- automatically balance the workload amongst themselves. -- balancedParIO :: Int -- ^ number of steal attempts before backing off -> Strategy balancedParIO retries gang = Executable $ \name ppt range fill -> -- TLM: A suitable PPT should be chosen when invoking the continuation in -- order to balance scheduler overhead with fine-grained function calls -- let resource = LBS.mkResource ppt (SMP.mkResource retries <> Backoff.mkResource) in timed name $ runParIO resource gang range fill --} -- Top-level mutable state -- ----------------------- -- -- It is important to keep some information alive for the entire run of the -- program, not just a single execution. These tokens use 'unsafePerformIO' to -- ensure they are executed only once, and reused for subsequent invocations. -- -- | Initialise the gang of threads that will be used to execute computations. -- This spawns one worker for each available processor, or as specified by the -- value of the environment variable @ACCELERATE_LLVM_NATIVE_THREADS@. -- -- This globally shared thread gang is auto-initialised on startup and shared by -- all computations (unless the user chooses to 'run' with a different gang). -- -- It does not help to have multiple gangs running at the same time, as then the -- system as a whole may run slower as the threads contend for cache. The -- scheduler is able to execute operations from multiple sources concurrently, -- so multiple gangs should not be necessary. -- {-# NOINLINE defaultTarget #-} defaultTarget :: Native defaultTarget :: Native defaultTarget = IO Native -> Native forall a. IO a -> a unsafePerformIO (IO Native -> Native) -> IO Native -> Native forall a b. (a -> b) -> a -> b $ do Int nproc <- IO Int getNumProcessors Int ncaps <- IO Int getNumCapabilities Maybe Int menv <- (String -> Maybe Int forall a. Read a => String -> Maybe a readMaybe (String -> Maybe Int) -> Maybe String -> Maybe Int forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b =<<) (Maybe String -> Maybe Int) -> IO (Maybe String) -> IO (Maybe Int) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> String -> IO (Maybe String) lookupEnv String "ACCELERATE_LLVM_NATIVE_THREADS" let nthreads :: Int nthreads = Int -> Maybe Int -> Int forall a. a -> Maybe a -> a fromMaybe Int nproc Maybe Int menv -- Update the number of capabilities, but never set it lower than it already -- is. This target will spawn a worker on each processor (as returned by -- 'getNumProcessors', which includes SMT (hyperthreading) cores), but the -- user may have requested more capabilities than this to handle, for example, -- concurrent output. -- Int -> IO () setNumCapabilities (Int -> Int -> Int forall a. Ord a => a -> a -> a max Int ncaps Int nthreads) Flag -> String -> IO () Debug.traceIO Flag Debug.dump_gc (String -> Int -> String forall r. PrintfType r => String -> r printf String "gc: initialise native target with %d worker threads" Int nthreads) [Int] -> IO Native createTarget [Int 0 .. Int nthreadsInt -> Int -> Int forall a. Num a => a -> a -> a -Int 1] {-- -- Debugging -- --------- {-# INLINE timed #-} timed :: ShortByteString -> IO a -> IO a timed name f = Debug.timed Debug.dump_exec (elapsed name) f {-# INLINE elapsed #-} elapsed :: ShortByteString -> Double -> Double -> String elapsed name x y = printf "exec: %s %s" (unpack name) (Debug.elapsedP x y) --}