{-# LANGUAGE CPP #-} -- | -- Module : Data.Array.Accelerate.LLVM.Native.State -- Copyright : [2014..2020] The Accelerate Team -- License : BSD3 -- -- Maintainer : Trevor L. McDonell -- Stability : experimental -- Portability : non-portable (GHC extensions) -- module Data.Array.Accelerate.LLVM.Native.State ( evalNative, createTarget, defaultTarget, ) where -- accelerate import Data.Array.Accelerate.LLVM.State import Data.Array.Accelerate.LLVM.Native.Target import Data.Array.Accelerate.LLVM.Native.Execute.Scheduler import qualified Data.Array.Accelerate.LLVM.Native.Link.Cache as LC import qualified Data.Array.Accelerate.LLVM.Native.Debug as Debug -- library import Data.Maybe import System.Environment import System.IO.Unsafe import Text.Printf import Text.Read import Prelude as P import GHC.Conc -- | Execute a computation in the Native backend -- evalNative :: Native -> LLVM Native a -> IO a evalNative = evalLLVM -- | Create a Native execution target by spawning a worker thread on each of the -- given capabilities. -- createTarget :: [Int] -- ^ CPUs to launch worker threads on -> IO Native createTarget cpus = do gang <- hireWorkersOn cpus linker <- LC.new return $! Native linker gang {-- -- | The strategy for balancing work amongst the available worker threads. -- type Strategy = Gang -> Executable -- | Execute an operation sequentially on a single thread -- sequentialIO :: Strategy sequentialIO gang = Executable $ \name _ppt range fill -> timed name $ runSeqIO gang range fill -- | Execute a computation without load balancing. Each thread computes an -- equally sized chunk of the input. No work stealing occurs. -- unbalancedParIO :: Strategy unbalancedParIO gang = Executable $ \name _ppt range fill -> timed name $ runParIO Single.mkResource gang range fill -- | Execute a computation where threads use work stealing (based on lazy -- splitting of work stealing queues and exponential backoff) in order to -- automatically balance the workload amongst themselves. -- balancedParIO :: Int -- ^ number of steal attempts before backing off -> Strategy balancedParIO retries gang = Executable $ \name ppt range fill -> -- TLM: A suitable PPT should be chosen when invoking the continuation in -- order to balance scheduler overhead with fine-grained function calls -- let resource = LBS.mkResource ppt (SMP.mkResource retries <> Backoff.mkResource) in timed name $ runParIO resource gang range fill --} -- Top-level mutable state -- ----------------------- -- -- It is important to keep some information alive for the entire run of the -- program, not just a single execution. These tokens use 'unsafePerformIO' to -- ensure they are executed only once, and reused for subsequent invocations. -- -- | Initialise the gang of threads that will be used to execute computations. -- This spawns one worker for each available processor, or as specified by the -- value of the environment variable @ACCELERATE_LLVM_NATIVE_THREADS@. -- -- This globally shared thread gang is auto-initialised on startup and shared by -- all computations (unless the user chooses to 'run' with a different gang). -- -- It does not help to have multiple gangs running at the same time, as then the -- system as a whole may run slower as the threads contend for cache. The -- scheduler is able to execute operations from multiple sources concurrently, -- so multiple gangs should not be necessary. -- {-# NOINLINE defaultTarget #-} defaultTarget :: Native defaultTarget = unsafePerformIO $ do nproc <- getNumProcessors ncaps <- getNumCapabilities menv <- (readMaybe =<<) <$> lookupEnv "ACCELERATE_LLVM_NATIVE_THREADS" let nthreads = fromMaybe nproc menv -- Update the number of capabilities, but never set it lower than it already -- is. This target will spawn a worker on each processor (as returned by -- 'getNumProcessors', which includes SMT (hyperthreading) cores), but the -- user may have requested more capabilities than this to handle, for example, -- concurrent output. -- setNumCapabilities (max ncaps nthreads) Debug.traceIO Debug.dump_gc (printf "gc: initialise native target with %d worker threads" nthreads) createTarget [0 .. nthreads-1] {-- -- Debugging -- --------- {-# INLINE timed #-} timed :: ShortByteString -> IO a -> IO a timed name f = Debug.timed Debug.dump_exec (elapsed name) f {-# INLINE elapsed #-} elapsed :: ShortByteString -> Double -> Double -> String elapsed name x y = printf "exec: %s %s" (unpack name) (Debug.elapsedP x y) --}