{-# LANGUAGE CPP #-}
{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Data.Array.Accelerate.LLVM.PTX.Target (
module Data.Array.Accelerate.LLVM.Target,
module Data.Array.Accelerate.LLVM.PTX.Target,
) where
import LLVM.AST.AddrSpace
import LLVM.AST.DataLayout
import LLVM.Target hiding ( Target )
import qualified LLVM.Target as LLVM
import qualified LLVM.Relocation as R
import qualified LLVM.CodeModel as CM
import qualified LLVM.CodeGenOpt as CGO
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.LLVM.Target
import Data.Array.Accelerate.LLVM.Util
import Control.Parallel.Meta ( Executable )
import Data.Array.Accelerate.LLVM.PTX.Array.Table ( MemoryTable )
import Data.Array.Accelerate.LLVM.PTX.Context ( Context, deviceProperties )
import Data.Array.Accelerate.LLVM.PTX.Execute.Stream.Reservoir ( Reservoir )
import Data.Array.Accelerate.LLVM.PTX.Link.Cache ( KernelTable )
import qualified Foreign.CUDA.Driver as CUDA
import Data.ByteString ( ByteString )
import Data.ByteString.Short ( ShortByteString )
import Data.String
import System.IO.Unsafe
import Text.Printf
import qualified Data.Map as Map
import qualified Data.Set as Set
data PTX = PTX {
ptxContext :: {-# UNPACK #-} !Context
, ptxMemoryTable :: {-# UNPACK #-} !MemoryTable
, ptxKernelTable :: {-# UNPACK #-} !KernelTable
, ptxStreamReservoir :: {-# UNPACK #-} !Reservoir
, fillP :: {-# UNPACK #-} !Executable
}
instance Target PTX where
targetTriple _ = Just ptxTargetTriple
#if ACCELERATE_USE_NVVM
targetDataLayout _ = Nothing
#else
targetDataLayout _ = Just ptxDataLayout
#endif
ptxDeviceProperties :: PTX -> CUDA.DeviceProperties
ptxDeviceProperties = deviceProperties . ptxContext
ptxDataLayout :: DataLayout
ptxDataLayout = DataLayout
{ endianness = LittleEndian
, mangling = Nothing
, aggregateLayout = AlignmentInfo 0 64
, stackAlignment = Nothing
, pointerLayouts = Map.fromList
[ (AddrSpace 0, (wordSize, AlignmentInfo wordSize wordSize)) ]
, typeLayouts = Map.fromList $
[ ((IntegerAlign, 1), AlignmentInfo 8 8) ] ++
[ ((IntegerAlign, i), AlignmentInfo i i) | i <- [8,16,32,64]] ++
[ ((VectorAlign, v), AlignmentInfo v v) | v <- [16,32,64,128]] ++
[ ((FloatAlign, f), AlignmentInfo f f) | f <- [32,64] ]
, nativeSizes = Just $ Set.fromList [ 16,32,64 ]
}
where
wordSize = bitSize (undefined :: Int)
ptxTargetTriple :: ShortByteString
ptxTargetTriple =
case bitSize (undefined::Int) of
32 -> "nvptx-nvidia-cuda"
64 -> "nvptx64-nvidia-cuda"
_ -> $internalError "ptxTargetTriple" "I don't know what architecture I am"
withPTXTargetMachine
:: CUDA.DeviceProperties
-> (TargetMachine -> IO a)
-> IO a
withPTXTargetMachine dev go =
let CUDA.Compute m n = CUDA.computeCapability dev
isa = CPUFeature (ptxISAVersion m n)
sm = fromString (printf "sm_%d%d" m n)
in
withTargetOptions $ \options -> do
withTargetMachine
ptxTarget
ptxTargetTriple
sm
(Map.singleton isa True)
options
R.Default
CM.Default
CGO.Default
go
ptxISAVersion :: Int -> Int -> ByteString
ptxISAVersion 2 _ = "ptx40"
ptxISAVersion 3 7 = "ptx41"
ptxISAVersion 3 _ = "ptx40"
ptxISAVersion 5 0 = "ptx40"
ptxISAVersion 5 2 = "ptx41"
ptxISAVersion 5 3 = "ptx42"
ptxISAVersion 6 _ = "ptx50"
ptxISAVersion _ _ = "ptx40"
{-# NOINLINE ptxTarget #-}
ptxTarget :: LLVM.Target
ptxTarget = unsafePerformIO $ do
initializeAllTargets
fst `fmap` lookupTarget Nothing ptxTargetTriple