-- |
-- Module      : Data.Array.Accelerate.LLVM.Native.Compile.Optimise
-- Copyright   : [2014..2020] The Accelerate Team
-- License     : BSD3
--
-- Maintainer  : Trevor L. McDonell <trevor.mcdonell@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)
--

module Data.Array.Accelerate.LLVM.Native.Compile.Optimise (

  optimiseModule

) where

-- llvm-hs
import LLVM.AST.DataLayout
import LLVM.Module
import LLVM.PassManager
import LLVM.Target

-- accelerate
import qualified Data.Array.Accelerate.LLVM.Native.Debug        as Debug

-- standard library
import Text.Printf


-- | Run the standard optimisations on the given module when targeting a
-- specific machine and data layout. Specifically, this will run the
-- optimisation passes such that LLVM has the necessary information to
-- automatically vectorise loops (whenever it deems beneficial to do so).
--
optimiseModule
    :: Maybe DataLayout
    -> Maybe TargetMachine
    -> Maybe TargetLibraryInfo
    -> Module
    -> IO ()
optimiseModule :: Maybe DataLayout
-> Maybe TargetMachine
-> Maybe TargetLibraryInfo
-> Module
-> IO ()
optimiseModule Maybe DataLayout
datalayout Maybe TargetMachine
machine Maybe TargetLibraryInfo
libinfo Module
mdl = do

  let p1 :: PassSetSpec
p1 = PassSetSpec
defaultCuratedPassSetSpec
            { optLevel :: Maybe Word
optLevel                           = Word -> Maybe Word
forall a. a -> Maybe a
Just Word
3
            , dataLayout :: Maybe DataLayout
dataLayout                         = Maybe DataLayout
datalayout
            , targetMachine :: Maybe TargetMachine
targetMachine                      = Maybe TargetMachine
machine
            , targetLibraryInfo :: Maybe TargetLibraryInfo
targetLibraryInfo                  = Maybe TargetLibraryInfo
libinfo
            , loopVectorize :: Maybe Bool
loopVectorize                      = Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True
            , superwordLevelParallelismVectorize :: Maybe Bool
superwordLevelParallelismVectorize = Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True
            }
  Bool
b1 <- PassSetSpec -> (PassManager -> IO Bool) -> IO Bool
forall a. PassSetSpec -> (PassManager -> IO a) -> IO a
withPassManager PassSetSpec
p1 ((PassManager -> IO Bool) -> IO Bool)
-> (PassManager -> IO Bool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \PassManager
pm -> PassManager -> Module -> IO Bool
runPassManager PassManager
pm Module
mdl

  Flag -> String -> IO ()
Debug.traceIO Flag
Debug.dump_cc (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
    String -> String -> String
forall r. PrintfType r => String -> r
printf String
"llvm: optimisation did work? %s" (Bool -> String
forall a. Show a => a -> String
show Bool
b1)

{--
-- The first gentle optimisation pass. I think this is usually done when loading
-- the module?
--
-- This is the first section of output running 'opt -O3 -debug-pass=Arguments'
--
-- Pass Arguments:
--  -datalayout -notti -basictti -x86tti -no-aa -tbaa -targetlibinfo -basicaa
--  -preverify -domtree -verify -simplifycfg -domtree -sroa -early-cse
--  -lower-expect
--
prepass :: [Pass]
prepass =
  [ SimplifyControlFlowGraph
  , ScalarReplacementOfAggregates { requiresDominatorTree = True }
  , EarlyCommonSubexpressionElimination
  , LowerExpectIntrinsic
  ]

-- The main optimisation pipeline. This mostly matches the process of running
-- 'opt -O3 -debug-pass=Arguments'. We are missing dead argument elimination and
-- in particular, slp-vectorizer (super-word level parallelism).
--
-- Pass Arguments:
--   -targetlibinfo -datalayout -notti -basictti -x86tti -no-aa -tbaa -basicaa
--   -globalopt -ipsccp -deadargelim -instcombine -simplifycfg -basiccg -prune-eh
--   -inline-cost -inline -functionattrs -argpromotion -sroa -domtree -early-cse
--   -lazy-value-info -jump-threading -correlated-propagation -simplifycfg
--   -instcombine -tailcallelim -simplifycfg -reassociate -domtree -loops
--   -loop-simplify -lcssa -loop-rotate -licm -lcssa -loop-unswitch -instcombine
--   -scalar-evolution -loop-simplify -lcssa -indvars -loop-idiom -loop-deletion
--   -loop-unroll -memdep -gvn -memdep -memcpyopt -sccp -instcombine
--   -lazy-value-info -jump-threading -correlated-propagation -domtree -memdep -dse
--   -loops -scalar-evolution -slp-vectorizer -adce -simplifycfg -instcombine
--   -barrier -domtree -loops -loop-simplify -lcssa -scalar-evolution
--   -loop-simplify -lcssa -loop-vectorize -instcombine -simplifycfg
--   -strip-dead-prototypes -globaldce -constmerge -preverify -domtree -verify
--
optpass :: [Pass]
optpass =
  [
    InterproceduralSparseConditionalConstantPropagation                 -- ipsccp
  , InstructionCombining
  , SimplifyControlFlowGraph
  , PruneExceptionHandling
  , FunctionInlining { functionInliningThreshold = 275 }                -- -O2 => 275
  , FunctionAttributes
  , ArgumentPromotion                                                   -- not needed?
  , ScalarReplacementOfAggregates { requiresDominatorTree = True }      -- false?
  , EarlyCommonSubexpressionElimination
  , JumpThreading
  , CorrelatedValuePropagation
  , SimplifyControlFlowGraph
  , InstructionCombining
  , TailCallElimination
  , SimplifyControlFlowGraph
  , Reassociate
  , LoopRotate
  , LoopInvariantCodeMotion
  , LoopClosedSingleStaticAssignment
  , LoopUnswitch { optimizeForSize = False }
  , LoopInstructionSimplify
  , InstructionCombining
  , InductionVariableSimplify
  , LoopIdiom
  , LoopDeletion
  , LoopUnroll { loopUnrollThreshold = Nothing
               , count               = Nothing
               , allowPartial        = Nothing }
  , GlobalValueNumbering { noLoads = False }    -- True to add memory dependency analysis
  , SparseConditionalConstantPropagation
  , InstructionCombining
  , JumpThreading
  , CorrelatedValuePropagation
  , DeadStoreElimination
  , defaultVectorizeBasicBlocks                 -- instead of slp-vectorizer?
  , AggressiveDeadCodeElimination
  , SimplifyControlFlowGraph
  , InstructionCombining
  , LoopVectorize
  , InstructionCombining
  , SimplifyControlFlowGraph
  , GlobalDeadCodeElimination
  , ConstantMerge
  ]
--}