{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE RecordWildCards #-}

#if __GLASGOW_HASKELL__ <904
#define OPAQUE NOINLINE
#endif

-- | This module is adapted fork of @Test.Tasty.Bench@ from @tasty-bench@
-- package. Check its documentation.
--
-- This module has (almost) the same API but uses @libperf@ to measure instruction counts.
-- They are more stable performance metric, which is useful while still
-- working on the problem.
--
-- The module is named the same so you can easily switch from using
-- tasty-bench to tasty-perfbench by only switching a @build-depends@
-- in your @.cabal@ file.
--
-- The 'Benchmarkable' is run /only once/.
-- There will be about 4000-5000 additional instructions counted in.
--
-- This module isn't great for "nanobenchmarks".
-- The reason to measure only once is to have proper values for branch misspredictions and cache misses. Their counts aren't linear in iterations.
-- In other words: measurements are made cold. If you need hot measurements, make the 'Benchmarkable' do more work.
--
-- You may include
--
-- @
-- , 'bench' "id ()"     $ 'whnf' id ()
-- , 'bench' "Just True" $ 'nf' Just True
-- @
--
-- in your benchmark suite to measure the "does (almost) nothing" operation(s) on your system to get an idea of what is the base level.
-- (On my machine both use 5.6K instructions).
--
-- Using performance monitoring functionality requires additional privileges.
-- Probably easiest way is to:
--
-- @
-- sudo sh -c 'echo 1 | tee \/proc\/sys\/kernel\/perf_event_paranoid'
-- @
--
-- which allows per-process performance monitoring only and excludes system wide performance monitoring.
-- That is enough for @tasty-perfbench@ to do its job.
-- (@libperf@ also counts events happening in kernel space).
-- See https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
-- for more information.
--
module Test.Tasty.Bench
  (
  -- * Running 'Benchmark'
    defaultMain
  , Benchmark
  , bench
  , bgroup
  , bcompare
  , bcompareWithin
  , env
  , envWithCleanup
  -- * Creating 'Benchmarkable'
  , Benchmarkable(..)
  , nf
  , whnf
  , nfIO
  , whnfIO
  , nfAppIO
  , whnfAppIO
  -- * Ingredients
  , benchIngredients
  , consoleBenchReporter
  , csvReporter
  , FailIfSlower(..)
  , FailIfFaster(..)
  , BenchMode(..)
  -- * Utils
  , locateBenchmark
  , mapLeafBenchmarks
  ) where

import Prelude hiding (Int, Integer)
import qualified Prelude
import Control.Applicative
import Control.Arrow (first, second)
import Control.DeepSeq (NFData, force)
import Control.Exception (bracket, evaluate)
import Control.Monad (void, unless, guard, (>=>), when)
import Data.Data (Typeable)
import Data.Foldable (foldMap, traverse_)
import Data.Int (Int64)
import Data.List (intercalate, stripPrefix, isPrefixOf, genericLength, genericDrop, foldl1')
import Data.Maybe (fromMaybe)
import Data.Monoid (All(..), Any(..))
import Data.Proxy ( Proxy(..) )
import Data.Traversable (forM)
import Data.Word (Word64)
import GHC.Conc
import GHC.Stats (RTSStats, gcs, major_gcs, getRTSStats, getRTSStatsEnabled)
import GHC.IO.Encoding ( utf8, setLocaleEncoding )
import System.Exit ( exitFailure )
import System.IO ( Handle, utf8, hClose, hSetBuffering, hPutStrLn, openFile, stderr, BufferMode(LineBuffering), IOMode(WriteMode) )
import System.IO.Unsafe ( unsafePerformIO )
import System.Mem ( performGC )
import Text.Printf ( printf )
import qualified Data.IntMap.Strict as IM
import Data.IntMap.Strict (IntMap)
import Data.Sequence (Seq, (<|))
import qualified Data.Sequence as Seq
import qualified Data.Set as S
import qualified Data.Map as M

import Test.Tasty hiding (defaultMain)
import qualified Test.Tasty
import Test.Tasty.Ingredients
import Test.Tasty.Ingredients.ConsoleReporter
import Test.Tasty.Options
import Test.Tasty.Patterns.Eval (eval, asB, withFields)
import Test.Tasty.Patterns.Types (Expr (And, Field, IntLit, NF, StringLit, Sub))
import qualified Test.Tasty.Patterns.Types as Patterns
import Test.Tasty.Providers
    ( IsTest(..), Result, singleTest, testFailed, testPassed )
import Test.Tasty.Runners

import System.IO.CodePage (withCP65001)

#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup (Semigroup(..))
#endif

#ifdef MIN_VERSION_libperf
import qualified LibPerf
#endif

data R a = R
    { rInstructions       :: a
    , rBranchInstructions :: a
    , rBranchMisses       :: a
    , rCacheReferences    :: a
    , rCacheMisses        :: a
    , rPageFaultsMaj      :: a
    , rPageFaultsMin      :: a
    }
  deriving (Functor, Foldable, Traversable)

instance Applicative R where
    pure x = R x x x x x x x

    R f1 f2 f3 f4 f5 f6 f7 <*> R x1 x2 x3 x4 x5 x6 x7 =
        R (f1 x1) (f2 x2) (f3 x3) (f4 x4) (f5 x5) (f6 x6) (f7 x7)

#ifdef MIN_VERSION_libperf
perfGroupHandle :: LibPerf.PerfGroupHandle R
perfGroupHandle = unsafePerformIO $
    LibPerf.perfGroupOpen $ R
        LibPerf.HwInstructions
        LibPerf.HwBranchInstructions
        LibPerf.HwBranchMisses
        LibPerf.HwCacheReferences
        LibPerf.HwCacheMisses
        LibPerf.SwPageFaultsMajor
        LibPerf.SwPageFaultsMinor
{-# OPAQUE perfGroupHandle #-}
#endif

data BenchMode = Instructions -- ^ Measure CPU time.

  deriving (Typeable)

-- | In addition to @--fail-if-slower@ command-line option,
-- one can adjust an upper bound of acceptable slow down
-- in comparison to baseline for
-- individual benchmarks and groups of benchmarks
-- using 'adjustOption' and 'localOption'.
--
-- E. g., set upper bound of acceptable slow down to 10% as follows:
--
-- > import Test.Tasty (localOption)
-- > localOption (FailIfSlower 0.10) (bgroup [...])
newtype FailIfSlower = FailIfSlower Double
  deriving (Show, Read, Typeable)

instance IsOption FailIfSlower where
  defaultValue = FailIfSlower (1.0 / 0.0)
  parseValue = fmap FailIfSlower . parsePositivePercents
  optionName = pure "fail-if-slower"
  optionHelp = pure "Upper bound of acceptable slow down in percents. If a benchmark is unacceptably slower than baseline (see --baseline), it will be reported as failed."

-- | In addition to @--fail-if-faster@ command-line option,
-- one can adjust an upper bound of acceptable speed up
-- in comparison to baseline for
-- individual benchmarks and groups of benchmarks
-- using 'adjustOption' and 'localOption'.
--
-- E. g., set upper bound of acceptable speed up to 10% as follows:
--
-- > import Test.Tasty (localOption)
-- > localOption (FailIfFaster 0.10) (bgroup [...])
--
newtype FailIfFaster = FailIfFaster Double
  deriving (Show, Read, Typeable)

instance IsOption FailIfFaster where
  defaultValue = FailIfFaster (1.0 / 0.0)
  parseValue = fmap FailIfFaster . parsePositivePercents
  optionName = pure "fail-if-faster"
  optionHelp = pure "Upper bound of acceptable speed up in percents. If a benchmark is unacceptably faster than baseline (see --baseline), it will be reported as failed."

parsePositivePercents :: String -> Maybe Double
parsePositivePercents xs = do
  x <- safeRead xs
  guard (x > 0)
  pure (x / 100)

instance IsOption BenchMode where
  defaultValue = Instructions
  parseValue v = case v of
    "instructions" -> Just Instructions
    _ -> Nothing
  optionName = pure "perfbench-mode"
  optionHelp = pure "What to perfbench"
  showDefaultValue m = Just $ case m of
    Instructions -> "instructions"

-- | Something that can be benchmarked, produced by 'nf', 'whnf', 'nfIO', 'whnfIO',
-- 'nfAppIO', 'whnfAppIO' below.
--
-- Drop-in replacement for @Criterion.@'Criterion.Benchmarkable' and
-- @Gauge.@'Gauge.Benchmarkable'.
--
newtype Benchmarkable =
    Benchmarkable
  { unBenchmarkable :: IO () -- ^ Run benchmark given number of times.
  } deriving (Typeable)


showCount :: Word64 -> String
showCount i
  | t < 995   = printf "%3.0f   "  t
  | t < 995e1 = printf "%4.2f K"  (t / 1e3)
  | t < 995e2 = printf "%4.1f K"  (t / 1e3)
  | t < 995e3 = printf "%3.0f  K" (t / 1e3)
  | t < 995e4 = printf "%4.2f M"  (t / 1e6)
  | t < 995e5 = printf "%4.1f M"  (t / 1e6)
  | t < 995e6 = printf "%3.0f  M" (t / 1e6)
  | t < 995e7 = printf "%4.2f G"  (t / 1e9)
  | t < 995e8 = printf "%4.1f G"  (t / 1e9)
  | t < 995e9 = printf "%3.0f  G" (t / 1e9)
  | otherwise = printf "%4.3f T"  (t / 1e12)
  where
    t = word64ToDouble i

data Measurement = Measurement
  { measInstructions  :: !Word64 -- ^ instructions count
  , measBranchTotal   :: !Word64
  , measBranchMisses  :: !Word64
  , measCacheTotal    :: !Word64
  , measCacheMisses   :: !Word64
  , measPageFaultsMaj :: !Word64
  , measPageFaultsMin :: !Word64
  , measMajorGC       :: !Word64
  , measMinorGC       :: !Word64
  } deriving (Show, Read)

type Estimate = Measurement

prettyEstimate :: Estimate -> Maybe Estimate -> String
prettyEstimate m ms = intercalate "\n"
    [ line  measInstructions "instructions"
    , line  measBranchTotal "branch instructions"
    , line2 measBranchTotal measBranchMisses "branch misspredictions"
    , line  measCacheTotal "cache references"
    , line2 measCacheTotal measCacheMisses "cache misses"
    , line  measPageFaultsMaj "major page faults"
    , line  measPageFaultsMin "monor page faults"
    , line  measMajorGC "major gcs"
    , line  measMinorGC "minor gcs"
    ]
  where
    line f msg =
      let pfx = showCount (f m) ++ " " ++ msg
      in case ms of
        Nothing -> pfx
        Just n -> pfx ++ replicate (40 - length pfx) ' ' ++ printf "  base: %s, change: %+0.1f%%" (showCount y) (percentDiff y x)
          where
            x = f m
            y = f n

    line2 f g msg =
      let pfx = showCount (g m) ++ " " ++ msg ++ printf " (%.01f%%)" (ratio g f)
      in case ms of
        Nothing -> pfx
        Just n -> pfx ++ replicate (40 - length pfx) ' ' ++ printf "  base: %s" (showCount y)
          where
            y = g n

    ratio a b = percent (a m) (b m)
    percent a b = 100 * word64ToDouble a / word64ToDouble (max 1 b)

    percentDiff _ 0 = 0
    percentDiff a b = 100 * (b' - a') / b'
      where
        a' = word64ToDouble a
        b' = word64ToDouble b

data WithLoHi a = WithLoHi
  !a      -- payload
  !Double -- lower bound (e. g., 0.9 for -10% speedup)
  !Double -- upper bound (e. g., 1.2 for +20% slowdown)
  deriving (Show, Read)

measure :: BenchMode -> Benchmarkable -> IO Measurement
#ifdef MIN_VERSION_libperf
measure _benchMode (Benchmarkable act) = do
  -- run once
  -- and to a regression
  -- to cut off as much of framework effect as possible.
  LibPerf.perfGroupReset perfGroupHandle
  performGC
  (majGC, minGC) <- rtsStats
  LibPerf.perfGroupEnable perfGroupHandle
  act
  LibPerf.perfGroupDisable perfGroupHandle
  (majGC', minGC') <- rtsStats
  result <- LibPerf.perfGroupRead perfGroupHandle

  let meas = Measurement
        { measInstructions  = rInstructions result
        , measBranchTotal   = rBranchInstructions result
        , measBranchMisses  = rBranchMisses result
        , measCacheTotal    = rCacheReferences result
        , measCacheMisses   = rCacheMisses result
        , measPageFaultsMaj = rPageFaultsMaj result
        , measPageFaultsMin = rPageFaultsMin result
        , measMinorGC       = minGC' - minGC
        , measMajorGC       = majGC' - majGC
        }
  pure $! meas
#else
measure _ _ = pure Measurement
        { measInstructions = 0
        , measBranchTotal  = 0
        , measBranchMisses = 0
        , measCacheTotal   = 0
        , measCacheMisses  = 0
        , measPageFaults   = 0
        , measMinorGC      = 0
        , measMajorGC      = 0
        }
#endif

rtsStats :: IO (Word64, Word64)
rtsStats = do
    enabled <- getRTSStatsEnabled
    if enabled
    then do
        stats <- getRTSStats
        let majGC = major_gcs stats
        let allGC = gcs stats
        return (fromIntegral majGC, fromIntegral (allGC - majGC))
    else return (0, 0)

instance IsTest Benchmarkable where
  testOptions = pure
    -- FailIfSlower and FailIfFaster must be options of a test provider rather
    -- than options of an ingredient to allow setting them on per-test level.
    [ Option (Proxy :: Proxy FailIfSlower)
    , Option (Proxy :: Proxy FailIfFaster)
    , Option (Proxy :: Proxy BenchMode)
    ]
  run opts b = const $ case getNumThreads (lookupOption opts) of
    1 -> do
      let benchMode = lookupOption opts
      est <- measure benchMode b
      let FailIfSlower ifSlower = lookupOption opts
          FailIfFaster ifFaster = lookupOption opts
      pure $ testPassed $ show (WithLoHi est (1 - ifFaster) (1 + ifSlower))
    _ -> pure $ testFailed "Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N."

-- | Attach a name to 'Benchmarkable'.
--
-- This is actually a synonym of 'Test.Tasty.Providers.singleTest' to
-- provide an interface compatible with @Criterion.@'Criterion.bench'
-- and @Gauge.@'Gauge.bench'.
bench :: String -> Benchmarkable -> Benchmark
bench = singleTest

-- | Attach a name to a group of 'Benchmark'.
--
-- This is actually a synonym of 'Test.Tasty.testGroup' to provide an
-- interface compatible with @Criterion.@'Criterion.bgroup' and
-- @Gauge@.'Gauge.bgroup'.
bgroup :: String -> [Benchmark] -> Benchmark
bgroup = testGroup

-- | Compare benchmarks, reporting relative speed up or slow down.
--
-- This function is a vague reminiscence of @bcompare@, which existed in pre-1.0
-- versions of @criterion@, but their types are incompatible. Under the hood
-- 'bcompare' is a thin wrapper over 'after' and requires @tasty-1.2@.
-- If you use 'bcompare', it is prudent to add @tasty >= 1.2@ to @build-depends@
-- section of your cabal file.
--
-- Here is a basic example:
--
-- > import Test.Tasty.Bench
-- >
-- > fibo :: Int -> Integer
-- > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
-- >
-- > main :: IO ()
-- > main = defaultMain
-- >   [ bgroup "fibonacci numbers"
-- >     [ bcompare "tenth"  $ bench "fifth"     $ nf fibo  5
-- >     ,                     bench "tenth"     $ nf fibo 10
-- >     , bcompare "tenth"  $ bench "twentieth" $ nf fibo 20
-- >     ]
-- >   ]
--
-- More complex examples:
--
-- * https://hackage.haskell.org/package/chimera-0.3.2.0/src/bench/Bench.hs
-- * https://hackage.haskell.org/package/fast-digits-0.3.1.0/src/bench/Bench.hs
-- * https://hackage.haskell.org/package/unicode-data-0.3.0/src/bench/Main.hs
bcompare
  :: String
  -- ^ @tasty@ pattern, which must unambiguously
  -- match a unique baseline benchmark. Consider using 'locateBenchmark' to construct it.
  -> Benchmark
  -- ^ Benchmark (or a group of benchmarks)
  -- to be compared against the baseline benchmark by dividing measured mean times.
  -- The result is reported by 'consoleBenchReporter', e. g., 0.50x or 1.25x.
  -> Benchmark
bcompare = bcompareWithin (-1/0) (1/0)

-- | Same as 'bcompare', but takes expected lower and upper bounds of
-- comparison. If the result is not within provided bounds, benchmark fails.
-- This allows to create portable performance tests: instead of comparing
-- to an absolute timeout or to previous runs, you can state that one implementation
-- of an algorithm must be faster than another.
--
-- E. g., 'bcompareWithin' 2.0 3.0 passes only if a benchmark is at least 2x
-- and at most 3x slower than a baseline.
bcompareWithin
  :: Double    -- ^ Lower bound of relative speed up.
  -> Double    -- ^ Upper bound of relative spped up.
  -> String    -- ^ @tasty@ pattern to locate a baseline benchmark.
  -> Benchmark -- ^ Benchmark to compare against baseline.
  -> Benchmark
bcompareWithin lo hi s = case parseExpr s of
  Nothing -> error $ "Could not parse bcompare pattern " ++ s
  Just e  -> after_ AllSucceed (And (StringLit (bcomparePrefix ++ show (lo, hi))) e)

bcomparePrefix :: String
bcomparePrefix = "tasty-perfbench"

-- | Benchmarks are actually just a regular 'Test.Tasty.TestTree' in disguise.
--
-- This is a drop-in replacement for @Criterion.@'Criterion.Benchmark'
-- and @Gauge.@'Gauge.Benchmark'.
type Benchmark = TestTree

-- | Run benchmarks and report results, providing an interface
-- compatible with @Criterion.@'Criterion.defaultMain' and
-- @Gauge.@'Gauge.defaultMain'.
defaultMain :: [Benchmark] -> IO ()
defaultMain bs = do
  let act = Test.Tasty.defaultMainWithIngredients benchIngredients $ testGroup "All" bs
  setLocaleEncoding utf8
  withCP65001 act

-- | List of default benchmark ingredients. This is what 'defaultMain' runs.
benchIngredients :: [Ingredient]
benchIngredients = [listingTests, composeReporters consoleBenchReporter csvReporter]

funcToBench :: (b -> c) -> (a -> b) -> a -> Benchmarkable
funcToBench frc f x = Benchmarkable $ do
  _ <- evaluate (frc (f x))
  pure  ()
{-# OPAQUE funcToBench #-}

-- | 'nf' @f@ @x@ measures time to compute
-- a normal form (by means of 'force') of an application of @f@ to @x@.
-- This does not include time to evaluate @f@ or @x@ themselves.
-- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
--
-- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
-- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
-- be an infinite structure. Thus @x@ will be evaluated in course of the first
-- application of @f@. This noisy measurement is to be discarded soon,
-- but if @x@ is not a primitive data type, consider forcing its evaluation
-- separately, e. g., via 'env' or 'withResource'.
--
-- Here is a textbook anti-pattern: 'nf' 'sum' @[1..1000000]@.
-- Since an input list is shared by multiple invocations of 'sum',
-- it will be allocated in memory in full, putting immense pressure
-- on garbage collector. Also no list fusion will happen.
-- A better approach is 'nf' (@\\n@ @->@ 'sum' @[1..n]@) @1000000@.
--
-- If you are measuring an inlinable function,
-- it is prudent to ensure that its invocation is fully saturated,
-- otherwise inlining will not happen. That's why one can often
-- see 'nf' (@\\n@ @->@ @f@ @n@) @x@ instead of 'nf' @f@ @x@.
-- Same applies to rewrite rules.
--
-- While @tasty-bench@ is capable to perform micro- and even nanobenchmarks,
-- such measurements are noisy and involve an overhead. Results are more reliable
-- when @f@ @x@ takes at least several milliseconds.
--
-- Note that forcing a normal form requires an additional
-- traverse of the structure. In certain scenarios (imagine benchmarking 'tail'),
-- especially when 'NFData' instance is badly written,
-- this traversal may take non-negligible time and affect results.
--
-- Drop-in replacement for @Criterion.@'Criterion.nf' and
-- @Gauge.@'Gauge.nf'.
nf :: NFData b => (a -> b) -> a -> Benchmarkable
nf = funcToBench force
{-# INLINE nf #-}

-- | 'whnf' @f@ @x@ measures time to compute
-- a weak head normal form of an application of @f@ to @x@.
-- This does not include time to evaluate @f@ or @x@ themselves.
-- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
--
-- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
-- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
-- be an infinite structure. Thus @x@ will be evaluated in course of the first
-- application of @f@. This noisy measurement is to be discarded soon,
-- but if @x@ is not a primitive data type, consider forcing its evaluation
-- separately, e. g., via 'env' or 'withResource'.
--
-- Computing only a weak head normal form is
-- rarely what intuitively is meant by "evaluation".
-- Beware that many educational materials contain examples with 'whnf':
-- this is a wrong default.
-- Unless you understand precisely, what is measured,
-- it is recommended to use 'nf' instead.
--
-- Here is a textbook anti-pattern: 'whnf' ('Data.List.replicate' @1000000@) @1@.
-- This will succeed in a matter of nanoseconds, because weak head
-- normal form forces only the first element of the list.
--
-- Drop-in replacement for @Criterion.@'Criterion.whnf' and @Gauge.@'Gauge.whnf'.
whnf :: (a -> b) -> a -> Benchmarkable
whnf = funcToBench id
{-# INLINE whnf #-}

ioToBench :: (b -> c) -> IO b -> Benchmarkable
ioToBench frc act = Benchmarkable $ do
    val <- act
    _ <- evaluate (frc val)
    return ()
{-# OPAQUE ioToBench #-}

-- | 'nfIO' @x@ measures time to evaluate side-effects of @x@
-- and compute its normal form (by means of 'force').
--
-- Pure subexpression of an effectful computation @x@
-- may be evaluated only once and get cached.
-- To avoid surprising results it is usually preferable
-- to use 'nfAppIO' instead.
--
-- Note that forcing a normal form requires an additional
-- traverse of the structure. In certain scenarios,
-- especially when 'NFData' instance is badly written,
-- this traversal may take non-negligible time and affect results.
--
-- A typical use case is 'nfIO' ('readFile' @"foo.txt"@).
-- However, if your goal is not to benchmark I\/O per se,
-- but just read input data from a file, it is cleaner to
-- use 'env' or 'withResource'.
--
-- Drop-in replacement for @Criterion.@'Criterion.nfIO' and @Gauge.@'Gauge.nfIO'.
nfIO :: NFData a => IO a -> Benchmarkable
nfIO = ioToBench force
{-# INLINE nfIO #-}

-- | 'whnfIO' @x@ measures time to evaluate side-effects of @x@
-- and compute its weak head normal form.
--
-- Pure subexpression of an effectful computation @x@
-- may be evaluated only once and get cached.
-- To avoid surprising results it is usually preferable
-- to use 'whnfAppIO' instead.
--
-- Computing only a weak head normal form is
-- rarely what intuitively is meant by "evaluation".
-- Unless you understand precisely, what is measured,
-- it is recommended to use 'nfIO' instead.
--
-- Lazy I\/O is treacherous.
-- If your goal is not to benchmark I\/O per se,
-- but just read input data from a file, it is cleaner to
-- use 'env' or 'withResource'.
--
-- Drop-in replacement for @Criterion.@'Criterion.whnfIO' and @Gauge.@'Gauge.whnfIO'.
whnfIO :: IO a -> Benchmarkable
whnfIO = ioToBench id
{-# INLINE whnfIO #-}

ioFuncToBench :: (b -> c) -> (a -> IO b) -> a -> Benchmarkable
ioFuncToBench frc f x = Benchmarkable $ do
    val <- f x
    _ <- evaluate (frc val)
    return ()
{-# OPAQUE ioFuncToBench #-}

-- | 'nfAppIO' @f@ @x@ measures time to evaluate side-effects of
-- an application of @f@ to @x@.
-- and compute its normal form (by means of 'force').
-- This does not include time to evaluate @f@ or @x@ themselves.
-- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
--
-- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
-- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
-- be an infinite structure. Thus @x@ will be evaluated in course of the first
-- application of @f@. This noisy measurement is to be discarded soon,
-- but if @x@ is not a primitive data type, consider forcing its evaluation
-- separately, e. g., via 'env' or 'withResource'.
--
-- Note that forcing a normal form requires an additional
-- traverse of the structure. In certain scenarios,
-- especially when 'NFData' instance is badly written,
-- this traversal may take non-negligible time and affect results.
--
-- A typical use case is 'nfAppIO' 'readFile' @"foo.txt"@.
-- However, if your goal is not to benchmark I\/O per se,
-- but just read input data from a file, it is cleaner to
-- use 'env' or 'withResource'.
--
-- Drop-in replacement for @Criterion.@'Criterion.nfAppIO' and @Gauge.@'Gauge.nfAppIO'.
nfAppIO :: NFData b => (a -> IO b) -> a -> Benchmarkable
nfAppIO = ioFuncToBench force
{-# INLINE nfAppIO #-}

-- | 'whnfAppIO' @f@ @x@ measures time to evaluate side-effects of
-- an application of @f@ to @x@.
-- and compute its weak head normal form.
-- This does not include time to evaluate @f@ or @x@ themselves.
-- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
--
-- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
-- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
-- be an infinite structure. Thus @x@ will be evaluated in course of the first
-- application of @f@. This noisy measurement is to be discarded soon,
-- but if @x@ is not a primitive data type, consider forcing its evaluation
-- separately, e. g., via 'env' or 'withResource'.
--
-- Computing only a weak head normal form is
-- rarely what intuitively is meant by "evaluation".
-- Unless you understand precisely, what is measured,
-- it is recommended to use 'nfAppIO' instead.
--
-- Lazy I\/O is treacherous.
-- If your goal is not to benchmark I\/O per se,
-- but just read input data from a file, it is cleaner to
-- use 'env' or 'withResource'.
--
-- Drop-in replacement for @Criterion.@'Criterion.whnfAppIO' and @Gauge.@'Gauge.whnfAppIO'.
whnfAppIO :: (a -> IO b) -> a -> Benchmarkable
whnfAppIO = ioFuncToBench id
{-# INLINE whnfAppIO #-}

-- | Run benchmarks in the given environment, usually reading large input data from file.
--
-- One might wonder why 'env' is needed,
-- when we can simply read all input data
-- before calling 'defaultMain'. The reason is that large data
-- dangling in the heap causes longer garbage collection
-- and slows down all benchmarks, even those which do not use it at all.
--
-- It is instrumental not only for proper 'IO' actions,
-- but also for a large statically-known data as well. Instead of a top-level
-- definition, which once evaluated will slow down garbage collection
-- during all subsequent benchmarks,
--
-- > largeData :: String
-- > largeData = replicate 1000000 'a'
-- >
-- > main :: IO ()
-- > main = defaultMain
-- >   [ bench "large" $ nf length largeData, ... ]
--
-- use
--
-- > import Control.DeepSeq (force)
-- > import Control.Exception (evaluate)
-- >
-- > main :: IO ()
-- > main = defaultMain
-- >   [ env (evaluate (force (replicate 1000000 'a'))) $ \largeData ->
-- >     bench "large" $ nf length largeData, ... ]
--
-- @Test.Tasty.Bench.@'env' is provided only for the sake of
-- compatibility with @Criterion.@'Criterion.env' and
-- @Gauge.@'Gauge.env', and involves 'unsafePerformIO'. Consider using
-- 'withResource' instead.
--
-- 'defaultMain' requires that the hierarchy of benchmarks and their names is
-- independent of underlying 'IO' actions. While executing 'IO' inside 'bench'
-- via 'nfIO' is fine, and reading test data from files via 'env' is also fine,
-- using 'env' to choose benchmarks or their names depending on 'IO' side effects
-- will throw a rather cryptic error message:
--
-- > Unhandled resource. Probably a bug in the runner you're using.
env :: NFData env => IO env -> (env -> Benchmark) -> Benchmark
env res = envWithCleanup res (const $ pure ())

-- | Similar to 'env', but includes an additional argument
-- to clean up created environment.
--
-- Provided only for the sake of compatibility with
-- @Criterion.@'Criterion.envWithCleanup' and
-- @Gauge.@'Gauge.envWithCleanup', and involves
-- 'unsafePerformIO'. Consider using 'withResource' instead.
envWithCleanup :: NFData env => IO env -> (env -> IO a) -> (env -> Benchmark) -> Benchmark
envWithCleanup res fin f = withResource
  (res >>= evaluate . force)
  (void . fin)
  (f . unsafePerformIO)

-- | A path to write results in CSV format, populated by @--csv@.
--
-- This is an option of 'csvReporter' and can be set only globally.
-- Modifying it via 'adjustOption' or 'localOption' does not have any effect.
-- One can however pass it to 'tryIngredients' 'benchIngredients'. For example,
-- here is how to set a default CSV location:
--
-- @
-- import Data.Maybe
-- import System.Exit
-- import Test.Tasty.Bench
-- import Test.Tasty.Ingredients
-- import Test.Tasty.Options
-- import Test.Tasty.Runners
--
-- main :: IO ()
-- main = do
--   let benchmarks = bgroup \"All\" ...
--   opts <- parseOptions benchIngredients benchmarks
--   let opts' = changeOption (Just . fromMaybe (CsvPath "foo.csv")) opts
--   case tryIngredients benchIngredients opts' benchmarks of
--     Nothing -> exitFailure
--     Just mb -> mb >>= \\b -> if b then exitSuccess else exitFailure
-- @
newtype CsvPath = CsvPath FilePath
  deriving (Typeable)

instance IsOption (Maybe CsvPath) where
  defaultValue = Nothing
  parseValue = Just . Just . CsvPath
  optionName = pure "csv"
  optionHelp = pure "File to write results in CSV format"

-- | Run benchmarks and save results in CSV format.
-- It activates when @--csv@ @FILE@ command line option is specified.
csvReporter :: Ingredient
csvReporter = TestReporter [Option (Proxy :: Proxy (Maybe CsvPath))] $
  \opts tree -> do
    CsvPath path <- lookupOption opts
    let names = testsNames opts tree
        namesMap = IM.fromDistinctAscList $ zip [0..] names
    pure $ \smap -> do
      case findNonUniqueElement names of
        Nothing -> pure ()
        Just name -> do -- 'die' is not available before base-4.8
          hPutStrLn stderr $ "CSV report cannot proceed, because name '" ++ name ++ "' corresponds to two or more benchmarks. Please disambiguate them."
          exitFailure
      let augmented = IM.intersectionWith (,) namesMap smap
      bracket
        (do
          h <- openFile path WriteMode
          hSetBuffering h LineBuffering
          hPutStrLn h $ "Name,Instructions"
          pure h
        )
        hClose
        $ \h -> csvOutput h augmented
      pure $ const $ isSuccessful smap

isSuccessful :: StatusMap -> IO Bool
isSuccessful = go . IM.elems
  where
    go [] = pure True
    go (tv : tvs) = do
      b <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure (resultSuccessful r); _ -> retry
      if b then go tvs else pure False

findNonUniqueElement :: Ord a => [a] -> Maybe a
findNonUniqueElement = go S.empty
  where
    go _ [] = Nothing
    go acc (x : xs)
      | x `S.member` acc = Just x
      | otherwise = go (S.insert x acc) xs

csvOutput :: Handle -> IntMap (TestName, TVar Status) -> IO ()
csvOutput h = traverse_ $ \(name, tv) -> do
  r <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure r; _ -> retry
  case safeRead (resultDescription r) of
    Nothing -> pure ()
    Just (WithLoHi est _ _) -> do
      msg <- formatMessage $ csvEncodeRow
          [ name
          , show (measInstructions est)
          , show (measBranchTotal est)
          , show (measBranchMisses est)
          , show (measCacheTotal est)
          , show (measCacheMisses est)
          , show (measPageFaultsMaj est)
          , show (measPageFaultsMin est)
          , show (measMajorGC est)
          , show (measMinorGC est)
          ]
      hPutStrLn h msg

-- | A path to read baseline results in CSV format, populated by @--baseline@.
--
-- This is an option of 'csvReporter' and can be set only globally.
-- Modifying it via 'adjustOption' or 'localOption' does not have any effect.
-- One can however pass it to 'tryIngredients' 'benchIngredients'.
newtype BaselinePath = BaselinePath FilePath
  deriving (Typeable)

instance IsOption (Maybe BaselinePath) where
  defaultValue = Nothing
  parseValue = Just . Just . BaselinePath
  optionName = pure "baseline"
  optionHelp = pure "File with baseline results in CSV format to compare against"

-- | Run benchmarks and report results
-- in a manner similar to 'consoleTestReporter'.
--
-- If @--baseline@ @FILE@ command line option is specified,
-- compare results against an earlier run and mark
-- too slow / too fast benchmarks as failed in accordance to
-- bounds specified by @--fail-if-slower@ @PERCENT@ and @--fail-if-faster@ @PERCENT@.
consoleBenchReporter :: Ingredient
consoleBenchReporter = modifyConsoleReporter [Option (Proxy :: Proxy (Maybe BaselinePath))] $ \opts -> do
  baseline <- case lookupOption opts of
    Nothing -> pure M.empty
    Just (BaselinePath path) -> mkBaseline . csvDecodeTable <$> (readFile path >>= evaluate . force)

  pure $ \name mDepR r -> case safeRead (resultDescription r) of
    Nothing  -> r
    Just (WithLoHi est lowerBound upperBound) ->
      (if isAcceptable then id else forceFail)
      r { resultDescription = prettyEstimate est baseline' }
      where
        baseline' :: Maybe Estimate
        baseline' = M.lookup name baseline >>= \cells -> case cells of
          x1:x2:x3:x4:x5:x6:x7:x8:x9:_ -> pure Measurement
            <*> safeRead x1
            <*> safeRead x2
            <*> safeRead x3
            <*> safeRead x4
            <*> safeRead x5
            <*> safeRead x6
            <*> safeRead x7
            <*> safeRead x8
            <*> safeRead x9
          _ -> Nothing

        isAcceptable = isAcceptableVsBaseline && isAcceptableVsBcompare
        mSlowDown = compareVsBaseline baseline name est
        slowDown = fromMaybe 1 mSlowDown
        isAcceptableVsBaseline = slowDown >= lowerBound && slowDown <= upperBound
        (isAcceptableVsBcompare, _bcompareMsg) = case mDepR of
          Nothing -> (True, "")
          Just (WithLoHi depR depLowerBound depUpperBound) -> case safeRead (resultDescription depR) of
            Nothing -> (True, "")
            Just (WithLoHi depEst _ _) -> let ratio = estValue est / estValue depEst in
              ( ratio >= depLowerBound && ratio <= depUpperBound
              , printf ", %.2fx" ratio
              )
  where
    mkBaseline :: [[String]] -> M.Map String [String]
    mkBaseline table = M.fromList [ (name, row) | name:row <- table ]

estValue :: Estimate -> Double
estValue = word64ToDouble . measInstructions

compareVsBaseline :: M.Map String [String] -> TestName -> Estimate -> Maybe Double
compareVsBaseline baseline name m = case mOld of
  Nothing -> Nothing
  Just oldTime -> Just $ int64ToDouble time / int64ToDouble oldTime
  where
    time = word64ToInt64 $ measInstructions m

    mOld :: Maybe Int64
    mOld = do
      case M.lookup name baseline of
        Nothing       -> Nothing
        Just []       -> Nothing
        Just (cell:_) -> safeRead cell

{-
formatSlowDown :: Maybe Double -> String
formatSlowDown Nothing = ""
formatSlowDown (Just ratio) = case percents `compare` 0 of
  LT -> printf ", %2i%% less than baseline" (-percents)
  EQ -> ",       same as baseline"
  GT -> printf ", %2i%% more than baseline" percents
  where
    percents :: Int64
    percents = truncate ((ratio - 1) * 100)
-}

forceFail :: Result -> Result
forceFail r = r { resultOutcome = Failure TestFailed, resultShortDescription = "FAIL" }

data Unique a = None | Unique !a | NotUnique
  deriving (Functor)

appendUnique :: Unique a -> Unique a -> Unique a
appendUnique None a = a
appendUnique a None = a
appendUnique _ _ = NotUnique

instance Semigroup (Unique a) where
  (<>) = appendUnique

instance Monoid (Unique a) where
  mempty = None
  mappend = (<>)

modifyConsoleReporter
    :: [OptionDescription]
    -> (OptionSet -> IO (TestName -> Maybe (WithLoHi Result) -> Result -> Result))
    -> Ingredient
modifyConsoleReporter desc' iof = TestReporter (desc ++ desc') $ \opts tree ->
  let nameSeqs     = IM.fromDistinctAscList $ zip [0..] $ testNameSeqs opts tree
      namesAndDeps = IM.fromDistinctAscList $ zip [0..] $ map (second isSingle)
                   $ testNamesAndDeps nameSeqs opts tree
      modifySMap   = (iof opts >>=) . flip postprocessResult
                   . IM.intersectionWith (\(a, b) c -> (a, b, c)) namesAndDeps
  in (modifySMap >=>) <$> cb opts tree
  where
    (desc, cb) = case consoleTestReporter of
      TestReporter d c -> (d, c)
      _ -> error "modifyConsoleReporter: consoleTestReporter must be TestReporter"

    isSingle (Unique a) = Just a
    isSingle _ = Nothing

testNameSeqs :: OptionSet -> TestTree -> [Seq TestName]
testNameSeqs = foldTestTree trivialFold
  { foldSingle = const $ const . (:[]) . Seq.singleton
  , foldGroup  = const $ map . (<|)
  }

testNamesAndDeps :: IntMap (Seq TestName) -> OptionSet -> TestTree -> [(TestName, Unique (WithLoHi IM.Key))]
testNamesAndDeps im = foldTestTree trivialFold
  { foldSingle = const $ const . (: []) . (, mempty)
  , foldGroup  = const $ map . first . (++) . (++ ".")
  , foldAfter  = const foldDeps
  }
  where
    foldDeps :: DependencyType -> Expr -> [(a, Unique (WithLoHi IM.Key))] -> [(a, Unique (WithLoHi IM.Key))]
    foldDeps AllSucceed (And (StringLit xs) p)
      | bcomparePrefix `isPrefixOf` xs
      , Just (lo :: Double, hi :: Double) <- safeRead $ drop (length bcomparePrefix) xs
      = map $ second $ mappend $ (\x -> WithLoHi x lo hi) <$> findMatchingKeys im p
    foldDeps _ _ = id

findMatchingKeys :: IntMap (Seq TestName) -> Expr -> Unique IM.Key
findMatchingKeys im pattern =
  foldMap (\(k, v) -> if withFields v pat == Right True then Unique k else mempty) $ IM.assocs im
  where
    pat = eval pattern >>= asB


postprocessResult
    :: (TestName -> Maybe (WithLoHi Result) -> Result -> Result)
    -> IntMap (TestName, Maybe (WithLoHi IM.Key), TVar Status)
    -> IO StatusMap
postprocessResult f src = do
  paired <- forM src $ \(name, mDepId, tv) -> (name, mDepId, tv,) <$> newTVarIO NotStarted
  let doUpdate = atomically $ do
        (Any anyUpdated, All allDone) <-
          getApp $ flip foldMap paired $ \(name, mDepId, newTV, oldTV) -> Ap $ do
            old <- readTVar oldTV
            case old of
              Done{} -> pure (Any False, All True)
              _ -> do
                new <- readTVar newTV
                case new of
                  Done res -> do

                    depRes <- case mDepId of
                      Nothing -> pure Nothing
                      Just (WithLoHi depId lo hi) -> case IM.lookup depId src of
                        Nothing -> pure Nothing
                        Just (_, _, depTV) -> do
                          depStatus <- readTVar depTV
                          case depStatus of
                            Done dep -> pure $ Just (WithLoHi dep lo hi)
                            _ -> pure Nothing

                    writeTVar oldTV (Done (f name depRes res))
                    pure (Any True, All True)
                  -- ignoring Progress nodes, we do not report any
                  -- it would be helpful to have instance Eq Progress
                  _ -> pure (Any False, All False)
        if anyUpdated || allDone then pure allDone else retry
      adNauseam = doUpdate >>= (`unless` adNauseam)
  _ <- forkIO adNauseam
  pure $ fmap (\(_, _, _, a) -> a) paired

int64ToDouble :: Int64 -> Double
int64ToDouble = fromIntegral

word64ToInt64 :: Word64 -> Int64
word64ToInt64 = fromIntegral

word64ToDouble :: Word64 -> Double
word64ToDouble = fromIntegral

#if !MIN_VERSION_base(4,10,0)
int64ToWord64 :: Int64 -> Word64
int64ToWord64 = fromIntegral
#endif

-- | Map leaf benchmarks ('bench', not 'bgroup') with a provided function,
-- which has an access to leaf's reversed path.
--
-- This helper is useful for bulk application of 'bcompare'.
-- See also 'locateBenchmark'.
--
-- Real world example: https://hackage.haskell.org/package/text-builder-linear-0.1/src/bench/Main.hs
mapLeafBenchmarks :: ([String] -> Benchmark -> Benchmark) -> Benchmark -> Benchmark
mapLeafBenchmarks processLeaf = go mempty
  where
    go :: [String] -> Benchmark -> Benchmark
    go path x = case x of
      SingleTest name t    -> processLeaf (name : path) (SingleTest name t)
      TestGroup name tts   -> TestGroup name (map (go (name : path))  tts)
      PlusTestOptions g tt -> PlusTestOptions g (go path tt)
      WithResource res f   -> WithResource res (go path . f)
      AskOptions f         -> AskOptions (go path . f)
      After dep expr tt    -> After dep expr (go path tt)

-- | Construct an AWK expression to locate an individual element or elements in 'Benchmark'
-- by the suffix of the path. Names are listed in reverse order:
-- from 'bench'\'s own name to a name of the outermost 'bgroup'.
--
-- This function is meant to be used in conjunction with 'bcompare', e. g.,
-- 'bcompare' ('Test.Tasty.Patterns.Printer.printAwkExpr' ('locateBenchmark' @path@)).
-- See also 'mapLeafBenchmarks'.
--
-- This function requires @tasty-1.0@, so if you use 'locateBenchmark'
-- it is prudent to add @tasty >= 1.0@ to @build-depends@
-- section of your cabal file.
--
-- Real world example: https://hackage.haskell.org/package/text-builder-linear-0.1/src/bench/Main.hs
locateBenchmark :: [String] -> Expr
locateBenchmark [] = IntLit 1
locateBenchmark path
  = foldl1' And
  $ zipWith (\i name -> Patterns.EQ (Field (Sub NF (IntLit i))) (StringLit name)) [0..] path

-------------------------------------------------------------------------------
-- Csv: Encoding
-------------------------------------------------------------------------------

csvEncodeRow :: [String] -> String
csvEncodeRow []     = ""
csvEncodeRow [x]    = csvEncodeField x
csvEncodeRow (x:xs) = csvEncodeField x ++ "," ++ csvEncodeRow xs

csvEncodeField :: String -> String
csvEncodeField xs
  | any (`elem` xs) ",\"\n\r"
  = '"' : go xs -- opening quote

  | otherwise = xs
  where
    go []         = '"' : []
    go ('"' : ys) = '"' : '"' : go ys
    go (y : ys)   = y : go ys

-------------------------------------------------------------------------------
-- Csv: Decoding
-------------------------------------------------------------------------------

-- | Decode CSV trying to recover as much as possible.
csvDecodeTable :: String -> [[String]]
csvDecodeTable []                 = []
csvDecodeTable ('\r' : '\n' : cs) = csvDecodeTable cs
csvDecodeTable ('\r'        : cs) = csvDecodeTable cs
csvDecodeTable ('\n'        : cs) = csvDecodeTable cs
csvDecodeTable (','         : cs) = csvDecodeField ("" :) cs
csvDecodeTable ('"'         : cs) = csvDecodeEscapedField id id cs
csvDecodeTable (c           : cs) = csvDecodeUnescapedField (c :) id cs

csvDecodeField :: ([String] -> [String]) -> String -> [[String]]
csvDecodeField accR ('\r' : '\n' : cs) = accR [""] : csvDecodeTable cs
csvDecodeField accR ('\r'        : cs) = accR [""] : csvDecodeTable cs
csvDecodeField accR ('\n'        : cs) = accR [""] : csvDecodeTable cs
csvDecodeField accR ('"'         : cs) = csvDecodeEscapedField id accR cs
csvDecodeField accR (','         : cs) = csvDecodeField (accR . ("" :)) cs
csvDecodeField accR (c           : cs) = csvDecodeUnescapedField (c :) accR cs
csvDecodeField accR []                 = [accR []]

csvDecodeEscapedField :: (String -> String) -> ([String] -> [String]) -> String -> [[String]]
csvDecodeEscapedField accF accR ('"' : '"' : cs) = csvDecodeEscapedField (accF . ('"' :)) accR cs
csvDecodeEscapedField accF accR ('"' : cs)       = csvDecodeAfterEscapedField (accR . (accF "" :)) cs
csvDecodeEscapedField accF accR (c   : cs)       = csvDecodeEscapedField (accF . (c :))   accR cs
csvDecodeEscapedField accF accR []               = [accR [accF ""]]

-- expected: EOF, EOL or ,
csvDecodeAfterEscapedField :: ([String] -> [String]) -> String -> [[String]]
csvDecodeAfterEscapedField accR [] = [accR []]
csvDecodeAfterEscapedField accR ('\r' : '\n' : cs) = accR [] : csvDecodeTable cs
csvDecodeAfterEscapedField accR ('\r'        : cs) = accR [] : csvDecodeTable cs
csvDecodeAfterEscapedField accR ('\n'        : cs) = accR [] : csvDecodeTable cs
csvDecodeAfterEscapedField accR (','         : cs) = csvDecodeField accR cs
csvDecodeAfterEscapedField accR (_           : cs) = csvDecodeAfterEscapedField accR cs

csvDecodeUnescapedField :: (String -> String) -> ([String] -> [String]) -> String -> [[String]]
csvDecodeUnescapedField accF accR (','         : cs) = csvDecodeField (accR . (accF "" :)) cs
csvDecodeUnescapedField accF accR ('\r' : '\n' : cs) = accR [accF ""] : csvDecodeTable cs
csvDecodeUnescapedField accF accR ('\r'        : cs) = accR [accF ""] : csvDecodeTable cs
csvDecodeUnescapedField accF accR ('\n'        : cs) = accR [accF ""] : csvDecodeTable cs
csvDecodeUnescapedField accF accR (c           : cs) = csvDecodeUnescapedField (accF . (c :)) accR cs
csvDecodeUnescapedField accF accR []                 = [accR [accF ""]]