import Criterion.Main
import Criterion.Types
import System.Random.MWC (createSystemRandom, GenIO)
import Control.Monad (replicateM)

import Debug.Trace

import GHC.IO.Handle
import System.Exit
import System.Process hiding (env)

import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Inference.SMC
import Control.Monad.Bayes.Inference.RMSMC
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Traced

-- import NonlinearSSM
import qualified HMM
import qualified LogReg
import qualified LDA

-- | Path to the Anglican project with benchmarks.
anglicanPath :: String
anglicanPath = "/scratch/ams240/repos/anglican-white-paper/experiments"

-- | Running Leiningen repl process.
-- Leiningen takes a lot of time to start so we keep a repl
-- running to speed up benchmarking.
-- Contains in order input handle, output handle, and process handle.
data LeinProc = LeinProc Handle Handle ProcessHandle

anglicanModelName :: Model -> String
anglicanModelName (LR _) = "logisticRegression"
anglicanModelName (HMM _) = "hmm"
anglicanModelName (LDA _) = "lda"

clojureBool :: Bool -> String
clojureBool False = "false"
clojureBool True = "true"

-- | Format Haskell [Bool] as a Clojure Boolean vector.
clojureBoolVector :: [Bool] -> String
clojureBoolVector = clojureVector . map clojureBool

-- | Format a Haskell list as a Clojure vector.
clojureVector :: [String] -> String
clojureVector xs = "[" ++ unwords xs ++ "]"

-- | Format a Haskell list as a Clojure vector.
clojureShowVector :: Show a => [a] -> String
clojureShowVector xs = clojureVector $ map show xs

-- | Insert data into an Anglican model.
anglicanData :: LeinProc -> Model -> IO ()
anglicanData lein model = do
  anglican lein ["(use 'nstools.ns)\n"]
  anglican lein ["(ns+ " ++ anglicanModelName model ++ ")\n"]
  case model of
    LR dataset -> do
      let (xs, labels) = unzip dataset
      anglican lein ["(ns-unmap *ns* 'xs)\n"]
      anglican lein ["(def xs " ++ clojureShowVector xs ++ ")\n", "nil\n"]
      anglican lein ["(ns-unmap *ns* 'labels)\n"]
      anglican lein ["(def labels " ++ clojureBoolVector labels ++ ")\n", "nil\n"]
    HMM observations -> do
      anglican lein ["(ns-unmap *ns* 'observations)\n"]
      anglican lein ["(def observations " ++ clojureShowVector observations ++ ")\n", "nil\n"]
    LDA docs -> do
      anglican lein ["(ns-unmap *ns* 'docs)\n"]
      anglican lein ["(def docs " ++ clojureVector (map clojureShowVector docs) ++ ")\n", "nil\n"]
  anglican lein ["(use 'nstools.ns)\n"]
  anglican lein ["(ns+ anglican.core)\n"]

anglican :: LeinProc -> [String] -> IO ()
anglican (LeinProc input output _) cmds = do
  -- execute command
  mapM (hPutStr input) cmds
  hFlush input
  -- wait until all samples are produced
  waitForAnglican output

-- | Wait until an Anglican program finishes
waitForAnglican :: Handle -> IO ()
waitForAnglican handle = run where
  run = do
    l <- hGetLine handle
    -- traceIO l
    -- We recognize that Anglican is finished when the repl emits a line "nil"
    if l == "nil" then
      return ()
    else
      run

-- | Start a Leiningen process in a directory that contains benchmarks.
startLein :: IO LeinProc
startLein = do
  let setup = (shell "lein repl"){cwd = Just anglicanPath, std_in = CreatePipe, std_out = CreatePipe}
  (Just input, Just output, _, process) <- createProcess setup
  -- wait until Leiningen starts producing output to make sure it's ready
  _ <- hWaitForInput output (-1) -- wait for output indefinitely
  let lein = LeinProc input output process
  anglican lein ["(use 'nstools.ns)\n"]
  return lein


-- | Path to the WebPPL project with benchmarks.
webpplPath :: String
webpplPath = "/scratch/ams240/repos/anglican-white-paper/experiments/WebPPL"

-- | Format Haskell list as a Javascript list.
javascriptList :: Show a => [a] -> String
javascriptList xs = unwords (map show xs)

webpplModelName :: Model -> String
webpplModelName (LR _) = "logisticRegression"
webpplModelName (HMM _) = "hmm"
webpplModelName (LDA _) = "lda"

-- | Environment to execute benchmarks in.
data Env = Env {rng :: GenIO, lein :: LeinProc}

data ProbProgSys = MonadBayes | Anglican | WebPPL
  deriving(Show)

data Model = LR [(Double,Bool)] | HMM [Double] | LDA [[String]]
instance Show Model where
  show (LR xs) = "LR" ++ show (length xs)
  show (HMM xs) = "HMM" ++ show (length xs)
  show (LDA xs) = "LDA" ++ show (length $ head xs)
buildModel :: MonadInfer m => Model -> m String
buildModel (LR dataset) = show <$> LogReg.logisticRegression dataset
buildModel (HMM dataset) = show <$> HMM.hmm dataset
buildModel (LDA dataset) = show <$> LDA.lda dataset
modelLength :: Model -> Int
modelLength (LR xs) = length xs
modelLength (HMM xs) = length xs
modelLength (LDA xs) = sum (map length xs)

data Alg = MH Int | SMC Int | RMSMC Int Int
instance Show Alg where
  show (MH n) = "MH" ++ show n
  show (SMC n) = "SMC" ++ show n
  show (RMSMC n t) = "RMSMC" ++ show n ++ "-" ++ show t
runAlg :: Model -> Alg -> SamplerIO String
runAlg model (MH n) = show <$> prior (mh n (buildModel model))
runAlg model (SMC n) = show <$> runPopulation (smcSystematic (modelLength model) n (buildModel model))
runAlg model (RMSMC n t) = show <$> runPopulation (rmsmcLocal (modelLength model) n t (buildModel model))

prepareBenchmarkable :: GenIO -> ProbProgSys -> Model -> Alg -> Benchmarkable
prepareBenchmarkable g MonadBayes model alg = nfIO $ sampleIOwith (runAlg model alg) g
prepareBenchmarkable _ Anglican _ _  = error "Anglican benchmarks not available"
prepareBenchmarkable _ WebPPL _ _ = error "WebPPL benchmarks not available"

prepareBenchmark :: Env -> ProbProgSys -> Model -> Alg -> Benchmark
prepareBenchmark e MonadBayes model alg =
  bench (show MonadBayes ++ sep ++ show model ++ sep ++ show alg) $
  prepareBenchmarkable (rng e) MonadBayes model alg where
    sep = "_"
prepareBenchmark e Anglican model alg = env prepareData (const $ bench name $ whnfIO collect) where
  name = show Anglican ++ sep ++ show model ++ sep ++ show alg
  sep = "_"
  algString (MH n) = "-a lmh -n " ++ show n
  algString (SMC n) = "-a smc -n " ++ show n
  algString (RMSMC _ _) = error "Anglican does not support resample-move SMC"
  prepareData = anglicanData (lein e) model
  collect = do
    anglican (lein e) $ ["(time (m! " ++ anglicanModelName model ++ " " ++ algString alg ++ "))\n"]
prepareBenchmark _ WebPPL model alg = bench name $ whnfIO run where
  name = show WebPPL ++ sep ++ show model ++ sep ++ show alg
  sep = "_"
  algString (MH n) = "--alg MCMC --samples " ++ show n ++ " --rejuv 0 "
  algString (SMC n) = "--alg SMC --samples " ++ show n ++ " --rejuv 0 "
  algString (RMSMC n t) = "--alg SMC --samples " ++ show n ++ " --rejuv " ++ show t ++ " "
  dataString (LR dataset) = let (xs, labels) = unzip dataset in "--xs='" ++ javascriptList xs ++ "' --labels='" ++ javascriptList (map (\b -> if b then 1 else 0) labels) ++ "'"
  dataString (HMM obs) = "--obs='" ++ javascriptList obs ++ "'"
  dataString (LDA docs) = unwords $ map (\(i,doc) -> "--doc" ++ show i ++ "='" ++ unwords doc ++ "'") (zip [1..5] docs)
  run = do
    let command = "node " ++ webpplModelName model ++ ".js " ++ algString alg ++ dataString model
    (_, _, _, process) <- createProcess $ (shell command){cwd = Just webpplPath, std_out = NoStream, std_err = NoStream}
    exitCode <- waitForProcess process
    case exitCode of
      ExitSuccess -> return ()
      ExitFailure i -> error $ "WebPPL terminated with exit code " ++ show i


-- | Checks if the requested benchmark is implemented.
supported :: (ProbProgSys, Model, Alg) -> Bool
supported (Anglican, _, RMSMC _ _) = False
supported _ = True

systems = [
            MonadBayes
            -- Anglican,
            -- WebPPL
          ]

lengthBenchmarks e lrData hmmData ldaData = benchmarks where
  lrLengths = [10] ++ map (*100) [1..10]
  hmmLengths = [10] ++ map (*100) [1..10]
  ldaLengths = [5] ++ map (*50) [1..10]
  models =
    map (LR . (`take` lrData)) lrLengths ++
    map (HMM . (`take` hmmData)) hmmLengths ++
    map (\n -> LDA $ map (take n) ldaData) ldaLengths
  algs = [
    MH 100,
    SMC 100,
    RMSMC 10 1
    ]
  benchmarks = map (uncurry3 (prepareBenchmark e)) $ filter supported xs where
        uncurry3 f (x,y,z) = f x y z
        xs = do
          m <- models
          s <- systems
          a <- algs
          return (s,m,a)

samplesBenchmarks e lrData hmmData ldaData = benchmarks where
  lrLengths = [50]
  hmmLengths = [20]
  ldaLengths = [10]
  models = map (LR . (`take` lrData)) lrLengths ++
           map (HMM . (`take` hmmData)) hmmLengths ++
           map (\n -> LDA $ map (take n) ldaData) ldaLengths
  algs =  map (\x -> MH (100*x)) [1..10] ++ map (\x -> SMC (100*x)) [1..10]
          ++ map (\x -> RMSMC 10 (10*x)) [1..10]
  benchmarks = map (uncurry3 (prepareBenchmark e)) $ filter supported xs where
        uncurry3 f (x,y,z) = f x y z
        xs = do
          a <- algs
          s <- systems
          m <- models
          return (s,m,a)

main :: IO ()
main = do

  g <- createSystemRandom
  l <- startLein
  let e = Env g l

  lrData <- sampleIOwith (LogReg.syntheticData 1000) g
  hmmData <- sampleIOwith (HMM.syntheticData 1000) g
  ldaData <- sampleIOwith (LDA.syntheticData 5 1000) g

  let configLength = defaultConfig{csvFile = Just "speed-length.csv", rawDataFile = Just "raw.dat"}
  defaultMainWith configLength (lengthBenchmarks e lrData hmmData ldaData)

  let configSamples = defaultConfig{csvFile = Just "speed-samples.csv", rawDataFile = Just "raw.dat"}
  defaultMainWith configSamples (samplesBenchmarks e lrData hmmData ldaData)