-- | Run all steps of the GeneCluEDO algorithms in order.
--
-- This will produce the following:
-- 
-- 1. run the minimal distance algorithm, give the minimal distance score
-- and return all co-optimal paths
--
-- 2. run the end-probability algorithm and return the probability that
-- each node is the begin/end of a chain
--
-- 3. run the edge probability algorithm and give the probability for each
-- @from :-> to@ edge
--
-- 4. with the edge probabilities, run the maximal probability path
-- algorithm, return that probability and all co-optimal paths
--
-- TODO -Pretty should yield a structure to be given to the eps or svg
-- generator. This allows more flexibility. Does diagrams offer
-- serialization?
--
-- TODO All this should be wrapped and available as a function. not just
-- providing output files.

module BioInf.GeneCluEDO
  ( runGeneCluEDO
  , FillWeight (..)
  , FillStyle (..)
  ) where

import           Control.Monad (forM_)
import           Data.Function (on)
import           Data.List (groupBy)
import           Numeric.Log
import qualified Data.Text as T
import qualified Data.Text.IO as T
import           System.FilePath (addExtension)
import           System.IO (withFile,IOMode(WriteMode))
import           Text.Printf

import           ADP.Fusion.Term.Edge.Type (From(..),To(..))
import           Data.PrimitiveArray (fromEdgeBoundaryFst,(:.)(..))
import           Data.PrimitiveArray.ScoreMatrix
import           Diagrams.TwoD.ProbabilityGrid
import           ShortestPath.SHP.Edge.MinDist (runMaxEdgeProbLast, runCoOptDist, boundaryPartFun,PathBT(..))

import           BioInf.GeneCluEDO.EdgeProb (edgeProbScoreMatrix, edgeProbPartFun)



runGeneCluEDO
  :: FillWeight
  -> FillStyle
  -> Double
  -- ^ "Temperature" for probability-related parts of the algorithms.
  -- Lower temperatures favor a single path.
  -> FilePath
  -- ^ The input score matrix
  -> String
  -- ^ In the current directory, create output files with this name prefix
  -> IO ()
runGeneCluEDO fw fs temperature inFile filePrefix = do
  scoreMat <- fromFile inFile
  let lon = listOfRowNames scoreMat
  let n = length lon
  let lns = map T.unpack lon
  let bcols = max 4 . maximum $ map T.length $ lon
  withFile (filePrefix `addExtension` ".run") WriteMode $ \hrun -> do
    hPrintf hrun ("Input File: %s\n") inFile
    hPrintf hrun ("Temperature: %f\n") temperature
    hPrintf hrun ("\n")
    let (minD, minDcoopts) = runCoOptDist scoreMat
    --
    -- Print the minimal distance and the co-optimal paths
    --
    hPrintf hrun "Minimal Distance: %6.3f\n" minD
    hPrintf hrun "Optimal Paths:\n"
    forM_ minDcoopts (T.hPutStrLn hrun)
    hPrintf hrun "\n"
    --
    -- end probabilities, both to the output file and create pretty file
    --
    hPrintf hrun "Chain Begin/End Probabilities:\n"
    let bps = boundaryPartFun temperature scoreMat
    forM_ lon $ hPrintf hrun ("%" ++ show (bcols + 4) ++ "s")
    hPrintf hrun "\n"
    forM_ bps $ \(_, Exp p) -> hPrintf hrun ("%" ++ show (bcols + 4) ++ ".4f") (exp p)
    hPrintf hrun "\n"
    hPrintf hrun "\n"
    svgGridFile (filePrefix `addExtension` "boundary.svg") fw fs 1 n [] lns (Prelude.map snd bps)
    --
    -- edge probabilities, output file and pretty file
    --
    hPrintf hrun "Edge Probabilities:\n"
    let eps = edgeProbPartFun temperature scoreMat
    hPrintf hrun ("%" ++ show (bcols + 4) ++ "s") ("" :: String)
    forM_ lon $ hPrintf hrun ("%" ++ show (bcols + 4) ++ "s")
    hPrintf hrun "\n"
    forM_ (groupBy ((==) `on` (fromEdgeBoundaryFst . fst)) eps) $ \rps -> do
      let (eb,_) = head rps
      hPrintf hrun ("%" ++ show (bcols + 4) ++ "s") (lon !! fromEdgeBoundaryFst eb)
      forM_ rps $ \(eb,Exp p) -> hPrintf hrun ("%" ++ show (bcols + 4) ++ ".4f") (exp p)
      hPrintf hrun "\n"
    svgGridFile (filePrefix `addExtension` "edge.svg") fw fs n n lns lns (Prelude.map snd eps)
    --
    -- maximum probability path
    --
    hPrintf hrun "\n"
    let probMat = edgeProbScoreMatrix scoreMat eps
    let (Exp maxP, _, maxPcoopts) = runMaxEdgeProbLast probMat
    hPrintf hrun "Maximal Log-Probability Path Score: %6.3f\n" maxP
    forM_ (map reverse maxPcoopts) $ \path -> do
      forM_ path $ \case
        BTnode (_:.To n)    -> hPrintf hrun "%s" (lns !! n)
        BTedge (From ff:.To tt) -> hPrintf hrun " -> %s" (lns !! tt)
      hPrintf hrun "\n"
    hPrintf hrun "\n"