-- |
-- Module      :  ELynx.Import.MarkovProcess.SiteprofilesPhylobayes
-- Description :  Import site profiles in Phylobayes format
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Tue Jan 29 12:12:55 2019.
--
-- For now I just try to go with a huge empirical distribution mixture model. Let's
-- see if performance is good enough.
--
-- There are subtle differences between
-- `ELynx.Import.MarkovProcess.EDMModelPhylobayes` and this module, which collects
-- one stationary distribution for each site.
module ELynx.Import.MarkovProcess.SiteprofilesPhylobayes
  ( siteprofiles,
  )
where

import Control.Applicative
import Control.Monad
import qualified Data.Attoparsec.ByteString as AS
import qualified Data.Attoparsec.ByteString.Char8 as AC
import Data.Containers.ListUtils (nubInt)
import qualified Data.Vector.Storable as V
import ELynx.Import.MarkovProcess.EDMModelPhylobayes

-- | Parse stationary distributions from Phylobayes format.
siteprofiles :: AS.Parser [EDMComponent]
siteprofiles :: Parser [EDMComponent]
siteprofiles = (Parser [EDMComponent] -> String -> Parser [EDMComponent]
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"phylobayes siteprofiles") (Parser [EDMComponent] -> Parser [EDMComponent])
-> Parser [EDMComponent] -> Parser [EDMComponent]
forall a b. (a -> b) -> a -> b
$ do
  ()
_ <- Parser ()
headerLines
  [EDMComponent]
cs <- Parser ByteString EDMComponent -> Parser [EDMComponent]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString EDMComponent
dataLine
  ()
_ <- (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace
  ()
_ <- Parser ()
forall t. Chunk t => Parser t ()
AS.endOfInput
  let ls :: [Int]
ls = (EDMComponent -> Int) -> [EDMComponent] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map EDMComponent -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EDMComponent]
cs
      nLs :: Int
nLs = [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
nubInt [Int]
ls
  Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
    (Int
nLs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
1)
    (String -> Parser ()
forall a. HasCallStack => String -> a
error String
"The site profiles have a different number of entries.")
  [EDMComponent] -> Parser [EDMComponent]
forall (m :: * -> *) a. Monad m => a -> m a
return [EDMComponent]
cs

line :: AS.Parser ()
line :: Parser ()
line = (Word8 -> Bool) -> Parser ()
AS.skipWhile (Bool -> Bool
not (Bool -> Bool) -> (Word8 -> Bool) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Bool
AC.isEndOfLine)

-- For now, just ignore the header.
headerLines :: AS.Parser ()
headerLines :: Parser ()
headerLines = Parser ()
line Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace Parser () -> String -> Parser ()
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"headerLine"

dataLine :: AS.Parser EDMComponent
dataLine :: Parser ByteString EDMComponent
dataLine = (Parser ByteString EDMComponent
-> String -> Parser ByteString EDMComponent
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"dataLine") (Parser ByteString EDMComponent -> Parser ByteString EDMComponent)
-> Parser ByteString EDMComponent -> Parser ByteString EDMComponent
forall a b. (a -> b) -> a -> b
$ do
  -- Ignore site number.
  Int
_ <- Parser Int
forall a. Integral a => Parser a
AC.decimal :: AS.Parser Int
  ()
_ <- (Word8 -> Bool) -> Parser ()
AS.skipWhile Word8 -> Bool
AC.isHorizontalSpace
  -- Also ignore additional white space on line.
  [Double]
vals <- Parser Double
AC.double Parser Double -> Parser () -> Parser ByteString [Double]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`AS.sepBy1` (Word8 -> Bool) -> Parser ()
AS.skipWhile Word8 -> Bool
AC.isHorizontalSpace
  ()
_ <- (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace
  -- Set the weight to 1.0 for all sites.
  EDMComponent -> Parser ByteString EDMComponent
forall (m :: * -> *) a. Monad m => a -> m a
return (Double
1.0, [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
V.fromList [Double]
vals)