module ELynx.Import.MarkovProcess.SiteprofilesPhylobayes
( Parser
, EDMComponent
, siteprofiles
) where
import Control.Monad
import qualified Data.ByteString.Lazy.Char8 as L
import Data.List (nub)
import qualified Data.Vector.Storable as V
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Byte
import Text.Megaparsec.Byte.Lexer
import ELynx.Data.MarkovProcess.EDMModel
import ELynx.Tools.ByteString (c2w)
type Parser = Parsec Void L.ByteString
siteprofiles :: Parser [EDMComponent]
siteprofiles = do
_ <- headerLines
cs <- many dataLine
_ <- many newline *> eof
<?> "phylobayes siteprofiles"
let ls = map length cs
nLs = length $ nub ls
when (nLs /= 1) (error "The site profiles have a different number of entries.")
return cs
horizontalSpace :: Parser ()
horizontalSpace = skipMany $ char (c2w ' ') <|> tab
line :: Parser ()
line = do
_ <- many $ noneOf [c2w '\n']
pure ()
headerLines :: Parser ()
headerLines = do
_ <- line
_ <- many newline
<?> "headerLine"
pure ()
dataLine :: Parser EDMComponent
dataLine = do
_ <- decimal :: Parser Integer
_ <- horizontalSpace
vals <- float `sepEndBy1` horizontalSpace
_ <- many newline
<?> "dataLine"
return (1.0, V.fromList vals)