{-# LANGUAGE DeriveGeneric #-}
module ELynx.Import.Tree.Newick
( NewickFormat (..),
description,
newick,
oneNewick,
someNewick,
)
where
import Control.Applicative
import Data.Aeson (FromJSON, ToJSON)
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Char8 as BS
import ELynx.Data.Tree.Measurable
import ELynx.Data.Tree.Phylogeny
import ELynx.Data.Tree.Rooted hiding (forest, label)
import ELynx.Data.Tree.Supported
import GHC.Generics
import Prelude hiding (takeWhile)
data NewickFormat = Standard | IqTree | RevBayes
deriving (Eq, Show, Read, Bounded, Enum, Generic)
instance FromJSON NewickFormat
instance ToJSON NewickFormat
description :: NewickFormat -> String
description Standard =
"Standard: Branch support values are stored in square brackets after branch lengths."
description IqTree =
"IqTree: Branch support values are stored as node names after the closing bracket of forests."
description RevBayes =
"RevBayes: Key-value pairs is provided in square brackets after node names as well as branch lengths. XXX: Key value pairs are ignored at the moment."
newick :: NewickFormat -> Parser (Tree Phylo BS.ByteString)
newick Standard = newickStandard
newick IqTree = newickIqTree
newick RevBayes = newickRevBayes
oneNewick :: NewickFormat -> Parser (Tree Phylo BS.ByteString)
oneNewick Standard = oneNewickStandard
oneNewick IqTree = oneNewickIqTree
oneNewick RevBayes = oneNewickRevBayes
someNewick :: NewickFormat -> Parser (Forest Phylo BS.ByteString)
someNewick Standard = someNewickStandard
someNewick IqTree = someNewickIqTree
someNewick RevBayes = someNewickRevBayes
newickStandard :: Parser (Tree Phylo BS.ByteString)
newickStandard = skipWhile isSpace *> tree <* char ';' <* skipWhile isSpace <?> "newickStandard"
oneNewickStandard :: Parser (Tree Phylo BS.ByteString)
oneNewickStandard = newickStandard <* endOfInput <?> "oneNewickStandard"
someNewickStandard :: Parser (Forest Phylo BS.ByteString)
someNewickStandard = some newickStandard <* endOfInput <?> "someNewickStandard"
tree :: Parser (Tree Phylo BS.ByteString)
tree = branched <|> leaf <?> "tree"
branched :: Parser (Tree Phylo BS.ByteString)
branched = (<?> "branched") $ do
f <- forest
n <- name
p <- phylo
return $ Node p n f
forest :: Parser (Forest Phylo BS.ByteString)
forest = char '(' *> (tree `sepBy1` char ',') <* char ')' <?> "forest"
leaf :: Parser (Tree Phylo BS.ByteString)
leaf = (<?> "leaf") $ do
n <- name
p <- phylo
return $ Node p n []
nameChar :: Char -> Bool
nameChar c = c `notElem` " :;()[],"
name :: Parser BS.ByteString
name = takeWhile nameChar <?> "name"
phylo :: Parser Phylo
phylo = Phylo <$> optional branchLength <*> optional branchSupport <?> "phylo"
branchLength :: Parser BranchLength
branchLength = char ':' *> double <?> "branchLength"
branchSupport :: Parser BranchSupport
branchSupport = (<?> "branchSupport") $
do
_ <- char '['
s <- double
_ <- char ']'
return s
newickIqTree :: Parser (Tree Phylo BS.ByteString)
newickIqTree = skipWhile isSpace *> treeIqTree <* char ';' <* skipWhile isSpace <?> "newickIqTree"
oneNewickIqTree :: Parser (Tree Phylo BS.ByteString)
oneNewickIqTree = newickIqTree <* endOfInput <?> "oneNewickIqTree"
someNewickIqTree :: Parser (Forest Phylo BS.ByteString)
someNewickIqTree = some newickIqTree <* endOfInput <?> "someNewickIqTree"
treeIqTree :: Parser (Tree Phylo BS.ByteString)
treeIqTree = branchedIqTree <|> leaf <?> "treeIqTree"
branchedIqTree :: Parser (Tree Phylo BS.ByteString)
branchedIqTree = (<?> "branchedIqTree") $ do
f <- forestIqTree
s <- optional double
n <- name
b <- optional branchLength
return $ Node (Phylo b s) n f
forestIqTree :: Parser (Forest Phylo BS.ByteString)
forestIqTree = (<?> "forestIqTree") $ do
_ <- char '('
f <- treeIqTree `sepBy1` char ','
_ <- char ')'
return f
newickRevBayes :: Parser (Tree Phylo BS.ByteString)
newickRevBayes =
skipWhile isSpace *> optional brackets *> treeRevBayes <* char ';' <* skipWhile isSpace <?> "newickRevBayes"
oneNewickRevBayes :: Parser (Tree Phylo BS.ByteString)
oneNewickRevBayes = newickRevBayes <* endOfInput <?> "oneNewickRevBayes"
someNewickRevBayes :: Parser (Forest Phylo BS.ByteString)
someNewickRevBayes = some newickRevBayes <* endOfInput <?> "someNewickRevBayes"
treeRevBayes :: Parser (Tree Phylo BS.ByteString)
treeRevBayes = branchedRevBayes <|> leafRevBayes <?> "treeRevBayes"
branchedRevBayes :: Parser (Tree Phylo BS.ByteString)
branchedRevBayes = (<?> "branchedRevgBayes") $ do
f <- forestRevBayes
n <- nameRevBayes
b <- optional branchLengthRevBayes
return $ Node (Phylo b Nothing) n f
forestRevBayes :: Parser (Forest Phylo BS.ByteString)
forestRevBayes = (<?> "forestRevBayes") $ do
_ <- char '('
f <- treeRevBayes `sepBy1` char ','
_ <- char ')'
return f
nameRevBayes :: Parser BS.ByteString
nameRevBayes = name <* optional brackets <?> "nameRevBayes"
branchLengthRevBayes :: Parser BranchLength
branchLengthRevBayes = branchLength <* optional brackets <?> "branchLengthRevBayes"
leafRevBayes :: Parser (Tree Phylo BS.ByteString)
leafRevBayes = (<?> "leafRevBayes") $ do
n <- nameRevBayes
b <- optional branchLengthRevBayes
return $ Node (Phylo b Nothing) n []
brackets :: Parser ()
brackets = (<?> "brackets") $ do
_ <- char '['
_ <- takeWhile (/= ']')
_ <- char ']'
return ()