module ELynx.Import.Tree.Newick
( Parser
, newick
, oneNewick
, manyNewick
, forest
, leaf
, node
, name
, branchLength
, newickIqTree
, oneNewickIqTree
, manyNewickIqTree
) where
import qualified Data.ByteString.Lazy as L
import Data.Tree
import Data.Void
import Data.Word
import Text.Megaparsec
import Text.Megaparsec.Byte
import Text.Megaparsec.Byte.Lexer (decimal, float)
import ELynx.Data.Tree.PhyloTree
import ELynx.Tools.ByteString (c2w)
type Parser = Parsec Void L.ByteString
newick :: Parser (Tree (PhyloLabel L.ByteString))
newick = tree <* char (c2w ';') <?> "newick"
oneNewick :: Parser (Tree (PhyloLabel L.ByteString))
oneNewick = newick <* space <* eof <?> "oneNewick"
manyNewick :: Parser [Tree (PhyloLabel L.ByteString)]
manyNewick = some (newick <* space) <* eof <?> "manyNewick"
tree :: Parser (Tree (PhyloLabel L.ByteString))
tree = space *> (branched <|> leaf) <?> "tree"
branched :: Parser (Tree (PhyloLabel L.ByteString))
branched = do
f <- forest
n <- node
<?> "branched"
return $ Node n f
forest :: Parser [Tree (PhyloLabel L.ByteString)]
forest = do
_ <- char (c2w '(')
f <- tree `sepBy1` char (c2w ',')
_ <- char (c2w ')')
<?> "forest"
return f
branchSupport :: Parser (Maybe Double)
branchSupport = optional $ do
_ <- try $ char (c2w '[')
s <- try float <|> try decimalAsDouble
_ <- try $ char (c2w ']')
return s
leaf :: Parser (Tree (PhyloLabel L.ByteString))
leaf = do
n <- node
<?> "leaf"
return $ Node n []
node :: Parser (PhyloLabel L.ByteString)
node = do
n <- name
b <- branchLength
s <- branchSupport
<?> "node"
return $ PhyloLabel n s b
checkNameCharacter :: Word8 -> Bool
checkNameCharacter c = c `notElem` map c2w " :;()[],"
name :: Parser L.ByteString
name = L.pack <$> many (satisfy checkNameCharacter) <?> "name"
branchLength :: Parser (Maybe Double)
branchLength = (optional $ char (c2w ':') *> branchLengthGiven) <?> "branchLength"
branchLengthGiven :: Parser Double
branchLengthGiven = try float <|> decimalAsDouble
decimalAsDouble :: Parser Double
decimalAsDouble = fromIntegral <$> (decimal :: Parser Int)
newickIqTree :: Parser (Tree (PhyloLabel L.ByteString))
newickIqTree = treeIqTree <* char (c2w ';') <?> "newickIqTree"
oneNewickIqTree :: Parser (Tree (PhyloLabel L.ByteString))
oneNewickIqTree = newickIqTree <* space <* eof <?> "oneNewickIqTree"
manyNewickIqTree :: Parser [Tree (PhyloLabel L.ByteString)]
manyNewickIqTree = some (newickIqTree <* space) <* eof <?> "manyNewickIqTree"
treeIqTree :: Parser (Tree (PhyloLabel L.ByteString))
treeIqTree = space *> (branchedIqTree <|> leaf) <?> "treeIqTree"
forestIqTree :: Parser [Tree (PhyloLabel L.ByteString)]
forestIqTree = do
_ <- char (c2w '(')
f <- treeIqTree `sepBy1` char (c2w ',')
_ <- char (c2w ')')
<?> "forestIqTree"
return f
branchedIqTree :: Parser (Tree (PhyloLabel L.ByteString))
branchedIqTree = do
f <- forestIqTree
s <- branchSupportIqTree
n <- nodeIqTree
<?> "branchedIqTree"
let n' = n {brSup = s}
return $ Node n' f
branchSupportIqTree :: Parser (Maybe Double)
branchSupportIqTree = optional $ try float <|> try decimalAsDouble
nodeIqTree :: Parser (PhyloLabel L.ByteString)
nodeIqTree = do
n <- name
b <- branchLength
<?> "nodeIqTree"
return $ PhyloLabel n Nothing b