module ELynx.Import.Tree.Newick
( Parser
, newick
, manyNewick
, forest
, leaf
, node
, name
, branchLength
) where
import qualified Data.ByteString.Lazy as L
import Data.Tree
import Data.Void
import Data.Word
import Text.Megaparsec
import Text.Megaparsec.Byte
import Text.Megaparsec.Byte.Lexer (decimal, float)
import ELynx.Data.Tree.PhyloTree
import ELynx.Tools.ByteString (c2w)
type Parser = Parsec Void L.ByteString
manyNewick :: Parser [Tree PhyloByteStringLabel]
manyNewick = some (newick <* space) <* eof <?> "manyNewick"
newick :: Parser (Tree PhyloByteStringLabel)
newick = tree <* char (c2w ';') <?> "newick"
tree :: Parser (Tree PhyloByteStringLabel)
tree = space *> (branched <|> leaf) <?> "tree"
branched :: Parser (Tree PhyloByteStringLabel)
branched = do
f <- forest
s <- branchSupport
n <- node
<?> "branched"
let n' = n {pBrSup = s}
return $ Node n' f
forest :: Parser [Tree PhyloByteStringLabel]
forest = do
_ <- char (c2w '(')
f <- tree `sepBy1` char (c2w ',')
_ <- char (c2w ')')
<?> "forest"
return f
branchSupport :: Parser (Maybe Double)
branchSupport = optional $ try float <|> try decimalAsDouble
leaf :: Parser (Tree PhyloByteStringLabel)
leaf = do
n <- node
<?> "leaf"
return $ Node n []
node :: Parser PhyloByteStringLabel
node = do
n <- name
b <- branchLength
<?> "node"
return $ PhyloLabel n Nothing b
checkNameCharacter :: Word8 -> Bool
checkNameCharacter c = c `notElem` map c2w " :;()[],"
name :: Parser L.ByteString
name = L.pack <$> many (satisfy checkNameCharacter) <?> "name"
branchLength :: Parser Double
branchLength = char (c2w ':') *> branchLengthGiven <|> pure 0 <?> "branchLength"
branchLengthGiven :: Parser Double
branchLengthGiven = try float <|> decimalAsDouble
decimalAsDouble :: Parser Double
decimalAsDouble = fromIntegral <$> (decimal :: Parser Int)