{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Compare.Compare
( compareCmd
) where
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as L
import Data.List
import qualified Data.Map as M
import Data.Monoid
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.Text.IO as T
import Data.Tree
import System.IO
import Text.Printf
import Compare.Options
import ELynx.Data.Tree.Bipartition
import ELynx.Data.Tree.BranchSupportTree as BS
import ELynx.Data.Tree.Distance
import ELynx.Data.Tree.MeasurableTree
import ELynx.Data.Tree.NamedTree
import ELynx.Data.Tree.PhyloTree
import ELynx.Data.Tree.Tree
import ELynx.Export.Tree.Newick (toNewick)
import ELynx.Import.Tree.Newick
import ELynx.Tools.InputOutput
treesOneFile :: FilePath
-> Compare (Tree (PhyloLabel L.ByteString), Tree (PhyloLabel L.ByteString))
treesOneFile tf = do
a <- lift ask
let nw = if argsNewickIqTree a then manyNewickIqTree else manyNewick
$(logInfo) $ T.pack $ "Parse file '" ++ tf ++ "'."
ts <- liftIO $ parseFileWith nw tf
let n = length ts
case compare n 2 of
LT -> error "Not enough trees in file."
GT -> error "Too many trees in file."
EQ -> return (head ts, head . tail $ ts)
treesTwoFiles :: FilePath -> FilePath
-> Compare (Tree (PhyloLabel L.ByteString), Tree (PhyloLabel L.ByteString))
treesTwoFiles tf1 tf2 = do
a <- lift ask
let nw = if argsNewickIqTree a then oneNewickIqTree else oneNewick
$(logInfo) $ T.pack $ "Parse first tree file '" ++ tf1 ++ "'."
t1 <- liftIO $ parseFileWith nw tf1
$(logInfo) $ T.pack $ "Parse second tree file '" ++ tf2 ++ "'."
t2 <- liftIO $ parseFileWith nw tf2
return (t1, t2)
compareCmd :: Maybe FilePath -> Compare ()
compareCmd outFile = do
a <- lift ask
let outFn = (++ ".out") <$> outFile
outH <- outHandle "results" outFn
let inFiles = argsInFiles a
nFiles = length inFiles
(t1, t2) <- case nFiles of
1 -> treesOneFile (head inFiles)
2 -> treesTwoFiles (head inFiles) (head . tail $ inFiles)
_ -> error "Need two input files with one tree each or one input file with two trees."
liftIO $ hPutStrLn outH "Tree 1:"
liftIO $ L.hPutStrLn outH $ toNewick t1
liftIO $ hPutStrLn outH "Tree 2:"
liftIO $ L.hPutStrLn outH $ toNewick t2
liftIO $ hPutStrLn outH ""
let lvs1 = leaves t1
lvs2 = leaves t2
lfns1 = map getName lvs1
lfns2 = map getName lvs2
s1 = S.fromList lfns1
s2 = S.fromList lfns2
if s1 == s2
then liftIO $ hPutStrLn outH "Trees have the same set of leaf names."
else liftIO $ hPutStrLn outH "Trees do not have the same set of leaf names."
liftIO $ hPutStrLn outH ""
let formatD str val = T.justifyLeft 25 ' ' str <> " " <> val
liftIO $ hPutStrLn outH "Distances."
liftIO $ T.hPutStrLn outH $ formatD "Symmetric"
(T.pack $ show $ symmetric t1 t2)
liftIO $ T.hPutStrLn outH $ formatD "Branch score"
(T.pack $ show $ branchScoreWith getName getLen t1 t2)
let t1' = BS.normalize t1
t2' = BS.normalize t2
$(logDebug) "Trees with normalized branch support values:"
$(logDebug) $ E.decodeUtf8 $ L.toStrict $ toNewick t1'
$(logDebug) $ E.decodeUtf8 $ L.toStrict $ toNewick t2'
liftIO $ T.hPutStrLn outH $ formatD "Incompatible split (0.10)"
(T.pack $ show $ incompatibleSplits (collapse 0.1 t1') (collapse 0.1 t2'))
liftIO $ T.hPutStrLn outH $ formatD "Incompatible split (0.50)"
(T.pack $ show $ incompatibleSplits (collapse 0.5 t1') (collapse 0.5 t2'))
liftIO $ T.hPutStrLn outH $ formatD "Incompatible split (0.80)"
(T.pack $ show $ incompatibleSplits (collapse 0.8 t1') (collapse 0.8 t2'))
liftIO $ T.hPutStrLn outH $ formatD "Incompatible split (0.90)"
(T.pack $ show $ incompatibleSplits (collapse 0.9 t1') (collapse 0.9 t2'))
when (argsBipartitions a) (do
let bp1 = bipartitions (fmap getName t1)
bp2 = bipartitions (fmap getName t2)
bp1Only = bp1 S.\\ bp2
bp2Only = bp2 S.\\ bp1
unless (S.null bp1Only)
(do
liftIO $ hPutStrLn outH ""
liftIO $ hPutStrLn outH "Bipartitions in Tree 1 that are not in Tree 2."
forM_ bp1Only (liftIO . hPutStrLn outH . bphuman L.unpack))
unless (S.null bp2Only)
(do
liftIO $ hPutStrLn outH ""
liftIO $ hPutStrLn outH "Bipartitions in Tree 2 that are not in Tree 1."
forM_ bp2Only (liftIO . hPutStrLn outH . bphuman L.unpack))
liftIO $ hPutStrLn outH ""
let bpCommon = bp1 `S.intersection` bp2
if S.null bpCommon
then liftIO $ hPutStrLn outH "There are no common bipartitions."
else do
let bpToBrLen1 = M.map getSum $ bipartitionToBranchLength getName (Sum . getLen) t1
bpToBrLen2 = M.map getSum $ bipartitionToBranchLength getName (Sum . getLen) t2
liftIO $ hPutStrLn outH "Common bipartitions and their respective differences in branch lengths."
liftIO $ hPutStrLn outH header
forM_ bpCommon (liftIO . hPutStrLn outH . getCommonBpStr L.unpack bpToBrLen1 bpToBrLen2))
liftIO $ hClose outH
header :: String
header = intercalate " " $ cols ++ ["Bipartition"]
where cols = map (T.unpack . T.justifyRight 12 ' ')
[ "Length 1", "Length 2", "Delta", "Relative [%]"]
getCommonBpStr :: (Ord a, Fractional b, PrintfArg b)
=> (a -> String)
-> M.Map (Bipartition a) b -> M.Map (Bipartition a) b
-> Bipartition a -> String
getCommonBpStr f m1 m2 p = intercalate " "
[ printf "% 12.7f" l1
, printf "% 12.7f" l2
, printf "% 12.7f" d
, printf "% 12.7f" rd
, s ]
where l1 = m1 M.! p
l2 = m2 M.! p
d = l1 - l2
rd = 2 * d / (l1 + l2)
s = bphuman f p