-- |
-- Module      :  ELynx.Topology.Phylogeny
-- Description :  Phylogenetic topologies
-- Copyright   :  (c) Dominik Schrempf, 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Sat Jul 18 13:15:49 2020.
--
-- A topology, as it is used in phylogenetics is a 'Topology' with unique leaf
-- labels, and the order of the topologies in the sub-forest is considered to be
-- meaningless.
--
-- NOTE: The functions in this module are defined using the functions in
-- "ELynx.Tree.Phylogeny". This induces a runtime overhead, but greatly reduces
-- the probability of additional bugs.
module ELynx.Topology.Phylogeny
  ( equal,
    equal',
    intersect,
    bifurcating,
    outgroup,
    midpoint,
    roots,
  )
where

import Data.Default.Class
import Data.List hiding (intersect)
import Data.Maybe
import qualified Data.Set as S
import ELynx.Topology.Rooted
import ELynx.Tree.Length
import qualified ELynx.Tree.Phylogeny as T
import qualified ELynx.Tree.Rooted as T

-- | The equality check is slow because the order of children is considered to
-- be arbitrary.
--
-- Return 'Left' if a topology does not have unique leaves.
equal :: (Eq a, Ord a) => Topology a -> Topology a -> Either String Bool
equal :: Topology a -> Topology a -> Either String Bool
equal Topology a
tL Topology a
tR
  | Topology a -> Bool
forall a. Ord a => Topology a -> Bool
duplicateLeaves Topology a
tL = String -> Either String Bool
forall a b. a -> Either a b
Left String
"equal: Left topology has duplicate leaves."
  | Topology a -> Bool
forall a. Ord a => Topology a -> Bool
duplicateLeaves Topology a
tR = String -> Either String Bool
forall a b. a -> Either a b
Left String
"equal: Right topology has duplicate leaves."
  | Bool
otherwise = Bool -> Either String Bool
forall a b. b -> Either a b
Right (Bool -> Either String Bool) -> Bool -> Either String Bool
forall a b. (a -> b) -> a -> b
$ Topology a -> Topology a -> Bool
forall a. Eq a => Topology a -> Topology a -> Bool
equal' Topology a
tL Topology a
tR

-- | Same as 'equal', but assume that leaves are unique.
equal' :: Eq a => Topology a -> Topology a -> Bool
equal' :: Topology a -> Topology a -> Bool
equal' (Leaf a
lbL) (Leaf a
lbR) =
  a
lbL a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
lbR
equal' (Node Forest a
tsL) (Node Forest a
tsR) =
  (Forest a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest a
tsL Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Forest a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest a
tsR)
    Bool -> Bool -> Bool
&& (Topology a -> Bool) -> Forest a -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Topology a -> Forest a -> Bool
forall (t :: * -> *) a.
(Foldable t, Eq a) =>
Topology a -> t (Topology a) -> Bool
`elem'` Forest a
tsR) Forest a
tsL
  where
    elem' :: Topology a -> t (Topology a) -> Bool
elem' Topology a
t t (Topology a)
ts = Maybe (Topology a) -> Bool
forall a. Maybe a -> Bool
isJust (Maybe (Topology a) -> Bool) -> Maybe (Topology a) -> Bool
forall a b. (a -> b) -> a -> b
$ (Topology a -> Bool) -> t (Topology a) -> Maybe (Topology a)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Topology a -> Topology a -> Bool
forall a. Eq a => Topology a -> Topology a -> Bool
equal' Topology a
t) t (Topology a)
ts
equal' Topology a
_ Topology a
_ = Bool
False

-- | Intersection of topologies.
--
-- | See 'ELynx.Tree.Phylogeny.intersect'.
intersect ::
  (Ord a) => [Topology a] -> Either String [Topology a]
intersect :: [Topology a] -> Either String [Topology a]
intersect [Topology a]
ts
  | Set a -> Bool
forall a. Set a -> Bool
S.null Set a
lvsCommon = String -> Either String [Topology a]
forall a b. a -> Either a b
Left String
"intersect: Intersection of leaves is empty."
  | Bool
otherwise = case [Maybe (Topology a)] -> Maybe [Topology a]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [(a -> Bool) -> Topology a -> Maybe (Topology a)
forall a. (a -> Bool) -> Topology a -> Maybe (Topology a)
dropLeavesWith (Set a -> a -> Bool
forall a. Ord a => Set a -> a -> Bool
predicate Set a
ls) Topology a
t | (Set a
ls, Topology a
t) <- [Set a] -> [Topology a] -> [(Set a, Topology a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Set a]
leavesToDrop [Topology a]
ts] of
    Maybe [Topology a]
Nothing -> String -> Either String [Topology a]
forall a b. a -> Either a b
Left String
"intersect: A topology is empty."
    Just [Topology a]
ts' -> [Topology a] -> Either String [Topology a]
forall a b. b -> Either a b
Right [Topology a]
ts'
  where
    -- Leaf sets.
    lvss :: [Set a]
lvss = (Topology a -> Set a) -> [Topology a] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map ([a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> (Topology a -> [a]) -> Topology a -> Set a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Topology a -> [a]
forall a. Topology a -> [a]
leaves) [Topology a]
ts
    -- Common leaf set.
    lvsCommon :: Set a
lvsCommon = (Set a -> Set a -> Set a) -> [Set a] -> Set a
forall a. (a -> a -> a) -> [a] -> a
foldl1' Set a -> Set a -> Set a
forall a. Ord a => Set a -> Set a -> Set a
S.intersection [Set a]
lvss
    -- Leaves to drop for each topology in the forest.
    leavesToDrop :: [Set a]
leavesToDrop = (Set a -> Set a) -> [Set a] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map (Set a -> Set a -> Set a
forall a. Ord a => Set a -> Set a -> Set a
S.\\ Set a
lvsCommon) [Set a]
lvss
    -- Predicate.
    predicate :: Set a -> a -> Bool
predicate Set a
lvsToDr a
l = a
l a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set a
lvsToDr

-- | Check if topology is bifurcating.
--
-- | See 'ELynx.Tree.Phylogeny.intersect'.
bifurcating :: Topology a -> Bool
bifurcating :: Topology a -> Bool
bifurcating (Leaf a
_) = Bool
True
bifurcating (Node Forest a
ts) = (Forest a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest a
ts Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2) Bool -> Bool -> Bool
&& (Topology a -> Bool) -> Forest a -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Topology a -> Bool
forall a. Topology a -> Bool
bifurcating Forest a
ts

-- Perform a computation over the 'Tree' data type.
overTree ::
  (Default a, Functor f) =>
  (T.Tree Length a -> f (T.Tree Length a)) ->
  Topology a ->
  f (Topology a)
overTree :: (Tree Length a -> f (Tree Length a))
-> Topology a -> f (Topology a)
overTree Tree Length a -> f (Tree Length a)
f = f (Tree Length a) -> f (Topology a)
forall e a. f (Tree e a) -> f (Topology a)
goBack (f (Tree Length a) -> f (Topology a))
-> (Topology a -> f (Tree Length a))
-> Topology a
-> f (Topology a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length a -> f (Tree Length a)
f (Tree Length a -> f (Tree Length a))
-> (Topology a -> Tree Length a) -> Topology a -> f (Tree Length a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Topology a -> Tree Length a
goThere
  where
    goThere :: Topology a -> Tree Length a
goThere = Length -> a -> Topology a -> Tree Length a
forall e a. e -> a -> Topology a -> Tree e a
toBranchLabelTreeWith (Double -> Length
toLengthUnsafe Double
1.0) a
forall a. Default a => a
def
    goBack :: f (Tree e a) -> f (Topology a)
goBack = (Tree e a -> Topology a) -> f (Tree e a) -> f (Topology a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Tree e a -> Topology a
forall e a. Tree e a -> Topology a
fromBranchLabelTree

-- | Root topology using an outgroup.
--
--   See 'ELynx.Tree.Phylogeny.outgroup'.
outgroup :: (Default a, Ord a) => S.Set a -> Topology a -> Either String (Topology a)
outgroup :: Set a -> Topology a -> Either String (Topology a)
outgroup Set a
xs = (Tree Length a -> Either String (Tree Length a))
-> Topology a -> Either String (Topology a)
forall a (f :: * -> *).
(Default a, Functor f) =>
(Tree Length a -> f (Tree Length a))
-> Topology a -> f (Topology a)
overTree (Set a -> Tree Length a -> Either String (Tree Length a)
forall e a.
(Semigroup e, Splittable e, Default a, Ord a) =>
Set a -> Tree e a -> Either String (Tree e a)
T.outgroup Set a
xs)

-- | Root topology at the midpoint.
--
-- See 'ELynx.Tree.Phylogeny.midpoint'.
--
-- Use 'depth' to measure topology height.
--
-- If the midpoint is ambiguous because the sum of the left and right depths is
-- odd, the depth of the left sub-topology will be set to be one node greater
-- than the one of the right sub-topology.
midpoint :: Default a => Topology a -> Either String (Topology a)
midpoint :: Topology a -> Either String (Topology a)
midpoint = (Tree Length a -> Either String (Tree Length a))
-> Topology a -> Either String (Topology a)
forall a (f :: * -> *).
(Default a, Functor f) =>
(Tree Length a -> f (Tree Length a))
-> Topology a -> f (Topology a)
overTree Tree Length a -> Either String (Tree Length a)
forall e a.
(Semigroup e, Splittable e, HasLength e, Default a) =>
Tree e a -> Either String (Tree e a)
T.midpoint

-- | For a rooted tree with a bifurcating root node, get all possible rooted
-- trees.
--
-- See 'ELynx.Tree.Phylogeny.roots'.
roots :: Default a => Topology a -> Either String [Topology a]
roots :: Topology a -> Either String [Topology a]
roots = Either String [Tree Length a] -> Either String [Topology a]
forall e a. Either String [Tree e a] -> Either String [Topology a]
goBack (Either String [Tree Length a] -> Either String [Topology a])
-> (Topology a -> Either String [Tree Length a])
-> Topology a
-> Either String [Topology a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length a -> Either String [Tree Length a]
forall e a.
(Semigroup e, Splittable e, Default a) =>
Tree e a -> Either String (Forest e a)
T.roots (Tree Length a -> Either String [Tree Length a])
-> (Topology a -> Tree Length a)
-> Topology a
-> Either String [Tree Length a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Topology a -> Tree Length a
goThere
  where
    -- We have to use a special 'overTree' function here, since a list of
    -- topologies is returned.
    goThere :: Topology a -> Tree Length a
goThere = Length -> a -> Topology a -> Tree Length a
forall e a. e -> a -> Topology a -> Tree e a
toBranchLabelTreeWith (Double -> Length
toLengthUnsafe Double
1.0) a
forall a. Default a => a
def
    goBack :: Either String [Tree e a] -> Either String [Topology a]
goBack = (([Tree e a] -> [Topology a])
-> Either String [Tree e a] -> Either String [Topology a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (([Tree e a] -> [Topology a])
 -> Either String [Tree e a] -> Either String [Topology a])
-> ((Tree e a -> Topology a) -> [Tree e a] -> [Topology a])
-> (Tree e a -> Topology a)
-> Either String [Tree e a]
-> Either String [Topology a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Tree e a -> Topology a) -> [Tree e a] -> [Topology a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap) Tree e a -> Topology a
forall e a. Tree e a -> Topology a
fromBranchLabelTree