-- |
-- Module      :  ELynx.Tree.Mrca
-- Description :  Most recent common ancestors
-- Copyright   :  2022 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Creation date: Wed Jun 29 15:57:09 2022.
--
-- Specify nodes using most recent common ancestors (MRCA).
module ELynx.Tree.Mrca
  ( isAncestor,
    isMrca,
    getPathToMrca,
    getTreeAtMrca,
    findNode,
  )
where

import Data.Either
import Data.List
import Data.Maybe
import qualified Data.Set as S
import ELynx.Tree.Rooted
import ELynx.Tree.Zipper

-- | Test if the root node of the given tree is an ancestor of the given leaves.
isAncestor :: Ord a => S.Set a -> Tree e a -> Bool
--                      True if an x of xs is not in the collection of leaves.
--                False      if an x of xs is not in the collection of leaves. -> OK.
isAncestor :: forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
xs Tree e a
t = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a. Ord a => a -> Set a -> Bool
`S.notMember` Set a
lvs) Set a
xs
  where
    lvs :: Set a
lvs = forall a. Ord a => [a] -> Set a
S.fromList forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree e a
t

-- | Test if the root node of the given tree is the MRCA of the given leaves.
isMrca :: Ord a => S.Set a -> Tree e a -> Bool
--                                    True if any daughter forest is an ancestor.
--                               False     if any daughter forest is an ancestor. -> OK.
isMrca :: forall a e. Ord a => Set a -> Tree e a -> Bool
isMrca Set a
xs Tree e a
t = forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
xs Tree e a
t Bool -> Bool -> Bool
&& Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
xs) (forall e a. Tree e a -> Forest e a
forest Tree e a
t))

-- | Get the path to the MRCA of the given leaves on the given tree.
--
-- Return 'Left' if:
--
-- - The tree has duplicate leaves.
--
-- - The MRCA cannot be found.
getPathToMrca :: (Ord a, Show a) => S.Set a -> Tree e a -> Either String Path
getPathToMrca :: forall a e.
(Ord a, Show a) =>
Set a -> Tree e a -> Either String Path
getPathToMrca Set a
ss Tree e a
tr
  | forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
tr = forall a b. a -> Either a b
Left String
"getPathToMrca: Tree contains duplicate leaves."
  | Bool
otherwise = forall a. [a] -> [a]
tail forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall {t} {e}.
(Num t, Enum t) =>
t -> Tree e a -> Either String [t]
go Int
0 Tree e a
tr
  where
    go :: t -> Tree e a -> Either String [t]
go t
i Tree e a
t
      | forall a e. Ord a => Set a -> Tree e a -> Bool
isMrca Set a
ss Tree e a
t = forall a b. b -> Either a b
Right [t
i]
      --                                    One path will be (Right p).
      | forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
ss Tree e a
t =
          case forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find forall a b. Either a b -> Bool
isRight [t -> Tree e a -> Either String [t]
go t
j Tree e a
t' | (t
j, Tree e a
t') <- forall a b. [a] -> [b] -> [(a, b)]
zip [t
0 ..] (forall e a. Tree e a -> Forest e a
forest Tree e a
t)] of
            -- Use 'error' because this should never happen since one subtree has
            -- to contain the MRCA.
            Maybe (Either String [t])
Nothing -> forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"getPathToMrca: BUG; I am ancestor but no MRCA found for: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Set a
ss
            Just Either String [t]
xs -> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (t
i forall a. a -> [a] -> [a]
:) Either String [t]
xs
      | Bool
otherwise = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"getPathToMrca: Could not get MRCA for: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Set a
ss forall a. Semigroup a => a -> a -> a
<> String
"."

getTreeAtMrca :: (Ord a, Show a) => S.Set a -> Tree e a -> Either String (Tree e a)
getTreeAtMrca :: forall a e.
(Ord a, Show a) =>
Set a -> Tree e a -> Either String (Tree e a)
getTreeAtMrca Set a
ss Tree e a
tr
  | forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
tr = forall a b. a -> Either a b
Left String
"getTreeAtMrca: Tree contains duplicate leaves."
  | Bool
otherwise = forall {e}. Tree e a -> Either String (Tree e a)
go Tree e a
tr
  where
    go :: Tree e a -> Either String (Tree e a)
go Tree e a
t
      | forall a e. Ord a => Set a -> Tree e a -> Bool
isMrca Set a
ss Tree e a
t = forall a b. b -> Either a b
Right Tree e a
t
      --                                One tree will be (Right t).
      | forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
ss Tree e a
t = case forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find forall a b. Either a b -> Bool
isRight forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> Either String (Tree e a)
go (forall e a. Tree e a -> Forest e a
forest Tree e a
t) of
          -- Use 'error' because this should never happen since one subtree has
          -- to contain the MRCA.
          Maybe (Either String (Tree e a))
Nothing -> forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"getTreeAtMrca: BUG; I am ancestor but no MRCA found for: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Set a
ss
          Just Either String (Tree e a)
x -> Either String (Tree e a)
x
      | Bool
otherwise = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"getTreeAtMrca: Could not get MRCA for: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Set a
ss forall a. Semigroup a => a -> a -> a
<> String
"."

-- | Find a node on a tree.
--
-- If the node is found, the node is specified by the MRCA of the returned set
-- of leaves. The set will contain one element if the node is a leaf, or two
-- elements, if the node is internal.
--
-- Return 'Left' if:
--
-- - The tree has duplicate labels.
--
-- - The node cannot be found.
--
-- - The node is a degree two node.
findNode :: (Ord a, Show a) => a -> Tree e a -> Either String (S.Set a)
findNode :: forall a e.
(Ord a, Show a) =>
a -> Tree e a -> Either String (Set a)
findNode a
n Tree e a
t
  | forall a e. Ord a => Tree e a -> Bool
duplicateLabels Tree e a
t = forall a b. a -> Either a b
Left String
"findNode: tree contains duplicate labels"
  | Bool
otherwise = case forall {t} {e}.
Ord t =>
t -> Tree e t -> Maybe (Either String (Set t))
go a
n Tree e a
t of
      Maybe (Either String (Set a))
Nothing -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"findNode: node not found: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show a
n
      Just Either String (Set a)
x -> Either String (Set a)
x
  where
    go :: t -> Tree e t -> Maybe (Either String (Set t))
go t
x (Node e
_ t
y Forest e t
ts)
      | t
x forall a. Eq a => a -> a -> Bool
== t
y = case Forest e t
ts of
          [] -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. a -> Set a
S.singleton t
x
          (Tree e t
l : Tree e t
r : Forest e t
_) -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> Set a
S.fromList [forall a. [a] -> a
head forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree e t
l, forall a. [a] -> a
head forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree e t
r]
          Forest e t
_ -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"findNode: degree two node found" forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show a
n
      | Bool
otherwise = case forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (t -> Tree e t -> Maybe (Either String (Set t))
go t
x) Forest e t
ts of
          [] -> forall a. Maybe a
Nothing
          [Either String (Set t)
z] -> forall a. a -> Maybe a
Just Either String (Set t)
z
          -- Use 'error' because this should never happen since we check for
          -- duplicate labels.
          [Either String (Set t)]
_ -> forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"findNode: BUG; more internal nodes found" forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show a
n