-- |
-- Module      :  ELynx.Tree.Mrca
-- Description :  Most recent common ancestors
-- Copyright   :  2022 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Creation date: Wed Jun 29 15:57:09 2022.
--
-- Specify nodes using most recent common ancestors (MRCA).
module ELynx.Tree.Mrca
  ( isAncestor,
    isMrca,
    getPathToMrca,
    getTreeAtMrca,
    findNode,
  )
where

import Data.Either
import Data.List
import Data.Maybe
import qualified Data.Set as S
import ELynx.Tree.Rooted
import ELynx.Tree.Zipper

-- | Test if the root node of the given tree is an ancestor of the given leaves.
isAncestor :: Ord a => S.Set a -> Tree e a -> Bool
--                      True if an x of xs is not in the collection of leaves.
--                False      if an x of xs is not in the collection of leaves. -> OK.
isAncestor :: Set a -> Tree e a -> Bool
isAncestor Set a
xs Tree e a
t = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ (a -> Bool) -> Set a -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.notMember` Set a
lvs) Set a
xs
  where
    lvs :: Set a
lvs = [a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> [a] -> Set a
forall a b. (a -> b) -> a -> b
$ Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves Tree e a
t

-- | Test if the root node of the given tree is the MRCA of the given leaves.
isMrca :: Ord a => S.Set a -> Tree e a -> Bool
--                                    True if any daughter forest is an ancestor.
--                               False     if any daughter forest is an ancestor. -> OK.
isMrca :: Set a -> Tree e a -> Bool
isMrca Set a
xs Tree e a
t = Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
xs Tree e a
t Bool -> Bool -> Bool
&& Bool -> Bool
not ((Tree e a -> Bool) -> [Tree e a] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
xs) (Tree e a -> [Tree e a]
forall e a. Tree e a -> Forest e a
forest Tree e a
t))

-- | Get the path to the MRCA of the given leaves on the given tree.
--
-- Return 'Left' if:
--
-- - The tree has duplicate leaves.
--
-- - The MRCA cannot be found.
getPathToMrca :: (Ord a, Show a) => S.Set a -> Tree e a -> Either String Path
getPathToMrca :: Set a -> Tree e a -> Either String Path
getPathToMrca Set a
ss Tree e a
tr
  | Tree e a -> Bool
forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
tr = String -> Either String Path
forall a b. a -> Either a b
Left String
"getPathToMrca: Tree contains duplicate leaves."
  | Bool
otherwise = Path -> Path
forall a. [a] -> [a]
tail (Path -> Path) -> Either String Path -> Either String Path
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Tree e a -> Either String Path
forall t e. (Num t, Enum t) => t -> Tree e a -> Either String [t]
go Int
0 Tree e a
tr
  where
    go :: t -> Tree e a -> Either String [t]
go t
i Tree e a
t
      | Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isMrca Set a
ss Tree e a
t = [t] -> Either String [t]
forall a b. b -> Either a b
Right [t
i]
      --                                    One path will be (Right p).
      | Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
ss Tree e a
t =
          case (Either String [t] -> Bool)
-> [Either String [t]] -> Maybe (Either String [t])
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Either String [t] -> Bool
forall a b. Either a b -> Bool
isRight [t -> Tree e a -> Either String [t]
go t
j Tree e a
t' | (t
j, Tree e a
t') <- [t] -> [Tree e a] -> [(t, Tree e a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [t
0 ..] (Tree e a -> [Tree e a]
forall e a. Tree e a -> Forest e a
forest Tree e a
t)] of
            -- Use 'error' because this should never happen since one subtree has
            -- to contain the MRCA.
            Maybe (Either String [t])
Nothing -> String -> Either String [t]
forall a. HasCallStack => String -> a
error (String -> Either String [t]) -> String -> Either String [t]
forall a b. (a -> b) -> a -> b
$ String
"getPathToMrca: BUG; I am ancestor but no MRCA found for: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Set a -> String
forall a. Show a => a -> String
show Set a
ss
            Just Either String [t]
xs -> ([t] -> [t]) -> Either String [t] -> Either String [t]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (t
i t -> [t] -> [t]
forall a. a -> [a] -> [a]
:) Either String [t]
xs
      | Bool
otherwise = String -> Either String [t]
forall a b. a -> Either a b
Left (String -> Either String [t]) -> String -> Either String [t]
forall a b. (a -> b) -> a -> b
$ String
"getPathToMrca: Could not get MRCA for: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Set a -> String
forall a. Show a => a -> String
show Set a
ss String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."

getTreeAtMrca :: (Ord a, Show a) => S.Set a -> Tree e a -> Either String (Tree e a)
getTreeAtMrca :: Set a -> Tree e a -> Either String (Tree e a)
getTreeAtMrca Set a
ss Tree e a
tr
  | Tree e a -> Bool
forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
tr = String -> Either String (Tree e a)
forall a b. a -> Either a b
Left String
"getTreeAtMrca: Tree contains duplicate leaves."
  | Bool
otherwise = Tree e a -> Either String (Tree e a)
forall e. Tree e a -> Either String (Tree e a)
go Tree e a
tr
  where
    go :: Tree e a -> Either String (Tree e a)
go Tree e a
t
      | Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isMrca Set a
ss Tree e a
t = Tree e a -> Either String (Tree e a)
forall a b. b -> Either a b
Right Tree e a
t
      --                                One tree will be (Right t).
      | Set a -> Tree e a -> Bool
forall a e. Ord a => Set a -> Tree e a -> Bool
isAncestor Set a
ss Tree e a
t = case (Either String (Tree e a) -> Bool)
-> [Either String (Tree e a)] -> Maybe (Either String (Tree e a))
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Either String (Tree e a) -> Bool
forall a b. Either a b -> Bool
isRight ([Either String (Tree e a)] -> Maybe (Either String (Tree e a)))
-> [Either String (Tree e a)] -> Maybe (Either String (Tree e a))
forall a b. (a -> b) -> a -> b
$ (Tree e a -> Either String (Tree e a))
-> [Tree e a] -> [Either String (Tree e a)]
forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> Either String (Tree e a)
go (Tree e a -> [Tree e a]
forall e a. Tree e a -> Forest e a
forest Tree e a
t) of
          -- Use 'error' because this should never happen since one subtree has
          -- to contain the MRCA.
          Maybe (Either String (Tree e a))
Nothing -> String -> Either String (Tree e a)
forall a. HasCallStack => String -> a
error (String -> Either String (Tree e a))
-> String -> Either String (Tree e a)
forall a b. (a -> b) -> a -> b
$ String
"getTreeAtMrca: BUG; I am ancestor but no MRCA found for: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Set a -> String
forall a. Show a => a -> String
show Set a
ss
          Just Either String (Tree e a)
x -> Either String (Tree e a)
x
      | Bool
otherwise = String -> Either String (Tree e a)
forall a b. a -> Either a b
Left (String -> Either String (Tree e a))
-> String -> Either String (Tree e a)
forall a b. (a -> b) -> a -> b
$ String
"getTreeAtMrca: Could not get MRCA for: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Set a -> String
forall a. Show a => a -> String
show Set a
ss String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."

-- | Find a node on a tree.
--
-- If the node is found, the node is specified by the MRCA of the returned set
-- of leaves. The set will contain one element if the node is a leaf, or two
-- elements, if the node is internal.
--
-- Return 'Left' if:
--
-- - The tree has duplicate labels.
--
-- - The node cannot be found.
--
-- - The node is a degree two node.
findNode :: (Ord a, Show a) => a -> Tree e a -> Either String (S.Set a)
findNode :: a -> Tree e a -> Either String (Set a)
findNode a
n Tree e a
t
  | Tree e a -> Bool
forall a e. Ord a => Tree e a -> Bool
duplicateLabels Tree e a
t = String -> Either String (Set a)
forall a b. a -> Either a b
Left String
"findNode: tree contains duplicate labels"
  | Bool
otherwise = case a -> Tree e a -> Maybe (Either String (Set a))
forall t e. Ord t => t -> Tree e t -> Maybe (Either String (Set t))
go a
n Tree e a
t of
      Maybe (Either String (Set a))
Nothing -> String -> Either String (Set a)
forall a b. a -> Either a b
Left (String -> Either String (Set a))
-> String -> Either String (Set a)
forall a b. (a -> b) -> a -> b
$ String
"findNode: node not found: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> a -> String
forall a. Show a => a -> String
show a
n
      Just Either String (Set a)
x -> Either String (Set a)
x
  where
    go :: t -> Tree e t -> Maybe (Either String (Set t))
go t
x (Node e
_ t
y Forest e t
ts)
      | t
x t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
y = case Forest e t
ts of
          [] -> Either String (Set t) -> Maybe (Either String (Set t))
forall a. a -> Maybe a
Just (Either String (Set t) -> Maybe (Either String (Set t)))
-> Either String (Set t) -> Maybe (Either String (Set t))
forall a b. (a -> b) -> a -> b
$ Set t -> Either String (Set t)
forall a b. b -> Either a b
Right (Set t -> Either String (Set t)) -> Set t -> Either String (Set t)
forall a b. (a -> b) -> a -> b
$ t -> Set t
forall a. a -> Set a
S.singleton t
x
          (Tree e t
l : Tree e t
r : Forest e t
_) -> Either String (Set t) -> Maybe (Either String (Set t))
forall a. a -> Maybe a
Just (Either String (Set t) -> Maybe (Either String (Set t)))
-> Either String (Set t) -> Maybe (Either String (Set t))
forall a b. (a -> b) -> a -> b
$ Set t -> Either String (Set t)
forall a b. b -> Either a b
Right (Set t -> Either String (Set t)) -> Set t -> Either String (Set t)
forall a b. (a -> b) -> a -> b
$ [t] -> Set t
forall a. Ord a => [a] -> Set a
S.fromList [[t] -> t
forall a. [a] -> a
head ([t] -> t) -> [t] -> t
forall a b. (a -> b) -> a -> b
$ Tree e t -> [t]
forall e a. Tree e a -> [a]
leaves Tree e t
l, [t] -> t
forall a. [a] -> a
head ([t] -> t) -> [t] -> t
forall a b. (a -> b) -> a -> b
$ Tree e t -> [t]
forall e a. Tree e a -> [a]
leaves Tree e t
r]
          Forest e t
_ -> Either String (Set t) -> Maybe (Either String (Set t))
forall a. a -> Maybe a
Just (Either String (Set t) -> Maybe (Either String (Set t)))
-> Either String (Set t) -> Maybe (Either String (Set t))
forall a b. (a -> b) -> a -> b
$ String -> Either String (Set t)
forall a b. a -> Either a b
Left (String -> Either String (Set t))
-> String -> Either String (Set t)
forall a b. (a -> b) -> a -> b
$ String
"findNode: degree two node found" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> a -> String
forall a. Show a => a -> String
show a
n
      | Bool
otherwise = case [Maybe (Either String (Set t))] -> [Either String (Set t)]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Either String (Set t))] -> [Either String (Set t)])
-> [Maybe (Either String (Set t))] -> [Either String (Set t)]
forall a b. (a -> b) -> a -> b
$ (Tree e t -> Maybe (Either String (Set t)))
-> Forest e t -> [Maybe (Either String (Set t))]
forall a b. (a -> b) -> [a] -> [b]
map (t -> Tree e t -> Maybe (Either String (Set t))
go t
x) Forest e t
ts of
          [] -> Maybe (Either String (Set t))
forall a. Maybe a
Nothing
          [Either String (Set t)
z] -> Either String (Set t) -> Maybe (Either String (Set t))
forall a. a -> Maybe a
Just Either String (Set t)
z
          -- Use 'error' because this should never happen since we check for
          -- duplicate labels.
          [Either String (Set t)]
_ -> String -> Maybe (Either String (Set t))
forall a. HasCallStack => String -> a
error (String -> Maybe (Either String (Set t)))
-> String -> Maybe (Either String (Set t))
forall a b. (a -> b) -> a -> b
$ String
"findNode: BUG; more internal nodes found" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> a -> String
forall a. Show a => a -> String
show a
n