{-# LANGUAGE GeneralizedNewtypeDeriving #-}

-- |
-- Module      :  ELynx.Tree.Bipartition
-- Description :  Bipartitions on trees
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Aug 30 15:28:17 2019.
module ELynx.Tree.Bipartition
  ( groups,

    -- * Data type
    Bipartition (fromBipartition),
    bp,
    bpUnsafe,
    toSet,
    bpHuman,

    -- * Work with 'Bipartition's
    bipartition,
    bipartitions,
    getComplementaryLeaves,
    bipartitionToBranch,
  )
where

import Control.Comonad
import Control.DeepSeq
import Data.List hiding (partition)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import ELynx.Tree.Rooted

-- | Each node of a tree is root of an induced subtree. Set the node labels to
-- the leaves of the induced subtrees.
groups :: Tree e a -> Tree e [a]
-- I am proud of this awesome 'Comonad' usage here :).
groups :: Tree e a -> Tree e [a]
groups = (Tree e a -> [a]) -> Tree e a -> Tree e [a]
forall (w :: * -> *) a b. Comonad w => (w a -> b) -> w a -> w b
extend Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves

-- | A bipartition of a tree is a grouping of the leaves of the tree into two
-- non-overlapping, non-empty sub sets.
--
-- For unrooted trees:
--
-- - Each branch partitions the leaves of the tree into two subsets, or a
--   bipartition.
--
-- For rooted trees:
--
-- - A bifurcating root node induces a bipartition; see 'bipartition'.
--
-- - Each inner node induces a bipartition by taking the leaves of the sub tree
--   and the complement leaf set of the full tree.
--
-- The order of the two subsets of a 'Bipartition' is meaningless. That is,
-- 'Bipartition's are weird in that
--
-- > Bipartition x y == Bipartition y x
--
-- is 'True'. Also,
--
-- > Bipartition x y > Bipartition y x
--
-- is False, even when @x > y@. That's why we have to make sure that for
--
-- > Bipartition x y
--
-- we always have @x >= y@. We ensure by construction that the larger subset
-- comes first, and so that equality checks are meaningful; see 'bp' and
-- 'bpUnsafe'.
newtype Bipartition a = Bipartition
  { Bipartition a -> (Set a, Set a)
fromBipartition :: (Set a, Set a)
  }
  deriving (Bipartition a -> Bipartition a -> Bool
(Bipartition a -> Bipartition a -> Bool)
-> (Bipartition a -> Bipartition a -> Bool) -> Eq (Bipartition a)
forall a. Eq a => Bipartition a -> Bipartition a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Bipartition a -> Bipartition a -> Bool
$c/= :: forall a. Eq a => Bipartition a -> Bipartition a -> Bool
== :: Bipartition a -> Bipartition a -> Bool
$c== :: forall a. Eq a => Bipartition a -> Bipartition a -> Bool
Eq, Eq (Bipartition a)
Eq (Bipartition a)
-> (Bipartition a -> Bipartition a -> Ordering)
-> (Bipartition a -> Bipartition a -> Bool)
-> (Bipartition a -> Bipartition a -> Bool)
-> (Bipartition a -> Bipartition a -> Bool)
-> (Bipartition a -> Bipartition a -> Bool)
-> (Bipartition a -> Bipartition a -> Bipartition a)
-> (Bipartition a -> Bipartition a -> Bipartition a)
-> Ord (Bipartition a)
Bipartition a -> Bipartition a -> Bool
Bipartition a -> Bipartition a -> Ordering
Bipartition a -> Bipartition a -> Bipartition a
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall a. Ord a => Eq (Bipartition a)
forall a. Ord a => Bipartition a -> Bipartition a -> Bool
forall a. Ord a => Bipartition a -> Bipartition a -> Ordering
forall a. Ord a => Bipartition a -> Bipartition a -> Bipartition a
min :: Bipartition a -> Bipartition a -> Bipartition a
$cmin :: forall a. Ord a => Bipartition a -> Bipartition a -> Bipartition a
max :: Bipartition a -> Bipartition a -> Bipartition a
$cmax :: forall a. Ord a => Bipartition a -> Bipartition a -> Bipartition a
>= :: Bipartition a -> Bipartition a -> Bool
$c>= :: forall a. Ord a => Bipartition a -> Bipartition a -> Bool
> :: Bipartition a -> Bipartition a -> Bool
$c> :: forall a. Ord a => Bipartition a -> Bipartition a -> Bool
<= :: Bipartition a -> Bipartition a -> Bool
$c<= :: forall a. Ord a => Bipartition a -> Bipartition a -> Bool
< :: Bipartition a -> Bipartition a -> Bool
$c< :: forall a. Ord a => Bipartition a -> Bipartition a -> Bool
compare :: Bipartition a -> Bipartition a -> Ordering
$ccompare :: forall a. Ord a => Bipartition a -> Bipartition a -> Ordering
$cp1Ord :: forall a. Ord a => Eq (Bipartition a)
Ord, Int -> Bipartition a -> ShowS
[Bipartition a] -> ShowS
Bipartition a -> String
(Int -> Bipartition a -> ShowS)
-> (Bipartition a -> String)
-> ([Bipartition a] -> ShowS)
-> Show (Bipartition a)
forall a. Show a => Int -> Bipartition a -> ShowS
forall a. Show a => [Bipartition a] -> ShowS
forall a. Show a => Bipartition a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Bipartition a] -> ShowS
$cshowList :: forall a. Show a => [Bipartition a] -> ShowS
show :: Bipartition a -> String
$cshow :: forall a. Show a => Bipartition a -> String
showsPrec :: Int -> Bipartition a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Bipartition a -> ShowS
Show, ReadPrec [Bipartition a]
ReadPrec (Bipartition a)
Int -> ReadS (Bipartition a)
ReadS [Bipartition a]
(Int -> ReadS (Bipartition a))
-> ReadS [Bipartition a]
-> ReadPrec (Bipartition a)
-> ReadPrec [Bipartition a]
-> Read (Bipartition a)
forall a. (Read a, Ord a) => ReadPrec [Bipartition a]
forall a. (Read a, Ord a) => ReadPrec (Bipartition a)
forall a. (Read a, Ord a) => Int -> ReadS (Bipartition a)
forall a. (Read a, Ord a) => ReadS [Bipartition a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Bipartition a]
$creadListPrec :: forall a. (Read a, Ord a) => ReadPrec [Bipartition a]
readPrec :: ReadPrec (Bipartition a)
$creadPrec :: forall a. (Read a, Ord a) => ReadPrec (Bipartition a)
readList :: ReadS [Bipartition a]
$creadList :: forall a. (Read a, Ord a) => ReadS [Bipartition a]
readsPrec :: Int -> ReadS (Bipartition a)
$creadsPrec :: forall a. (Read a, Ord a) => Int -> ReadS (Bipartition a)
Read, Bipartition a -> ()
(Bipartition a -> ()) -> NFData (Bipartition a)
forall a. NFData a => Bipartition a -> ()
forall a. (a -> ()) -> NFData a
rnf :: Bipartition a -> ()
$crnf :: forall a. NFData a => Bipartition a -> ()
NFData)

-- | Create a bipartition from two sets.
--
-- Ensure that the larger set comes first.
--
-- Return 'Left' if one set is empty.
bp :: Ord a => Set a -> Set a -> Either String (Bipartition a)
bp :: Set a -> Set a -> Either String (Bipartition a)
bp Set a
xs Set a
ys
  | Set a -> Bool
forall a. Set a -> Bool
S.null Set a
xs = String -> Either String (Bipartition a)
forall a b. a -> Either a b
Left String
"bp: Left set empty."
  | Set a -> Bool
forall a. Set a -> Bool
S.null Set a
ys = String -> Either String (Bipartition a)
forall a b. a -> Either a b
Left String
"bp: Right set empty."
  | Bool
otherwise = Bipartition a -> Either String (Bipartition a)
forall a b. b -> Either a b
Right (Bipartition a -> Either String (Bipartition a))
-> Bipartition a -> Either String (Bipartition a)
forall a b. (a -> b) -> a -> b
$ Set a -> Set a -> Bipartition a
forall a. Ord a => Set a -> Set a -> Bipartition a
bpUnsafe Set a
xs Set a
ys

-- | Create a bipartition from two sets.
--
-- Ensure that the larger set comes first.
bpUnsafe :: Ord a => Set a -> Set a -> Bipartition a
bpUnsafe :: Set a -> Set a -> Bipartition a
bpUnsafe Set a
xs Set a
ys = if Set a
xs Set a -> Set a -> Bool
forall a. Ord a => a -> a -> Bool
>= Set a
ys then (Set a, Set a) -> Bipartition a
forall a. (Set a, Set a) -> Bipartition a
Bipartition (Set a
xs, Set a
ys) else (Set a, Set a) -> Bipartition a
forall a. (Set a, Set a) -> Bipartition a
Bipartition (Set a
ys, Set a
xs)

-- | Conversion to a set containing both partitions.
toSet :: Ord a => Bipartition a -> Set a
toSet :: Bipartition a -> Set a
toSet (Bipartition (Set a
x, Set a
y)) = Set a -> Set a -> Set a
forall a. Ord a => Set a -> Set a -> Set a
S.union Set a
x Set a
y

-- I decided not to provide a human readable show instance because I need the
-- following identity to hold:
--
-- > read . show = id
--
-- This identity is met by the derived instance anyways. A more human readable
-- instance would most likely violate the identity. However, I provide separate
-- functions to convert bipartitions into human readable strings.

-- | Show a bipartition in a human readable format. Use a provided function to
-- extract information of interest.
bpHuman :: Show a => Bipartition a -> String
bpHuman :: Bipartition a -> String
bpHuman (Bipartition (Set a
x, Set a
y)) = String
"(" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Set a -> String
forall a. Show a => Set a -> String
setShow Set a
x String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"|" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Set a -> String
forall a. Show a => Set a -> String
setShow Set a
y String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"

-- Show the elements of a set in a human readable format.
setShow :: Show a => Set a -> String
setShow :: Set a -> String
setShow = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"," ([String] -> String) -> (Set a -> [String]) -> Set a -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> String) -> [a] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map a -> String
forall a. Show a => a -> String
show ([a] -> [String]) -> (Set a -> [a]) -> Set a -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set a -> [a]
forall a. Set a -> [a]
S.toList

-- | For a bifurcating root, get the bipartition induced by the root node.
--
-- Return 'Left' if
-- - the root node is not bifurcating;
-- - a leave set is empty.
bipartition :: Ord a => Tree e a -> Either String (Bipartition a)
bipartition :: Tree e a -> Either String (Bipartition a)
bipartition (Node e
_ a
_ [Tree e a
x, Tree e a
y]) = Set a -> Set a -> Either String (Bipartition a)
forall a. Ord a => Set a -> Set a -> Either String (Bipartition a)
bp ([a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> [a] -> Set a
forall a b. (a -> b) -> a -> b
$ Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves Tree e a
x) ([a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> [a] -> Set a
forall a b. (a -> b) -> a -> b
$ Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves Tree e a
y)
bipartition Tree e a
_ = String -> Either String (Bipartition a)
forall a b. a -> Either a b
Left String
"bipartition: Root node is not bifurcating."

-- | Get all bipartitions of the tree.
--
-- Return 'Left' if the tree contains duplicate leaves.
bipartitions :: Ord a => Tree e a -> Either String (Set (Bipartition a))
bipartitions :: Tree e a -> Either String (Set (Bipartition a))
bipartitions Tree e a
t
  | Tree e a -> Bool
forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
t = String -> Either String (Set (Bipartition a))
forall a b. a -> Either a b
Left String
"bipartitions: Tree contains duplicate leaves."
  | Bool
otherwise = Set (Bipartition a) -> Either String (Set (Bipartition a))
forall a b. b -> Either a b
Right (Set (Bipartition a) -> Either String (Set (Bipartition a)))
-> Set (Bipartition a) -> Either String (Set (Bipartition a))
forall a b. (a -> b) -> a -> b
$ Set a -> Tree e (Set a) -> Set (Bipartition a)
forall a e. Ord a => Set a -> Tree e (Set a) -> Set (Bipartition a)
bipartitions' Set a
forall a. Set a
S.empty (Tree e (Set a) -> Set (Bipartition a))
-> Tree e (Set a) -> Set (Bipartition a)
forall a b. (a -> b) -> a -> b
$ [a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> Tree e [a] -> Tree e (Set a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tree e a -> Tree e [a]
forall e a. Tree e a -> Tree e [a]
groups Tree e a
t

-- | Report the complementary leaves for each child.
getComplementaryLeaves ::
  (Ord a) =>
  -- Complementary leaves.
  Set a ->
  -- Tree with node labels storing leaves.
  Tree e (Set a) ->
  [Set a]
getComplementaryLeaves :: Set a -> Tree e (Set a) -> [Set a]
getComplementaryLeaves Set a
p (Node e
_ Set a
_ Forest e (Set a)
ts) =
  [ [Set a] -> Set a
forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
S.unions ([Set a] -> Set a) -> [Set a] -> Set a
forall a b. (a -> b) -> a -> b
$ Set a
p Set a -> [Set a] -> [Set a]
forall a. a -> [a] -> [a]
: Int -> [Set a] -> [Set a]
forall a. Int -> [a] -> [a]
take Int
i [Set a]
lvsChildren [Set a] -> [Set a] -> [Set a]
forall a. [a] -> [a] -> [a]
++ Int -> [Set a] -> [Set a]
forall a. Int -> [a] -> [a]
drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Set a]
lvsChildren
    | Int
i <- [Int
0 .. (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)]
  ]
  where
    n :: Int
n = Forest e (Set a) -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest e (Set a)
ts
    lvsChildren :: [Set a]
lvsChildren = (Tree e (Set a) -> Set a) -> Forest e (Set a) -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map Tree e (Set a) -> Set a
forall e a. Tree e a -> a
label Forest e (Set a)
ts

-- See 'bipartitions', but do not check if leaves are unique, nor if
-- bipartitions are valid.
bipartitions' :: Ord a => Set a -> Tree e (Set a) -> Set (Bipartition a)
bipartitions' :: Set a -> Tree e (Set a) -> Set (Bipartition a)
bipartitions' Set a
p (Node e
_ Set a
p' []) = (String -> Set (Bipartition a))
-> (Bipartition a -> Set (Bipartition a))
-> Either String (Bipartition a)
-> Set (Bipartition a)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Set (Bipartition a) -> String -> Set (Bipartition a)
forall a b. a -> b -> a
const Set (Bipartition a)
forall a. Set a
S.empty) Bipartition a -> Set (Bipartition a)
forall a. a -> Set a
S.singleton (Either String (Bipartition a) -> Set (Bipartition a))
-> Either String (Bipartition a) -> Set (Bipartition a)
forall a b. (a -> b) -> a -> b
$ Set a -> Set a -> Either String (Bipartition a)
forall a. Ord a => Set a -> Set a -> Either String (Bipartition a)
bp Set a
p Set a
p'
bipartitions' Set a
p t :: Tree e (Set a)
t@(Node e
_ Set a
p' [Tree e (Set a)]
ts) =
  [Set (Bipartition a)] -> Set (Bipartition a)
forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
S.unions ([Set (Bipartition a)] -> Set (Bipartition a))
-> [Set (Bipartition a)] -> Set (Bipartition a)
forall a b. (a -> b) -> a -> b
$
    (String -> Set (Bipartition a))
-> (Bipartition a -> Set (Bipartition a))
-> Either String (Bipartition a)
-> Set (Bipartition a)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Set (Bipartition a) -> String -> Set (Bipartition a)
forall a b. a -> b -> a
const Set (Bipartition a)
forall a. Set a
S.empty) Bipartition a -> Set (Bipartition a)
forall a. a -> Set a
S.singleton (Set a -> Set a -> Either String (Bipartition a)
forall a. Ord a => Set a -> Set a -> Either String (Bipartition a)
bp Set a
p Set a
p') Set (Bipartition a)
-> [Set (Bipartition a)] -> [Set (Bipartition a)]
forall a. a -> [a] -> [a]
:
      [Set a -> Tree e (Set a) -> Set (Bipartition a)
forall a e. Ord a => Set a -> Tree e (Set a) -> Set (Bipartition a)
bipartitions' Set a
c Tree e (Set a)
s | (Set a
c, Tree e (Set a)
s) <- [Set a] -> [Tree e (Set a)] -> [(Set a, Tree e (Set a))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Set a]
cs [Tree e (Set a)]
ts]
  where
    cs :: [Set a]
cs = Set a -> Tree e (Set a) -> [Set a]
forall a e. Ord a => Set a -> Tree e (Set a) -> [Set a]
getComplementaryLeaves Set a
p Tree e (Set a)
t

-- | Convert a tree into a 'Map' from each 'Bipartition' to the branch inducing
-- the respective 'Bipartition'.
--
-- Since the induced bipartitions of the daughter branches of a bifurcating root
-- node are equal, the branches leading to the root have to be combined in this
-- case. See http://evolution.genetics.washington.edu/phylip/doc/treedist.html
-- and how unrooted trees are handled.
--
-- Further, branches connected to degree two nodes also induce the same
-- bipartitions and have to be combined.
--
-- For combining branches, a binary function is required. This requirement is
-- encoded in the 'Semigroup' type class constraint (see 'prune').
--
-- Return 'Left' if the tree contains duplicate leaves.
bipartitionToBranch ::
  (Semigroup e, Ord a) =>
  Tree e a ->
  Either String (Map (Bipartition a) e)
bipartitionToBranch :: Tree e a -> Either String (Map (Bipartition a) e)
bipartitionToBranch Tree e a
t
  | Tree e a -> Bool
forall a e. Ord a => Tree e a -> Bool
duplicateLeaves Tree e a
t = String -> Either String (Map (Bipartition a) e)
forall a b. a -> Either a b
Left String
"bipartitionToBranch: Tree contains duplicate leaves."
  | Bool
otherwise = Map (Bipartition a) e -> Either String (Map (Bipartition a) e)
forall a b. b -> Either a b
Right (Map (Bipartition a) e -> Either String (Map (Bipartition a) e))
-> Map (Bipartition a) e -> Either String (Map (Bipartition a) e)
forall a b. (a -> b) -> a -> b
$ Set a -> Tree e (Set a) -> Map (Bipartition a) e
forall e a.
(Semigroup e, Ord a) =>
Set a -> Tree e (Set a) -> Map (Bipartition a) e
bipartitionToBranch' Set a
forall a. Set a
S.empty Tree e (Set a)
pTree
  where
    pTree :: Tree e (Set a)
pTree = [a] -> Set a
forall a. Ord a => [a] -> Set a
S.fromList ([a] -> Set a) -> Tree e [a] -> Tree e (Set a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tree e a -> Tree e [a]
forall e a. Tree e a -> Tree e [a]
groups Tree e a
t

-- When calculating the map, branches separated by various degree two nodes have
-- to be combined. Hence, not only the complementary leaves, but also the branch
-- label itself have to be passed along.
bipartitionToBranch' ::
  (Semigroup e, Ord a) =>
  -- Complementary leaves.
  Set a ->
  -- Partition tree.
  Tree e (Set a) ->
  Map (Bipartition a) e
bipartitionToBranch' :: Set a -> Tree e (Set a) -> Map (Bipartition a) e
bipartitionToBranch' Set a
p t :: Tree e (Set a)
t@(Node e
b Set a
p' Forest e (Set a)
ts) =
  (e -> e -> e) -> [Map (Bipartition a) e] -> Map (Bipartition a) e
forall (f :: * -> *) k a.
(Foldable f, Ord k) =>
(a -> a -> a) -> f (Map k a) -> Map k a
M.unionsWith e -> e -> e
forall a. Semigroup a => a -> a -> a
(<>) ([Map (Bipartition a) e] -> Map (Bipartition a) e)
-> [Map (Bipartition a) e] -> Map (Bipartition a) e
forall a b. (a -> b) -> a -> b
$
    (String -> Map (Bipartition a) e)
-> (Bipartition a -> Map (Bipartition a) e)
-> Either String (Bipartition a)
-> Map (Bipartition a) e
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Map (Bipartition a) e -> String -> Map (Bipartition a) e
forall a b. a -> b -> a
const Map (Bipartition a) e
forall k a. Map k a
M.empty) (Bipartition a -> e -> Map (Bipartition a) e
forall k a. k -> a -> Map k a
`M.singleton` e
b) (Set a -> Set a -> Either String (Bipartition a)
forall a. Ord a => Set a -> Set a -> Either String (Bipartition a)
bp Set a
p Set a
p') Map (Bipartition a) e
-> [Map (Bipartition a) e] -> [Map (Bipartition a) e]
forall a. a -> [a] -> [a]
:
      [Set a -> Tree e (Set a) -> Map (Bipartition a) e
forall e a.
(Semigroup e, Ord a) =>
Set a -> Tree e (Set a) -> Map (Bipartition a) e
bipartitionToBranch' Set a
c Tree e (Set a)
s | (Set a
c, Tree e (Set a)
s) <- [Set a] -> Forest e (Set a) -> [(Set a, Tree e (Set a))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Set a]
cs Forest e (Set a)
ts]
  where
    cs :: [Set a]
cs = Set a -> Tree e (Set a) -> [Set a]
forall a e. Ord a => Set a -> Tree e (Set a) -> [Set a]
getComplementaryLeaves Set a
p Tree e (Set a)
t