{-# LANGUAGE ViewPatterns #-}

module Bio.Sequence.Functions.Marking
  ( getMarking
  , unsafeGetMarking
  , toMarked
  , unsafeToMarked
  , addMarkings
  , unsafeAddMarkings
  , listMarkings
  ) where

import           Control.Lens
import           Control.Monad.Except (MonadError, throwError)
import           Data.List            (nub)
import           Data.List.NonEmpty   (NonEmpty (..))
import           Data.Text            (Text)
import qualified Data.Vector          as V (toList)
import           Prelude              hiding (drop, head, length, null, reverse, tail, take, (!!))

import Bio.NucleicAcid.Nucleotide      (Complementary (..))
import Bio.Sequence.Class              (ContainsMarking, IsBareSequence, IsMarkedSequence,
                                        IsSequence (..), _sequenceInner, markedSequence, markings,
                                        sequ, unsafeMarkedSequence, weights)
import Bio.Sequence.Functions.Sequence (length, unsafeGetRange)
import Bio.Sequence.Range              (Range, checkRange)
import Bio.Sequence.Utilities          (unsafeEither)

-- | Function that retrieves all elements in 'IsSequence' @s@ that are covered by given 'Marking'' @s@.
-- Returns 'NonEmpty' list, because if 'Marking' is present in @s@, then list of
-- all 'Marking's for @s@ can't be empty. If given 'Marking is not found in @s@, an
-- error will be thrown.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > getMarking sequ "Letter A" == ['a', 'a'] :| [['a']]
--
getMarking :: (ContainsMarking s, MonadError Text m, Complementary (Element s)) => s -> Marking s -> m (NonEmpty [Element s])
getMarking :: forall s (m :: * -> *).
(ContainsMarking s, MonadError Text m,
 Complementary (Element s)) =>
s -> Marking s -> m (NonEmpty [Element s])
getMarking (forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) Marking s
mk | Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Marking s
mk forall a b. Eq a => a -> [(a, b)] -> Bool
`member` (Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings) = forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError Text
markingNotFoundError
                                | Bool
otherwise                         = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ NonEmpty [Element (Sequence (Marking s) (Weight s) (Element s))]
res
  where
    res :: NonEmpty [Element (Sequence (Marking s) (Weight s) (Element s))]
res = forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 forall a. Semigroup a => a -> a -> a
(<>) forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((forall a. a -> [a] -> NonEmpty a
:| []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s.
(IsSequence s, Complementary (Element s)) =>
s -> Range -> [Element s]
unsafeGetRange Sequence (Marking s) (Weight s) (Element s)
s) forall a b. (a -> b) -> a -> b
$  Marking s
mk forall a b. Eq a => a -> [(a, b)] -> [b]
`lookupAll` (Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings)

    markingNotFoundError :: Text
    markingNotFoundError :: Text
markingNotFoundError = Text
"Bio.Sequence.Functions.Marking: given marking not found in Sequence."

unsafeGetMarking :: (ContainsMarking s, Complementary (Element s)) => s -> Marking s -> NonEmpty [Element s]
unsafeGetMarking :: forall s.
(ContainsMarking s, Complementary (Element s)) =>
s -> Marking s -> NonEmpty [Element s]
unsafeGetMarking s
mk = forall a. Either Text a -> a
unsafeEither forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s (m :: * -> *).
(ContainsMarking s, MonadError Text m,
 Complementary (Element s)) =>
s -> Marking s -> m (NonEmpty [Element s])
getMarking s
mk

-- | Converts 'IsBareSequence' @s@ to 'IsMarkedSequence' @s'@ that is marked using provided list
-- of 'Marking's. If at least one of ranges in given list of 'Marking's is out of
-- bounds, an error will be thrown.
--
-- > sequBare = Sequence ['a', 'a', 'b', 'a'] mempty mempty :: BareSequence Char
-- > toMarked sequ [("Letter A", (0, 2)), ("Letter A", (3, 4))] :: MarkedSequence String Char
--
toMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s', MonadError Text m) => s -> [(mk, Range)] -> m s'
toMarked :: forall s s' mk (m :: * -> *).
(IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk,
 Element s ~ Element s', MonadError Text m) =>
s -> [(mk, Range)] -> m s'
toMarked (forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = forall s (m :: * -> *).
(IsMarkedSequence s, MonadError Text m) =>
[Element s] -> [(Marking s, Range)] -> m s
markedSequence (forall a. Vector a -> [a]
V.toList forall a b. (a -> b) -> a -> b
$ Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ)

unsafeToMarked :: (IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk, Element s ~ Element s') => s -> [(mk, Range)] -> s'
unsafeToMarked :: forall s s' mk.
(IsBareSequence s, IsMarkedSequence s', Marking s' ~ mk,
 Element s ~ Element s') =>
s -> [(mk, Range)] -> s'
unsafeToMarked (forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = forall s.
IsMarkedSequence s =>
[Element s] -> [(Marking s, Range)] -> s
unsafeMarkedSequence (forall a. Vector a -> [a]
V.toList forall a b. (a -> b) -> a -> b
$ Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ)

-- | Adds new 'Marking's to given 'IsSequence' @s@. Type of new 'Marking's must
-- match type of 'Marking's that @s@ is already marked with. If at least one of ranges
-- in given list of 'Marking's is out of bounds, an error will be thrown.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > sequ' = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4))] mempty
-- > addMarkings sequ' [("Letter B", (2, 3))] == sequ
--
addMarkings :: (ContainsMarking s, Marking s ~ mk, MonadError Text m) => s -> [(mk, Range)] -> m s
addMarkings :: forall s mk (m :: * -> *).
(ContainsMarking s, Marking s ~ mk, MonadError Text m) =>
s -> [(mk, Range)] -> m s
addMarkings (forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) [(mk, Range)]
markings' | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Range -> Bool
checkRange (forall s. IsSequence s => s -> Int
length Sequence (Marking s) (Weight s) (Element s)
s) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(mk, Range)]
markings' = forall (f :: * -> *) a. Applicative f => a -> f a
pure s
res
                                        | Bool
otherwise                                   = forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError Text
rangesError
  where
    res :: s
res = forall s.
IsSequence s =>
Sequence (Marking s) (Weight s) (Element s) -> s
fromSequence forall a b. (a -> b) -> a -> b
$ forall a mk w.
Vector a -> [(mk, Range)] -> Vector w -> Sequence mk w a
_sequenceInner (Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) (Vector a)
sequ) (Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings forall a. Semigroup a => a -> a -> a
<> [(mk, Range)]
markings') (Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) (Vector w)
weights)

    rangesError :: Text
    rangesError :: Text
rangesError = Text
"Bio.Sequence.Functions.Marking: can't add markings to Sequence, because some of them are out of range."

unsafeAddMarkings :: (ContainsMarking s, Marking s ~ mk) => s -> [(mk, Range)] -> s
unsafeAddMarkings :: forall s mk.
(ContainsMarking s, Marking s ~ mk) =>
s -> [(mk, Range)] -> s
unsafeAddMarkings s
s = forall a. Either Text a -> a
unsafeEither forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s mk (m :: * -> *).
(ContainsMarking s, Marking s ~ mk, MonadError Text m) =>
s -> [(mk, Range)] -> m s
addMarkings s
s

-- | Retrieves all 'Marking's from given sequence that 'ContainsMarking'.
--   Result is list of 'Marking's without dublicates.
--
-- > sequ = Sequence ['a', 'a', 'b', 'a'] [("Letter A", (0, 2)), ("Letter A", (3, 4)), ("Letter B", (2, 3))] mempty
-- > listMarkings sequ == ["Letter A", "Letter B"]
--
listMarkings :: ContainsMarking s => s -> [Marking s]
listMarkings :: forall s. ContainsMarking s => s -> [Marking s]
listMarkings (forall s.
IsSequence s =>
s -> Sequence (Marking s) (Weight s) (Element s)
toSequence -> Sequence (Marking s) (Weight s) (Element s)
s) = forall a. Eq a => [a] -> [a]
nub forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Sequence (Marking s) (Weight s) (Element s)
s forall s a. s -> Getting a s a -> a
^. forall mk w a. Getter (Sequence mk w a) [(mk, Range)]
markings

--------------------------------------------------------------------------------
-- Inner functions.
--------------------------------------------------------------------------------

member :: Eq a => a -> [(a, b)] -> Bool
member :: forall a b. Eq a => a -> [(a, b)] -> Bool
member a
a = (a
a forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> a
fst

lookupAll :: Eq a => a -> [(a, b)] -> [b]
lookupAll :: forall a b. Eq a => a -> [(a, b)] -> [b]
lookupAll a
a = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
== a
a) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst)