module SequenceFormats.Genomic where

import SequenceFormats.Bed (BedEntry(..))
import SequenceFormats.Eigenstrat (EigenstratSnpEntry(..))
import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Pileup (PileupRow(..))
import SequenceFormats.Utils (Chrom)
import SequenceFormats.VCF (VCFentry(..))

import Control.Monad.Trans.Class (lift)
import Pipes (Producer, next, yield)

class Genomic a where
    genomicPosition :: a -> (Chrom, Int)

    genomicChrom :: a -> Chrom
    genomicChrom = (Chrom, Int) -> Chrom
forall a b. (a, b) -> a
fst ((Chrom, Int) -> Chrom) -> (a -> (Chrom, Int)) -> a -> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
forall a. Genomic a => a -> (Chrom, Int)
genomicPosition

    genomicBase :: a -> Int 
    genomicBase = (Chrom, Int) -> Int
forall a b. (a, b) -> b
snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
forall a. Genomic a => a -> (Chrom, Int)
genomicPosition

instance Genomic EigenstratSnpEntry where
    genomicPosition :: EigenstratSnpEntry -> (Chrom, Int)
genomicPosition (EigenstratSnpEntry Chrom
c Int
p Double
_ ByteString
_ Char
_ Char
_) = (Chrom
c, Int
p)

instance Genomic FreqSumEntry where
    genomicPosition :: FreqSumEntry -> (Chrom, Int)
genomicPosition (FreqSumEntry Chrom
c Int
p Maybe ByteString
_ Maybe Double
_ Char
_ Char
_ [Maybe Int]
_) = (Chrom
c, Int
p)

instance Genomic PileupRow where
    genomicPosition :: PileupRow -> (Chrom, Int)
genomicPosition (PileupRow Chrom
c Int
p Char
_ [String]
_ [[Strand]]
_) = (Chrom
c, Int
p)

instance Genomic VCFentry where
    genomicPosition :: VCFentry -> (Chrom, Int)
genomicPosition (VCFentry Chrom
c Int
p Maybe ByteString
_ ByteString
_ [ByteString]
_ Double
_ Maybe ByteString
_ [ByteString]
_ [ByteString]
_ [[ByteString]]
_) = (Chrom
c, Int
p)

data IntervalStatus = BedBehind | BedOn | BedAhead

filterThroughBed :: (Monad m, Genomic e) => Producer BedEntry m () -> Producer e m () -> Producer e m ()
filterThroughBed :: Producer BedEntry m () -> Producer e m () -> Producer e m ()
filterThroughBed Producer BedEntry m ()
bedProd Producer e m ()
gProd = do
    Either () (BedEntry, Producer BedEntry m ())
b <- m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () e m (Either () (BedEntry, Producer BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Producer BedEntry m ()))
 -> Proxy
      X () () e m (Either () (BedEntry, Producer BedEntry m ())))
-> m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () e m (Either () (BedEntry, Producer BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Producer BedEntry m ()
-> m (Either () (BedEntry, Producer BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer BedEntry m ()
bedProd
    let (BedEntry
bedCurrent, Producer BedEntry m ()
bedRest) = case Either () (BedEntry, Producer BedEntry m ())
b of
            Left ()
_ -> String -> (BedEntry, Producer BedEntry m ())
forall a. HasCallStack => String -> a
error String
"Bed file empty or not readable"
            Right (BedEntry, Producer BedEntry m ())
r -> (BedEntry, Producer BedEntry m ())
r
    Either () (e, Producer e m ())
f' <- m (Either () (e, Producer e m ()))
-> Proxy X () () e m (Either () (e, Producer e m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (e, Producer e m ()))
 -> Proxy X () () e m (Either () (e, Producer e m ())))
-> m (Either () (e, Producer e m ()))
-> Proxy X () () e m (Either () (e, Producer e m ()))
forall a b. (a -> b) -> a -> b
$ Producer e m () -> m (Either () (e, Producer e m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer e m ()
gProd
    let (e
gCurrent, Producer e m ()
gRest) = case Either () (e, Producer e m ())
f' of
            Left ()
_ -> String -> (e, Producer e m ())
forall a. HasCallStack => String -> a
error String
"Genomic stream empty or not readable"
            Right (e, Producer e m ())
r -> (e, Producer e m ())
r
    BedEntry
-> e
-> Producer BedEntry m ()
-> Producer e m ()
-> Producer e m ()
forall t (m :: * -> *) x' x.
(Genomic t, Monad m) =>
BedEntry
-> t
-> Proxy X () () BedEntry m ()
-> Proxy X () () t m ()
-> Proxy x' x () t m ()
go BedEntry
bedCurrent e
gCurrent Producer BedEntry m ()
bedRest Producer e m ()
gRest
  where
    go :: BedEntry
-> t
-> Proxy X () () BedEntry m ()
-> Proxy X () () t m ()
-> Proxy x' x () t m ()
go BedEntry
bedCurrent t
gCurrent Proxy X () () BedEntry m ()
bedRest Proxy X () () t m ()
gRest = do
        let recurseNextBed :: Proxy x' x () t m ()
recurseNextBed = do
                Either () (BedEntry, Proxy X () () BedEntry m ())
b <- m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
     x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Proxy X () () BedEntry m ()))
 -> Proxy
      x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ())))
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
     x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () BedEntry m ()
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () BedEntry m ()
bedRest
                case Either () (BedEntry, Proxy X () () BedEntry m ())
b of
                    Left () -> () -> Proxy x' x () t m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                    Right (BedEntry
nextBed, Proxy X () () BedEntry m ()
bedRest') -> BedEntry
-> t
-> Proxy X () () BedEntry m ()
-> Proxy X () () t m ()
-> Proxy x' x () t m ()
go BedEntry
nextBed t
gCurrent Proxy X () () BedEntry m ()
bedRest' Proxy X () () t m ()
gRest
            recurseNextG :: Proxy x' x () t m ()
recurseNextG = do
                Either () (t, Proxy X () () t m ())
f' <- m (Either () (t, Proxy X () () t m ()))
-> Proxy x' x () t m (Either () (t, Proxy X () () t m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (t, Proxy X () () t m ()))
 -> Proxy x' x () t m (Either () (t, Proxy X () () t m ())))
-> m (Either () (t, Proxy X () () t m ()))
-> Proxy x' x () t m (Either () (t, Proxy X () () t m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () t m () -> m (Either () (t, Proxy X () () t m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () t m ()
gRest
                case Either () (t, Proxy X () () t m ())
f' of
                    Left () -> () -> Proxy x' x () t m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                    Right (t
nextG, Proxy X () () t m ()
gRest') -> BedEntry
-> t
-> Proxy X () () BedEntry m ()
-> Proxy X () () t m ()
-> Proxy x' x () t m ()
go BedEntry
bedCurrent t
nextG Proxy X () () BedEntry m ()
bedRest Proxy X () () t m ()
gRest'
        case BedEntry
bedCurrent BedEntry -> t -> IntervalStatus
forall e. Genomic e => BedEntry -> e -> IntervalStatus
`checkIntervalStatus` t
gCurrent of
            IntervalStatus
BedBehind -> Proxy x' x () t m ()
recurseNextBed
            IntervalStatus
BedAhead -> Proxy x' x () t m ()
recurseNextG
            IntervalStatus
BedOn -> do
                t -> Proxy x' x () t m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield t
gCurrent
                Proxy x' x () t m ()
recurseNextG
    checkIntervalStatus :: (Genomic e) => BedEntry -> e -> IntervalStatus
    checkIntervalStatus :: BedEntry -> e -> IntervalStatus
checkIntervalStatus (BedEntry Chrom
bedChrom Int
bedStart Int
bedEnd) e
g =
        case Chrom
bedChrom Chrom -> Chrom -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` e -> Chrom
forall a. Genomic a => a -> Chrom
genomicChrom e
g of
            Ordering
LT -> IntervalStatus
BedBehind
            Ordering
GT -> IntervalStatus
BedAhead
            Ordering
EQ -> if Int
bedStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> e -> Int
forall a. Genomic a => a -> Int
genomicBase e
g then
                      IntervalStatus
BedAhead
                  else
                      if Int
bedEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< e -> Int
forall a. Genomic a => a -> Int
genomicBase e
g then IntervalStatus
BedBehind else IntervalStatus
BedOn


chromFilter :: (Genomic e) => [Chrom] -> e -> Bool
chromFilter :: [Chrom] -> e -> Bool
chromFilter [Chrom]
exclusionList = (Chrom -> [Chrom] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Chrom]
exclusionList) (Chrom -> Bool) -> (e -> Chrom) -> e -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Chrom
forall a. Genomic a => a -> Chrom
genomicChrom