module SequenceFormats.Genomic where import SequenceFormats.Bed (BedEntry(..)) import SequenceFormats.Eigenstrat (EigenstratSnpEntry(..)) import SequenceFormats.FreqSum (FreqSumEntry(..)) import SequenceFormats.Pileup (PileupRow(..)) import SequenceFormats.Utils (Chrom) import SequenceFormats.VCF (VCFentry(..)) import Control.Monad.Trans.Class (lift) import Pipes (Producer, next, yield) class Genomic a where genomicPosition :: a -> (Chrom, Int) genomicChrom :: a -> Chrom genomicChrom = (Chrom, Int) -> Chrom forall a b. (a, b) -> a fst ((Chrom, Int) -> Chrom) -> (a -> (Chrom, Int)) -> a -> Chrom forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> (Chrom, Int) forall a. Genomic a => a -> (Chrom, Int) genomicPosition genomicBase :: a -> Int genomicBase = (Chrom, Int) -> Int forall a b. (a, b) -> b snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> (Chrom, Int) forall a. Genomic a => a -> (Chrom, Int) genomicPosition instance Genomic EigenstratSnpEntry where genomicPosition :: EigenstratSnpEntry -> (Chrom, Int) genomicPosition (EigenstratSnpEntry Chrom c Int p Double _ ByteString _ Char _ Char _) = (Chrom c, Int p) instance Genomic FreqSumEntry where genomicPosition :: FreqSumEntry -> (Chrom, Int) genomicPosition (FreqSumEntry Chrom c Int p Maybe ByteString _ Maybe Double _ Char _ Char _ [Maybe Int] _) = (Chrom c, Int p) instance Genomic PileupRow where genomicPosition :: PileupRow -> (Chrom, Int) genomicPosition (PileupRow Chrom c Int p Char _ [String] _ [[Strand]] _) = (Chrom c, Int p) instance Genomic VCFentry where genomicPosition :: VCFentry -> (Chrom, Int) genomicPosition (VCFentry Chrom c Int p Maybe ByteString _ ByteString _ [ByteString] _ Double _ Maybe ByteString _ [ByteString] _ [ByteString] _ [[ByteString]] _) = (Chrom c, Int p) data IntervalStatus = BedBehind | BedOn | BedAhead filterThroughBed :: (Monad m, Genomic e) => Producer BedEntry m () -> Producer e m () -> Producer e m () filterThroughBed :: Producer BedEntry m () -> Producer e m () -> Producer e m () filterThroughBed Producer BedEntry m () bedProd Producer e m () gProd = do Either () (BedEntry, Producer BedEntry m ()) b <- m (Either () (BedEntry, Producer BedEntry m ())) -> Proxy X () () e m (Either () (BedEntry, Producer BedEntry m ())) forall (t :: (* -> *) -> * -> *) (m :: * -> *) a. (MonadTrans t, Monad m) => m a -> t m a lift (m (Either () (BedEntry, Producer BedEntry m ())) -> Proxy X () () e m (Either () (BedEntry, Producer BedEntry m ()))) -> m (Either () (BedEntry, Producer BedEntry m ())) -> Proxy X () () e m (Either () (BedEntry, Producer BedEntry m ())) forall a b. (a -> b) -> a -> b $ Producer BedEntry m () -> m (Either () (BedEntry, Producer BedEntry m ())) forall (m :: * -> *) a r. Monad m => Producer a m r -> m (Either r (a, Producer a m r)) next Producer BedEntry m () bedProd let (BedEntry bedCurrent, Producer BedEntry m () bedRest) = case Either () (BedEntry, Producer BedEntry m ()) b of Left () _ -> String -> (BedEntry, Producer BedEntry m ()) forall a. HasCallStack => String -> a error String "Bed file empty or not readable" Right (BedEntry, Producer BedEntry m ()) r -> (BedEntry, Producer BedEntry m ()) r Either () (e, Producer e m ()) f' <- m (Either () (e, Producer e m ())) -> Proxy X () () e m (Either () (e, Producer e m ())) forall (t :: (* -> *) -> * -> *) (m :: * -> *) a. (MonadTrans t, Monad m) => m a -> t m a lift (m (Either () (e, Producer e m ())) -> Proxy X () () e m (Either () (e, Producer e m ()))) -> m (Either () (e, Producer e m ())) -> Proxy X () () e m (Either () (e, Producer e m ())) forall a b. (a -> b) -> a -> b $ Producer e m () -> m (Either () (e, Producer e m ())) forall (m :: * -> *) a r. Monad m => Producer a m r -> m (Either r (a, Producer a m r)) next Producer e m () gProd let (e gCurrent, Producer e m () gRest) = case Either () (e, Producer e m ()) f' of Left () _ -> String -> (e, Producer e m ()) forall a. HasCallStack => String -> a error String "Genomic stream empty or not readable" Right (e, Producer e m ()) r -> (e, Producer e m ()) r BedEntry -> e -> Producer BedEntry m () -> Producer e m () -> Producer e m () forall t (m :: * -> *) x' x. (Genomic t, Monad m) => BedEntry -> t -> Proxy X () () BedEntry m () -> Proxy X () () t m () -> Proxy x' x () t m () go BedEntry bedCurrent e gCurrent Producer BedEntry m () bedRest Producer e m () gRest where go :: BedEntry -> t -> Proxy X () () BedEntry m () -> Proxy X () () t m () -> Proxy x' x () t m () go BedEntry bedCurrent t gCurrent Proxy X () () BedEntry m () bedRest Proxy X () () t m () gRest = do let recurseNextBed :: Proxy x' x () t m () recurseNextBed = do Either () (BedEntry, Proxy X () () BedEntry m ()) b <- m (Either () (BedEntry, Proxy X () () BedEntry m ())) -> Proxy x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ())) forall (t :: (* -> *) -> * -> *) (m :: * -> *) a. (MonadTrans t, Monad m) => m a -> t m a lift (m (Either () (BedEntry, Proxy X () () BedEntry m ())) -> Proxy x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ()))) -> m (Either () (BedEntry, Proxy X () () BedEntry m ())) -> Proxy x' x () t m (Either () (BedEntry, Proxy X () () BedEntry m ())) forall a b. (a -> b) -> a -> b $ Proxy X () () BedEntry m () -> m (Either () (BedEntry, Proxy X () () BedEntry m ())) forall (m :: * -> *) a r. Monad m => Producer a m r -> m (Either r (a, Producer a m r)) next Proxy X () () BedEntry m () bedRest case Either () (BedEntry, Proxy X () () BedEntry m ()) b of Left () -> () -> Proxy x' x () t m () forall (m :: * -> *) a. Monad m => a -> m a return () Right (BedEntry nextBed, Proxy X () () BedEntry m () bedRest') -> BedEntry -> t -> Proxy X () () BedEntry m () -> Proxy X () () t m () -> Proxy x' x () t m () go BedEntry nextBed t gCurrent Proxy X () () BedEntry m () bedRest' Proxy X () () t m () gRest recurseNextG :: Proxy x' x () t m () recurseNextG = do Either () (t, Proxy X () () t m ()) f' <- m (Either () (t, Proxy X () () t m ())) -> Proxy x' x () t m (Either () (t, Proxy X () () t m ())) forall (t :: (* -> *) -> * -> *) (m :: * -> *) a. (MonadTrans t, Monad m) => m a -> t m a lift (m (Either () (t, Proxy X () () t m ())) -> Proxy x' x () t m (Either () (t, Proxy X () () t m ()))) -> m (Either () (t, Proxy X () () t m ())) -> Proxy x' x () t m (Either () (t, Proxy X () () t m ())) forall a b. (a -> b) -> a -> b $ Proxy X () () t m () -> m (Either () (t, Proxy X () () t m ())) forall (m :: * -> *) a r. Monad m => Producer a m r -> m (Either r (a, Producer a m r)) next Proxy X () () t m () gRest case Either () (t, Proxy X () () t m ()) f' of Left () -> () -> Proxy x' x () t m () forall (m :: * -> *) a. Monad m => a -> m a return () Right (t nextG, Proxy X () () t m () gRest') -> BedEntry -> t -> Proxy X () () BedEntry m () -> Proxy X () () t m () -> Proxy x' x () t m () go BedEntry bedCurrent t nextG Proxy X () () BedEntry m () bedRest Proxy X () () t m () gRest' case BedEntry bedCurrent BedEntry -> t -> IntervalStatus forall e. Genomic e => BedEntry -> e -> IntervalStatus `checkIntervalStatus` t gCurrent of IntervalStatus BedBehind -> Proxy x' x () t m () recurseNextBed IntervalStatus BedAhead -> Proxy x' x () t m () recurseNextG IntervalStatus BedOn -> do t -> Proxy x' x () t m () forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m () yield t gCurrent Proxy x' x () t m () recurseNextG checkIntervalStatus :: (Genomic e) => BedEntry -> e -> IntervalStatus checkIntervalStatus :: BedEntry -> e -> IntervalStatus checkIntervalStatus (BedEntry Chrom bedChrom Int bedStart Int bedEnd) e g = case Chrom bedChrom Chrom -> Chrom -> Ordering forall a. Ord a => a -> a -> Ordering `compare` e -> Chrom forall a. Genomic a => a -> Chrom genomicChrom e g of Ordering LT -> IntervalStatus BedBehind Ordering GT -> IntervalStatus BedAhead Ordering EQ -> if Int bedStart Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1 Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > e -> Int forall a. Genomic a => a -> Int genomicBase e g then IntervalStatus BedAhead else if Int bedEnd Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < e -> Int forall a. Genomic a => a -> Int genomicBase e g then IntervalStatus BedBehind else IntervalStatus BedOn chromFilter :: (Genomic e) => [Chrom] -> e -> Bool chromFilter :: [Chrom] -> e -> Bool chromFilter [Chrom] exclusionList = (Chrom -> [Chrom] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `notElem` [Chrom] exclusionList) (Chrom -> Bool) -> (e -> Chrom) -> e -> Bool forall b c a. (b -> c) -> (a -> b) -> a -> c . e -> Chrom forall a. Genomic a => a -> Chrom genomicChrom