module Bio.Bam.Index (
    BamIndex(..),
    withIndexedBam,
    readBamIndex,
    readBaiIndex,
    readTabix,
    IndexFormatError(..),

    Region(..),
    Subsequence(..),
    streamBamRefseq,
    streamBamRegions,
    streamBamSubseq,
    streamBamUnaligned
) where

import Bio.Bam.Header
import Bio.Bam.Reader
import Bio.Bam.Rec
import Bio.Bam.Regions              ( Region(..), Subsequence(..) )
import Bio.Prelude
import Bio.Streaming
import Bio.Streaming.Bgzf           ( bgunzip )
import System.Directory             ( doesFileExist )

import qualified Bio.Bam.Regions                as R
import qualified Bio.Streaming.Bytes            as S
import qualified Bio.Streaming.Parse            as P
import qualified Data.IntMap.Strict             as M
import qualified Data.ByteString                as B
import qualified Data.Vector                    as V
import qualified Data.Vector.Mutable            as W
import qualified Data.Vector.Unboxed            as U
import qualified Data.Vector.Unboxed.Mutable    as N
import qualified Data.Vector.Algorithms.Intro   as N
import qualified Streaming.Prelude              as Q

-- | Full index, unifying BAI and CSI style.  In both cases, we have the
-- binning scheme, parameters are fixed in BAI, but variable in CSI.
-- Checkpoints are created from the linear index in BAI or from the
-- `loffset' field in CSI.

data BamIndex a = BamIndex {
    -- | Minshift parameter from CSI
    BamIndex a -> Int
minshift :: {-# UNPACK #-} !Int,

    -- | Depth parameter from CSI
    BamIndex a -> Int
depth :: {-# UNPACK #-} !Int,

    -- | Best guess at where the unaligned records start
    BamIndex a -> Int64
unaln_off :: {-# UNPACK #-} !Int64,

    -- | Room for stuff (needed for tabix)
    BamIndex a -> a
extensions :: a,

    -- | Records for the binning index, where each bin has a list of
    -- segments belonging to it.
    BamIndex a -> Vector Bins
refseq_bins :: {-# UNPACK #-} !(V.Vector Bins),

    -- | Known checkpoints of the form (pos,off) where off is the
    -- virtual offset of the first record crossing pos.
    BamIndex a -> Vector Ckpoints
refseq_ckpoints :: {-# UNPACK #-} !(V.Vector Ckpoints) }

  deriving Int -> BamIndex a -> ShowS
[BamIndex a] -> ShowS
BamIndex a -> String
(Int -> BamIndex a -> ShowS)
-> (BamIndex a -> String)
-> ([BamIndex a] -> ShowS)
-> Show (BamIndex a)
forall a. Show a => Int -> BamIndex a -> ShowS
forall a. Show a => [BamIndex a] -> ShowS
forall a. Show a => BamIndex a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamIndex a] -> ShowS
$cshowList :: forall a. Show a => [BamIndex a] -> ShowS
show :: BamIndex a -> String
$cshow :: forall a. Show a => BamIndex a -> String
showsPrec :: Int -> BamIndex a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> BamIndex a -> ShowS
Show

-- | Mapping from bin number to vector of clusters.
type Bins = IntMap Segments
type Segments = U.Vector (Int64,Int64)


-- | Checkpoints.  Each checkpoint is a position with the virtual offset
-- where the first alignment crossing the position is found.  In BAI, we
-- get this from the 'ioffset' vector, in CSI we get it from the
-- 'loffset' field:  "Given a region [beg,end), we only need to visit
-- chunks whose end file offset is larger than 'ioffset' of the 16kB
-- window containing 'beg'."  (Sounds like a marginal gain, though.)

type Ckpoints = IntMap Int64


-- | Decode only those reads that fall into one of several regions.
-- Strategy:  We will scan the file mostly linearly, but only those
-- regions that are actually needed.  We filter the decoded stuff so
-- that it actually overlaps our regions.
--
-- From the binning index, we get a list of segments per requested
-- region.  Using the checkpoints, we prune them:  if we have a
-- checkpoint to the left of the beginning of the interesting region, we
-- can move the start of each segment forward to the checkpoint.  If
-- that makes the segment empty, it can be droppped.
--
-- The resulting segment lists are merged, then traversed.  We seek to
-- the beginning of the earliest segment and start decoding.  Once the
-- virtual file position leaves the segment or the alignment position
-- moves past the end of the requested region, we move to the next.
-- Moving is a seek if it spans a sufficiently large gap or points
-- backwards, else we just keep going.

-- | A 'Segment' has a start and an end offset, and an "end coordinate"
-- from the originating region.
data Segment = Segment {-# UNPACK #-} !Int64 {-# UNPACK #-} !Int64 {-# UNPACK #-} !Int deriving Int -> Segment -> ShowS
[Segment] -> ShowS
Segment -> String
(Int -> Segment -> ShowS)
-> (Segment -> String) -> ([Segment] -> ShowS) -> Show Segment
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Segment] -> ShowS
$cshowList :: [Segment] -> ShowS
show :: Segment -> String
$cshow :: Segment -> String
showsPrec :: Int -> Segment -> ShowS
$cshowsPrec :: Int -> Segment -> ShowS
Show

segmentLists :: BamIndex a -> Refseq -> R.Subsequence -> [[Segment]]
segmentLists :: BamIndex a -> Refseq -> Subsequence -> [[Segment]]
segmentLists bi :: BamIndex a
bi@BamIndex{..} (Refseq ref :: Word32
ref) (R.Subsequence imap :: IntMap Int
imap)
        | Just bins :: Bins
bins <- Vector Bins
refseq_bins Vector Bins -> Int -> Maybe Bins
forall a. Vector a -> Int -> Maybe a
V.!? Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ref,
          Just cpts :: Ckpoints
cpts <- Vector Ckpoints
refseq_ckpoints Vector Ckpoints -> Int -> Maybe Ckpoints
forall a. Vector a -> Int -> Maybe a
V.!? Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ref
        = [ BamIndex a -> Int -> Int -> Bins -> Ckpoints -> [Segment]
forall a. BamIndex a -> Int -> Int -> Bins -> Ckpoints -> [Segment]
rgnToSegments BamIndex a
bi Int
beg Int
end Bins
bins Ckpoints
cpts | (beg :: Int
beg,end :: Int
end) <- IntMap Int -> [(Int, Int)]
forall a. IntMap a -> [(Int, a)]
M.toList IntMap Int
imap ]
segmentLists _ _ _ = []

-- from region to list of bins, then to list of segments
rgnToSegments :: BamIndex a -> Int -> Int -> Bins -> Ckpoints -> [Segment]
rgnToSegments :: BamIndex a -> Int -> Int -> Bins -> Ckpoints -> [Segment]
rgnToSegments bi :: BamIndex a
bi@BamIndex{..} beg :: Int
beg end :: Int
end bins :: Bins
bins cpts :: Ckpoints
cpts =
    [ Int64 -> Int64 -> Int -> Segment
Segment Int64
boff' Int64
eoff Int
end
    | Int
bin <- BamIndex a -> Int -> Int -> [Int]
forall a. BamIndex a -> Int -> Int -> [Int]
binList BamIndex a
bi Int
beg Int
end
    , (boff :: Int64
boff,eoff :: Int64
eoff) <- [(Int64, Int64)]
-> (Vector (Int64, Int64) -> [(Int64, Int64)])
-> Maybe (Vector (Int64, Int64))
-> [(Int64, Int64)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] Vector (Int64, Int64) -> [(Int64, Int64)]
forall a. Unbox a => Vector a -> [a]
U.toList (Maybe (Vector (Int64, Int64)) -> [(Int64, Int64)])
-> Maybe (Vector (Int64, Int64)) -> [(Int64, Int64)]
forall a b. (a -> b) -> a -> b
$ Int -> Bins -> Maybe (Vector (Int64, Int64))
forall a. Int -> IntMap a -> Maybe a
M.lookup Int
bin Bins
bins
    , let boff' :: Int64
boff' = Int64 -> Int64 -> Int64
forall a. Ord a => a -> a -> a
max Int64
boff Int64
cpt
    , Int64
boff' Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
eoff ]
  where
    !cpt :: Int64
cpt = Int64 -> ((Int, Int64) -> Int64) -> Maybe (Int, Int64) -> Int64
forall b a. b -> (a -> b) -> Maybe a -> b
maybe 0 (Int, Int64) -> Int64
forall a b. (a, b) -> b
snd (Maybe (Int, Int64) -> Int64) -> Maybe (Int, Int64) -> Int64
forall a b. (a -> b) -> a -> b
$ Int -> Ckpoints -> Maybe (Int, Int64)
forall a. Int -> IntMap a -> Maybe (Int, a)
M.lookupLE Int
beg Ckpoints
cpts

-- list of bins for given range of coordinates, from Heng's horrible code
binList :: BamIndex a -> Int -> Int -> [Int]
binList :: BamIndex a -> Int -> Int -> [Int]
binList BamIndex{..} beg :: Int
beg end :: Int
end = Int -> Int -> Int -> [Int]
binlist' 0 (Int
minshift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 3Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
depth) 0
  where
    binlist' :: Int -> Int -> Int -> [Int]
binlist' l :: Int
l s :: Int
s t :: Int
t = if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
depth then [] else [Int
b..Int
e] [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [Int]
go
      where
        b :: Int
b = Int
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
beg Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
s
        e :: Int
e = Int
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
endInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
s
        go :: [Int]
go = Int -> Int -> Int -> [Int]
binlist' (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
-3) (Int
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` (3Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
l))


-- | Merges two lists of segments.  Lists must be sorted, the merge sort
-- merges overlapping segments into one.
infix 4 ~~
(~~) :: [Segment] -> [Segment] -> [Segment]
Segment a :: Int64
a b :: Int64
b e :: Int
e : xs :: [Segment]
xs ~~ :: [Segment] -> [Segment] -> [Segment]
~~ Segment u :: Int64
u v :: Int64
v f :: Int
f : ys :: [Segment]
ys
    |          Int64
b Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
u = Int64 -> Int64 -> Int -> Segment
Segment Int64
a Int64
b Int
e Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ Int64 -> Int64 -> Int -> Segment
Segment Int64
u Int64
v Int
f Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: [Segment]
ys)     -- no overlap
    | Int64
a Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
u Bool -> Bool -> Bool
&& Int64
b Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
v = Int64 -> Int64 -> Int -> Segment
Segment Int64
a Int64
v (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
e Int
f) Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ [Segment]
ys)             -- some overlap
    |          Int64
b Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
v = Int64 -> Int64 -> Int -> Segment
Segment Int64
u Int64
v (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
e Int
f) Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ [Segment]
ys)             -- contained
    | Int64
v Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
a          = Int64 -> Int64 -> Int -> Segment
Segment Int64
u Int64
v Int
f Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ Int64 -> Int64 -> Int -> Segment
Segment Int64
a Int64
b Int
e Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: [Segment]
ys)     -- no overlap
    | Int64
u Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
a          = Int64 -> Int64 -> Int -> Segment
Segment Int64
u Int64
b (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
e Int
f) Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ [Segment]
ys)             -- some overlap
    | Bool
otherwise      = Int64 -> Int64 -> Int -> Segment
Segment Int64
a Int64
b (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
e Int
f) Segment -> [Segment] -> [Segment]
forall a. a -> [a] -> [a]
: ([Segment]
xs [Segment] -> [Segment] -> [Segment]
~~ [Segment]
ys)             -- contained
[] ~~ ys :: [Segment]
ys = [Segment]
ys
xs :: [Segment]
xs ~~ [] = [Segment]
xs


data IndexFormatError = IndexFormatError Bytes deriving (Typeable, Int -> IndexFormatError -> ShowS
[IndexFormatError] -> ShowS
IndexFormatError -> String
(Int -> IndexFormatError -> ShowS)
-> (IndexFormatError -> String)
-> ([IndexFormatError] -> ShowS)
-> Show IndexFormatError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IndexFormatError] -> ShowS
$cshowList :: [IndexFormatError] -> ShowS
show :: IndexFormatError -> String
$cshow :: IndexFormatError -> String
showsPrec :: Int -> IndexFormatError -> ShowS
$cshowsPrec :: Int -> IndexFormatError -> ShowS
Show)

instance Exception IndexFormatError where
    displayException :: IndexFormatError -> String
displayException (IndexFormatError m :: Bytes
m) = "index signature " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bytes -> String
forall a. Show a => a -> String
show Bytes
m String -> ShowS
forall a. [a] -> [a] -> [a]
++ " not recognized"

{- | Reads any index we can find for a file.

If the file name has a .bai or .csi extension, optionally followed by
.gz, we read it.  Else we look for the index by adding such an extension
and by replacing the extension with these two, and finally try the file
itself.  The first file that exists is used.
-}
readBamIndex :: FilePath -> IO (BamIndex ())
readBamIndex :: String -> IO (BamIndex ())
readBamIndex fp1 :: String
fp1 | (String -> Bool) -> [String] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
fp1) [String]
exts = String
-> (ByteStream IO () -> IO (BamIndex ())) -> IO (BamIndex ())
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
String -> (ByteStream m () -> m r) -> m r
streamFile String
fp1 ByteStream IO () -> IO (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> m (BamIndex ())
readBaiIndex
                 | Bool
otherwise                   = [String] -> IO (BamIndex ())
forall (m :: * -> *).
(MonadIO m, MonadMask m) =>
[String] -> m (BamIndex ())
tryAll [String]
exts
  where
    exts :: [String]
exts = String -> [String]
words ".bai .bai.gz .csi .csi.gz"

    fp2 :: String
fp2 = ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ case (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='.') String
f of [] -> String
fString -> ShowS
forall a. [a] -> [a] -> [a]
++String
d ; _:b :: String
b -> String
bString -> ShowS
forall a. [a] -> [a] -> [a]
++String
d
    (f :: String
f,d :: String
d) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='/') (String -> (String, String)) -> String -> (String, String)
forall a b. (a -> b) -> a -> b
$ ShowS
forall a. [a] -> [a]
reverse String
fp1

    tryAll :: [String] -> m (BamIndex ())
tryAll [    ] = String -> (ByteStream m () -> m (BamIndex ())) -> m (BamIndex ())
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
String -> (ByteStream m () -> m r) -> m r
streamFile String
fp1 ByteStream m () -> m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> m (BamIndex ())
readBaiIndex
    tryAll (e :: String
e:es :: [String]
es) = do Bool
x1 <- IO Bool -> m Bool
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> m Bool) -> IO Bool -> m Bool
forall a b. (a -> b) -> a -> b
$ String -> IO Bool
doesFileExist (String
fp1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e)
                       Bool
x2 <- IO Bool -> m Bool
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> m Bool) -> IO Bool -> m Bool
forall a b. (a -> b) -> a -> b
$ String -> IO Bool
doesFileExist (String
fp2 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e)
                       case () of
                            _ | Bool
x1 -> String -> (ByteStream m () -> m (BamIndex ())) -> m (BamIndex ())
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
String -> (ByteStream m () -> m r) -> m r
streamFile (String
fp1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e) ByteStream m () -> m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> m (BamIndex ())
readBaiIndex
                              | Bool
x2 -> String -> (ByteStream m () -> m (BamIndex ())) -> m (BamIndex ())
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
String -> (ByteStream m () -> m r) -> m r
streamFile (String
fp2 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e) ByteStream m () -> m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> m (BamIndex ())
readBaiIndex
                            _      -> [String] -> m (BamIndex ())
tryAll [String]
es

-- | Reads an index in BAI or CSI format, recognized automatically.  The
-- index can be compressed, even though this isn't standard.
readBaiIndex :: MonadIO m => ByteStream m r -> m (BamIndex ())
readBaiIndex :: ByteStream m r -> m (BamIndex ())
readBaiIndex = (r -> m (BamIndex ()))
-> ((BamIndex (), ByteStream m r) -> m (BamIndex ()))
-> Either r (BamIndex (), ByteStream m r)
-> m (BamIndex ())
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (m (BamIndex ()) -> r -> m (BamIndex ())
forall a b. a -> b -> a
const (m (BamIndex ()) -> r -> m (BamIndex ()))
-> (IO (BamIndex ()) -> m (BamIndex ()))
-> IO (BamIndex ())
-> r
-> m (BamIndex ())
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IO (BamIndex ()) -> m (BamIndex ())
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (BamIndex ()) -> r -> m (BamIndex ()))
-> IO (BamIndex ()) -> r -> m (BamIndex ())
forall a b. (a -> b) -> a -> b
$ EofException -> IO (BamIndex ())
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM EofException
P.EofException) (BamIndex () -> m (BamIndex ())
forall (m :: * -> *) a. Monad m => a -> m a
return (BamIndex () -> m (BamIndex ()))
-> ((BamIndex (), ByteStream m r) -> BamIndex ())
-> (BamIndex (), ByteStream m r)
-> m (BamIndex ())
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (BamIndex (), ByteStream m r) -> BamIndex ()
forall a b. (a, b) -> a
fst) (Either r (BamIndex (), ByteStream m r) -> m (BamIndex ()))
-> (ByteStream m r -> m (Either r (BamIndex (), ByteStream m r)))
-> ByteStream m r
-> m (BamIndex ())
forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=<
               (Int64 -> Parser r m (BamIndex ()))
-> ByteStream m r -> m (Either r (BamIndex (), ByteStream m r))
forall (m :: * -> *) r a.
MonadIO m =>
(Int64 -> Parser r m a)
-> ByteStream m r -> m (Either r (a, ByteStream m r))
P.parseIO (Parser r m (BamIndex ()) -> Int64 -> Parser r m (BamIndex ())
forall a b. a -> b -> a
const (Parser r m (BamIndex ()) -> Int64 -> Parser r m (BamIndex ()))
-> Parser r m (BamIndex ()) -> Int64 -> Parser r m (BamIndex ())
forall a b. (a -> b) -> a -> b
$ Int -> Parser r m Bytes
forall (m :: * -> *) r. Monad m => Int -> Parser r m Bytes
P.getString 4 Parser r m Bytes
-> (Bytes -> Parser r m (BamIndex ())) -> Parser r m (BamIndex ())
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bytes -> Parser r m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
Bytes -> Parser r m (BamIndex ())
switch) (ByteStream m r -> m (Either r (BamIndex (), ByteStream m r)))
-> (ByteStream m r -> ByteStream m r)
-> ByteStream m r
-> m (Either r (BamIndex (), ByteStream m r))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> ByteStream m r
S.gunzip
  where
    switch :: Bytes -> Parser r m (BamIndex ())
switch "BAI\1" = do Int
nref <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
getIndexArrays Int
nref 14 5 ((Ckpoints -> Parser r m Ckpoints)
-> Word32 -> Ckpoints -> Parser r m Ckpoints
forall a b. a -> b -> a
const Ckpoints -> Parser r m Ckpoints
forall (m :: * -> *) a. Monad m => a -> m a
return) (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
forall (m :: * -> *) r.
Monad m =>
(Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
getIntervals

    switch "CSI\1" = do Int
minshift <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
depth <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32 Parser r m Word32 -> (Word32 -> Parser r m ()) -> Parser r m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Parser r m ()
forall (m :: * -> *) r. Monad m => Int -> Parser r m ()
P.drop (Int -> Parser r m ())
-> (Word32 -> Int) -> Word32 -> Parser r m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral -- aux data
                        Int
nref <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
getIndexArrays Int
nref Int
minshift Int
depth (Int -> Int -> Word32 -> Ckpoints -> Parser r m Ckpoints
forall a (m :: * -> *) a r.
(Num a, Monad m, Ord a, Integral a) =>
Int -> Int -> a -> IntMap a -> Parser r m (IntMap a)
addOneCheckpoint Int
minshift Int
depth) (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return

    switch magic :: Bytes
magic   = IndexFormatError -> Parser r m (BamIndex ())
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IndexFormatError -> Parser r m (BamIndex ()))
-> IndexFormatError -> Parser r m (BamIndex ())
forall a b. (a -> b) -> a -> b
$ Bytes -> IndexFormatError
IndexFormatError Bytes
magic


    -- Insert one checkpoint.  If we already have an entry (can happen
    -- if it comes from a different bin), we conservatively take the min
    addOneCheckpoint :: Int -> Int -> a -> IntMap a -> Parser r m (IntMap a)
addOneCheckpoint minshift :: Int
minshift depth :: Int
depth bin :: a
bin cp :: IntMap a
cp = do
            a
loffset <- Word64 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> a) -> Parser r m Word64 -> Parser r m a
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word64
forall (m :: * -> *) r. Monad m => Parser r m Word64
P.getWord64
            let key :: Int
key = Int -> Int -> Int -> Int
forall t. (Integral t, Bits t) => t -> Int -> Int -> t
llim (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
bin) (3Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
depth) Int
minshift
            IntMap a -> Parser r m (IntMap a)
forall (m :: * -> *) a. Monad m => a -> m a
return (IntMap a -> Parser r m (IntMap a))
-> IntMap a -> Parser r m (IntMap a)
forall a b. (a -> b) -> a -> b
$! (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
M.insertWith a -> a -> a
forall a. Ord a => a -> a -> a
min Int
key a
loffset IntMap a
cp

    -- compute left limit of bin
    llim :: t -> Int -> Int -> t
llim bin :: t
bin dp :: Int
dp sf :: Int
sf | Int
dp  Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==  0 = 0
                   | t
bin t -> t -> Bool
forall a. Ord a => a -> a -> Bool
>= t
ix = (t
bin t -> t -> t
forall a. Num a => a -> a -> a
- t
ix) t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftL` Int
sf
                   | Bool
otherwise = t -> Int -> Int -> t
llim t
bin (Int
dpInt -> Int -> Int
forall a. Num a => a -> a -> a
-3) (Int
sfInt -> Int -> Int
forall a. Num a => a -> a -> a
+3)
            where ix :: t
ix = (1 t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftL` Int
dp t -> t -> t
forall a. Num a => a -> a -> a
- 1) t -> t -> t
forall a. Integral a => a -> a -> a
`div` 7

withIndexedBam :: (MonadIO m, MonadLog m, MonadMask m) => FilePath -> (BamMeta -> BamIndex () -> Handle -> m r) -> m r
withIndexedBam :: String -> (BamMeta -> BamIndex () -> Handle -> m r) -> m r
withIndexedBam f :: String
f k :: BamMeta -> BamIndex () -> Handle -> m r
k = do
    BamIndex ()
idx <- IO (BamIndex ()) -> m (BamIndex ())
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (BamIndex ()) -> m (BamIndex ()))
-> IO (BamIndex ()) -> m (BamIndex ())
forall a b. (a -> b) -> a -> b
$ String -> IO (BamIndex ())
readBamIndex String
f
    m Handle -> (Handle -> m ()) -> (Handle -> m r) -> m r
forall (m :: * -> *) a c b.
MonadMask m =>
m a -> (a -> m c) -> (a -> m b) -> m b
bracket (IO Handle -> m Handle
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Handle -> m Handle) -> IO Handle -> m Handle
forall a b. (a -> b) -> a -> b
$ String -> IOMode -> IO Handle
openBinaryFile String
f IOMode
ReadMode) (IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> (Handle -> IO ()) -> Handle -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Handle -> IO ()
hClose) ((Handle -> m r) -> m r) -> (Handle -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ \hdl :: Handle
hdl -> do
        (hdr :: BamMeta
hdr,_) <- ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam (ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ()))
-> ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall a b. (a -> b) -> a -> b
$ Handle -> ByteStream m ()
forall (m :: * -> *). MonadIO m => Handle -> ByteStream m ()
streamHandle Handle
hdl
        BamMeta -> BamIndex () -> Handle -> m r
k BamMeta
hdr BamIndex ()
idx Handle
hdl


type TabIndex = BamIndex TabMeta

data TabMeta = TabMeta { TabMeta -> TabFormat
format :: TabFormat
                       , TabMeta -> Int
col_seq :: Int                           -- Column for the sequence name
                       , TabMeta -> Int
col_beg :: Int                           -- Column for the start of a region
                       , TabMeta -> Int
col_end :: Int                           -- Column for the end of a region
                       , TabMeta -> Char
comment_char :: Char
                       , TabMeta -> Int
skip_lines :: Int
                       , TabMeta -> Vector Bytes
names :: V.Vector Bytes }
  deriving Int -> TabMeta -> ShowS
[TabMeta] -> ShowS
TabMeta -> String
(Int -> TabMeta -> ShowS)
-> (TabMeta -> String) -> ([TabMeta] -> ShowS) -> Show TabMeta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TabMeta] -> ShowS
$cshowList :: [TabMeta] -> ShowS
show :: TabMeta -> String
$cshow :: TabMeta -> String
showsPrec :: Int -> TabMeta -> ShowS
$cshowsPrec :: Int -> TabMeta -> ShowS
Show

data TabFormat = Generic | SamFormat | VcfFormat | ZeroBased   deriving Int -> TabFormat -> ShowS
[TabFormat] -> ShowS
TabFormat -> String
(Int -> TabFormat -> ShowS)
-> (TabFormat -> String)
-> ([TabFormat] -> ShowS)
-> Show TabFormat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TabFormat] -> ShowS
$cshowList :: [TabFormat] -> ShowS
show :: TabFormat -> String
$cshow :: TabFormat -> String
showsPrec :: Int -> TabFormat -> ShowS
$cshowsPrec :: Int -> TabFormat -> ShowS
Show

-- | Reads a Tabix index.  Note that tabix indices are compressed, this
-- is taken care of automatically.
readTabix :: MonadIO m => ByteStream m r -> m TabIndex
readTabix :: ByteStream m r -> m TabIndex
readTabix = (r -> m TabIndex)
-> ((TabIndex, ByteStream m r) -> m TabIndex)
-> Either r (TabIndex, ByteStream m r)
-> m TabIndex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (m TabIndex -> r -> m TabIndex
forall a b. a -> b -> a
const (m TabIndex -> r -> m TabIndex)
-> (IO TabIndex -> m TabIndex) -> IO TabIndex -> r -> m TabIndex
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IO TabIndex -> m TabIndex
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO TabIndex -> r -> m TabIndex) -> IO TabIndex -> r -> m TabIndex
forall a b. (a -> b) -> a -> b
$ EofException -> IO TabIndex
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM EofException
P.EofException) (TabIndex -> m TabIndex
forall (m :: * -> *) a. Monad m => a -> m a
return (TabIndex -> m TabIndex)
-> ((TabIndex, ByteStream m r) -> TabIndex)
-> (TabIndex, ByteStream m r)
-> m TabIndex
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (TabIndex, ByteStream m r) -> TabIndex
forall a b. (a, b) -> a
fst) (Either r (TabIndex, ByteStream m r) -> m TabIndex)
-> (ByteStream m r -> m (Either r (TabIndex, ByteStream m r)))
-> ByteStream m r
-> m TabIndex
forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=<
            (Int64 -> Parser r m TabIndex)
-> ByteStream m r -> m (Either r (TabIndex, ByteStream m r))
forall (m :: * -> *) r a.
MonadIO m =>
(Int64 -> Parser r m a)
-> ByteStream m r -> m (Either r (a, ByteStream m r))
P.parseIO (Parser r m TabIndex -> Int64 -> Parser r m TabIndex
forall a b. a -> b -> a
const (Parser r m TabIndex -> Int64 -> Parser r m TabIndex)
-> Parser r m TabIndex -> Int64 -> Parser r m TabIndex
forall a b. (a -> b) -> a -> b
$ Int -> Parser r m Bytes
forall (m :: * -> *) r. Monad m => Int -> Parser r m Bytes
P.getString 4 Parser r m Bytes
-> (Bytes -> Parser r m TabIndex) -> Parser r m TabIndex
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bytes -> Parser r m TabIndex
forall (m :: * -> *) r. MonadIO m => Bytes -> Parser r m TabIndex
switch) (ByteStream m r -> m (Either r (TabIndex, ByteStream m r)))
-> (ByteStream m r -> ByteStream m r)
-> ByteStream m r
-> m (Either r (TabIndex, ByteStream m r))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> ByteStream m r
S.gunzip
  where
    switch :: Bytes -> Parser r m TabIndex
switch "TBI\1" = do Int
nref <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        TabFormat
format       <- (Word32 -> TabFormat) -> Parser r m Word32 -> Parser r m TabFormat
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM Word32 -> TabFormat
forall a. (Num a, Bits a) => a -> TabFormat
toFormat     Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
col_seq      <- (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
col_beg      <- (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
col_end      <- (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Char
comment_char <- (Word32 -> Char) -> Parser r m Word32 -> Parser r m Char
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM (Int -> Char
chr (Int -> Char) -> (Word32 -> Int) -> Word32 -> Char
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Int
skip_lines   <- (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                        Vector Bytes
names        <- (Bytes -> Vector Bytes)
-> Parser r m Bytes -> Parser r m (Vector Bytes)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ([Bytes] -> Vector Bytes
forall a. [a] -> Vector a
V.fromList ([Bytes] -> Vector Bytes)
-> (Bytes -> [Bytes]) -> Bytes -> Vector Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Bytes -> [Bytes]
B.split 0) (Parser r m Bytes -> Parser r m (Vector Bytes))
-> (Word32 -> Parser r m Bytes)
-> Word32
-> Parser r m (Vector Bytes)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Parser r m Bytes
forall (m :: * -> *) r. Monad m => Int -> Parser r m Bytes
P.getString (Int -> Parser r m Bytes)
-> (Word32 -> Int) -> Word32 -> Parser r m Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Parser r m (Vector Bytes))
-> Parser r m Word32 -> Parser r m (Vector Bytes)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32

                        BamIndex ()
ix <- Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
forall (m :: * -> *) r.
MonadIO m =>
Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
getIndexArrays Int
nref 14 5 ((Ckpoints -> Parser r m Ckpoints)
-> Word32 -> Ckpoints -> Parser r m Ckpoints
forall a b. a -> b -> a
const Ckpoints -> Parser r m Ckpoints
forall (m :: * -> *) a. Monad m => a -> m a
return) (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
forall (m :: * -> *) r.
Monad m =>
(Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
getIntervals
                        Bool
fin <- Parser r m Bool
forall (m :: * -> *) r. Monad m => Parser r m Bool
P.isFinished
                        if Bool
fin then TabIndex -> Parser r m TabIndex
forall (m :: * -> *) a. Monad m => a -> m a
return (TabIndex -> Parser r m TabIndex)
-> TabIndex -> Parser r m TabIndex
forall a b. (a -> b) -> a -> b
$! BamIndex ()
ix { extensions :: TabMeta
extensions = TabMeta :: TabFormat
-> Int -> Int -> Int -> Char -> Int -> Vector Bytes -> TabMeta
TabMeta{..} }
                               else do Int64
unaln <- Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int64) -> Parser r m Word64 -> Parser r m Int64
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word64
forall (m :: * -> *) r. Monad m => Parser r m Word64
P.getWord64
                                       TabIndex -> Parser r m TabIndex
forall (m :: * -> *) a. Monad m => a -> m a
return (TabIndex -> Parser r m TabIndex)
-> TabIndex -> Parser r m TabIndex
forall a b. (a -> b) -> a -> b
$! BamIndex ()
ix { unaln_off :: Int64
unaln_off = Int64
unaln, extensions :: TabMeta
extensions = TabMeta :: TabFormat
-> Int -> Int -> Int -> Char -> Int -> Vector Bytes -> TabMeta
TabMeta{..} }

    switch magic :: Bytes
magic   = IndexFormatError -> Parser r m TabIndex
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IndexFormatError -> Parser r m TabIndex)
-> IndexFormatError -> Parser r m TabIndex
forall a b. (a -> b) -> a -> b
$ Bytes -> IndexFormatError
IndexFormatError Bytes
magic

    toFormat :: a -> TabFormat
toFormat 1 = TabFormat
SamFormat
    toFormat 2 = TabFormat
VcfFormat
    toFormat x :: a
x = if a -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit a
x 16 then TabFormat
ZeroBased else TabFormat
Generic

-- Read the intervals.  Each one becomes a checkpoint.
getIntervals :: Monad m => (IntMap Int64, Int64) -> P.Parser r m (IntMap Int64, Int64)
getIntervals :: (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
getIntervals (cp :: Ckpoints
cp,mx0 :: Int64
mx0) = do
    Int
nintv <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
    Int
-> Int
-> (Ckpoints, Int64)
-> ((Ckpoints, Int64) -> Int -> Parser r m (Ckpoints, Int64))
-> Parser r m (Ckpoints, Int64)
forall (m :: * -> *) ix a.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM 0 Int
nintv (Ckpoints
cp,Int64
mx0) (((Ckpoints, Int64) -> Int -> Parser r m (Ckpoints, Int64))
 -> Parser r m (Ckpoints, Int64))
-> ((Ckpoints, Int64) -> Int -> Parser r m (Ckpoints, Int64))
-> Parser r m (Ckpoints, Int64)
forall a b. (a -> b) -> a -> b
$ \(!Ckpoints
im,!Int64
mx) int :: Int
int -> do
        Int64
oo <- Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int64) -> Parser r m Word64 -> Parser r m Int64
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word64
forall (m :: * -> *) r. Monad m => Parser r m Word64
P.getWord64
        (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (if Int64
oo Int64 -> Int64 -> Bool
forall a. Eq a => a -> a -> Bool
== 0 then Ckpoints
im else Int -> Int64 -> Ckpoints -> Ckpoints
forall a. Int -> a -> IntMap a -> IntMap a
M.insert (Int
int Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x4000) Int64
oo Ckpoints
im, Int64 -> Int64 -> Int64
forall a. Ord a => a -> a -> a
max Int64
mx Int64
oo)


getIndexArrays :: MonadIO m => Int -> Int -> Int
               -> (Word32 -> Ckpoints -> P.Parser r m Ckpoints)
               -> ((Ckpoints, Int64) -> P.Parser r m (Ckpoints, Int64))
               -> P.Parser r m (BamIndex ())
getIndexArrays :: Int
-> Int
-> Int
-> (Word32 -> Ckpoints -> Parser r m Ckpoints)
-> ((Ckpoints, Int64) -> Parser r m (Ckpoints, Int64))
-> Parser r m (BamIndex ())
getIndexArrays nref :: Int
nref minshift :: Int
minshift depth :: Int
depth addOneCheckpoint :: Word32 -> Ckpoints -> Parser r m Ckpoints
addOneCheckpoint addManyCheckpoints :: (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
addManyCheckpoints
    | Int
nref  Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 1 = BamIndex () -> Parser r m (BamIndex ())
forall (m :: * -> *) a. Monad m => a -> m a
return (BamIndex () -> Parser r m (BamIndex ()))
-> BamIndex () -> Parser r m (BamIndex ())
forall a b. (a -> b) -> a -> b
$ Int
-> Int
-> Int64
-> ()
-> Vector Bins
-> Vector Ckpoints
-> BamIndex ()
forall a.
Int
-> Int
-> Int64
-> a
-> Vector Bins
-> Vector Ckpoints
-> BamIndex a
BamIndex Int
minshift Int
depth 0 () Vector Bins
forall a. Vector a
V.empty Vector Ckpoints
forall a. Vector a
V.empty
    | Bool
otherwise = do
        MVector RealWorld Bins
rbins  <- IO (MVector RealWorld Bins) -> Parser r m (MVector RealWorld Bins)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld Bins)
 -> Parser r m (MVector RealWorld Bins))
-> IO (MVector RealWorld Bins)
-> Parser r m (MVector RealWorld Bins)
forall a b. (a -> b) -> a -> b
$ Int -> IO (MVector (PrimState IO) Bins)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
W.new Int
nref
        MVector RealWorld Ckpoints
rckpts <- IO (MVector RealWorld Ckpoints)
-> Parser r m (MVector RealWorld Ckpoints)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld Ckpoints)
 -> Parser r m (MVector RealWorld Ckpoints))
-> IO (MVector RealWorld Ckpoints)
-> Parser r m (MVector RealWorld Ckpoints)
forall a b. (a -> b) -> a -> b
$ Int -> IO (MVector (PrimState IO) Ckpoints)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
W.new Int
nref
        Int64
mxR <- Int
-> Int
-> Int64
-> (Int64 -> Int -> Parser r m Int64)
-> Parser r m Int64
forall (m :: * -> *) ix a.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM 0 Int
nref 0 ((Int64 -> Int -> Parser r m Int64) -> Parser r m Int64)
-> (Int64 -> Int -> Parser r m Int64) -> Parser r m Int64
forall a b. (a -> b) -> a -> b
$ \mx0 :: Int64
mx0 r :: Int
r -> do
                Word32
nbins <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
                (!Bins
bins,!Ckpoints
cpts,!Int64
mx1) <- Word32
-> Word32
-> (Bins, Ckpoints, Int64)
-> ((Bins, Ckpoints, Int64)
    -> Word32 -> Parser r m (Bins, Ckpoints, Int64))
-> Parser r m (Bins, Ckpoints, Int64)
forall (m :: * -> *) ix a.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM 0 Word32
nbins (Bins
forall a. IntMap a
M.empty,Ckpoints
forall a. IntMap a
M.empty,Int64
mx0) (((Bins, Ckpoints, Int64)
  -> Word32 -> Parser r m (Bins, Ckpoints, Int64))
 -> Parser r m (Bins, Ckpoints, Int64))
-> ((Bins, Ckpoints, Int64)
    -> Word32 -> Parser r m (Bins, Ckpoints, Int64))
-> Parser r m (Bins, Ckpoints, Int64)
forall a b. (a -> b) -> a -> b
$ \(!Bins
im,!Ckpoints
cp,!Int64
mx) _ -> do
                        Word32
bin <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32 -- the "distinct bin"
                        Ckpoints
cp' <- Word32 -> Ckpoints -> Parser r m Ckpoints
addOneCheckpoint Word32
bin Ckpoints
cp
                        Vector (Int64, Int64)
segsarr <- Parser r m (Vector (Int64, Int64))
forall (m :: * -> *) r.
MonadIO m =>
Parser r m (Vector (Int64, Int64))
getSegmentArray
                        let !mx' :: Int64
mx' = if Vector (Int64, Int64) -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector (Int64, Int64)
segsarr then Int64
mx else Int64 -> Int64 -> Int64
forall a. Ord a => a -> a -> a
max Int64
mx ((Int64, Int64) -> Int64
forall a b. (a, b) -> b
snd (Vector (Int64, Int64) -> (Int64, Int64)
forall a. Unbox a => Vector a -> a
U.last Vector (Int64, Int64)
segsarr))
                        (Bins, Ckpoints, Int64) -> Parser r m (Bins, Ckpoints, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Vector (Int64, Int64) -> Bins -> Bins
forall a. Int -> a -> IntMap a -> IntMap a
M.insert (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
bin) Vector (Int64, Int64)
segsarr Bins
im, Ckpoints
cp', Int64
mx')
                (!Ckpoints
cpts',!Int64
mx2) <- (Ckpoints, Int64) -> Parser r m (Ckpoints, Int64)
addManyCheckpoints (Ckpoints
cpts,Int64
mx1)
                IO () -> Parser r m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Parser r m ()) -> IO () -> Parser r m ()
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) Bins -> Int -> Bins -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
W.write MVector RealWorld Bins
MVector (PrimState IO) Bins
rbins Int
r Bins
bins IO () -> IO () -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MVector (PrimState IO) Ckpoints -> Int -> Ckpoints -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
W.write MVector RealWorld Ckpoints
MVector (PrimState IO) Ckpoints
rckpts Int
r Ckpoints
cpts'
                Int64 -> Parser r m Int64
forall (m :: * -> *) a. Monad m => a -> m a
return Int64
mx2
        (Vector Bins -> Vector Ckpoints -> BamIndex ())
-> Parser r m (Vector Bins)
-> Parser r m (Vector Ckpoints)
-> Parser r m (BamIndex ())
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (Int
-> Int
-> Int64
-> ()
-> Vector Bins
-> Vector Ckpoints
-> BamIndex ()
forall a.
Int
-> Int
-> Int64
-> a
-> Vector Bins
-> Vector Ckpoints
-> BamIndex a
BamIndex Int
minshift Int
depth Int64
mxR ()) (IO (Vector Bins) -> Parser r m (Vector Bins)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Vector Bins) -> Parser r m (Vector Bins))
-> IO (Vector Bins) -> Parser r m (Vector Bins)
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) Bins -> IO (Vector Bins)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector RealWorld Bins
MVector (PrimState IO) Bins
rbins) (IO (Vector Ckpoints) -> Parser r m (Vector Ckpoints)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Vector Ckpoints) -> Parser r m (Vector Ckpoints))
-> IO (Vector Ckpoints) -> Parser r m (Vector Ckpoints)
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) Ckpoints -> IO (Vector Ckpoints)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector RealWorld Ckpoints
MVector (PrimState IO) Ckpoints
rckpts)

-- | Reads the list of segments from an index file and makes sure
-- it is sorted.
getSegmentArray :: MonadIO m => P.Parser r m Segments
getSegmentArray :: Parser r m (Vector (Int64, Int64))
getSegmentArray = do
    Int
nsegs <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
P.getWord32
    MVector RealWorld (Int64, Int64)
segsarr <- IO (MVector RealWorld (Int64, Int64))
-> Parser r m (MVector RealWorld (Int64, Int64))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld (Int64, Int64))
 -> Parser r m (MVector RealWorld (Int64, Int64)))
-> IO (MVector RealWorld (Int64, Int64))
-> Parser r m (MVector RealWorld (Int64, Int64))
forall a b. (a -> b) -> a -> b
$ Int -> IO (MVector (PrimState IO) (Int64, Int64))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
N.new Int
nsegs
    Int -> Int -> (Int -> Parser r m ()) -> Parser r m ()
forall (m :: * -> *) ix.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> (ix -> m ()) -> m ()
loopM 0 Int
nsegs ((Int -> Parser r m ()) -> Parser r m ())
-> (Int -> Parser r m ()) -> Parser r m ()
forall a b. (a -> b) -> a -> b
$ \i :: Int
i -> do Int64
beg <- Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int64) -> Parser r m Word64 -> Parser r m Int64
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word64
forall (m :: * -> *) r. Monad m => Parser r m Word64
P.getWord64
                             Int64
end <- Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int64) -> Parser r m Word64 -> Parser r m Int64
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word64
forall (m :: * -> *) r. Monad m => Parser r m Word64
P.getWord64
                             IO () -> Parser r m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Parser r m ()) -> IO () -> Parser r m ()
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) (Int64, Int64)
-> Int -> (Int64, Int64) -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
N.write MVector RealWorld (Int64, Int64)
MVector (PrimState IO) (Int64, Int64)
segsarr Int
i (Int64
beg,Int64
end)
    IO (Vector (Int64, Int64)) -> Parser r m (Vector (Int64, Int64))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Vector (Int64, Int64)) -> Parser r m (Vector (Int64, Int64)))
-> IO (Vector (Int64, Int64)) -> Parser r m (Vector (Int64, Int64))
forall a b. (a -> b) -> a -> b
$ MVector (PrimState IO) (Int64, Int64) -> IO ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e, Ord e) =>
v (PrimState m) e -> m ()
N.sort MVector RealWorld (Int64, Int64)
MVector (PrimState IO) (Int64, Int64)
segsarr IO () -> IO (Vector (Int64, Int64)) -> IO (Vector (Int64, Int64))
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MVector (PrimState IO) (Int64, Int64) -> IO (Vector (Int64, Int64))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze MVector RealWorld (Int64, Int64)
MVector (PrimState IO) (Int64, Int64)
segsarr

{-# INLINE reduceM #-}
reduceM :: (Monad m, Enum ix, Eq ix) => ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM :: ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM beg :: ix
beg end :: ix
end acc :: a
acc cons :: a -> ix -> m a
cons = if ix
beg ix -> ix -> Bool
forall a. Eq a => a -> a -> Bool
/= ix
end then a -> ix -> m a
cons a
acc ix
beg m a -> (a -> m a) -> m a
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \n :: a
n -> ix -> ix -> a -> (a -> ix -> m a) -> m a
forall (m :: * -> *) ix a.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> a -> (a -> ix -> m a) -> m a
reduceM (ix -> ix
forall a. Enum a => a -> a
succ ix
beg) ix
end a
n a -> ix -> m a
cons else a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
acc

{-# INLINE loopM #-}
loopM :: (Monad m, Enum ix, Eq ix) => ix -> ix -> (ix -> m ()) -> m ()
loopM :: ix -> ix -> (ix -> m ()) -> m ()
loopM beg :: ix
beg end :: ix
end k :: ix -> m ()
k = if ix
beg ix -> ix -> Bool
forall a. Eq a => a -> a -> Bool
/= ix
end then ix -> m ()
k ix
beg m () -> m () -> m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ix -> ix -> (ix -> m ()) -> m ()
forall (m :: * -> *) ix.
(Monad m, Enum ix, Eq ix) =>
ix -> ix -> (ix -> m ()) -> m ()
loopM (ix -> ix
forall a. Enum a => a -> a
succ ix
beg) ix
end ix -> m ()
k else () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

{-| Seeks to a virtual offset in a BGZF file and streams from there.

If the optional end offset is supplied, streaming stops when it is
reached.  Else, streaming goes on to the end of file.
-}
streamBgzf :: MonadIO m => Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
streamBgzf :: Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
streamBgzf hdl :: Handle
hdl off :: Int64
off eoff :: Maybe Int64
eoff =
    Int64 -> ByteStream m () -> ByteStream m ()
forall (m :: * -> *) r.
Monad m =>
Int64 -> ByteStream m r -> ByteStream m r
S.drop (Int64
off Int64 -> Int64 -> Int64
forall a. Bits a => a -> a -> a
.&. 0xffff) (ByteStream m () -> ByteStream m ())
-> (ByteStream m () -> ByteStream m ())
-> ByteStream m ()
-> ByteStream m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteStream m () -> ByteStream m ()
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> ByteStream m r
bgunzip (ByteStream m () -> ByteStream m ())
-> ByteStream m () -> ByteStream m ()
forall a b. (a -> b) -> a -> b
$ do
        Bool -> ByteStream m () -> ByteStream m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int64
off Int64 -> Int64 -> Bool
forall a. Eq a => a -> a -> Bool
/= 0) (IO () -> ByteStream m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ByteStream m ()) -> IO () -> ByteStream m ()
forall a b. (a -> b) -> a -> b
$ Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
hdl SeekMode
AbsoluteSeek (Integer -> IO ()) -> Integer -> IO ()
forall a b. (a -> b) -> a -> b
$ Int64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Integer) -> Int64 -> Integer
forall a b. (a -> b) -> a -> b
$ Int64 -> Int -> Int64
forall a. Bits a => a -> Int -> a
shiftR Int64
off 16)
        (ByteStream m () -> ByteStream m ())
-> (Int64 -> ByteStream m () -> ByteStream m ())
-> Maybe Int64
-> ByteStream m ()
-> ByteStream m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ByteStream m () -> ByteStream m ()
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Int64 -> ByteStream m () -> ByteStream m ()
forall (m :: * -> *).
Monad m =>
Int64 -> ByteStream m () -> ByteStream m ()
S.trim Maybe Int64
eoff (ByteStream m () -> ByteStream m ())
-> ByteStream m () -> ByteStream m ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteStream m ()
forall (m :: * -> *). MonadIO m => Handle -> ByteStream m ()
streamHandle Handle
hdl
{-# INLINE streamBgzf #-}

{- | Streams one reference from a bam file.

Seeks to a given sequence in a Bam file and enumerates only those
records aligning to that reference.  We use the first checkpoint
available for the sequence, which an appropriate index.  Streams the
'BamRaw' records of the correct reference sequence only, and produces an
empty stream if the sequence isn't found.
-}
streamBamRefseq :: (MonadIO m, MonadLog m) => BamIndex b -> Handle -> Refseq -> Stream (Of BamRaw) m ()
streamBamRefseq :: BamIndex b -> Handle -> Refseq -> Stream (Of BamRaw) m ()
streamBamRefseq BamIndex{..} hdl :: Handle
hdl (Refseq r :: Word32
r)
    | Just ckpts :: Ckpoints
ckpts <- Vector Ckpoints
refseq_ckpoints Vector Ckpoints -> Int -> Maybe Ckpoints
forall a. Vector a -> Int -> Maybe a
V.!? Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
r
    , Just (voff :: Int64
voff, _) <- Ckpoints -> Maybe (Int64, Ckpoints)
forall a. IntMap a -> Maybe (a, IntMap a)
M.minView Ckpoints
ckpts
    , Int64
voff Int64 -> Int64 -> Bool
forall a. Eq a => a -> a -> Bool
/= 0 = Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$
                  (BamRaw -> Bool)
-> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool) -> Stream (Of a) m r -> Stream (Of a) m ()
Q.takeWhile ((Word32 -> Refseq
Refseq Word32
r Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
==) (Refseq -> Bool) -> (BamRaw -> Refseq) -> BamRaw -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Refseq
b_rname (BamRec -> Refseq) -> (BamRaw -> BamRec) -> BamRaw -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRaw -> BamRec
unpackBam) (Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$
                  (ByteStream m () -> m (Either () (BamRaw, ByteStream m ())))
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) s r a.
Monad m =>
(s -> m (Either r (a, s))) -> s -> Stream (Of a) m r
Q.unfoldr (Level
-> (Int64 -> Parser () m BamRaw)
-> ByteStream m ()
-> m (Either () (BamRaw, ByteStream m ()))
forall (m :: * -> *) r a.
MonadLog m =>
Level
-> (Int64 -> Parser r m a)
-> ByteStream m r
-> m (Either r (a, ByteStream m r))
P.parseLog Level
Error Int64 -> Parser () m BamRaw
forall (m :: * -> *) r. Monad m => Int64 -> Parser r m BamRaw
getBamRaw) (ByteStream m () -> Stream (Of BamRaw) m ())
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$
                  Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
streamBgzf Handle
hdl Int64
voff Maybe Int64
forall a. Maybe a
Nothing
    | Bool
otherwise = () -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()


{- | Reads from a Bam file the part with unaligned reads.

Sort of the dual to 'streamBamRefseq'.  Since the index does not
actually point to the unaligned part at the end, we use a best guess at
where the unaligned stuff might start, then skip over any aligned
records.  Our \"fallback guess\" is to decode from the current position;
this only works if something else already consumed the Bam header.
-}
streamBamUnaligned :: MonadIO m => BamIndex b -> Handle -> Stream (Of BamRaw) m ()
streamBamUnaligned :: BamIndex b -> Handle -> Stream (Of BamRaw) m ()
streamBamUnaligned BamIndex{..} hdl :: Handle
hdl =
    (BamRaw -> Bool)
-> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool) -> Stream (Of a) m r -> Stream (Of a) m r
Q.filter (Bool -> Bool
not (Bool -> Bool) -> (BamRaw -> Bool) -> BamRaw -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refseq -> Bool
isValidRefseq (Refseq -> Bool) -> (BamRaw -> Refseq) -> BamRaw -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Refseq
b_rname (BamRec -> Refseq) -> (BamRaw -> BamRec) -> BamRaw -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRaw -> BamRec
unpackBam) (Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$
    (ByteStream m () -> m (Either () (BamRaw, ByteStream m ())))
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) s r a.
Monad m =>
(s -> m (Either r (a, s))) -> s -> Stream (Of a) m r
Q.unfoldr ((Int64 -> Parser () m BamRaw)
-> ByteStream m () -> m (Either () (BamRaw, ByteStream m ()))
forall (m :: * -> *) r a.
MonadIO m =>
(Int64 -> Parser r m a)
-> ByteStream m r -> m (Either r (a, ByteStream m r))
P.parseIO Int64 -> Parser () m BamRaw
forall (m :: * -> *) r. Monad m => Int64 -> Parser r m BamRaw
getBamRaw) (ByteStream m () -> Stream (Of BamRaw) m ())
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
streamBgzf Handle
hdl Int64
unaln_off Maybe Int64
forall a. Maybe a
Nothing

{- | Streams one 'Segment'.

Takes a 'Handle', a 'Segment' and a 'Stream' coming from that handle.
If skipping ahead in the stream looks cheap enough, that is done.  Else
we seek the handle to the start offset and stream from it.  Either way,
the part of the stream before it crosses either the end offset or the
max position is returned, and the remaining stream after it is returned
in its functorial value so it can be passed to another invocation of
e.g. 'streamBamSegment'.  Note that the stream passed in becomes
unusable.
-}
streamBamSegment :: MonadIO m
                 => Handle -> Segment -> Stream (Of BamRaw) m ()
                 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSegment :: Handle
-> Segment
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSegment hdl :: Handle
hdl (Segment beg :: Int64
beg end :: Int64
end mpos :: Int
mpos) =
    m (Maybe (BamRaw, Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m (Maybe (BamRaw, Stream (Of BamRaw) m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Maybe (BamRaw, Stream (Of BamRaw) m ()))
 -> Stream (Of BamRaw) m (Maybe (BamRaw, Stream (Of BamRaw) m ())))
-> (Stream (Of BamRaw) m ()
    -> m (Maybe (BamRaw, Stream (Of BamRaw) m ())))
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Maybe (BamRaw, Stream (Of BamRaw) m ()))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of BamRaw) m ()
-> m (Maybe (BamRaw, Stream (Of BamRaw) m ()))
forall (m :: * -> *) a r.
Monad m =>
Stream (Of a) m r -> m (Maybe (a, Stream (Of a) m r))
Q.uncons (Stream (Of BamRaw) m ()
 -> Stream (Of BamRaw) m (Maybe (BamRaw, Stream (Of BamRaw) m ())))
-> (Maybe (BamRaw, Stream (Of BamRaw) m ())
    -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> \case
        -- don't seek if it's a forwards seek of less than 512k
        Just (br :: BamRaw
br,brs :: Stream (Of BamRaw) m ()
brs) | Int64 -> Bool
forall a. Integral a => a -> Bool
near (BamRaw -> Int64
virt_offset BamRaw
br)
            -> (BamRaw -> Bool)
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool)
-> Stream (Of a) m r -> Stream (Of a) m (Stream (Of a) m r)
Q.span BamRaw -> Bool
in_seg (Stream (Of BamRaw) m ()
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall a b. (a -> b) -> a -> b
$ BamRaw -> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) a r.
Monad m =>
a -> Stream (Of a) m r -> Stream (Of a) m r
Q.cons BamRaw
br Stream (Of BamRaw) m ()
brs
        _   -> (BamRaw -> Bool)
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool)
-> Stream (Of a) m r -> Stream (Of a) m (Stream (Of a) m r)
Q.span BamRaw -> Bool
in_seg (Stream (Of BamRaw) m ()
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall a b. (a -> b) -> a -> b
$ (ByteStream m () -> m (Either () (BamRaw, ByteStream m ())))
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall (m :: * -> *) s r a.
Monad m =>
(s -> m (Either r (a, s))) -> s -> Stream (Of a) m r
Q.unfoldr ((Int64 -> Parser () m BamRaw)
-> ByteStream m () -> m (Either () (BamRaw, ByteStream m ()))
forall (m :: * -> *) r a.
MonadIO m =>
(Int64 -> Parser r m a)
-> ByteStream m r -> m (Either r (a, ByteStream m r))
P.parseIO Int64 -> Parser () m BamRaw
forall (m :: * -> *) r. Monad m => Int64 -> Parser r m BamRaw
getBamRaw) (ByteStream m () -> Stream (Of BamRaw) m ())
-> ByteStream m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$ Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Int64 -> Maybe Int64 -> ByteStream m ()
streamBgzf Handle
hdl Int64
beg (Int64 -> Maybe Int64
forall a. a -> Maybe a
Just Int64
end)
  where
    near :: a -> Bool
near    o :: a
o = Int64
beg Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
<= a -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
o Bool -> Bool -> Bool
&& Int64
beg Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ 0x800000000 Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
> a -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
o
    in_seg :: BamRaw -> Bool
in_seg br :: BamRaw
br = BamRaw -> Int64
virt_offset BamRaw
br Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Int64
end Bool -> Bool -> Bool
&& BamRec -> Int
b_pos (BamRaw -> BamRec
unpackBam BamRaw
br) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
mpos

streamBamSubseq :: MonadIO m
                => BamIndex b -> Handle -> Refseq -> R.Subsequence -> Stream (Of BamRaw) m ()
                -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSubseq :: BamIndex b
-> Handle
-> Refseq
-> Subsequence
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSubseq bi :: BamIndex b
bi hdl :: Handle
hdl ref :: Refseq
ref subs :: Subsequence
subs str :: Stream (Of BamRaw) m ()
str = (BamRaw -> Bool)
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool) -> Stream (Of a) m r -> Stream (Of a) m r
Q.filter BamRaw -> Bool
olap (Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall a b. (a -> b) -> a -> b
$ (Stream (Of BamRaw) m ()
 -> Segment -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> [Segment]
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM ((Segment
 -> Stream (Of BamRaw) m ()
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> Segment
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((Segment
  -> Stream (Of BamRaw) m ()
  -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
 -> Stream (Of BamRaw) m ()
 -> Segment
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> (Segment
    -> Stream (Of BamRaw) m ()
    -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> Segment
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall a b. (a -> b) -> a -> b
$ Handle
-> Segment
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *).
MonadIO m =>
Handle
-> Segment
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSegment Handle
hdl) Stream (Of BamRaw) m ()
str [Segment]
segs
  where
    segs :: [Segment]
segs = ([Segment] -> [Segment] -> [Segment])
-> [Segment] -> [[Segment]] -> [Segment]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr [Segment] -> [Segment] -> [Segment]
(~~) [] ([[Segment]] -> [Segment]) -> [[Segment]] -> [Segment]
forall a b. (a -> b) -> a -> b
$ BamIndex b -> Refseq -> Subsequence -> [[Segment]]
forall a. BamIndex a -> Refseq -> Subsequence -> [[Segment]]
segmentLists BamIndex b
bi Refseq
ref Subsequence
subs
    olap :: BamRaw -> Bool
olap br :: BamRaw
br = case BamRaw -> BamRec
unpackBam BamRaw
br of
        BamRec{..} -> Refseq
b_rname Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
== Refseq
ref Bool -> Bool -> Bool
&& Int -> Int -> Subsequence -> Bool
R.overlaps Int
b_pos (Int
b_pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength Vector Cigar
b_cigar) Subsequence
subs

streamBamRegions :: MonadIO m => BamIndex b -> Handle -> [R.Region] -> Stream (Of BamRaw) m ()
streamBamRegions :: BamIndex b -> Handle -> [Region] -> Stream (Of BamRaw) m ()
streamBamRegions bi :: BamIndex b
bi hdl :: Handle
hdl = Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
 -> Stream (Of BamRaw) m ())
-> ([Region] -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> [Region]
-> Stream (Of BamRaw) m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Stream (Of BamRaw) m ()
 -> (Refseq, Subsequence)
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> Stream (Of BamRaw) m ()
-> [(Refseq, Subsequence)]
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM (\s :: Stream (Of BamRaw) m ()
s (r :: Refseq
r,is :: Subsequence
is) -> BamIndex b
-> Handle
-> Refseq
-> Subsequence
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall (m :: * -> *) b.
MonadIO m =>
BamIndex b
-> Handle
-> Refseq
-> Subsequence
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
streamBamSubseq BamIndex b
bi Handle
hdl Refseq
r Subsequence
is Stream (Of BamRaw) m ()
s) (() -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) ([(Refseq, Subsequence)]
 -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()))
-> ([Region] -> [(Refseq, Subsequence)])
-> [Region]
-> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Regions -> [(Refseq, Subsequence)]
R.toList (Regions -> [(Refseq, Subsequence)])
-> ([Region] -> Regions) -> [Region] -> [(Refseq, Subsequence)]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Region] -> Regions
R.fromList