-- | Printers for BAM and SAM.  BAM is properly supported, SAM can be
-- piped to standard output.

module Bio.Bam.Writer (
    IsBamRec(..),
    encodeBamWith,

    packBam,
    writeBamFile,
    writeBamHandle,
    pipeBamOutput,
    pipeSamOutput
                      ) where

import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import Bio.Streaming
import Bio.Streaming.Bgzf

import Data.ByteString.Builder.Prim ( (>*<) )
import Data.ByteString.Internal     ( fromForeignPtr )
import Data.ByteString.Lazy         ( foldrChunks )
import Foreign.Marshal.Alloc        ( alloca )

import qualified Bio.Streaming.Bytes                as S
import qualified Data.ByteString                    as B
import qualified Data.ByteString.Builder            as B
import qualified Data.ByteString.Builder.Extra      as B
import qualified Data.ByteString.Builder.Prim       as E
import qualified Data.Vector.Generic                as V
import qualified Data.Vector.Storable               as W
import qualified Data.Vector.Unboxed                as U
import qualified Streaming.Prelude                  as Q

{- | write in SAM format to stdout

This is useful for piping to other tools (say, AWK scripts) or for
debugging.  No convenience functions to send SAM to a file or to
compress it exist, because these are stupid ideas.
-}
pipeSamOutput :: (IsBamRec a, MonadIO m) => BamMeta -> Stream (Of a) m r -> m r
pipeSamOutput :: BamMeta -> Stream (Of a) m r -> m r
pipeSamOutput meta :: BamMeta
meta s :: Stream (Of a) m r
s = do
    IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> (Builder -> IO ()) -> Builder -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Handle -> Builder -> IO ()
B.hPutBuilder Handle
stdout (Builder -> m ()) -> Builder -> m ()
forall a b. (a -> b) -> a -> b
$ BamMeta -> Builder
showBamMeta BamMeta
meta
    (a -> m ()) -> Stream (Of a) m r -> m r
forall (m :: * -> *) a x r.
Monad m =>
(a -> m x) -> Stream (Of a) m r -> m r
Q.mapM_ (IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> (a -> IO ()) -> a -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Handle -> Builder -> IO ()
B.hPutBuilder Handle
stdout (Builder -> IO ()) -> (a -> Builder) -> a -> IO ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refs -> BamRec -> Builder
encodeSamEntry (BamMeta -> Refs
meta_refs BamMeta
meta) (BamRec -> Builder) -> (a -> BamRec) -> a -> Builder
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. a -> BamRec
forall a. IsBamRec a => a -> BamRec
unpackBamRec) Stream (Of a) m r
s
{-# INLINE pipeSamOutput #-}

encodeSamEntry :: Refs -> BamRec -> B.Builder
encodeSamEntry :: Refs -> BamRec -> Builder
encodeSamEntry refs :: Refs
refs b :: BamRec
b =
    ByteString -> Builder
B.byteStringCopy (BamRec -> ByteString
b_qname BamRec
b)                         Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Int -> Builder
B.intDec         (BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0xffff)               Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    ByteString -> Builder
B.byteStringCopy (BamSQ -> ByteString
sq_name (BamSQ -> ByteString) -> BamSQ -> ByteString
forall a b. (a -> b) -> a -> b
$ Refs -> Refseq -> BamSQ
getRef Refs
refs (Refseq -> BamSQ) -> Refseq -> BamSQ
forall a b. (a -> b) -> a -> b
$ BamRec -> Refseq
b_rname BamRec
b) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Int -> Builder
B.intDec         (BamRec -> Int
b_pos BamRec
b Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)                       Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Word8 -> Builder
B.word8Dec       (Qual -> Word8
unQ (Qual -> Word8) -> Qual -> Word8
forall a b. (a -> b) -> a -> b
$ BamRec -> Qual
b_mapq BamRec
b)                    Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Vector Cigar -> Builder
buildCigar       (BamRec -> Vector Cigar
b_cigar BamRec
b)                         Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Refseq -> Refseq -> Builder
buildMrnm        (BamRec -> Refseq
b_mrnm BamRec
b) (BamRec -> Refseq
b_rname BamRec
b)              Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Int -> Builder
B.intDec         (BamRec -> Int
b_mpos BamRec
b Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)                      Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Int -> Builder
B.intDec         (BamRec -> Int
b_isize BamRec
b)                         Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Vector_Nucs_half Nucleotides -> Builder
buildSeq         (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b)                           Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Maybe (Vector Qual) -> Builder
buildQual        (BamRec -> Maybe (Vector Qual)
b_qual BamRec
b)                          Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    ((BamKey, Ext) -> Builder) -> [(BamKey, Ext)] -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (BamKey, Ext) -> Builder
buildExt (BamRec -> [(BamKey, Ext)]
b_exts BamRec
b)                          Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 '\n'
  where
    buildCigar :: Vector Cigar -> Builder
buildCigar = BoundedPrim (Int, Word8)
-> (Vector Cigar -> Maybe ((Int, Word8), Vector Cigar))
-> Vector Cigar
-> Builder
forall b a. BoundedPrim b -> (a -> Maybe (b, a)) -> a -> Builder
E.primUnfoldrBounded
                    (BoundedPrim Int
E.intDec BoundedPrim Int -> BoundedPrim Word8 -> BoundedPrim (Int, Word8)
forall (f :: * -> *) a b. Monoidal f => f a -> f b -> f (a, b)
>*< FixedPrim Word8 -> BoundedPrim Word8
forall a. FixedPrim a -> BoundedPrim a
E.liftFixedToBounded FixedPrim Word8
E.word8)
                    ((Cigar -> (Int, Word8))
-> Vector Cigar -> Maybe ((Int, Word8), Vector Cigar)
forall (v :: * -> *) a a.
Vector v a =>
(a -> a) -> v a -> Maybe (a, v a)
vuncons ((Cigar -> (Int, Word8))
 -> Vector Cigar -> Maybe ((Int, Word8), Vector Cigar))
-> (Cigar -> (Int, Word8))
-> Vector Cigar
-> Maybe ((Int, Word8), Vector Cigar)
forall a b. (a -> b) -> a -> b
$ \(op :: CigOp
op :* num :: Int
num) -> (Int
num, ByteString -> Int -> Word8
B.index "MIDNSHP" (CigOp -> Int
forall a. Enum a => a -> Int
fromEnum CigOp
op)))

    buildMrnm :: Refseq -> Refseq -> Builder
buildMrnm mrnm :: Refseq
mrnm rname :: Refseq
rname
        | Refseq -> Bool
isValidRefseq Refseq
mrnm Bool -> Bool -> Bool
&& Refseq
mrnm Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
== Refseq
rname  =  Char -> Builder
B.char7 '='
        | Bool
otherwise                            =  ByteString -> Builder
B.byteString (BamSQ -> ByteString
sq_name (BamSQ -> ByteString) -> BamSQ -> ByteString
forall a b. (a -> b) -> a -> b
$ Refs -> Refseq -> BamSQ
getRef Refs
refs Refseq
mrnm)

    buildSeq :: Vector_Nucs_half Nucleotides -> Builder
buildSeq  = FixedPrim Word8
-> (Vector_Nucs_half Nucleotides
    -> Maybe (Word8, Vector_Nucs_half Nucleotides))
-> Vector_Nucs_half Nucleotides
-> Builder
forall b a. FixedPrim b -> (a -> Maybe (b, a)) -> a -> Builder
E.primUnfoldrFixed FixedPrim Word8
E.word8 ((Nucleotides -> Word8)
-> Vector_Nucs_half Nucleotides
-> Maybe (Word8, Vector_Nucs_half Nucleotides)
forall (v :: * -> *) a a.
Vector v a =>
(a -> a) -> v a -> Maybe (a, v a)
vuncons ((Nucleotides -> Word8)
 -> Vector_Nucs_half Nucleotides
 -> Maybe (Word8, Vector_Nucs_half Nucleotides))
-> (Nucleotides -> Word8)
-> Vector_Nucs_half Nucleotides
-> Maybe (Word8, Vector_Nucs_half Nucleotides)
forall a b. (a -> b) -> a -> b
$ \(Ns x :: Word8
x) -> ByteString -> Int -> Word8
B.index "-ACMGRSVTWYHKDBN" (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
x Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. 15)
    buildQual :: Maybe (Vector Qual) -> Builder
buildQual = Builder
-> (Vector Qual -> Builder) -> Maybe (Vector Qual) -> Builder
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Char -> Builder
B.char7 '*') (FixedPrim Word8
-> (Vector Qual -> Maybe (Word8, Vector Qual))
-> Vector Qual
-> Builder
forall b a. FixedPrim b -> (a -> Maybe (b, a)) -> a -> Builder
E.primUnfoldrFixed FixedPrim Word8
E.word8 ((Qual -> Word8) -> Vector Qual -> Maybe (Word8, Vector Qual)
forall (v :: * -> *) a a.
Vector v a =>
(a -> a) -> v a -> Maybe (a, v a)
vuncons ((Qual -> Word8) -> Vector Qual -> Maybe (Word8, Vector Qual))
-> (Qual -> Word8) -> Vector Qual -> Maybe (Word8, Vector Qual)
forall a b. (a -> b) -> a -> b
$ \(Q q :: Word8
q) -> Word8
q Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ 33))

    buildExt :: (BamKey, Ext) -> Builder
buildExt (BamKey k :: Word16
k,v :: Ext
v) = Char -> Builder
B.char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                            Word8 -> Builder
B.word8 (Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral         Word16
k   ) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                            Word8 -> Builder
B.word8 (Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
shiftR Word16
k 8)) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                            Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                            Ext -> Builder
buildExtVal Ext
v

    buildExtVal :: Ext -> Builder
buildExtVal (Int      i :: Int
i) = Char -> Builder
B.char7 'i' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Int -> Builder
B.intDec Int
i
    buildExtVal (Float    f :: Float
f) = Char -> Builder
B.char7 'f' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Float -> Builder
B.floatDec Float
f
    buildExtVal (Text     t :: ByteString
t) = Char -> Builder
B.char7 'Z' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
B.byteStringCopy ByteString
t
    buildExtVal (Bin      x :: ByteString
x) = Char -> Builder
B.char7 'H' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
B.byteStringHex ByteString
x
    buildExtVal (Char     c :: Word8
c) = Char -> Builder
B.char7 'A' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
B.word8 Word8
c
    buildExtVal (IntArr   a :: Vector Int
a) = Char -> Builder
B.char7 'B' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 'i' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Int -> Builder) -> Vector Int -> Builder
forall t. Unbox t => (t -> Builder) -> Vector t -> Builder
buildArr   Int -> Builder
B.intDec Vector Int
a
    buildExtVal (FloatArr a :: Vector Float
a) = Char -> Builder
B.char7 'B' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.char7 'f' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Float -> Builder) -> Vector Float -> Builder
forall t. Unbox t => (t -> Builder) -> Vector t -> Builder
buildArr Float -> Builder
B.floatDec Vector Float
a

    buildArr :: (t -> Builder) -> Vector t -> Builder
buildArr p :: t -> Builder
p = (t -> Builder -> Builder) -> Builder -> Vector t -> Builder
forall a b. Unbox a => (a -> b -> b) -> b -> Vector a -> b
U.foldr (\x :: t
x k :: Builder
k -> Char -> Builder
B.char7 ',' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> t -> Builder
p t
x Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
k) Builder
forall a. Monoid a => a
mempty

    vuncons :: (a -> a) -> v a -> Maybe (a, v a)
vuncons f :: a -> a
f v :: v a
v | v a -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
V.null  v a
v = Maybe (a, v a)
forall a. Maybe a
Nothing
                | Bool
otherwise = (a, v a) -> Maybe (a, v a)
forall a. a -> Maybe a
Just (a -> a
f (v a -> a
forall (v :: * -> *) a. Vector v a => v a -> a
V.unsafeHead v a
v), v a -> v a
forall (v :: * -> *) a. Vector v a => v a -> v a
V.unsafeTail v a
v)


class IsBamRec a where
    pushBam :: a -> BgzfTokens -> BgzfTokens
    unpackBamRec :: a -> BamRec

instance IsBamRec BamRaw where
    {-# INLINE pushBam #-}
    pushBam :: BamRaw -> BgzfTokens -> BgzfTokens
pushBam = BamRaw -> BgzfTokens -> BgzfTokens
pushBamRaw
    {-# INLINE unpackBamRec #-}
    unpackBamRec :: BamRaw -> BamRec
unpackBamRec = BamRaw -> BamRec
unpackBam

instance IsBamRec BamRec where
    {-# INLINE pushBam #-}
    pushBam :: BamRec -> BgzfTokens -> BgzfTokens
pushBam = BamRec -> BgzfTokens -> BgzfTokens
pushBamRec
    {-# INLINE unpackBamRec #-}
    unpackBamRec :: BamRec -> BamRec
unpackBamRec = BamRec -> BamRec
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id

instance (IsBamRec a, IsBamRec b) => IsBamRec (Either a b) where
    {-# INLINE pushBam #-}
    pushBam :: Either a b -> BgzfTokens -> BgzfTokens
pushBam = (a -> BgzfTokens -> BgzfTokens)
-> (b -> BgzfTokens -> BgzfTokens)
-> Either a b
-> BgzfTokens
-> BgzfTokens
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either a -> BgzfTokens -> BgzfTokens
forall a. IsBamRec a => a -> BgzfTokens -> BgzfTokens
pushBam b -> BgzfTokens -> BgzfTokens
forall a. IsBamRec a => a -> BgzfTokens -> BgzfTokens
pushBam
    {-# INLINE unpackBamRec #-}
    unpackBamRec :: Either a b -> BamRec
unpackBamRec = (a -> BamRec) -> (b -> BamRec) -> Either a b -> BamRec
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either a -> BamRec
forall a. IsBamRec a => a -> BamRec
unpackBamRec b -> BamRec
forall a. IsBamRec a => a -> BamRec
unpackBamRec

-- | Encodes BAM records straight into a dynamic buffer, then BGZF's it.
-- Should be fairly direct and perform well.
encodeBamWith :: (IsBamRec a, MonadIO m) => Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
encodeBamWith :: Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
encodeBamWith lv :: Int
lv meta :: BamMeta
meta = Int -> Stream (Of (Endo BgzfTokens)) m r -> ByteStream m r
forall (m :: * -> *) b.
MonadIO m =>
Int -> Stream (Of (Endo BgzfTokens)) m b -> ByteStream m b
encodeBgzf Int
lv (Stream (Of (Endo BgzfTokens)) m r -> ByteStream m r)
-> (Stream (Of a) m r -> Stream (Of (Endo BgzfTokens)) m r)
-> Stream (Of a) m r
-> ByteStream m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of a) m r -> Stream (Of (Endo BgzfTokens)) m r
forall (m :: * -> *) a r.
(Monad m, IsBamRec a) =>
Stream (Of a) m r -> Stream (Of (Endo BgzfTokens)) m r
enc_bam
  where
    enc_bam :: Stream (Of a) m r -> Stream (Of (Endo BgzfTokens)) m r
enc_bam bs :: Stream (Of a) m r
bs = Endo BgzfTokens
-> Stream (Of (Endo BgzfTokens)) m r
-> Stream (Of (Endo BgzfTokens)) m r
forall (m :: * -> *) a r.
Monad m =>
a -> Stream (Of a) m r -> Stream (Of a) m r
Q.cons Endo BgzfTokens
pushHeader (Stream (Of (Endo BgzfTokens)) m r
 -> Stream (Of (Endo BgzfTokens)) m r)
-> Stream (Of (Endo BgzfTokens)) m r
-> Stream (Of (Endo BgzfTokens)) m r
forall a b. (a -> b) -> a -> b
$ (a -> Endo BgzfTokens)
-> Stream (Of a) m r -> Stream (Of (Endo BgzfTokens)) m r
forall (m :: * -> *) a b r.
Monad m =>
(a -> b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.map ((BgzfTokens -> BgzfTokens) -> Endo BgzfTokens
forall a. (a -> a) -> Endo a
Endo ((BgzfTokens -> BgzfTokens) -> Endo BgzfTokens)
-> (a -> BgzfTokens -> BgzfTokens) -> a -> Endo BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. a -> BgzfTokens -> BgzfTokens
forall a. IsBamRec a => a -> BgzfTokens -> BgzfTokens
pushBam) Stream (Of a) m r
bs

    pushHeader :: Endo BgzfTokens
    pushHeader :: Endo BgzfTokens
pushHeader = (BgzfTokens -> BgzfTokens) -> Endo BgzfTokens
forall a. (a -> a) -> Endo a
Endo ((BgzfTokens -> BgzfTokens) -> Endo BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> Endo BgzfTokens
forall a b. (a -> b) -> a -> b
$ ByteString -> BgzfTokens -> BgzfTokens
TkString "BAM\1"
                      (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BgzfTokens -> BgzfTokens
TkSetMark                        -- the length byte
                      (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Builder -> BgzfTokens -> BgzfTokens
pushBuilder (BamMeta -> Builder
showBamMeta BamMeta
meta)
                      (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BgzfTokens -> BgzfTokens
TkEndRecord                      -- fills the length in
                      (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> (Refs -> Int) -> Refs -> Word32
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector BamSQ -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector BamSQ -> Int) -> (Refs -> Vector BamSQ) -> Refs -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refs -> Vector BamSQ
unRefs (Refs -> Word32) -> Refs -> Word32
forall a b. (a -> b) -> a -> b
$ BamMeta -> Refs
meta_refs BamMeta
meta)
                      (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Endo BgzfTokens -> BgzfTokens -> BgzfTokens
forall a. Endo a -> a -> a
appEndo ((BamSQ -> Endo BgzfTokens) -> Vector BamSQ -> Endo BgzfTokens
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ((BgzfTokens -> BgzfTokens) -> Endo BgzfTokens
forall a. (a -> a) -> Endo a
Endo ((BgzfTokens -> BgzfTokens) -> Endo BgzfTokens)
-> (BamSQ -> BgzfTokens -> BgzfTokens) -> BamSQ -> Endo BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamSQ -> BgzfTokens -> BgzfTokens
pushRef) (Refs -> Vector BamSQ
unRefs (Refs -> Vector BamSQ) -> Refs -> Vector BamSQ
forall a b. (a -> b) -> a -> b
$ BamMeta -> Refs
meta_refs BamMeta
meta))

    pushRef :: BamSQ -> BgzfTokens -> BgzfTokens
    pushRef :: BamSQ -> BgzfTokens -> BgzfTokens
pushRef bs :: BamSQ
bs = Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length (BamSQ -> ByteString
sq_name BamSQ
bs) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
               (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> BgzfTokens -> BgzfTokens
TkString (BamSQ -> ByteString
sq_name BamSQ
bs)
               (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 0
               (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ BamSQ -> Int
sq_length BamSQ
bs)

    pushBuilder :: B.Builder -> BgzfTokens -> BgzfTokens
    pushBuilder :: Builder -> BgzfTokens -> BgzfTokens
pushBuilder b :: Builder
b tk :: BgzfTokens
tk = (ByteString -> BgzfTokens -> BgzfTokens)
-> BgzfTokens -> ByteString -> BgzfTokens
forall a. (ByteString -> a -> a) -> a -> ByteString -> a
foldrChunks ByteString -> BgzfTokens -> BgzfTokens
TkString BgzfTokens
tk (Builder -> ByteString
B.toLazyByteString Builder
b)
{-# INLINE encodeBamWith #-}

pushBamRaw :: BamRaw -> BgzfTokens -> BgzfTokens
pushBamRaw :: BamRaw -> BgzfTokens -> BgzfTokens
pushBamRaw r :: BamRaw
r = Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ BamRaw -> ByteString
raw_data BamRaw
r) (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
               ByteString -> BgzfTokens -> BgzfTokens
TkString (BamRaw -> ByteString
raw_data BamRaw
r)
{-# INLINE pushBamRaw #-}

-- | Writes BAM encoded stuff to a file.
-- In reality, it cleverly writes to a temporary file and renames it
-- when done.
writeBamFile :: (IsBamRec a, MonadIO m, MonadMask m) => FilePath -> BamMeta -> Stream (Of a) m r -> m r
writeBamFile :: FilePath -> BamMeta -> Stream (Of a) m r -> m r
writeBamFile fp :: FilePath
fp meta :: BamMeta
meta = FilePath -> ByteStream m r -> m r
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
FilePath -> ByteStream m r -> m r
S.writeFile FilePath
fp (ByteStream m r -> m r)
-> (Stream (Of a) m r -> ByteStream m r)
-> Stream (Of a) m r
-> m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
forall a (m :: * -> *) r.
(IsBamRec a, MonadIO m) =>
Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
encodeBamWith 6 BamMeta
meta

-- | Write BAM encoded stuff to stdout.
-- This sends uncompressed(!) BAM to stdout.  Useful for piping to other
-- tools.  The output is still wrapped in a BGZF stream, because that's
-- what all tools expect; but the individuals blocks are not compressed.
pipeBamOutput :: (IsBamRec a, MonadIO m) => BamMeta -> Stream (Of a) m r -> m r
pipeBamOutput :: BamMeta -> Stream (Of a) m r -> m r
pipeBamOutput meta :: BamMeta
meta = Handle -> ByteStream m r -> m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> ByteStream m r -> m r
S.hPut Handle
stdout (ByteStream m r -> m r)
-> (Stream (Of a) m r -> ByteStream m r)
-> Stream (Of a) m r
-> m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
forall a (m :: * -> *) r.
(IsBamRec a, MonadIO m) =>
Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
encodeBamWith 0 BamMeta
meta
{-# INLINE pipeBamOutput #-}

-- | Writes BAM encoded stuff to a 'Handle'.
writeBamHandle :: (IsBamRec a, MonadIO m) => Handle -> BamMeta -> Stream (Of a) m r -> m r
writeBamHandle :: Handle -> BamMeta -> Stream (Of a) m r -> m r
writeBamHandle hdl :: Handle
hdl meta :: BamMeta
meta = Handle -> ByteStream m r -> m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> ByteStream m r -> m r
S.hPut Handle
hdl (ByteStream m r -> m r)
-> (Stream (Of a) m r -> ByteStream m r)
-> Stream (Of a) m r
-> m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
forall a (m :: * -> *) r.
(IsBamRec a, MonadIO m) =>
Int -> BamMeta -> Stream (Of a) m r -> ByteStream m r
encodeBamWith 6 BamMeta
meta

{-# RULES
    "pushBam/unpackBam"     forall b . pushBamRec (unpackBam b) = pushBamRaw b
  #-}

{-# INLINE[1] pushBamRec #-}
pushBamRec :: BamRec -> BgzfTokens -> BgzfTokens
pushBamRec :: BamRec -> BgzfTokens -> BgzfTokens
pushBamRec BamRec{..} =
      BgzfTokens -> BgzfTokens
TkSetMark
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Refseq -> Word32
unRefseq Refseq
b_rname)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
b_pos)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8  (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length ByteString
b_qname Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8  (Qual -> Word8
unQ Qual
b_mapq)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word16 -> BgzfTokens -> BgzfTokens
TkWord16 (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bin)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word16 -> BgzfTokens -> BgzfTokens
TkWord16 (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word16) -> Int -> Word16
forall a b. (a -> b) -> a -> b
$ Vector Cigar -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Cigar
b_cigar)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word16 -> BgzfTokens -> BgzfTokens
TkWord16 (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
b_flag)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length Vector_Nucs_half Nucleotides
b_seq)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Refseq -> Word32
unRefseq Refseq
b_mrnm)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
b_mpos)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
b_isize)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> BgzfTokens -> BgzfTokens
TkString ByteString
b_qname
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 0
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Word8 -> BgzfTokens -> BgzfTokens
TkMemCopy (Vector Cigar -> Vector Word8
forall a b. (Storable a, Storable b) => Vector a -> Vector b
W.unsafeCast Vector Cigar
b_cigar)
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector_Nucs_half Nucleotides -> BgzfTokens -> BgzfTokens
forall (vec :: * -> *).
Vector vec Nucleotides =>
vec Nucleotides -> BgzfTokens -> BgzfTokens
pushSeq Vector_Nucs_half Nucleotides
b_seq
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (BgzfTokens -> BgzfTokens)
-> (Vector Qual -> BgzfTokens -> BgzfTokens)
-> Maybe (Vector Qual)
-> BgzfTokens
-> BgzfTokens
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Int -> Word8 -> BgzfTokens -> BgzfTokens
TkMemFill (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length Vector_Nucs_half Nucleotides
b_seq) 0xff) (Vector Word8 -> BgzfTokens -> BgzfTokens
TkMemCopy (Vector Word8 -> BgzfTokens -> BgzfTokens)
-> (Vector Qual -> Vector Word8)
-> Vector Qual
-> BgzfTokens
-> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Qual -> Vector Word8
forall a b. (Storable a, Storable b) => Vector a -> Vector b
W.unsafeCast) Maybe (Vector Qual)
b_qual
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((BamKey, Ext)
 -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens)
-> [(BamKey, Ext)]
-> BgzfTokens
-> BgzfTokens
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
(.) ((BgzfTokens -> BgzfTokens)
 -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> ((BamKey, Ext) -> BgzfTokens -> BgzfTokens)
-> (BamKey, Ext)
-> (BgzfTokens -> BgzfTokens)
-> BgzfTokens
-> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (BamKey, Ext) -> BgzfTokens -> BgzfTokens
pushExt) BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id [(BamKey, Ext)]
b_exts
    (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BgzfTokens -> BgzfTokens
TkEndRecord
  where
    bin :: Int
bin = Int -> Int -> Int
distinctBin Int
b_pos (Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength Vector Cigar
b_cigar)

    pushSeq :: V.Vector vec Nucleotides => vec Nucleotides -> BgzfTokens -> BgzfTokens
    pushSeq :: vec Nucleotides -> BgzfTokens -> BgzfTokens
pushSeq v :: vec Nucleotides
v = case vec Nucleotides
v vec Nucleotides -> Int -> Maybe Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> Int -> Maybe a
V.!? 0 of
                    Nothing -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id
                    Just a :: Nucleotides
a  -> case vec Nucleotides
v vec Nucleotides -> Int -> Maybe Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> Int -> Maybe a
V.!? 1 of
                        Nothing -> Word8 -> BgzfTokens -> BgzfTokens
TkWord8 (Nucleotides -> Word8
unNs Nucleotides
a Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL` 4)
                        Just b :: Nucleotides
b  -> Word8 -> BgzfTokens -> BgzfTokens
TkWord8 (Nucleotides -> Word8
unNs Nucleotides
a Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL` 4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Nucleotides -> Word8
unNs Nucleotides
b) (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. vec Nucleotides -> BgzfTokens -> BgzfTokens
forall (vec :: * -> *).
Vector vec Nucleotides =>
vec Nucleotides -> BgzfTokens -> BgzfTokens
pushSeq (Int -> vec Nucleotides -> vec Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop 2 vec Nucleotides
v)

    pushExt :: (BamKey, Ext) -> BgzfTokens -> BgzfTokens
    pushExt :: (BamKey, Ext) -> BgzfTokens -> BgzfTokens
pushExt (BamKey k :: Word16
k, e :: Ext
e) = case Ext
e of
        Text  t :: ByteString
t -> Char -> BgzfTokens -> BgzfTokens
common 'Z' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> BgzfTokens -> BgzfTokens
TkString ByteString
t (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 0
        Bin   t :: ByteString
t -> Char -> BgzfTokens -> BgzfTokens
common 'H' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> BgzfTokens -> BgzfTokens
TkString ByteString
t (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 0
        Char  c :: Word8
c -> Char -> BgzfTokens -> BgzfTokens
common 'A' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 Word8
c
        Float f :: Float
f -> Char -> BgzfTokens -> BgzfTokens
common 'f' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Float -> Word32
fromFloat Float
f)

        Int i :: Int
i   -> case Vector Int -> (Char, Int -> BgzfTokens -> BgzfTokens)
put_some_int (Int -> Vector Int
forall a. Unbox a => a -> Vector a
U.singleton Int
i) of
                        (c :: Char
c,op :: Int -> BgzfTokens -> BgzfTokens
op) -> Char -> BgzfTokens -> BgzfTokens
common Char
c (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BgzfTokens -> BgzfTokens
op Int
i

        IntArr  ia :: Vector Int
ia -> case Vector Int -> (Char, Int -> BgzfTokens -> BgzfTokens)
put_some_int Vector Int
ia of
                        (c :: Char
c,op :: Int -> BgzfTokens -> BgzfTokens
op) -> Char -> BgzfTokens -> BgzfTokens
common 'B' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c)
                                  (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
iaInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)
                                  (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens)
-> Vector Int
-> BgzfTokens
-> BgzfTokens
forall a b. Unbox a => (a -> b -> b) -> b -> Vector a -> b
U.foldr ((BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
(.) ((BgzfTokens -> BgzfTokens)
 -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> (Int -> BgzfTokens -> BgzfTokens)
-> Int
-> (BgzfTokens -> BgzfTokens)
-> BgzfTokens
-> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BgzfTokens -> BgzfTokens
op) BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Vector Int
ia

        FloatArr fa :: Vector Float
fa -> Char -> BgzfTokens -> BgzfTokens
common 'B' (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord 'f')
                       (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Vector Float -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Float
faInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)
                       (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Float -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens)
-> Vector Float
-> BgzfTokens
-> BgzfTokens
forall a b. Unbox a => (a -> b -> b) -> b -> Vector a -> b
U.foldr ((BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
(.) ((BgzfTokens -> BgzfTokens)
 -> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens)
-> (Float -> BgzfTokens -> BgzfTokens)
-> Float
-> (BgzfTokens -> BgzfTokens)
-> BgzfTokens
-> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Word32 -> BgzfTokens -> BgzfTokens)
-> (Float -> Word32) -> Float -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Float -> Word32
fromFloat) BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Vector Float
fa
      where
        common :: Char -> BgzfTokens -> BgzfTokens
        common :: Char -> BgzfTokens -> BgzfTokens
common z :: Char
z = Word16 -> BgzfTokens -> BgzfTokens
TkWord16 Word16
k (BgzfTokens -> BgzfTokens)
-> (BgzfTokens -> BgzfTokens) -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> BgzfTokens -> BgzfTokens
TkWord8 (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
z)

        put_some_int :: U.Vector Int -> (Char, Int -> BgzfTokens -> BgzfTokens)
        put_some_int :: Vector Int -> (Char, Int -> BgzfTokens -> BgzfTokens)
put_some_int is :: Vector Int
is
            | (Int -> Bool) -> Vector Int -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
U.all (Int -> Int -> Int -> Bool
between        0    0xff) Vector Int
is = ('C', Word8 -> BgzfTokens -> BgzfTokens
TkWord8  (Word8 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word8) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
            | (Int -> Bool) -> Vector Int -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
U.all (Int -> Int -> Int -> Bool
between   (-0x80)   0x7f) Vector Int
is = ('c', Word8 -> BgzfTokens -> BgzfTokens
TkWord8  (Word8 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word8) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
            | (Int -> Bool) -> Vector Int -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
U.all (Int -> Int -> Int -> Bool
between        0  0xffff) Vector Int
is = ('S', Word16 -> BgzfTokens -> BgzfTokens
TkWord16 (Word16 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word16) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
            | (Int -> Bool) -> Vector Int -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
U.all (Int -> Int -> Int -> Bool
between (-0x8000) 0x7fff) Vector Int
is = ('s', Word16 -> BgzfTokens -> BgzfTokens
TkWord16 (Word16 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word16) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
            | (Int -> Bool) -> Vector Int -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
U.all                      (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0) Vector Int
is = ('I', Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Word32 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word32) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
            | Bool
otherwise                           = ('i', Word32 -> BgzfTokens -> BgzfTokens
TkWord32 (Word32 -> BgzfTokens -> BgzfTokens)
-> (Int -> Word32) -> Int -> BgzfTokens -> BgzfTokens
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral)

        between :: Int -> Int -> Int -> Bool
        between :: Int -> Int -> Int -> Bool
between l :: Int
l r :: Int
r x :: Int
x = Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
r

        fromFloat :: Float -> Word32
        fromFloat :: Float -> Word32
fromFloat float :: Float
float = IO Word32 -> Word32
forall a. IO a -> a
unsafeDupablePerformIO (IO Word32 -> Word32) -> IO Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ (Ptr Word32 -> IO Word32) -> IO Word32
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr Word32 -> IO Word32) -> IO Word32)
-> (Ptr Word32 -> IO Word32) -> IO Word32
forall a b. (a -> b) -> a -> b
$ \buf :: Ptr Word32
buf ->
                          Ptr Word32 -> Int -> Float -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word32
buf 0 Float
float IO () -> IO Word32 -> IO Word32
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Ptr Word32 -> IO Word32
forall a. Storable a => Ptr a -> IO a
peek Ptr Word32
buf

packBam :: BamRec -> IO BamRaw
packBam :: BamRec -> IO BamRaw
packBam br :: BamRec
br = do BB
bb <- Int -> IO BB
newBuffer 1000
                (bb' :: BB
bb', TkEnd) <- BB -> BgzfTokens -> IO (BB, BgzfTokens)
store_loop BB
bb (BamRec -> BgzfTokens -> BgzfTokens
pushBamRec BamRec
br BgzfTokens
TkEnd)
                Int64 -> ByteString -> IO BamRaw
forall (m :: * -> *).
MonadThrow m =>
Int64 -> ByteString -> m BamRaw
bamRaw 0 (ByteString -> IO BamRaw) -> ByteString -> IO BamRaw
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> ByteString
fromForeignPtr (BB -> ForeignPtr Word8
buffer BB
bb') 4 (BB -> Int
used BB
bb' Int -> Int -> Int
forall a. Num a => a -> a -> a
- 4)
  where
    store_loop :: BB -> BgzfTokens -> IO (BB, BgzfTokens)
store_loop bb :: BB
bb tk :: BgzfTokens
tk = do (bb' :: BB
bb',tk' :: BgzfTokens
tk') <- BB -> BgzfTokens -> IO (BB, BgzfTokens)
fillBuffer BB
bb BgzfTokens
tk
                          case BgzfTokens
tk' of TkEnd -> (BB, BgzfTokens) -> IO (BB, BgzfTokens)
forall (m :: * -> *) a. Monad m => a -> m a
return (BB
bb',BgzfTokens
tk')
                                      _     -> do BB
bb'' <- Int -> BB -> IO BB
expandBuffer (128Int -> Int -> Int
forall a. Num a => a -> a -> a
*1024) BB
bb'
                                                  BB -> BgzfTokens -> IO (BB, BgzfTokens)
store_loop BB
bb'' BgzfTokens
tk'