{-# LANGUAGE UndecidableInstances #-}
module Bio.Bam.Header (
        BamMeta(..),
        parseBamMeta,
        showBamMeta,
        addPG,

        BamKey(..),
        BamHeader(..),
        BamSQ(..),
        BamSorting(..),
        BamOtherShit,

        Refseq(..),
        invalidRefseq,
        isValidRefseq,
        invalidPos,
        isValidPos,
        unknownMapq,
        isKnownMapq,

        Refs(..),
        getRef,

        compareNames,

        flagPaired,
        flagProperlyPaired,
        flagUnmapped,
        flagMateUnmapped,
        flagReversed,
        flagMateReversed,
        flagFirstMate,
        flagSecondMate,
        flagAuxillary,
        flagSecondary,
        flagFailsQC,
        flagDuplicate,
        flagSupplementary,
        eflagTrimmed,
        eflagMerged,
        eflagAlternative,
        eflagExactIndex,

        distinctBin,

        MdOp(..),
        readMd,
        showMd
    ) where

import Bio.Prelude           hiding ( uncons )
import Bio.Util.Nub
import Control.Monad.Trans.RWS
import Data.ByteString              ( uncons )
import Data.ByteString.Builder      ( Builder, byteString, char7, intDec, word16LE )

import qualified Data.Attoparsec.ByteString.Char8   as P
import qualified Data.ByteString                    as B
import qualified Data.ByteString.Char8              as S
import qualified Data.HashMap.Strict                as H
import qualified Data.Vector                        as V

data BamMeta = BamMeta {
        BamMeta -> BamHeader
meta_hdr :: !BamHeader,
        BamMeta -> Refs
meta_refs :: !Refs,
        BamMeta -> [Fix BamPG]
meta_pgs :: [Fix BamPG],
        BamMeta -> [(BamKey, BamOtherShit)]
meta_other_shit :: [(BamKey, BamOtherShit)],
        BamMeta -> [Bytes]
meta_comment :: [Bytes]
    } deriving ( Int -> BamMeta -> ShowS
[BamMeta] -> ShowS
BamMeta -> String
(Int -> BamMeta -> ShowS)
-> (BamMeta -> String) -> ([BamMeta] -> ShowS) -> Show BamMeta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamMeta] -> ShowS
$cshowList :: [BamMeta] -> ShowS
show :: BamMeta -> String
$cshow :: BamMeta -> String
showsPrec :: Int -> BamMeta -> ShowS
$cshowsPrec :: Int -> BamMeta -> ShowS
Show, (forall x. BamMeta -> Rep BamMeta x)
-> (forall x. Rep BamMeta x -> BamMeta) -> Generic BamMeta
forall x. Rep BamMeta x -> BamMeta
forall x. BamMeta -> Rep BamMeta x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BamMeta x -> BamMeta
$cfrom :: forall x. BamMeta -> Rep BamMeta x
Generic )

-- | Exactly two characters, for the \"named\" fields in bam.
newtype BamKey = BamKey Word16
    deriving ( BamKey -> BamKey -> Bool
(BamKey -> BamKey -> Bool)
-> (BamKey -> BamKey -> Bool) -> Eq BamKey
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BamKey -> BamKey -> Bool
$c/= :: BamKey -> BamKey -> Bool
== :: BamKey -> BamKey -> Bool
$c== :: BamKey -> BamKey -> Bool
Eq, Eq BamKey
Eq BamKey =>
(BamKey -> BamKey -> Ordering)
-> (BamKey -> BamKey -> Bool)
-> (BamKey -> BamKey -> Bool)
-> (BamKey -> BamKey -> Bool)
-> (BamKey -> BamKey -> Bool)
-> (BamKey -> BamKey -> BamKey)
-> (BamKey -> BamKey -> BamKey)
-> Ord BamKey
BamKey -> BamKey -> Bool
BamKey -> BamKey -> Ordering
BamKey -> BamKey -> BamKey
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BamKey -> BamKey -> BamKey
$cmin :: BamKey -> BamKey -> BamKey
max :: BamKey -> BamKey -> BamKey
$cmax :: BamKey -> BamKey -> BamKey
>= :: BamKey -> BamKey -> Bool
$c>= :: BamKey -> BamKey -> Bool
> :: BamKey -> BamKey -> Bool
$c> :: BamKey -> BamKey -> Bool
<= :: BamKey -> BamKey -> Bool
$c<= :: BamKey -> BamKey -> Bool
< :: BamKey -> BamKey -> Bool
$c< :: BamKey -> BamKey -> Bool
compare :: BamKey -> BamKey -> Ordering
$ccompare :: BamKey -> BamKey -> Ordering
$cp1Ord :: Eq BamKey
Ord, Int -> BamKey -> Int
BamKey -> Int
(Int -> BamKey -> Int) -> (BamKey -> Int) -> Hashable BamKey
forall a. (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: BamKey -> Int
$chash :: BamKey -> Int
hashWithSalt :: Int -> BamKey -> Int
$chashWithSalt :: Int -> BamKey -> Int
Hashable, (forall x. BamKey -> Rep BamKey x)
-> (forall x. Rep BamKey x -> BamKey) -> Generic BamKey
forall x. Rep BamKey x -> BamKey
forall x. BamKey -> Rep BamKey x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BamKey x -> BamKey
$cfrom :: forall x. BamKey -> Rep BamKey x
Generic )

instance IsString BamKey where
    {-# INLINE fromString #-}
    fromString :: String -> BamKey
fromString [a :: Char
a,b :: Char
b]
        | Char -> Int
ord Char
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 256 Bool -> Bool -> Bool
&& Char -> Int
ord Char
b Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 256
            = Word16 -> BamKey
BamKey (Word16 -> BamKey) -> (Int -> Word16) -> Int -> BamKey
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> BamKey) -> Int -> BamKey
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
a Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftL (Char -> Int
ord Char
b) 8

    fromString s :: String
s
            = String -> BamKey
forall a. HasCallStack => String -> a
error (String -> BamKey) -> String -> BamKey
forall a b. (a -> b) -> a -> b
$ "Not a legal BAM key: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
s

instance Show BamKey where
    show :: BamKey -> String
show (BamKey a :: Word16
a) = [ Int -> Char
chr (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
a Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0xff), Int -> Char
chr (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftR (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
a) 8 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0xff) ]

-- | Adds a new program line to a header.  The new entry is
-- (arbitrarily) prepended to the first existing chain, or forms a new
-- singleton chain if none exists.

addPG :: MonadIO m => Maybe Version -> m (BamMeta -> BamMeta)
addPG :: Maybe Version -> m (BamMeta -> BamMeta)
addPG vn :: Maybe Version
vn = IO (BamMeta -> BamMeta) -> m (BamMeta -> BamMeta)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (BamMeta -> BamMeta) -> m (BamMeta -> BamMeta))
-> IO (BamMeta -> BamMeta) -> m (BamMeta -> BamMeta)
forall a b. (a -> b) -> a -> b
$ do
    [String]
args <- IO [String]
getArgs
    String
pn   <- IO String
getProgName

    let more :: BamOtherShit
more = ("PN", String -> Bytes
S.pack String
pn) (BamKey, Bytes) -> BamOtherShit -> BamOtherShit
forall a. a -> [a] -> [a]
:
               ("CL", String -> Bytes
S.pack (String -> Bytes) -> String -> Bytes
forall a b. (a -> b) -> a -> b
$ [String] -> String
unwords [String]
args) (BamKey, Bytes) -> BamOtherShit -> BamOtherShit
forall a. a -> [a] -> [a]
:
               BamOtherShit
-> (Version -> BamOtherShit) -> Maybe Version -> BamOtherShit
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] (\v :: Version
v -> [("VN",String -> Bytes
S.pack (Version -> String
showVersion Version
v))]) Maybe Version
vn

    (BamMeta -> BamMeta) -> IO (BamMeta -> BamMeta)
forall (m :: * -> *) a. Monad m => a -> m a
return ((BamMeta -> BamMeta) -> IO (BamMeta -> BamMeta))
-> (BamMeta -> BamMeta) -> IO (BamMeta -> BamMeta)
forall a b. (a -> b) -> a -> b
$ \bm :: BamMeta
bm -> case BamMeta -> [Fix BamPG]
meta_pgs BamMeta
bm of
        [    ] -> BamMeta
bm { meta_pgs :: [Fix BamPG]
meta_pgs = BamPG (Fix BamPG) -> Fix BamPG
forall (f :: * -> *). f (Fix f) -> Fix f
Fix (Bytes -> Maybe (Fix BamPG) -> BamOtherShit -> BamPG (Fix BamPG)
forall pp. Bytes -> Maybe pp -> BamOtherShit -> BamPG pp
BamPG (String -> Bytes
S.pack String
pn)  Maybe (Fix BamPG)
forall a. Maybe a
Nothing  BamOtherShit
more) Fix BamPG -> [Fix BamPG] -> [Fix BamPG]
forall a. a -> [a] -> [a]
: [ ] }
        pg :: Fix BamPG
pg:pgs :: [Fix BamPG]
pgs -> BamMeta
bm { meta_pgs :: [Fix BamPG]
meta_pgs = BamPG (Fix BamPG) -> Fix BamPG
forall (f :: * -> *). f (Fix f) -> Fix f
Fix (Bytes -> Maybe (Fix BamPG) -> BamOtherShit -> BamPG (Fix BamPG)
forall pp. Bytes -> Maybe pp -> BamOtherShit -> BamPG pp
BamPG (String -> Bytes
S.pack String
pn) (Fix BamPG -> Maybe (Fix BamPG)
forall a. a -> Maybe a
Just Fix BamPG
pg) BamOtherShit
more) Fix BamPG -> [Fix BamPG] -> [Fix BamPG]
forall a. a -> [a] -> [a]
: [Fix BamPG]
pgs }


instance Semigroup BamMeta where <> :: BamMeta -> BamMeta -> BamMeta
(<>)    = BamMeta -> BamMeta -> BamMeta
combineBamMeta
instance Monoid    BamMeta where mempty :: BamMeta
mempty  = BamHeader
-> Refs
-> [Fix BamPG]
-> [(BamKey, BamOtherShit)]
-> [Bytes]
-> BamMeta
BamMeta BamHeader
forall a. Monoid a => a
mempty Refs
forall a. Monoid a => a
mempty [Fix BamPG]
forall a. Monoid a => a
mempty [] []
                                 mappend :: BamMeta -> BamMeta -> BamMeta
mappend = BamMeta -> BamMeta -> BamMeta
forall a. Semigroup a => a -> a -> a
(<>)

{- | Combines two bam headers into one.

The overarching goal is to combine headers in such a way that no
information is lost, but redundant information is removed.  In
particular, we sometimes \"merge\" headers with the same references, at
other times we \"meld\" headers with entirely different references.  In
the former case, we must concatenate the reference lists, in the latter
case we want to keep it as is.

* If both headers have a version number, the result is the smaller of
  the two.

* The resulting sort order is the most specific one compatible with both
  input sort orders.  The stupid 'Unknown' state is compatible with
  everything.

* Reference sequences are appended and run through 'nub'.  The numbering
  of reference may thus change, which has to be dealt with in an
  appropriate way, see 'concatInputs', 'mergeInputsOn', and \"bam-meld\"
  for details.  (It is also possible that different sequences are left
  with the same name.  We cannot solve this right here, and there is no
  reliable way to do it in general.)

* Comments are appended and run through 'nub'.  This should work in
  most case, and if it doesn't, someone needs to \"samtools reheader\"
  the file anyway.

* Program chains are just collected, but when formatting, they are
  (effectively) run through 'nub' and are potentially assigned new
  unique identifiers.
-}
combineBamMeta :: BamMeta -> BamMeta -> BamMeta
combineBamMeta :: BamMeta -> BamMeta -> BamMeta
combineBamMeta a :: BamMeta
a b :: BamMeta
b = $WBamMeta :: BamHeader
-> Refs
-> [Fix BamPG]
-> [(BamKey, BamOtherShit)]
-> [Bytes]
-> BamMeta
BamMeta
    { meta_hdr :: BamHeader
meta_hdr        = BamMeta -> BamHeader
meta_hdr BamMeta
a BamHeader -> BamHeader -> BamHeader
forall a. Semigroup a => a -> a -> a
<> BamMeta -> BamHeader
meta_hdr BamMeta
b
    , meta_refs :: Refs
meta_refs       = BamMeta -> Refs
meta_refs BamMeta
a Refs -> Refs -> Refs
forall a. Monoid a => a -> a -> a
`mappend` BamMeta -> Refs
meta_refs BamMeta
b
    , meta_pgs :: [Fix BamPG]
meta_pgs        = BamMeta -> [Fix BamPG]
meta_pgs BamMeta
a [Fix BamPG] -> [Fix BamPG] -> [Fix BamPG]
forall a. Semigroup a => a -> a -> a
<> BamMeta -> [Fix BamPG]
meta_pgs BamMeta
b
    , meta_other_shit :: [(BamKey, BamOtherShit)]
meta_other_shit = [(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)]
forall a. (Hashable a, Eq a) => [a] -> [a]
nubHash ([(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)])
-> [(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)]
forall a b. (a -> b) -> a -> b
$ BamMeta -> [(BamKey, BamOtherShit)]
meta_other_shit BamMeta
a [(BamKey, BamOtherShit)]
-> [(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)]
forall a. [a] -> [a] -> [a]
++ BamMeta -> [(BamKey, BamOtherShit)]
meta_other_shit BamMeta
b
    , meta_comment :: [Bytes]
meta_comment    = [Bytes] -> [Bytes]
forall a. (Hashable a, Eq a) => [a] -> [a]
nubHash ([Bytes] -> [Bytes]) -> [Bytes] -> [Bytes]
forall a b. (a -> b) -> a -> b
$ BamMeta -> [Bytes]
meta_comment BamMeta
a [Bytes] -> [Bytes] -> [Bytes]
forall a. [a] -> [a] -> [a]
++ BamMeta -> [Bytes]
meta_comment BamMeta
b }

data BamHeader = BamHeader {
        BamHeader -> (Int, Int)
hdr_version :: (Int, Int),
        BamHeader -> BamSorting
hdr_sorting :: BamSorting,
        BamHeader -> BamOtherShit
hdr_other_shit :: BamOtherShit
    } deriving (Int -> BamHeader -> ShowS
[BamHeader] -> ShowS
BamHeader -> String
(Int -> BamHeader -> ShowS)
-> (BamHeader -> String)
-> ([BamHeader] -> ShowS)
-> Show BamHeader
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamHeader] -> ShowS
$cshowList :: [BamHeader] -> ShowS
show :: BamHeader -> String
$cshow :: BamHeader -> String
showsPrec :: Int -> BamHeader -> ShowS
$cshowsPrec :: Int -> BamHeader -> ShowS
Show, BamHeader -> BamHeader -> Bool
(BamHeader -> BamHeader -> Bool)
-> (BamHeader -> BamHeader -> Bool) -> Eq BamHeader
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BamHeader -> BamHeader -> Bool
$c/= :: BamHeader -> BamHeader -> Bool
== :: BamHeader -> BamHeader -> Bool
$c== :: BamHeader -> BamHeader -> Bool
Eq)

instance Monoid BamHeader where
    mempty :: BamHeader
mempty = (Int, Int) -> BamSorting -> BamOtherShit -> BamHeader
BamHeader (1,0) BamSorting
Unknown []
    mappend :: BamHeader -> BamHeader -> BamHeader
mappend = BamHeader -> BamHeader -> BamHeader
forall a. Semigroup a => a -> a -> a
(<>)

instance Semigroup BamHeader where
    a :: BamHeader
a <> :: BamHeader -> BamHeader -> BamHeader
<> b :: BamHeader
b = BamHeader :: (Int, Int) -> BamSorting -> BamOtherShit -> BamHeader
BamHeader { hdr_version :: (Int, Int)
hdr_version    = (Int, Int) -> (Int, Int) -> (Int, Int)
forall a. Ord a => a -> a -> a
max (BamHeader -> (Int, Int)
hdr_version BamHeader
a) (BamHeader -> (Int, Int)
hdr_version BamHeader
b)
                       , hdr_sorting :: BamSorting
hdr_sorting    = BamHeader -> BamSorting
hdr_sorting BamHeader
a BamSorting -> BamSorting -> BamSorting
forall a. Semigroup a => a -> a -> a
<> BamHeader -> BamSorting
hdr_sorting BamHeader
b
                       , hdr_other_shit :: BamOtherShit
hdr_other_shit = ((BamKey, Bytes) -> BamKey) -> BamOtherShit -> BamOtherShit
forall b a. (Hashable b, Eq b) => (a -> b) -> [a] -> [a]
nubHashBy (BamKey, Bytes) -> BamKey
forall a b. (a, b) -> a
fst (BamOtherShit -> BamOtherShit) -> BamOtherShit -> BamOtherShit
forall a b. (a -> b) -> a -> b
$ BamHeader -> BamOtherShit
hdr_other_shit BamHeader
a BamOtherShit -> BamOtherShit -> BamOtherShit
forall a. [a] -> [a] -> [a]
++ BamHeader -> BamOtherShit
hdr_other_shit BamHeader
b }

data BamSQ = BamSQ {
        BamSQ -> Bytes
sq_name :: Bytes,
        BamSQ -> Int
sq_length :: Int,
        BamSQ -> BamOtherShit
sq_other_shit :: BamOtherShit
    } deriving (Int -> BamSQ -> ShowS
[BamSQ] -> ShowS
BamSQ -> String
(Int -> BamSQ -> ShowS)
-> (BamSQ -> String) -> ([BamSQ] -> ShowS) -> Show BamSQ
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamSQ] -> ShowS
$cshowList :: [BamSQ] -> ShowS
show :: BamSQ -> String
$cshow :: BamSQ -> String
showsPrec :: Int -> BamSQ -> ShowS
$cshowsPrec :: Int -> BamSQ -> ShowS
Show, BamSQ -> BamSQ -> Bool
(BamSQ -> BamSQ -> Bool) -> (BamSQ -> BamSQ -> Bool) -> Eq BamSQ
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BamSQ -> BamSQ -> Bool
$c/= :: BamSQ -> BamSQ -> Bool
== :: BamSQ -> BamSQ -> Bool
$c== :: BamSQ -> BamSQ -> Bool
Eq, (forall x. BamSQ -> Rep BamSQ x)
-> (forall x. Rep BamSQ x -> BamSQ) -> Generic BamSQ
forall x. Rep BamSQ x -> BamSQ
forall x. BamSQ -> Rep BamSQ x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BamSQ x -> BamSQ
$cfrom :: forall x. BamSQ -> Rep BamSQ x
Generic)

instance Hashable BamSQ

data BamPG pp = BamPG {
        BamPG pp -> Bytes
pg_pref_name :: Bytes,
        BamPG pp -> Maybe pp
pg_prev_pg :: Maybe pp,
        BamPG pp -> BamOtherShit
pg_other_shit :: BamOtherShit
    } deriving (Int -> BamPG pp -> ShowS
[BamPG pp] -> ShowS
BamPG pp -> String
(Int -> BamPG pp -> ShowS)
-> (BamPG pp -> String) -> ([BamPG pp] -> ShowS) -> Show (BamPG pp)
forall pp. Show pp => Int -> BamPG pp -> ShowS
forall pp. Show pp => [BamPG pp] -> ShowS
forall pp. Show pp => BamPG pp -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamPG pp] -> ShowS
$cshowList :: forall pp. Show pp => [BamPG pp] -> ShowS
show :: BamPG pp -> String
$cshow :: forall pp. Show pp => BamPG pp -> String
showsPrec :: Int -> BamPG pp -> ShowS
$cshowsPrec :: forall pp. Show pp => Int -> BamPG pp -> ShowS
Show, BamPG pp -> BamPG pp -> Bool
(BamPG pp -> BamPG pp -> Bool)
-> (BamPG pp -> BamPG pp -> Bool) -> Eq (BamPG pp)
forall pp. Eq pp => BamPG pp -> BamPG pp -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BamPG pp -> BamPG pp -> Bool
$c/= :: forall pp. Eq pp => BamPG pp -> BamPG pp -> Bool
== :: BamPG pp -> BamPG pp -> Bool
$c== :: forall pp. Eq pp => BamPG pp -> BamPG pp -> Bool
Eq, (forall a. BamPG a -> Rep1 BamPG a)
-> (forall a. Rep1 BamPG a -> BamPG a) -> Generic1 BamPG
forall a. Rep1 BamPG a -> BamPG a
forall a. BamPG a -> Rep1 BamPG a
forall k (f :: k -> *).
(forall (a :: k). f a -> Rep1 f a)
-> (forall (a :: k). Rep1 f a -> f a) -> Generic1 f
$cto1 :: forall a. Rep1 BamPG a -> BamPG a
$cfrom1 :: forall a. BamPG a -> Rep1 BamPG a
Generic1)

newtype Fix f = Fix (f (Fix f))

instance Eq (f (Fix f)) => Eq (Fix f) where
    Fix f :: f (Fix f)
f == :: Fix f -> Fix f -> Bool
== Fix g :: f (Fix f)
g  =  f (Fix f)
f f (Fix f) -> f (Fix f) -> Bool
forall a. Eq a => a -> a -> Bool
== f (Fix f)
g

instance Show (f (Fix f)) => Show (Fix f) where
    showsPrec :: Int -> Fix f -> ShowS
showsPrec p :: Int
p (Fix f :: f (Fix f)
f) = Int -> f (Fix f) -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p f (Fix f)
f

instance Hashable (Fix BamPG) where
    hashWithSalt :: Int -> Fix BamPG -> Int
hashWithSalt s :: Int
s (Fix (BamPG n :: Bytes
n Nothing  o :: BamOtherShit
o)) = Int -> BamOtherShit -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt               (Int -> Bytes -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s Bytes
n)    BamOtherShit
o
    hashWithSalt s :: Int
s (Fix (BamPG n :: Bytes
n (Just p :: Fix BamPG
p) o :: BamOtherShit
o)) = Int -> BamOtherShit -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt (Int -> Fix BamPG -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt (Int -> Bytes -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s Bytes
n) Fix BamPG
p) BamOtherShit
o


-- | Possible sorting orders from bam header.  Thanks to samtools, which
-- doesn't declare sorted files properly, we have to have the stupid
-- 'Unknown' state, too.
data BamSorting = Unknown       -- ^ undeclared sort order
                | Unsorted      -- ^ definitely not sorted
                | Grouped       -- ^ grouped by query name
                | Queryname     -- ^ sorted by query name
                | Coordinate    -- ^ sorted by coordinate
    deriving (Int -> BamSorting -> ShowS
[BamSorting] -> ShowS
BamSorting -> String
(Int -> BamSorting -> ShowS)
-> (BamSorting -> String)
-> ([BamSorting] -> ShowS)
-> Show BamSorting
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BamSorting] -> ShowS
$cshowList :: [BamSorting] -> ShowS
show :: BamSorting -> String
$cshow :: BamSorting -> String
showsPrec :: Int -> BamSorting -> ShowS
$cshowsPrec :: Int -> BamSorting -> ShowS
Show, BamSorting -> BamSorting -> Bool
(BamSorting -> BamSorting -> Bool)
-> (BamSorting -> BamSorting -> Bool) -> Eq BamSorting
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BamSorting -> BamSorting -> Bool
$c/= :: BamSorting -> BamSorting -> Bool
== :: BamSorting -> BamSorting -> Bool
$c== :: BamSorting -> BamSorting -> Bool
Eq)

instance Semigroup BamSorting where
    Unknown    <> :: BamSorting -> BamSorting -> BamSorting
<>          b :: BamSorting
b  =  BamSorting
b
    a :: BamSorting
a          <>    Unknown  =  BamSorting
a
    Grouped    <>    Grouped  =  BamSorting
Grouped
    Grouped    <>  Queryname  =  BamSorting
Grouped
    Queryname  <>    Grouped  =  BamSorting
Grouped
    Queryname  <>  Queryname  =  BamSorting
Queryname
    Coordinate <> Coordinate  =  BamSorting
Coordinate
    _          <>          _  =  BamSorting
Unsorted


type BamOtherShit = [(BamKey, Bytes)]

parseBamMeta :: P.Parser BamMeta
parseBamMeta :: Parser BamMeta
parseBamMeta = PreBamMeta -> BamMeta
fixupMeta (PreBamMeta -> BamMeta)
-> ([PreBamMeta -> PreBamMeta] -> PreBamMeta)
-> [PreBamMeta -> PreBamMeta]
-> BamMeta
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (PreBamMeta -> (PreBamMeta -> PreBamMeta) -> PreBamMeta)
-> PreBamMeta -> [PreBamMeta -> PreBamMeta] -> PreBamMeta
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (((PreBamMeta -> PreBamMeta) -> PreBamMeta -> PreBamMeta)
-> PreBamMeta -> (PreBamMeta -> PreBamMeta) -> PreBamMeta
forall a b c. (a -> b -> c) -> b -> a -> c
flip (PreBamMeta -> PreBamMeta) -> PreBamMeta -> PreBamMeta
forall a b. (a -> b) -> a -> b
($)) PreBamMeta
emptyHeader
               ([PreBamMeta -> PreBamMeta] -> BamMeta)
-> Parser Bytes [PreBamMeta -> PreBamMeta] -> Parser BamMeta
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes [PreBamMeta -> PreBamMeta]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Bytes (PreBamMeta -> PreBamMeta)
parseBamMetaLine Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes () -> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Bool) -> Parser Bytes ()
P.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='\t') Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes Char -> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Bytes Char
P.char '\n') Parser BamMeta -> Parser Bytes () -> Parser BamMeta
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Bytes ()
forall t. Chunk t => Parser t ()
P.endOfInput

-- Bam header in the process of being parsed.  Better suited for
-- collecting lines than 'BamMeta'.
data PreBamMeta = PreBamMeta {
        PreBamMeta -> BamHeader
pmeta_hdr        :: BamHeader,
        PreBamMeta -> [BamSQ]
pmeta_refs       :: [BamSQ],
        PreBamMeta -> HashMap Bytes (BamPG Bytes)
pmeta_pgs        :: HashMap Bytes (BamPG Bytes),
        PreBamMeta -> [(BamKey, BamOtherShit)]
pmeta_other_shit :: [(BamKey, BamOtherShit)],
        PreBamMeta -> [Bytes]
pmeta_comment    :: [Bytes] }

emptyHeader :: PreBamMeta
emptyHeader :: PreBamMeta
emptyHeader = BamHeader
-> [BamSQ]
-> HashMap Bytes (BamPG Bytes)
-> [(BamKey, BamOtherShit)]
-> [Bytes]
-> PreBamMeta
PreBamMeta BamHeader
forall a. Monoid a => a
mempty [] HashMap Bytes (BamPG Bytes)
forall k v. HashMap k v
H.empty [] []


-- | Fixes a bam header after parsing.  It turns accumulated lists into
-- vectors, throws errors for mandatory fields that weren't parsed
-- correctly, and it handles the program (PG) lines.  Program lines come
-- in as an arbitrary graph.  It should be a linear chain, but this
-- isn't guaranteed in practice.  We decompose the graph into chains by
-- tracing from nodes with no predecessor, or from an arbitrary node if
-- all nodes have predecessors.  Tracing stops if it would form a cycle.
fixupMeta :: PreBamMeta -> BamMeta
fixupMeta :: PreBamMeta -> BamMeta
fixupMeta PreBamMeta{..} = $WBamMeta :: BamHeader
-> Refs
-> [Fix BamPG]
-> [(BamKey, BamOtherShit)]
-> [Bytes]
-> BamMeta
BamMeta
    { meta_hdr :: BamHeader
meta_hdr        = BamHeader
pmeta_hdr
    , meta_refs :: Refs
meta_refs       = Vector BamSQ -> Refs
Refs (Vector BamSQ -> Refs)
-> ([BamSQ] -> Vector BamSQ) -> [BamSQ] -> Refs
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [BamSQ] -> Vector BamSQ
forall a. [a] -> Vector a
V.fromList ([BamSQ] -> Vector BamSQ)
-> ([BamSQ] -> [BamSQ]) -> [BamSQ] -> Vector BamSQ
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [BamSQ] -> [BamSQ]
forall a. [a] -> [a]
reverse ([BamSQ] -> Refs) -> [BamSQ] -> Refs
forall a b. (a -> b) -> a -> b
$ [BamSQ]
pmeta_refs
    , meta_pgs :: [Fix BamPG]
meta_pgs        = ((), [Fix BamPG]) -> [Fix BamPG]
forall a b. (a, b) -> b
snd (((), [Fix BamPG]) -> [Fix BamPG])
-> ((), [Fix BamPG]) -> [Fix BamPG]
forall a b. (a -> b) -> a -> b
$ RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
-> () -> HashMap Bytes (BamPG Bytes) -> ((), [Fix BamPG])
forall r w s a. RWS r w s a -> r -> s -> (a, w)
evalRWS RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
trace_pgs () HashMap Bytes (BamPG Bytes)
pmeta_pgs
    , meta_other_shit :: [(BamKey, BamOtherShit)]
meta_other_shit = [(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)]
forall a. [a] -> [a]
reverse [(BamKey, BamOtherShit)]
pmeta_other_shit
    , meta_comment :: [Bytes]
meta_comment    = [Bytes] -> [Bytes]
forall a. [a] -> [a]
reverse [Bytes]
pmeta_comment  }
  where
    -- keep tracing from roots until no nodes are left
    trace_pgs :: RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
    trace_pgs :: RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
trace_pgs = do
        HashMap Bytes (BamPG Bytes)
gg <- RWST
  ()
  [Fix BamPG]
  (HashMap Bytes (BamPG Bytes))
  Identity
  (HashMap Bytes (BamPG Bytes))
forall w (m :: * -> *) r s. (Monoid w, Monad m) => RWST r w s m s
get
        case (HashMap Bytes (BamPG Bytes)
 -> Bytes -> HashMap Bytes (BamPG Bytes))
-> HashMap Bytes (BamPG Bytes)
-> [Bytes]
-> HashMap Bytes (BamPG Bytes)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((Bytes
 -> HashMap Bytes (BamPG Bytes) -> HashMap Bytes (BamPG Bytes))
-> HashMap Bytes (BamPG Bytes)
-> Bytes
-> HashMap Bytes (BamPG Bytes)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Bytes -> HashMap Bytes (BamPG Bytes) -> HashMap Bytes (BamPG Bytes)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v
H.delete) HashMap Bytes (BamPG Bytes)
gg
                    [ Bytes
pp | BamPG Bytes
p <- HashMap Bytes (BamPG Bytes) -> [BamPG Bytes]
forall k v. HashMap k v -> [v]
H.elems HashMap Bytes (BamPG Bytes)
gg
                         , Bytes
pp <- [Bytes] -> (Bytes -> [Bytes]) -> Maybe Bytes -> [Bytes]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] Bytes -> [Bytes]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (BamPG Bytes -> Maybe Bytes
forall pp. BamPG pp -> Maybe pp
pg_prev_pg BamPG Bytes
p) ] of
          orphans :: HashMap Bytes (BamPG Bytes)
orphans
            -- the empty graph has no roots:
            | HashMap Bytes (BamPG Bytes) -> Bool
forall k v. HashMap k v -> Bool
H.null HashMap Bytes (BamPG Bytes)
gg      -> () -> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
            -- an arbitrary node is picked as root:
            | HashMap Bytes (BamPG Bytes) -> Bool
forall k v. HashMap k v -> Bool
H.null HashMap Bytes (BamPG Bytes)
orphans -> HashMap Bytes ()
-> Bytes
-> RWS
     () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) (Maybe (Fix BamPG))
forall x.
HashMap Bytes ()
-> Bytes
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
trace_pg HashMap Bytes ()
forall k v. HashMap k v
H.empty ([Bytes] -> Bytes
forall a. [a] -> a
head ([Bytes] -> Bytes) -> [Bytes] -> Bytes
forall a b. (a -> b) -> a -> b
$ HashMap Bytes (BamPG Bytes) -> [Bytes]
forall k v. HashMap k v -> [k]
H.keys HashMap Bytes (BamPG Bytes)
gg) RWS
  () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) (Maybe (Fix BamPG))
-> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
-> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
trace_pgs
            -- nodes without parents are roots:
            | Bool
otherwise      -> (Bytes
 -> RWS
      () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) (Maybe (Fix BamPG)))
-> [Bytes] -> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (HashMap Bytes ()
-> Bytes
-> RWS
     () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) (Maybe (Fix BamPG))
forall x.
HashMap Bytes ()
-> Bytes
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
trace_pg HashMap Bytes ()
forall k v. HashMap k v
H.empty) (HashMap Bytes (BamPG Bytes) -> [Bytes]
forall k v. HashMap k v -> [k]
H.keys HashMap Bytes (BamPG Bytes)
orphans) RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
-> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
-> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> RWS () [Fix BamPG] (HashMap Bytes (BamPG Bytes)) ()
trace_pgs

    -- Trace one PG line.  Do not trace into nodes in the 'closed' set,
    -- remove reached nodes from the 'open' set (the state) and add them
    -- to the 'closed' set.
    trace_pg :: HashMap Bytes () -> Bytes -> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
    trace_pg :: HashMap Bytes ()
-> Bytes
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
trace_pg closed :: HashMap Bytes ()
closed name :: Bytes
name =
        case Bytes -> HashMap Bytes (BamPG Bytes) -> Maybe (BamPG Bytes)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
H.lookup Bytes
name HashMap Bytes (BamPG Bytes)
pmeta_pgs of
            _ | Bytes -> HashMap Bytes () -> Bool
forall k a. (Eq k, Hashable k) => k -> HashMap k a -> Bool
H.member Bytes
name HashMap Bytes ()
closed -> Maybe (Fix BamPG)
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Fix BamPG)
forall a. Maybe a
Nothing
            Nothing                  -> Maybe (Fix BamPG)
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Fix BamPG)
forall a. Maybe a
Nothing
            Just pg :: BamPG Bytes
pg -> do
                (HashMap Bytes x -> HashMap Bytes x)
-> RWST () [Fix BamPG] (HashMap Bytes x) Identity ()
forall w (m :: * -> *) s r.
(Monoid w, Monad m) =>
(s -> s) -> RWST r w s m ()
modify ((HashMap Bytes x -> HashMap Bytes x)
 -> RWST () [Fix BamPG] (HashMap Bytes x) Identity ())
-> (HashMap Bytes x -> HashMap Bytes x)
-> RWST () [Fix BamPG] (HashMap Bytes x) Identity ()
forall a b. (a -> b) -> a -> b
$ Bytes -> HashMap Bytes x -> HashMap Bytes x
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v
H.delete Bytes
name
                Maybe (Maybe (Fix BamPG))
pp <- (Bytes -> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG)))
-> Maybe Bytes
-> RWST
     ()
     [Fix BamPG]
     (HashMap Bytes x)
     Identity
     (Maybe (Maybe (Fix BamPG)))
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (HashMap Bytes ()
-> Bytes
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
forall x.
HashMap Bytes ()
-> Bytes
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
trace_pg (Bytes -> () -> HashMap Bytes () -> HashMap Bytes ()
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
H.insert Bytes
name () HashMap Bytes ()
closed)) (BamPG Bytes -> Maybe Bytes
forall pp. BamPG pp -> Maybe pp
pg_prev_pg BamPG Bytes
pg)
                let self :: Fix BamPG
self = BamPG (Fix BamPG) -> Fix BamPG
forall (f :: * -> *). f (Fix f) -> Fix f
Fix (BamPG (Fix BamPG) -> Fix BamPG) -> BamPG (Fix BamPG) -> Fix BamPG
forall a b. (a -> b) -> a -> b
$ BamPG Bytes
pg { pg_prev_pg :: Maybe (Fix BamPG)
pg_prev_pg = Maybe (Maybe (Fix BamPG)) -> Maybe (Fix BamPG)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join Maybe (Maybe (Fix BamPG))
pp }
                [Fix BamPG] -> RWST () [Fix BamPG] (HashMap Bytes x) Identity ()
forall (m :: * -> *) w r s. Monad m => w -> RWST r w s m ()
tell [ Fix BamPG
self ]
                Maybe (Fix BamPG)
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Fix BamPG)
 -> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG)))
-> Maybe (Fix BamPG)
-> RWS () [Fix BamPG] (HashMap Bytes x) (Maybe (Fix BamPG))
forall a b. (a -> b) -> a -> b
$ Fix BamPG -> Maybe (Fix BamPG)
forall a. a -> Maybe a
Just Fix BamPG
self


parseBamMetaLine :: P.Parser (PreBamMeta -> PreBamMeta)
parseBamMetaLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
parseBamMetaLine = Char -> Parser Bytes Char
P.char '@' Parser Bytes Char
-> Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Parser Bytes (PreBamMeta -> PreBamMeta)]
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [Parser Bytes (PreBamMeta -> PreBamMeta)
hdLine, Parser Bytes (PreBamMeta -> PreBamMeta)
sqLine, Parser Bytes (PreBamMeta -> PreBamMeta)
pgLine, Parser Bytes (PreBamMeta -> PreBamMeta)
coLine, Parser Bytes (PreBamMeta -> PreBamMeta)
otherLine]
  where
    hdLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
hdLine = Bytes -> Parser Bytes
P.string "HD\t" Parser Bytes
-> Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
             (\fns :: [BamHeader -> BamHeader]
fns meta :: PreBamMeta
meta -> PreBamMeta
meta { pmeta_hdr :: BamHeader
pmeta_hdr = ((BamHeader -> BamHeader) -> BamHeader -> BamHeader)
-> BamHeader -> [BamHeader -> BamHeader] -> BamHeader
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (BamHeader -> BamHeader) -> BamHeader -> BamHeader
forall a b. (a -> b) -> a -> b
($) (PreBamMeta -> BamHeader
pmeta_hdr PreBamMeta
meta) [BamHeader -> BamHeader]
fns })
               ([BamHeader -> BamHeader] -> PreBamMeta -> PreBamMeta)
-> Parser Bytes [BamHeader -> BamHeader]
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes (BamHeader -> BamHeader)
-> Parser Bytes () -> Parser Bytes [BamHeader -> BamHeader]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
P.sepBy1 ([Parser Bytes (BamHeader -> BamHeader)]
-> Parser Bytes (BamHeader -> BamHeader)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [Parser Bytes (BamHeader -> BamHeader)
hdvn, Parser Bytes (BamHeader -> BamHeader)
hdso, Parser Bytes (BamHeader -> BamHeader)
hdother]) Parser Bytes ()
tabs

    sqLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
sqLine = do Bytes
_ <- Bytes -> Parser Bytes
P.string "SQ\t"
                [BamSQ -> BamSQ]
fns <- Parser Bytes (BamSQ -> BamSQ)
-> Parser Bytes () -> Parser Bytes [BamSQ -> BamSQ]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
P.sepBy1 ([Parser Bytes (BamSQ -> BamSQ)] -> Parser Bytes (BamSQ -> BamSQ)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [Parser Bytes (BamSQ -> BamSQ)
sqnm, Parser Bytes (BamSQ -> BamSQ)
sqln, Parser Bytes (BamSQ -> BamSQ)
sqother]) Parser Bytes ()
tabs
                let sq :: BamSQ
sq = ((BamSQ -> BamSQ) -> BamSQ -> BamSQ)
-> BamSQ -> [BamSQ -> BamSQ] -> BamSQ
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (BamSQ -> BamSQ) -> BamSQ -> BamSQ
forall a b. (a -> b) -> a -> b
($) (Bytes -> Int -> BamOtherShit -> BamSQ
BamSQ "" (-1) []) [BamSQ -> BamSQ]
fns
                Bool -> Parser Bytes ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not (Bool -> Bool) -> (Bytes -> Bool) -> Bytes -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Bytes -> Bool
B.null (Bytes -> Bool) -> Bytes -> Bool
forall a b. (a -> b) -> a -> b
$ BamSQ -> Bytes
sq_name BamSQ
sq) Parser Bytes () -> String -> Parser Bytes ()
forall i a. Parser i a -> String -> Parser i a
P.<?> "SQ:NM field"
                Bool -> Parser Bytes ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (BamSQ -> Int
sq_length BamSQ
sq Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0) Parser Bytes () -> String -> Parser Bytes ()
forall i a. Parser i a -> String -> Parser i a
P.<?> "SQ:LN field"
                (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((PreBamMeta -> PreBamMeta)
 -> Parser Bytes (PreBamMeta -> PreBamMeta))
-> (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall a b. (a -> b) -> a -> b
$ \meta :: PreBamMeta
meta -> PreBamMeta
meta { pmeta_refs :: [BamSQ]
pmeta_refs = BamSQ
sq BamSQ -> [BamSQ] -> [BamSQ]
forall a. a -> [a] -> [a]
: PreBamMeta -> [BamSQ]
pmeta_refs PreBamMeta
meta }

    pgLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
pgLine = do Bytes
_ <- Bytes -> Parser Bytes
P.string "PG\t"
                [BamPG Bytes -> BamPG Bytes]
fns <- Parser Bytes (BamPG Bytes -> BamPG Bytes)
-> Parser Bytes () -> Parser Bytes [BamPG Bytes -> BamPG Bytes]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
P.sepBy1 ([Parser Bytes (BamPG Bytes -> BamPG Bytes)]
-> Parser Bytes (BamPG Bytes -> BamPG Bytes)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [Parser Bytes (BamPG Bytes -> BamPG Bytes)
forall pp. Parser Bytes (BamPG pp -> BamPG pp)
pgid, Parser Bytes (BamPG Bytes -> BamPG Bytes)
forall pp. Parser Bytes (BamPG pp -> BamPG Bytes)
pgpp, Parser Bytes (BamPG Bytes -> BamPG Bytes)
forall pp. Parser Bytes (BamPG pp -> BamPG pp)
pgother]) Parser Bytes ()
tabs
                let pg :: BamPG Bytes
pg = ((BamPG Bytes -> BamPG Bytes) -> BamPG Bytes -> BamPG Bytes)
-> BamPG Bytes -> [BamPG Bytes -> BamPG Bytes] -> BamPG Bytes
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (BamPG Bytes -> BamPG Bytes) -> BamPG Bytes -> BamPG Bytes
forall a b. (a -> b) -> a -> b
($) (Bytes -> Maybe Bytes -> BamOtherShit -> BamPG Bytes
forall pp. Bytes -> Maybe pp -> BamOtherShit -> BamPG pp
BamPG "" Maybe Bytes
forall a. Maybe a
Nothing []) [BamPG Bytes -> BamPG Bytes]
fns
                Bool -> Parser Bytes ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not (Bool -> Bool) -> (Bytes -> Bool) -> Bytes -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Bytes -> Bool
B.null (Bytes -> Bool) -> Bytes -> Bool
forall a b. (a -> b) -> a -> b
$ BamPG Bytes -> Bytes
forall pp. BamPG pp -> Bytes
pg_pref_name BamPG Bytes
pg) Parser Bytes () -> String -> Parser Bytes ()
forall i a. Parser i a -> String -> Parser i a
P.<?> "PG:ID field"
                (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((PreBamMeta -> PreBamMeta)
 -> Parser Bytes (PreBamMeta -> PreBamMeta))
-> (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall a b. (a -> b) -> a -> b
$ \meta :: PreBamMeta
meta -> PreBamMeta
meta { pmeta_pgs :: HashMap Bytes (BamPG Bytes)
pmeta_pgs = Bytes
-> BamPG Bytes
-> HashMap Bytes (BamPG Bytes)
-> HashMap Bytes (BamPG Bytes)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
H.insert (BamPG Bytes -> Bytes
forall pp. BamPG pp -> Bytes
pg_pref_name BamPG Bytes
pg) BamPG Bytes
pg (PreBamMeta -> HashMap Bytes (BamPG Bytes)
pmeta_pgs PreBamMeta
meta) }

    hdvn :: Parser Bytes (BamHeader -> BamHeader)
hdvn = Bytes -> Parser Bytes
P.string "VN:" Parser Bytes
-> Parser Bytes (BamHeader -> BamHeader)
-> Parser Bytes (BamHeader -> BamHeader)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
           (\a :: Int
a b :: Int
b hdr :: BamHeader
hdr -> BamHeader
hdr { hdr_version :: (Int, Int)
hdr_version = (Int
a,Int
b) })
             (Int -> Int -> BamHeader -> BamHeader)
-> Parser Bytes Int -> Parser Bytes (Int -> BamHeader -> BamHeader)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes Int
forall a. Integral a => Parser a
P.decimal Parser Bytes (Int -> BamHeader -> BamHeader)
-> Parser Bytes Int -> Parser Bytes (BamHeader -> BamHeader)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ((Char -> Parser Bytes Char
P.char '.' Parser Bytes Char -> Parser Bytes Char -> Parser Bytes Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Bytes Char
P.char ':') Parser Bytes Char -> Parser Bytes Int -> Parser Bytes Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Bytes Int
forall a. Integral a => Parser a
P.decimal)

    hdso :: Parser Bytes (BamHeader -> BamHeader)
hdso = Bytes -> Parser Bytes
P.string "SO:" Parser Bytes
-> Parser Bytes (BamHeader -> BamHeader)
-> Parser Bytes (BamHeader -> BamHeader)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
           (\s :: BamSorting
s hdr :: BamHeader
hdr -> BamHeader
hdr { hdr_sorting :: BamSorting
hdr_sorting = BamSorting
s })
             (BamSorting -> BamHeader -> BamHeader)
-> Parser Bytes BamSorting -> Parser Bytes (BamHeader -> BamHeader)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Bytes BamSorting] -> Parser Bytes BamSorting
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [ BamSorting
Grouped     BamSorting -> Parser Bytes -> Parser Bytes BamSorting
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Bytes -> Parser Bytes
P.string "grouped"
                          , BamSorting
Queryname   BamSorting -> Parser Bytes -> Parser Bytes BamSorting
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Bytes -> Parser Bytes
P.string "queryname"
                          , BamSorting
Coordinate  BamSorting -> Parser Bytes -> Parser Bytes BamSorting
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Bytes -> Parser Bytes
P.string "coordinate"
                          , BamSorting
Unsorted    BamSorting -> Parser Bytes -> Parser Bytes BamSorting
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Bytes -> Parser Bytes
P.string "unsorted"
                          , BamSorting
Unknown     BamSorting -> Parser Bytes () -> Parser Bytes BamSorting
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Bool) -> Parser Bytes ()
P.skipWhile (\c :: Char
c -> Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\t' Bool -> Bool -> Bool
&& Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\n') ]

    sqnm :: Parser Bytes (BamSQ -> BamSQ)
sqnm = Bytes -> Parser Bytes
P.string "SN:" Parser Bytes
-> Parser Bytes (BamSQ -> BamSQ) -> Parser Bytes (BamSQ -> BamSQ)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (\s :: Bytes
s sq :: BamSQ
sq -> BamSQ
sq { sq_name :: Bytes
sq_name = Bytes
s }) (Bytes -> BamSQ -> BamSQ)
-> Parser Bytes -> Parser Bytes (BamSQ -> BamSQ)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes
pall
    sqln :: Parser Bytes (BamSQ -> BamSQ)
sqln = Bytes -> Parser Bytes
P.string "LN:" Parser Bytes
-> Parser Bytes (BamSQ -> BamSQ) -> Parser Bytes (BamSQ -> BamSQ)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (\i :: Int
i sq :: BamSQ
sq -> BamSQ
sq { sq_length :: Int
sq_length = Int
i }) (Int -> BamSQ -> BamSQ)
-> Parser Bytes Int -> Parser Bytes (BamSQ -> BamSQ)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes Int
forall a. Integral a => Parser a
P.decimal

    pgid :: Parser Bytes (BamPG pp -> BamPG pp)
pgid = Bytes -> Parser Bytes
P.string "ID:" Parser Bytes
-> Parser Bytes (BamPG pp -> BamPG pp)
-> Parser Bytes (BamPG pp -> BamPG pp)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (\s :: Bytes
s pg :: BamPG pp
pg -> BamPG pp
pg { pg_pref_name :: Bytes
pg_pref_name =      Bytes
s }) (Bytes -> BamPG pp -> BamPG pp)
-> Parser Bytes -> Parser Bytes (BamPG pp -> BamPG pp)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes
pall
    pgpp :: Parser Bytes (BamPG pp -> BamPG Bytes)
pgpp = Bytes -> Parser Bytes
P.string "PP:" Parser Bytes
-> Parser Bytes (BamPG pp -> BamPG Bytes)
-> Parser Bytes (BamPG pp -> BamPG Bytes)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (\s :: Bytes
s pg :: BamPG pp
pg -> BamPG pp
pg { pg_prev_pg :: Maybe Bytes
pg_prev_pg   = Bytes -> Maybe Bytes
forall a. a -> Maybe a
Just Bytes
s }) (Bytes -> BamPG pp -> BamPG Bytes)
-> Parser Bytes -> Parser Bytes (BamPG pp -> BamPG Bytes)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes
pall

    hdother :: Parser Bytes (BamHeader -> BamHeader)
hdother = (\t :: (BamKey, Bytes)
t hdr :: BamHeader
hdr -> BamHeader
hdr { hdr_other_shit :: BamOtherShit
hdr_other_shit = (BamKey, Bytes)
t (BamKey, Bytes) -> BamOtherShit -> BamOtherShit
forall a. a -> [a] -> [a]
: BamHeader -> BamOtherShit
hdr_other_shit BamHeader
hdr }) ((BamKey, Bytes) -> BamHeader -> BamHeader)
-> Parser Bytes (BamKey, Bytes)
-> Parser Bytes (BamHeader -> BamHeader)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes (BamKey, Bytes)
tagother
    sqother :: Parser Bytes (BamSQ -> BamSQ)
sqother = (\t :: (BamKey, Bytes)
t sq :: BamSQ
sq  -> BamSQ
sq  { sq_other_shit :: BamOtherShit
sq_other_shit  = (BamKey, Bytes)
t (BamKey, Bytes) -> BamOtherShit -> BamOtherShit
forall a. a -> [a] -> [a]
: BamSQ -> BamOtherShit
sq_other_shit  BamSQ
sq  }) ((BamKey, Bytes) -> BamSQ -> BamSQ)
-> Parser Bytes (BamKey, Bytes) -> Parser Bytes (BamSQ -> BamSQ)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes (BamKey, Bytes)
tagother
    pgother :: Parser Bytes (BamPG pp -> BamPG pp)
pgother = (\t :: (BamKey, Bytes)
t p :: BamPG pp
p   -> BamPG pp
p   { pg_other_shit :: BamOtherShit
pg_other_shit  = (BamKey, Bytes)
t (BamKey, Bytes) -> BamOtherShit -> BamOtherShit
forall a. a -> [a] -> [a]
: BamPG pp -> BamOtherShit
forall pp. BamPG pp -> BamOtherShit
pg_other_shit  BamPG pp
p   }) ((BamKey, Bytes) -> BamPG pp -> BamPG pp)
-> Parser Bytes (BamKey, Bytes)
-> Parser Bytes (BamPG pp -> BamPG pp)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes (BamKey, Bytes)
tagother

    coLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
coLine = Bytes -> Parser Bytes
P.string "CO\t" Parser Bytes
-> Parser Bytes (PreBamMeta -> PreBamMeta)
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
             (\s :: Bytes
s meta :: PreBamMeta
meta -> Bytes
s Bytes -> PreBamMeta -> PreBamMeta
forall a b. a -> b -> b
`seq` PreBamMeta
meta { pmeta_comment :: [Bytes]
pmeta_comment = Bytes
s Bytes -> [Bytes] -> [Bytes]
forall a. a -> [a] -> [a]
: PreBamMeta -> [Bytes]
pmeta_comment PreBamMeta
meta })
               (Bytes -> PreBamMeta -> PreBamMeta)
-> Parser Bytes -> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Bytes
P.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= 'n')

    otherLine :: Parser Bytes (PreBamMeta -> PreBamMeta)
otherLine = (\k :: BamKey
k ts :: BamOtherShit
ts meta :: PreBamMeta
meta -> PreBamMeta
meta { pmeta_other_shit :: [(BamKey, BamOtherShit)]
pmeta_other_shit = (BamKey
k,BamOtherShit
ts) (BamKey, BamOtherShit)
-> [(BamKey, BamOtherShit)] -> [(BamKey, BamOtherShit)]
forall a. a -> [a] -> [a]
: PreBamMeta -> [(BamKey, BamOtherShit)]
pmeta_other_shit PreBamMeta
meta })
                  (BamKey -> BamOtherShit -> PreBamMeta -> PreBamMeta)
-> Parser Bytes BamKey
-> Parser Bytes (BamOtherShit -> PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes BamKey
bamkey Parser Bytes (BamOtherShit -> PreBamMeta -> PreBamMeta)
-> Parser Bytes BamOtherShit
-> Parser Bytes (PreBamMeta -> PreBamMeta)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Bytes ()
tabs Parser Bytes ()
-> Parser Bytes BamOtherShit -> Parser Bytes BamOtherShit
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Bytes (BamKey, Bytes)
-> Parser Bytes () -> Parser Bytes BamOtherShit
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
P.sepBy1 Parser Bytes (BamKey, Bytes)
tagother Parser Bytes ()
tabs)

    tagother :: P.Parser (BamKey,Bytes)
    tagother :: Parser Bytes (BamKey, Bytes)
tagother = (,) (BamKey -> Bytes -> (BamKey, Bytes))
-> Parser Bytes BamKey -> Parser Bytes (Bytes -> (BamKey, Bytes))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes BamKey
bamkey Parser Bytes (Bytes -> (BamKey, Bytes))
-> Parser Bytes -> Parser Bytes (BamKey, Bytes)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Parser Bytes Char
P.char ':' Parser Bytes Char -> Parser Bytes -> Parser Bytes
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Bytes
pall)

    tabs :: Parser Bytes ()
tabs = Char -> Parser Bytes Char
P.char '\t' Parser Bytes Char -> Parser Bytes () -> Parser Bytes ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Bytes ()
P.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\t')

    pall :: P.Parser Bytes
    pall :: Parser Bytes
pall = (Char -> Bool) -> Parser Bytes
P.takeWhile (\c :: Char
c -> Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\t' Bool -> Bool -> Bool
&& Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/='\n')

    bamkey :: P.Parser BamKey
    bamkey :: Parser Bytes BamKey
bamkey = (\a :: Char
a b :: Char
b -> String -> BamKey
forall a. IsString a => String -> a
fromString [Char
a,Char
b]) (Char -> Char -> BamKey)
-> Parser Bytes Char -> Parser Bytes (Char -> BamKey)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bytes Char
P.anyChar Parser Bytes (Char -> BamKey)
-> Parser Bytes Char -> Parser Bytes BamKey
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Bytes Char
P.anyChar


-- | Creates the textual form of Bam meta data.
--
-- Formatting is straight forward, only program lines are a bit
-- involved.  Our multiple chains may lead to common nodes, and we do
-- not want to print multiple identical lines.  At the same time, we may
-- need to print multiple different lines that carry the same id.  The
-- solution is to memoize printed lines, and to reuse their identity if
-- an identical line is needed.  When printing a line, it gets its
-- preferred identifier, but if it's already taken, a new identifier is
-- made up by first removing any trailing number and then by appending
-- numeric suffixes.

showBamMeta :: BamMeta -> Builder
showBamMeta :: BamMeta -> Builder
showBamMeta (BamMeta h :: BamHeader
h (Refs ss :: Vector BamSQ
ss) pgs :: [Fix BamPG]
pgs os :: [(BamKey, BamOtherShit)]
os cs :: [Bytes]
cs) =
    BamHeader -> Builder
show_bam_meta_hdr BamHeader
h Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    (BamSQ -> Builder) -> Vector BamSQ -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap BamSQ -> Builder
show_bam_meta_seq Vector BamSQ
ss Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    Builder
show_bam_pgs Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    ((BamKey, BamOtherShit) -> Builder)
-> [(BamKey, BamOtherShit)] -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (BamKey, BamOtherShit) -> Builder
forall (t :: * -> *).
Foldable t =>
(BamKey, t (BamKey, Bytes)) -> Builder
show_bam_meta_other [(BamKey, BamOtherShit)]
os Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
    (Bytes -> Builder) -> [Bytes] -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Bytes -> Builder
show_bam_meta_comment [Bytes]
cs
  where
    show_bam_meta_hdr :: BamHeader -> Builder
show_bam_meta_hdr (BamHeader (major :: Int
major,minor :: Int
minor) so :: BamSorting
so os' :: BamOtherShit
os') =
        "@HD\tVN:" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Int -> Builder
intDec Int
major Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char7 '.' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Int -> Builder
intDec Int
minor Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Bytes -> Builder
byteString (case BamSorting
so of Unsorted    -> "\tSO:unsorted"
                               Grouped     -> "\tSO:grouped"
                               Queryname   -> "\tSO:queryname"
                               Coordinate  -> "\tSO:coordinate"
                               Unknown     -> Bytes
forall a. Monoid a => a
mempty) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        BamOtherShit -> Builder
forall (t :: * -> *). Foldable t => t (BamKey, Bytes) -> Builder
show_bam_others BamOtherShit
os'

    show_bam_meta_seq :: BamSQ -> Builder
show_bam_meta_seq (BamSQ nm :: Bytes
nm ln :: Int
ln ts :: BamOtherShit
ts) =
        Bytes -> Builder
byteString "@SQ\tSN:" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
byteString Bytes
nm Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Bytes -> Builder
byteString "\tLN:" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Int -> Builder
intDec Int
ln Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> BamOtherShit -> Builder
forall (t :: * -> *). Foldable t => t (BamKey, Bytes) -> Builder
show_bam_others BamOtherShit
ts

    show_bam_meta_comment :: Bytes -> Builder
show_bam_meta_comment cm :: Bytes
cm = Bytes -> Builder
byteString "@CO\t" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
byteString Bytes
cm Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char7 '\n'

    show_bam_meta_other :: (BamKey, t (BamKey, Bytes)) -> Builder
show_bam_meta_other (BamKey k :: Word16
k,ts :: t (BamKey, Bytes)
ts) =
        Char -> Builder
char7 '@' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word16 -> Builder
word16LE Word16
k Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> t (BamKey, Bytes) -> Builder
forall (t :: * -> *). Foldable t => t (BamKey, Bytes) -> Builder
show_bam_others t (BamKey, Bytes)
ts

    show_bam_others :: t (BamKey, Bytes) -> Builder
show_bam_others ts :: t (BamKey, Bytes)
ts =
        ((BamKey, Bytes) -> Builder) -> t (BamKey, Bytes) -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (BamKey, Bytes) -> Builder
show_bam_other t (BamKey, Bytes)
ts Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char7 '\n'

    show_bam_other :: (BamKey, Bytes) -> Builder
show_bam_other (BamKey k :: Word16
k,v :: Bytes
v) =
        Char -> Builder
char7 '\t' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word16 -> Builder
word16LE Word16
k Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
char7 ':' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
byteString Bytes
v

    show_bam_pgs :: Builder
show_bam_pgs = ((), Builder) -> Builder
forall a b. (a, b) -> b
snd (((), Builder) -> Builder) -> ((), Builder) -> Builder
forall a b. (a -> b) -> a -> b
$ RWS () Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) ()
-> ()
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
-> ((), Builder)
forall r w s a. RWS r w s a -> r -> s -> (a, w)
evalRWS ((Fix BamPG
 -> RWST
      ()
      Builder
      (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
      Identity
      Bytes)
-> [Fix BamPG]
-> RWS () Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Fix BamPG
-> RWST
     ()
     Builder
     (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
     Identity
     Bytes
forall (m :: * -> *) r.
Monad m =>
Fix BamPG
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
show_bam_pg [Fix BamPG]
pgs) () (HashMap (Fix BamPG) Bytes
forall k v. HashMap k v
H.empty, HashMap Bytes ()
forall k v. HashMap k v
H.empty)

    show_bam_pg :: Fix BamPG
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
show_bam_pg p :: Fix BamPG
p@(Fix (BamPG pn :: Bytes
pn pp :: Maybe (Fix BamPG)
pp po :: BamOtherShit
po)) = do
        Maybe Bytes
ppid <- case Maybe (Fix BamPG)
pp of Nothing -> Maybe Bytes
-> RWST
     r
     Builder
     (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
     m
     (Maybe Bytes)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Bytes
forall a. Maybe a
Nothing
                           Just p' :: Fix BamPG
p' -> Bytes -> Maybe Bytes
forall a. a -> Maybe a
Just (Bytes -> Maybe Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
-> RWST
     r
     Builder
     (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
     m
     (Maybe Bytes)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Fix BamPG
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
show_bam_pg Fix BamPG
p'

        ((HashMap (Fix BamPG) Bytes, HashMap Bytes ()) -> Maybe Bytes)
-> RWST
     r
     Builder
     (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
     m
     (Maybe Bytes)
forall w (m :: * -> *) s a r.
(Monoid w, Monad m) =>
(s -> a) -> RWST r w s m a
gets (Fix BamPG -> HashMap (Fix BamPG) Bytes -> Maybe Bytes
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
H.lookup Fix BamPG
p (HashMap (Fix BamPG) Bytes -> Maybe Bytes)
-> ((HashMap (Fix BamPG) Bytes, HashMap Bytes ())
    -> HashMap (Fix BamPG) Bytes)
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
-> Maybe Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
-> HashMap (Fix BamPG) Bytes
forall a b. (a, b) -> a
fst) RWST
  r
  Builder
  (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
  m
  (Maybe Bytes)
-> (Maybe Bytes
    -> RWST
         r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
            Just pid :: Bytes
pid -> Bytes
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
forall (m :: * -> *) a. Monad m => a -> m a
return Bytes
pid
            Nothing  -> do
                -- preferred name without a trailing dash-and-number
                let pn' :: String
pn' = case (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isDigit ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ Bytes -> String
S.unpack Bytes
pn of
                            '-':xs :: String
xs -> ShowS
forall a. [a] -> [a]
reverse String
xs
                            _      -> Bytes -> String
S.unpack Bytes
pn

                -- find unused preferable PG:ID:  try prefered name,
                -- preferred name without number, preferred name
                -- without number and increasing numbers attached
                Bytes
pid <- ((HashMap (Fix BamPG) Bytes, HashMap Bytes ()) -> Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
forall w (m :: * -> *) s a r.
(Monoid w, Monad m) =>
(s -> a) -> RWST r w s m a
gets (((HashMap (Fix BamPG) Bytes, HashMap Bytes ()) -> Bytes)
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes)
-> ((HashMap (Fix BamPG) Bytes, HashMap Bytes ()) -> Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
forall a b. (a -> b) -> a -> b
$ \(_,hs :: HashMap Bytes ()
hs) ->
                            [Bytes] -> Bytes
forall a. [a] -> a
head ([Bytes] -> Bytes) -> ([Bytes] -> [Bytes]) -> [Bytes] -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Bytes -> Bool) -> [Bytes] -> [Bytes]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Bytes -> Bool) -> Bytes -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Bytes -> HashMap Bytes () -> Bool)
-> HashMap Bytes () -> Bytes -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Bytes -> HashMap Bytes () -> Bool
forall k a. (Eq k, Hashable k) => k -> HashMap k a -> Bool
H.member HashMap Bytes ()
hs) ([Bytes] -> Bytes) -> [Bytes] -> Bytes
forall a b. (a -> b) -> a -> b
$
                            Bytes
pn Bytes -> [Bytes] -> [Bytes]
forall a. a -> [a] -> [a]
: String -> Bytes
S.pack String
pn' Bytes -> [Bytes] -> [Bytes]
forall a. a -> [a] -> [a]
: [ String -> Bytes
S.pack (String -> Bytes) -> String -> Bytes
forall a b. (a -> b) -> a -> b
$ String
pn' String -> ShowS
forall a. [a] -> [a] -> [a]
++ '-' Char -> ShowS
forall a. a -> [a] -> [a]
: (Int -> String
forall a. Show a => a -> String
show Int
i) | Int
i <- [2::Int ..] ]

                ((HashMap (Fix BamPG) Bytes, HashMap Bytes ())
 -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall w (m :: * -> *) s r.
(Monoid w, Monad m) =>
(s -> s) -> RWST r w s m ()
modify (((HashMap (Fix BamPG) Bytes, HashMap Bytes ())
  -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ())
-> ((HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes)
    -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
    -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
-> (HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes)
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ((HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes)
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ())
-> (HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes)
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall a b. (a -> b) -> a -> b
$ Fix BamPG
-> Bytes -> HashMap (Fix BamPG) Bytes -> HashMap (Fix BamPG) Bytes
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
H.insert Fix BamPG
p Bytes
pid
                ((HashMap (Fix BamPG) Bytes, HashMap Bytes ())
 -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall w (m :: * -> *) s r.
(Monoid w, Monad m) =>
(s -> s) -> RWST r w s m ()
modify (((HashMap (Fix BamPG) Bytes, HashMap Bytes ())
  -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ())
-> ((HashMap Bytes () -> HashMap Bytes ())
    -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
    -> (HashMap (Fix BamPG) Bytes, HashMap Bytes ()))
-> (HashMap Bytes () -> HashMap Bytes ())
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (HashMap Bytes () -> HashMap Bytes ())
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
-> (HashMap (Fix BamPG) Bytes, HashMap Bytes ())
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second ((HashMap Bytes () -> HashMap Bytes ())
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ())
-> (HashMap Bytes () -> HashMap Bytes ())
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall a b. (a -> b) -> a -> b
$ Bytes -> () -> HashMap Bytes () -> HashMap Bytes ()
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
H.insert Bytes
pid ()

                Builder
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall (m :: * -> *) w r s. Monad m => w -> RWST r w s m ()
tell (Builder
 -> RWST
      r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ())
-> Builder
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m ()
forall a b. (a -> b) -> a -> b
$ Bytes -> Builder
byteString "@PG\tID:" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
byteString Bytes
pid Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                       Builder -> (Bytes -> Builder) -> Maybe Bytes -> Builder
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Builder
forall a. Monoid a => a
mempty (\x :: Bytes
x -> Bytes -> Builder
byteString "\tPP:" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
byteString Bytes
x) Maybe Bytes
ppid Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                       BamOtherShit -> Builder
forall (t :: * -> *). Foldable t => t (BamKey, Bytes) -> Builder
show_bam_others BamOtherShit
po
                Bytes
-> RWST
     r Builder (HashMap (Fix BamPG) Bytes, HashMap Bytes ()) m Bytes
forall (m :: * -> *) a. Monad m => a -> m a
return Bytes
pid


-- | Reference sequence in Bam
-- Bam enumerates the reference sequences and then sorts by index.  We
-- need to track that index if we want to reproduce the sorting order.
newtype Refseq = Refseq { Refseq -> Word32
unRefseq :: Word32 } deriving (Refseq -> Refseq -> Bool
(Refseq -> Refseq -> Bool)
-> (Refseq -> Refseq -> Bool) -> Eq Refseq
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Refseq -> Refseq -> Bool
$c/= :: Refseq -> Refseq -> Bool
== :: Refseq -> Refseq -> Bool
$c== :: Refseq -> Refseq -> Bool
Eq, Eq Refseq
Eq Refseq =>
(Refseq -> Refseq -> Ordering)
-> (Refseq -> Refseq -> Bool)
-> (Refseq -> Refseq -> Bool)
-> (Refseq -> Refseq -> Bool)
-> (Refseq -> Refseq -> Bool)
-> (Refseq -> Refseq -> Refseq)
-> (Refseq -> Refseq -> Refseq)
-> Ord Refseq
Refseq -> Refseq -> Bool
Refseq -> Refseq -> Ordering
Refseq -> Refseq -> Refseq
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Refseq -> Refseq -> Refseq
$cmin :: Refseq -> Refseq -> Refseq
max :: Refseq -> Refseq -> Refseq
$cmax :: Refseq -> Refseq -> Refseq
>= :: Refseq -> Refseq -> Bool
$c>= :: Refseq -> Refseq -> Bool
> :: Refseq -> Refseq -> Bool
$c> :: Refseq -> Refseq -> Bool
<= :: Refseq -> Refseq -> Bool
$c<= :: Refseq -> Refseq -> Bool
< :: Refseq -> Refseq -> Bool
$c< :: Refseq -> Refseq -> Bool
compare :: Refseq -> Refseq -> Ordering
$ccompare :: Refseq -> Refseq -> Ordering
$cp1Ord :: Eq Refseq
Ord, Ord Refseq
Ord Refseq =>
((Refseq, Refseq) -> [Refseq])
-> ((Refseq, Refseq) -> Refseq -> Int)
-> ((Refseq, Refseq) -> Refseq -> Int)
-> ((Refseq, Refseq) -> Refseq -> Bool)
-> ((Refseq, Refseq) -> Int)
-> ((Refseq, Refseq) -> Int)
-> Ix Refseq
(Refseq, Refseq) -> Int
(Refseq, Refseq) -> [Refseq]
(Refseq, Refseq) -> Refseq -> Bool
(Refseq, Refseq) -> Refseq -> Int
forall a.
Ord a =>
((a, a) -> [a])
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Bool)
-> ((a, a) -> Int)
-> ((a, a) -> Int)
-> Ix a
unsafeRangeSize :: (Refseq, Refseq) -> Int
$cunsafeRangeSize :: (Refseq, Refseq) -> Int
rangeSize :: (Refseq, Refseq) -> Int
$crangeSize :: (Refseq, Refseq) -> Int
inRange :: (Refseq, Refseq) -> Refseq -> Bool
$cinRange :: (Refseq, Refseq) -> Refseq -> Bool
unsafeIndex :: (Refseq, Refseq) -> Refseq -> Int
$cunsafeIndex :: (Refseq, Refseq) -> Refseq -> Int
index :: (Refseq, Refseq) -> Refseq -> Int
$cindex :: (Refseq, Refseq) -> Refseq -> Int
range :: (Refseq, Refseq) -> [Refseq]
$crange :: (Refseq, Refseq) -> [Refseq]
$cp1Ix :: Ord Refseq
Ix, Refseq
Refseq -> Refseq -> Bounded Refseq
forall a. a -> a -> Bounded a
maxBound :: Refseq
$cmaxBound :: Refseq
minBound :: Refseq
$cminBound :: Refseq
Bounded, Int -> Refseq -> Int
Refseq -> Int
(Int -> Refseq -> Int) -> (Refseq -> Int) -> Hashable Refseq
forall a. (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: Refseq -> Int
$chash :: Refseq -> Int
hashWithSalt :: Int -> Refseq -> Int
$chashWithSalt :: Int -> Refseq -> Int
Hashable)

instance Show Refseq where
    showsPrec :: Int -> Refseq -> ShowS
showsPrec p :: Int
p (Refseq r :: Word32
r) = Int -> Word32 -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p Word32
r

instance Enum Refseq where
    succ :: Refseq -> Refseq
succ = Word32 -> Refseq
Refseq (Word32 -> Refseq) -> (Refseq -> Word32) -> Refseq -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Word32
forall a. Enum a => a -> a
succ (Word32 -> Word32) -> (Refseq -> Word32) -> Refseq -> Word32
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refseq -> Word32
unRefseq
    pred :: Refseq -> Refseq
pred = Word32 -> Refseq
Refseq (Word32 -> Refseq) -> (Refseq -> Word32) -> Refseq -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Word32
forall a. Enum a => a -> a
pred (Word32 -> Word32) -> (Refseq -> Word32) -> Refseq -> Word32
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refseq -> Word32
unRefseq
    toEnum :: Int -> Refseq
toEnum = Word32 -> Refseq
Refseq (Word32 -> Refseq) -> (Int -> Word32) -> Int -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral
    fromEnum :: Refseq -> Int
fromEnum = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> (Refseq -> Word32) -> Refseq -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refseq -> Word32
unRefseq
    enumFrom :: Refseq -> [Refseq]
enumFrom = (Word32 -> Refseq) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> [a] -> [b]
map Word32 -> Refseq
Refseq ([Word32] -> [Refseq])
-> (Refseq -> [Word32]) -> Refseq -> [Refseq]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> [Word32]
forall a. Enum a => a -> [a]
enumFrom (Word32 -> [Word32]) -> (Refseq -> Word32) -> Refseq -> [Word32]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refseq -> Word32
unRefseq
    enumFromThen :: Refseq -> Refseq -> [Refseq]
enumFromThen (Refseq a :: Word32
a) (Refseq b :: Word32
b) = (Word32 -> Refseq) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> [a] -> [b]
map Word32 -> Refseq
Refseq ([Word32] -> [Refseq]) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> [Word32]
forall a. Enum a => a -> a -> [a]
enumFromThen Word32
a Word32
b
    enumFromTo :: Refseq -> Refseq -> [Refseq]
enumFromTo (Refseq a :: Word32
a) (Refseq b :: Word32
b) = (Word32 -> Refseq) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> [a] -> [b]
map Word32 -> Refseq
Refseq ([Word32] -> [Refseq]) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> [Word32]
forall a. Enum a => a -> a -> [a]
enumFromTo Word32
a Word32
b
    enumFromThenTo :: Refseq -> Refseq -> Refseq -> [Refseq]
enumFromThenTo (Refseq a :: Word32
a) (Refseq b :: Word32
b) (Refseq c :: Word32
c) = (Word32 -> Refseq) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> [a] -> [b]
map Word32 -> Refseq
Refseq ([Word32] -> [Refseq]) -> [Word32] -> [Refseq]
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32 -> [Word32]
forall a. Enum a => a -> a -> a -> [a]
enumFromThenTo Word32
a Word32
b Word32
c


-- | Tests whether a reference sequence is valid.
-- Returns true unless the the argument equals @invalidRefseq@.
isValidRefseq :: Refseq -> Bool
isValidRefseq :: Refseq -> Bool
isValidRefseq = Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
(/=) Refseq
invalidRefseq

-- | The invalid Refseq.
-- Bam uses this value to encode a missing reference sequence.
invalidRefseq :: Refseq
invalidRefseq :: Refseq
invalidRefseq = Word32 -> Refseq
Refseq 0xffffffff

-- | The invalid position.
-- Bam uses this value to encode a missing position.
{-# INLINE invalidPos #-}
invalidPos :: Int
invalidPos :: Int
invalidPos = -1

-- | Tests whether a position is valid.
-- Returns true unless the the argument equals @invalidPos@.
{-# INLINE isValidPos #-}
isValidPos :: Int -> Bool
isValidPos :: Int -> Bool
isValidPos = Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(/=) Int
invalidPos

{-# INLINE unknownMapq #-}
unknownMapq :: Int
unknownMapq :: Int
unknownMapq = 255

isKnownMapq :: Int -> Bool
isKnownMapq :: Int -> Bool
isKnownMapq = Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(/=) Int
unknownMapq

-- | A list of reference sequences.
newtype Refs = Refs { Refs -> Vector BamSQ
unRefs :: V.Vector BamSQ } deriving Int -> Refs -> ShowS
[Refs] -> ShowS
Refs -> String
(Int -> Refs -> ShowS)
-> (Refs -> String) -> ([Refs] -> ShowS) -> Show Refs
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Refs] -> ShowS
$cshowList :: [Refs] -> ShowS
show :: Refs -> String
$cshow :: Refs -> String
showsPrec :: Int -> Refs -> ShowS
$cshowsPrec :: Int -> Refs -> ShowS
Show

instance Monoid Refs where
    mempty :: Refs
mempty = Vector BamSQ -> Refs
Refs Vector BamSQ
forall a. Vector a
V.empty
    mappend :: Refs -> Refs -> Refs
mappend = Refs -> Refs -> Refs
forall a. Semigroup a => a -> a -> a
(<>)

instance Semigroup Refs where
    Refs a :: Vector BamSQ
a <> :: Refs -> Refs -> Refs
<> Refs b :: Vector BamSQ
b = Vector BamSQ -> Refs
Refs (Vector BamSQ -> Refs)
-> ([BamSQ] -> Vector BamSQ) -> [BamSQ] -> Refs
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [BamSQ] -> Vector BamSQ
forall a. [a] -> Vector a
V.fromList ([BamSQ] -> Vector BamSQ)
-> ([BamSQ] -> [BamSQ]) -> [BamSQ] -> Vector BamSQ
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [BamSQ] -> [BamSQ]
forall a. (Hashable a, Eq a) => [a] -> [a]
nubHash ([BamSQ] -> Refs) -> [BamSQ] -> Refs
forall a b. (a -> b) -> a -> b
$ Vector BamSQ -> [BamSQ]
forall a. Vector a -> [a]
V.toList Vector BamSQ
a [BamSQ] -> [BamSQ] -> [BamSQ]
forall a. [a] -> [a] -> [a]
++ Vector BamSQ -> [BamSQ]
forall a. Vector a -> [a]
V.toList Vector BamSQ
b

getRef :: Refs -> Refseq -> BamSQ
getRef :: Refs -> Refseq -> BamSQ
getRef (Refs refs :: Vector BamSQ
refs) (Refseq i :: Word32
i) = BamSQ -> Maybe BamSQ -> BamSQ
forall a. a -> Maybe a -> a
fromMaybe (Bytes -> Int -> BamOtherShit -> BamSQ
BamSQ "*" 0 []) (Maybe BamSQ -> BamSQ) -> Maybe BamSQ -> BamSQ
forall a b. (a -> b) -> a -> b
$ Vector BamSQ
refs Vector BamSQ -> Int -> Maybe BamSQ
forall a. Vector a -> Int -> Maybe a
V.!? Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
i

flagPaired, flagProperlyPaired, flagUnmapped, flagMateUnmapped,
 flagReversed, flagMateReversed, flagFirstMate, flagSecondMate,
 flagAuxillary, flagSecondary, flagFailsQC, flagDuplicate,
 flagSupplementary :: Int

flagPaired :: Int
flagPaired         =   0x1
flagProperlyPaired :: Int
flagProperlyPaired =   0x2
flagUnmapped :: Int
flagUnmapped       =   0x4
flagMateUnmapped :: Int
flagMateUnmapped   =   0x8
flagReversed :: Int
flagReversed       =  0x10
flagMateReversed :: Int
flagMateReversed   =  0x20
flagFirstMate :: Int
flagFirstMate      =  0x40
flagSecondMate :: Int
flagSecondMate     =  0x80
flagAuxillary :: Int
flagAuxillary      = 0x100
flagSecondary :: Int
flagSecondary      = 0x100
flagFailsQC :: Int
flagFailsQC        = 0x200
flagDuplicate :: Int
flagDuplicate      = 0x400
flagSupplementary :: Int
flagSupplementary  = 0x800

eflagTrimmed, eflagMerged, eflagAlternative, eflagExactIndex :: Int
eflagTrimmed :: Int
eflagTrimmed     = 0x1
eflagMerged :: Int
eflagMerged      = 0x2
eflagAlternative :: Int
eflagAlternative = 0x4
eflagExactIndex :: Int
eflagExactIndex  = 0x8


-- | Compares two sequence names the way samtools does.
-- samtools sorts by \"strnum_cmp\":
--
-- * if both strings start with a digit, parse the initial
--   sequence of digits and compare numerically, if equal,
--   continue behind the numbers
-- * else compare the first characters (possibly NUL), if equal
--   continue behind them
-- * else both strings ended and the shorter one counts as
--   smaller (and that part is stupid)

compareNames :: Bytes -> Bytes -> Ordering
compareNames :: Bytes -> Bytes -> Ordering
compareNames n :: Bytes
n m :: Bytes
m = case (Bytes -> Maybe (Word8, Bytes)
uncons Bytes
n, Bytes -> Maybe (Word8, Bytes)
uncons Bytes
m) of
        ( Nothing, Nothing ) -> Ordering
EQ
        ( Just  _, Nothing ) -> Ordering
GT
        ( Nothing, Just  _ ) -> Ordering
LT
        ( Just (c :: Word8
c,n' :: Bytes
n'), Just (d :: Word8
d,m' :: Bytes
m') )
            | Word8 -> Bool
is_digit Word8
c Bool -> Bool -> Bool
|| Word8 -> Bool
is_digit Word8
d
            , Just (u :: Int
u,n'' :: Bytes
n'') <- Bytes -> Maybe (Int, Bytes)
S.readInt Bytes
n
            , Just (v :: Int
v,m'' :: Bytes
m'') <- Bytes -> Maybe (Int, Bytes)
S.readInt Bytes
m ->
                case Int
u Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Int
v of
                    LT -> Ordering
LT
                    GT -> Ordering
GT
                    EQ -> Bytes
n'' Bytes -> Bytes -> Ordering
`compareNames` Bytes
m''
            | Bool
otherwise ->
                case Word8
c Word8 -> Word8 -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Word8
d of
                    LT -> Ordering
LT
                    GT -> Ordering
GT
                    EQ -> Bytes
n' Bytes -> Bytes -> Ordering
`compareNames` Bytes
m'
  where
    is_digit :: Word8 -> Bool
is_digit c :: Word8
c = Char -> Word8
c2w '0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
c Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Word8
c2w '9'


data MdOp = MdNum Int | MdRep Nucleotides | MdDel [Nucleotides] deriving Int -> MdOp -> ShowS
[MdOp] -> ShowS
MdOp -> String
(Int -> MdOp -> ShowS)
-> (MdOp -> String) -> ([MdOp] -> ShowS) -> Show MdOp
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MdOp] -> ShowS
$cshowList :: [MdOp] -> ShowS
show :: MdOp -> String
$cshow :: MdOp -> String
showsPrec :: Int -> MdOp -> ShowS
$cshowsPrec :: Int -> MdOp -> ShowS
Show

readMd :: Bytes -> Maybe [MdOp]
readMd :: Bytes -> Maybe [MdOp]
readMd s :: Bytes
s | Bytes -> Bool
S.null Bytes
s           = [MdOp] -> Maybe [MdOp]
forall (m :: * -> *) a. Monad m => a -> m a
return []
         | Char -> Bool
isDigit (Bytes -> Char
S.head Bytes
s) = do (n :: Int
n,t :: Bytes
t) <- Bytes -> Maybe (Int, Bytes)
S.readInt Bytes
s
                                   (Int -> MdOp
MdNum Int
n MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:) ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bytes -> Maybe [MdOp]
readMd Bytes
t
         | Bytes -> Char
S.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '^'    = let (a :: Bytes
a,b :: Bytes
b) = (Char -> Bool) -> Bytes -> (Bytes, Bytes)
S.break Char -> Bool
isDigit (Bytes -> Bytes
S.tail Bytes
s)
                                in ([Nucleotides] -> MdOp
MdDel ((Word8 -> Nucleotides) -> [Word8] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> Nucleotides
toNucleotides ([Word8] -> [Nucleotides]) -> [Word8] -> [Nucleotides]
forall a b. (a -> b) -> a -> b
$ Bytes -> [Word8]
B.unpack Bytes
a) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:) ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bytes -> Maybe [MdOp]
readMd Bytes
b
         | Bool
otherwise          = (Nucleotides -> MdOp
MdRep (Word8 -> Nucleotides
toNucleotides (Word8 -> Nucleotides) -> Word8 -> Nucleotides
forall a b. (a -> b) -> a -> b
$ Bytes -> Word8
B.head Bytes
s) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:) ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bytes -> Maybe [MdOp]
readMd (Bytes -> Bytes
S.tail Bytes
s)

-- | Normalizes a series of 'MdOp's and encodes them in the way BAM and
-- SAM expect it.
showMd :: [MdOp] -> Bytes
showMd :: [MdOp] -> Bytes
showMd = String -> Bytes
S.pack (String -> Bytes) -> ([MdOp] -> String) -> [MdOp] -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ([MdOp] -> ShowS) -> String -> [MdOp] -> String
forall a b c. (a -> b -> c) -> b -> a -> c
flip [MdOp] -> ShowS
s1 []
  where
    s1 :: [MdOp] -> ShowS
s1 (MdNum  i :: Int
i : MdNum  j :: Int
j : ms :: [MdOp]
ms) = [MdOp] -> ShowS
s1 (Int -> MdOp
MdNum (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
ms)
    s1 (MdNum  0            : ms :: [MdOp]
ms) = [MdOp] -> ShowS
s1 [MdOp]
ms
    s1 (MdNum  i :: Int
i            : ms :: [MdOp]
ms) = Int -> ShowS
forall a. Show a => a -> ShowS
shows Int
i ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [MdOp] -> ShowS
s1 [MdOp]
ms

    s1 (MdRep  r :: Nucleotides
r            : ms :: [MdOp]
ms) = Nucleotides -> ShowS
forall a. Show a => a -> ShowS
shows Nucleotides
r ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [MdOp] -> ShowS
s1 [MdOp]
ms

    s1 (MdDel d1 :: [Nucleotides]
d1 : MdDel d2 :: [Nucleotides]
d2 : ms :: [MdOp]
ms) = [MdOp] -> ShowS
s1 ([Nucleotides] -> MdOp
MdDel ([Nucleotides]
d1[Nucleotides] -> [Nucleotides] -> [Nucleotides]
forall a. [a] -> [a] -> [a]
++[Nucleotides]
d2) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
ms)
    s1 (MdDel []            : ms :: [MdOp]
ms) = [MdOp] -> ShowS
s1 [MdOp]
ms
    s1 (MdDel ns :: [Nucleotides]
ns : MdRep  r :: Nucleotides
r : ms :: [MdOp]
ms) = (:) '^' ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Nucleotides] -> ShowS
forall a. Show a => a -> ShowS
shows [Nucleotides]
ns ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (:) '0' ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Nucleotides -> ShowS
forall a. Show a => a -> ShowS
shows Nucleotides
r ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [MdOp] -> ShowS
s1 [MdOp]
ms
    s1 (MdDel ns :: [Nucleotides]
ns            : ms :: [MdOp]
ms) = (:) '^' ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Nucleotides] -> ShowS
forall a. Show a => a -> ShowS
shows [Nucleotides]
ns ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [MdOp] -> ShowS
s1 [MdOp]
ms
    s1 [                        ] = ShowS
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id


-- | Computes the "distinct bin" according to the BAM binning scheme.  If
-- an alignment starts at @pos@ and its CIGAR implies a length of @len@
-- on the reference, then it goes into bin @distinctBin pos len@.
distinctBin :: Int -> Int -> Int
distinctBin :: Int -> Int -> Int
distinctBin beg :: Int
beg len :: Int
len = Int -> Int -> Int
mkbin 14 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
mkbin 17 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
mkbin 20 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
mkbin 23 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
mkbin 26 0
  where end :: Int
end = Int
beg Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1
        mkbin :: Int -> Int -> Int
mkbin n :: Int
n x :: Int
x = if Int
beg Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
end Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
n then Int
x
                    else ((1 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` (29Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
n))Int -> Int -> Int
forall a. Num a => a -> a -> a
-1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 7 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
beg Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
n)