Safe Haskell	None
Language	Haskell2010

Bio.Bam.Header

Synopsis

data BamMeta = BamMeta {
- meta_hdr :: !BamHeader
- meta_refs :: !Refs
- meta_pgs :: [Fix BamPG]
- meta_other_shit :: [(BamKey, BamOtherShit)]
- meta_comment :: [Bytes]
}
parseBamMeta :: Parser BamMeta
showBamMeta :: BamMeta -> Builder
addPG :: MonadIO m => Maybe Version -> m (BamMeta -> BamMeta)
newtype BamKey = BamKey Word16
data BamHeader = BamHeader {
- hdr_version :: (Int, Int)
- hdr_sorting :: BamSorting
- hdr_other_shit :: BamOtherShit
}
data BamSQ = BamSQ {
- sq_name :: Bytes
- sq_length :: Int
- sq_other_shit :: BamOtherShit
}
data BamSorting
- = Unknown
- | Unsorted
- | Grouped
- | Queryname
- | Coordinate
type BamOtherShit = [(BamKey, Bytes)]
newtype Refseq = Refseq {
- unRefseq :: Word32
}
invalidRefseq :: Refseq
isValidRefseq :: Refseq -> Bool
invalidPos :: Int
isValidPos :: Int -> Bool
unknownMapq :: Int
isKnownMapq :: Int -> Bool
newtype Refs = Refs {
- unRefs :: Vector BamSQ
}
getRef :: Refs -> Refseq -> BamSQ
compareNames :: Bytes -> Bytes -> Ordering
flagPaired :: Int
flagProperlyPaired :: Int
flagUnmapped :: Int
flagMateUnmapped :: Int
flagReversed :: Int
flagMateReversed :: Int
flagFirstMate :: Int
flagSecondMate :: Int
flagAuxillary :: Int
flagSecondary :: Int
flagFailsQC :: Int
flagDuplicate :: Int
flagSupplementary :: Int
eflagTrimmed :: Int
eflagMerged :: Int
eflagAlternative :: Int
eflagExactIndex :: Int
distinctBin :: Int -> Int -> Int
data MdOp
- = MdNum Int
- | MdRep Nucleotides
- | MdDel [Nucleotides]
readMd :: Bytes -> Maybe [MdOp]
showMd :: [MdOp] -> Bytes

Documentation

data BamMeta Source #

Constructors

BamMeta
Fields meta_hdr :: !BamHeader meta_refs :: !Refs meta_pgs :: [Fix BamPG] meta_other_shit :: [(BamKey, BamOtherShit)] meta_comment :: [Bytes]

Instances

Show BamMeta Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> BamMeta -> ShowS # show :: BamMeta -> String # showList :: [BamMeta] -> ShowS #
Generic BamMeta Source #
Instance details Defined in Bio.Bam.Header Associated Types type Rep BamMeta :: Type -> Type # Methods from :: BamMeta -> Rep BamMeta x # to :: Rep BamMeta x -> BamMeta #
Semigroup BamMeta Source #
Instance details Defined in Bio.Bam.Header Methods (<>) :: BamMeta -> BamMeta -> BamMeta # sconcat :: NonEmpty BamMeta -> BamMeta # stimes :: Integral b => b -> BamMeta -> BamMeta #
Monoid BamMeta Source #
Instance details Defined in Bio.Bam.Header Methods mempty :: BamMeta # mappend :: BamMeta -> BamMeta -> BamMeta # mconcat :: [BamMeta] -> BamMeta #
type Rep BamMeta Source #
Instance details Defined in Bio.Bam.Header type Rep BamMeta

parseBamMeta :: Parser BamMeta Source #

showBamMeta :: BamMeta -> Builder Source #

Creates the textual form of Bam meta data.

Formatting is straight forward, only program lines are a bit involved. Our multiple chains may lead to common nodes, and we do not want to print multiple identical lines. At the same time, we may need to print multiple different lines that carry the same id. The solution is to memoize printed lines, and to reuse their identity if an identical line is needed. When printing a line, it gets its preferred identifier, but if it's already taken, a new identifier is made up by first removing any trailing number and then by appending numeric suffixes.

addPG :: MonadIO m => Maybe Version -> m (BamMeta -> BamMeta) Source #

Adds a new program line to a header. The new entry is (arbitrarily) prepended to the first existing chain, or forms a new singleton chain if none exists.

newtype BamKey Source #

Exactly two characters, for the "named" fields in bam.

Constructors

BamKey Word16

Instances

Eq BamKey Source #
Instance details Defined in Bio.Bam.Header Methods (==) :: BamKey -> BamKey -> Bool # (/=) :: BamKey -> BamKey -> Bool #
Ord BamKey Source #
Instance details Defined in Bio.Bam.Header Methods compare :: BamKey -> BamKey -> Ordering # (<) :: BamKey -> BamKey -> Bool # (<=) :: BamKey -> BamKey -> Bool # (>) :: BamKey -> BamKey -> Bool # (>=) :: BamKey -> BamKey -> Bool # max :: BamKey -> BamKey -> BamKey # min :: BamKey -> BamKey -> BamKey #
Show BamKey Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> BamKey -> ShowS # show :: BamKey -> String # showList :: [BamKey] -> ShowS #
IsString BamKey Source #
Instance details Defined in Bio.Bam.Header Methods fromString :: String -> BamKey #
Generic BamKey Source #
Instance details Defined in Bio.Bam.Header Associated Types type Rep BamKey :: Type -> Type # Methods from :: BamKey -> Rep BamKey x # to :: Rep BamKey x -> BamKey #
Hashable BamKey Source #
Instance details Defined in Bio.Bam.Header Methods hashWithSalt :: Int -> BamKey -> Int # hash :: BamKey -> Int #
type Rep BamKey Source #
Instance details Defined in Bio.Bam.Header type Rep BamKey = D1 (MetaData "BamKey" "Bio.Bam.Header" "biohazard-2.1-LAR9kjxyI4PJ81NLnLu2iq" True) (C1 (MetaCons "BamKey" PrefixI False) (S1 (MetaSel (Nothing :: Maybe Symbol) NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Word16)))

data BamHeader Source #

Constructors

BamHeader
Fields hdr_version :: (Int, Int) hdr_sorting :: BamSorting hdr_other_shit :: BamOtherShit

Instances

Eq BamHeader Source #
Instance details Defined in Bio.Bam.Header Methods (==) :: BamHeader -> BamHeader -> Bool # (/=) :: BamHeader -> BamHeader -> Bool #
Show BamHeader Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> BamHeader -> ShowS # show :: BamHeader -> String # showList :: [BamHeader] -> ShowS #
Semigroup BamHeader Source #
Instance details Defined in Bio.Bam.Header Methods (<>) :: BamHeader -> BamHeader -> BamHeader # sconcat :: NonEmpty BamHeader -> BamHeader # stimes :: Integral b => b -> BamHeader -> BamHeader #
Monoid BamHeader Source #
Instance details Defined in Bio.Bam.Header Methods mempty :: BamHeader # mappend :: BamHeader -> BamHeader -> BamHeader # mconcat :: [BamHeader] -> BamHeader #

data BamSQ Source #

Constructors

BamSQ
Fields sq_name :: Bytes sq_length :: Int sq_other_shit :: BamOtherShit

Instances

Eq BamSQ Source #
Instance details Defined in Bio.Bam.Header Methods (==) :: BamSQ -> BamSQ -> Bool # (/=) :: BamSQ -> BamSQ -> Bool #
Show BamSQ Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> BamSQ -> ShowS # show :: BamSQ -> String # showList :: [BamSQ] -> ShowS #
Generic BamSQ Source #
Instance details Defined in Bio.Bam.Header Associated Types type Rep BamSQ :: Type -> Type # Methods from :: BamSQ -> Rep BamSQ x # to :: Rep BamSQ x -> BamSQ #
Hashable BamSQ Source #
Instance details Defined in Bio.Bam.Header Methods hashWithSalt :: Int -> BamSQ -> Int # hash :: BamSQ -> Int #
type Rep BamSQ Source #
Instance details Defined in Bio.Bam.Header type Rep BamSQ = D1 (MetaData "BamSQ" "Bio.Bam.Header" "biohazard-2.1-LAR9kjxyI4PJ81NLnLu2iq" False) (C1 (MetaCons "BamSQ" PrefixI True) (S1 (MetaSel (Just "sq_name") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Bytes) :: (S1 (MetaSel (Just "sq_length") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Int) :: S1 (MetaSel (Just "sq_other_shit") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 BamOtherShit))))

data BamSorting Source #

Possible sorting orders from bam header. Thanks to samtools, which doesn't declare sorted files properly, we have to have the stupid Unknown state, too.

Constructors

Unknown	undeclared sort order
Unsorted	definitely not sorted
Grouped	grouped by query name
Queryname	sorted by query name
Coordinate	sorted by coordinate

Instances

Eq BamSorting Source #
Instance details Defined in Bio.Bam.Header Methods (==) :: BamSorting -> BamSorting -> Bool # (/=) :: BamSorting -> BamSorting -> Bool #
Show BamSorting Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> BamSorting -> ShowS # show :: BamSorting -> String # showList :: [BamSorting] -> ShowS #
Semigroup BamSorting Source #
Instance details Defined in Bio.Bam.Header Methods (<>) :: BamSorting -> BamSorting -> BamSorting # sconcat :: NonEmpty BamSorting -> BamSorting # stimes :: Integral b => b -> BamSorting -> BamSorting #

type BamOtherShit = [(BamKey, Bytes)] Source #

newtype Refseq Source #

Reference sequence in Bam Bam enumerates the reference sequences and then sorts by index. We need to track that index if we want to reproduce the sorting order.

Constructors

Refseq
Fields unRefseq :: Word32

Instances

Bounded Refseq Source #
Instance details Defined in Bio.Bam.Header Methods minBound :: Refseq # maxBound :: Refseq #
Enum Refseq Source #
Instance details Defined in Bio.Bam.Header Methods succ :: Refseq -> Refseq # pred :: Refseq -> Refseq # toEnum :: Int -> Refseq # fromEnum :: Refseq -> Int # enumFrom :: Refseq -> [Refseq] # enumFromThen :: Refseq -> Refseq -> [Refseq] # enumFromTo :: Refseq -> Refseq -> [Refseq] # enumFromThenTo :: Refseq -> Refseq -> Refseq -> [Refseq] #
Eq Refseq Source #
Instance details Defined in Bio.Bam.Header Methods (==) :: Refseq -> Refseq -> Bool # (/=) :: Refseq -> Refseq -> Bool #
Ord Refseq Source #
Instance details Defined in Bio.Bam.Header Methods compare :: Refseq -> Refseq -> Ordering # (<) :: Refseq -> Refseq -> Bool # (<=) :: Refseq -> Refseq -> Bool # (>) :: Refseq -> Refseq -> Bool # (>=) :: Refseq -> Refseq -> Bool # max :: Refseq -> Refseq -> Refseq # min :: Refseq -> Refseq -> Refseq #
Show Refseq Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> Refseq -> ShowS # show :: Refseq -> String # showList :: [Refseq] -> ShowS #
Ix Refseq Source #
Instance details Defined in Bio.Bam.Header Methods range :: (Refseq, Refseq) -> [Refseq] # index :: (Refseq, Refseq) -> Refseq -> Int # unsafeIndex :: (Refseq, Refseq) -> Refseq -> Int inRange :: (Refseq, Refseq) -> Refseq -> Bool # rangeSize :: (Refseq, Refseq) -> Int # unsafeRangeSize :: (Refseq, Refseq) -> Int
Hashable Refseq Source #
Instance details Defined in Bio.Bam.Header Methods hashWithSalt :: Int -> Refseq -> Int # hash :: Refseq -> Int #

invalidRefseq :: Refseq Source #

The invalid Refseq. Bam uses this value to encode a missing reference sequence.

isValidRefseq :: Refseq -> Bool Source #

Tests whether a reference sequence is valid. Returns true unless the the argument equals invalidRefseq.

invalidPos :: Int Source #

The invalid position. Bam uses this value to encode a missing position.

isValidPos :: Int -> Bool Source #

Tests whether a position is valid. Returns true unless the the argument equals invalidPos.

unknownMapq :: Int Source #

isKnownMapq :: Int -> Bool Source #

newtype Refs Source #

A list of reference sequences.

Constructors

Refs
Fields unRefs :: Vector BamSQ

Instances

Show Refs Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> Refs -> ShowS # show :: Refs -> String # showList :: [Refs] -> ShowS #
Semigroup Refs Source #
Instance details Defined in Bio.Bam.Header Methods (<>) :: Refs -> Refs -> Refs # sconcat :: NonEmpty Refs -> Refs # stimes :: Integral b => b -> Refs -> Refs #
Monoid Refs Source #
Instance details Defined in Bio.Bam.Header Methods mempty :: Refs # mappend :: Refs -> Refs -> Refs # mconcat :: [Refs] -> Refs #

getRef :: Refs -> Refseq -> BamSQ Source #

compareNames :: Bytes -> Bytes -> Ordering Source #

Compares two sequence names the way samtools does. samtools sorts by "strnum_cmp":

if both strings start with a digit, parse the initial sequence of digits and compare numerically, if equal, continue behind the numbers
else compare the first characters (possibly NUL), if equal continue behind them
else both strings ended and the shorter one counts as smaller (and that part is stupid)

flagPaired :: Int Source #

flagProperlyPaired :: Int Source #

flagUnmapped :: Int Source #

flagMateUnmapped :: Int Source #

flagReversed :: Int Source #

flagMateReversed :: Int Source #

flagFirstMate :: Int Source #

flagSecondMate :: Int Source #

flagAuxillary :: Int Source #

flagSecondary :: Int Source #

flagFailsQC :: Int Source #

flagDuplicate :: Int Source #

flagSupplementary :: Int Source #

eflagTrimmed :: Int Source #

eflagMerged :: Int Source #

eflagAlternative :: Int Source #

eflagExactIndex :: Int Source #

distinctBin :: Int -> Int -> Int Source #

Computes the "distinct bin" according to the BAM binning scheme. If an alignment starts at pos and its CIGAR implies a length of len on the reference, then it goes into bin distinctBin pos len.

data MdOp Source #

Constructors

MdNum Int
MdRep Nucleotides
MdDel [Nucleotides]

Instances

Show MdOp Source #
Instance details Defined in Bio.Bam.Header Methods showsPrec :: Int -> MdOp -> ShowS # show :: MdOp -> String # showList :: [MdOp] -> ShowS #

readMd :: Bytes -> Maybe [MdOp] Source #

showMd :: [MdOp] -> Bytes Source #

Normalizes a series of MdOps and encodes them in the way BAM and SAM expect it.