Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data BamMeta = BamMeta {
- meta_hdr :: !BamHeader
- meta_refs :: !Refs
- meta_pgs :: [Fix BamPG]
- meta_other_shit :: [(BamKey, BamOtherShit)]
- meta_comment :: [Bytes]
- parseBamMeta :: Parser BamMeta
- showBamMeta :: BamMeta -> Builder
- addPG :: MonadIO m => Maybe Version -> m (BamMeta -> BamMeta)
- newtype BamKey = BamKey Word16
- data BamHeader = BamHeader {
- hdr_version :: (Int, Int)
- hdr_sorting :: BamSorting
- hdr_other_shit :: BamOtherShit
- data BamSQ = BamSQ {}
- data BamSorting
- type BamOtherShit = [(BamKey, Bytes)]
- newtype Refseq = Refseq {}
- invalidRefseq :: Refseq
- isValidRefseq :: Refseq -> Bool
- invalidPos :: Int
- isValidPos :: Int -> Bool
- unknownMapq :: Int
- isKnownMapq :: Int -> Bool
- newtype Refs = Refs {}
- getRef :: Refs -> Refseq -> BamSQ
- compareNames :: Bytes -> Bytes -> Ordering
- flagPaired :: Int
- flagProperlyPaired :: Int
- flagUnmapped :: Int
- flagMateUnmapped :: Int
- flagReversed :: Int
- flagMateReversed :: Int
- flagFirstMate :: Int
- flagSecondMate :: Int
- flagAuxillary :: Int
- flagSecondary :: Int
- flagFailsQC :: Int
- flagDuplicate :: Int
- flagSupplementary :: Int
- eflagTrimmed :: Int
- eflagMerged :: Int
- eflagAlternative :: Int
- eflagExactIndex :: Int
- distinctBin :: Int -> Int -> Int
- data MdOp
- = MdNum Int
- | MdRep Nucleotides
- | MdDel [Nucleotides]
- readMd :: Bytes -> Maybe [MdOp]
- showMd :: [MdOp] -> Bytes
Documentation
BamMeta | |
|
showBamMeta :: BamMeta -> Builder Source #
Creates the textual form of Bam meta data.
Formatting is straight forward, only program lines are a bit involved. Our multiple chains may lead to common nodes, and we do not want to print multiple identical lines. At the same time, we may need to print multiple different lines that carry the same id. The solution is to memoize printed lines, and to reuse their identity if an identical line is needed. When printing a line, it gets its preferred identifier, but if it's already taken, a new identifier is made up by first removing any trailing number and then by appending numeric suffixes.
addPG :: MonadIO m => Maybe Version -> m (BamMeta -> BamMeta) Source #
Adds a new program line to a header. The new entry is (arbitrarily) prepended to the first existing chain, or forms a new singleton chain if none exists.
Exactly two characters, for the "named" fields in bam.
BamHeader | |
|
BamSQ | |
|
Instances
Eq BamSQ Source # | |
Show BamSQ Source # | |
Generic BamSQ Source # | |
Hashable BamSQ Source # | |
Defined in Bio.Bam.Header | |
type Rep BamSQ Source # | |
Defined in Bio.Bam.Header type Rep BamSQ = D1 (MetaData "BamSQ" "Bio.Bam.Header" "biohazard-2.1-LAR9kjxyI4PJ81NLnLu2iq" False) (C1 (MetaCons "BamSQ" PrefixI True) (S1 (MetaSel (Just "sq_name") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Bytes) :*: (S1 (MetaSel (Just "sq_length") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Int) :*: S1 (MetaSel (Just "sq_other_shit") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 BamOtherShit)))) |
data BamSorting Source #
Possible sorting orders from bam header. Thanks to samtools, which
doesn't declare sorted files properly, we have to have the stupid
Unknown
state, too.
Unknown | undeclared sort order |
Unsorted | definitely not sorted |
Grouped | grouped by query name |
Queryname | sorted by query name |
Coordinate | sorted by coordinate |
Instances
Eq BamSorting Source # | |
Defined in Bio.Bam.Header (==) :: BamSorting -> BamSorting -> Bool # (/=) :: BamSorting -> BamSorting -> Bool # | |
Show BamSorting Source # | |
Defined in Bio.Bam.Header showsPrec :: Int -> BamSorting -> ShowS # show :: BamSorting -> String # showList :: [BamSorting] -> ShowS # | |
Semigroup BamSorting Source # | |
Defined in Bio.Bam.Header (<>) :: BamSorting -> BamSorting -> BamSorting # sconcat :: NonEmpty BamSorting -> BamSorting # stimes :: Integral b => b -> BamSorting -> BamSorting # |
type BamOtherShit = [(BamKey, Bytes)] Source #
Reference sequence in Bam Bam enumerates the reference sequences and then sorts by index. We need to track that index if we want to reproduce the sorting order.
invalidRefseq :: Refseq Source #
The invalid Refseq. Bam uses this value to encode a missing reference sequence.
isValidRefseq :: Refseq -> Bool Source #
Tests whether a reference sequence is valid.
Returns true unless the the argument equals invalidRefseq
.
invalidPos :: Int Source #
The invalid position. Bam uses this value to encode a missing position.
isValidPos :: Int -> Bool Source #
Tests whether a position is valid.
Returns true unless the the argument equals invalidPos
.
unknownMapq :: Int Source #
isKnownMapq :: Int -> Bool Source #
A list of reference sequences.
compareNames :: Bytes -> Bytes -> Ordering Source #
Compares two sequence names the way samtools does. samtools sorts by "strnum_cmp":
- if both strings start with a digit, parse the initial sequence of digits and compare numerically, if equal, continue behind the numbers
- else compare the first characters (possibly NUL), if equal continue behind them
- else both strings ended and the shorter one counts as smaller (and that part is stupid)
flagPaired :: Int Source #
flagUnmapped :: Int Source #
flagReversed :: Int Source #
flagFirstMate :: Int Source #
flagSecondMate :: Int Source #
flagAuxillary :: Int Source #
flagSecondary :: Int Source #
flagFailsQC :: Int Source #
flagDuplicate :: Int Source #
eflagTrimmed :: Int Source #
eflagMerged :: Int Source #
distinctBin :: Int -> Int -> Int Source #
Computes the "distinct bin" according to the BAM binning scheme. If
an alignment starts at pos
and its CIGAR implies a length of len
on the reference, then it goes into bin distinctBin pos len
.