Safe Haskell | None |
---|---|
Language | Haskell2010 |
Parsers and Printers for BAM and SAM. We employ an Iteratee
interface, and we strive to support everything possible in BAM. So
far, the implementation of the nucleotides is somewhat lacking: we
do not have support for ambiguity codes, and the "=" symbol is not
understood.
- data BamRaw
- bamRaw :: FileOffset -> Bytes -> BamRaw
- virt_offset :: BamRaw -> FileOffset
- raw_data :: BamRaw -> Bytes
- data BamRec = BamRec {
- b_qname :: Seqid
- b_flag :: Int
- b_rname :: Refseq
- b_pos :: Int
- b_mapq :: Qual
- b_cigar :: Vector Cigar
- b_mrnm :: Refseq
- b_mpos :: Int
- b_isize :: Int
- b_seq :: Vector_Nucs_half Nucleotides
- b_qual :: Vector Qual
- b_exts :: Extensions
- b_virtual_offset :: FileOffset
- unpackBam :: BamRaw -> BamRec
- nullBamRec :: BamRec
- getMd :: BamRec -> Maybe [MdOp]
- data Cigar = !CigOp :* !Int
- data CigOp
- alignedLength :: Vector v Cigar => v Cigar -> Int
- newtype Nucleotides = Ns {}
- data Vector_Nucs_half a
- type Extensions = [(BamKey, Ext)]
- data Ext
- extAsInt :: Int -> BamKey -> BamRec -> Int
- extAsString :: BamKey -> BamRec -> Bytes
- setQualFlag :: Char -> BamRec -> BamRec
- deleteE :: BamKey -> Extensions -> Extensions
- insertE :: BamKey -> Ext -> Extensions -> Extensions
- updateE :: BamKey -> Ext -> Extensions -> Extensions
- adjustE :: (Ext -> Ext) -> BamKey -> Extensions -> Extensions
- isPaired :: BamRec -> Bool
- isProperlyPaired :: BamRec -> Bool
- isUnmapped :: BamRec -> Bool
- isMateUnmapped :: BamRec -> Bool
- isReversed :: BamRec -> Bool
- isMateReversed :: BamRec -> Bool
- isFirstMate :: BamRec -> Bool
- isSecondMate :: BamRec -> Bool
- isAuxillary :: BamRec -> Bool
- isFailsQC :: BamRec -> Bool
- isDuplicate :: BamRec -> Bool
- isTrimmed :: BamRec -> Bool
- isMerged :: BamRec -> Bool
- isAlternative :: BamRec -> Bool
- isExactIndex :: BamRec -> Bool
- type_mask :: Int
- progressBam :: MonadIO m => String -> Refs -> Int -> (String -> IO ()) -> Enumeratee [BamRaw] [BamRaw] m a
- data Word32 :: *
Documentation
Bam record in its native encoding along with virtual address.
bamRaw :: FileOffset -> Bytes -> BamRaw Source
Smart constructor. Makes sure we got a at least a full record.
virt_offset :: BamRaw -> FileOffset Source
internal representation of a BAM record
BamRec | |
|
Cigar line in BAM coding Bam encodes an operation and a length into a single integer, we keep those integers in an array.
alignedLength :: Vector v Cigar => v Cigar -> Int Source
extracts the aligned length from a cigar line This gives the length of an alignment as measured on the reference, which is different from the length on the query or the length of the alignment.
newtype Nucleotides Source
A nucleotide base in an alignment. Experience says we're dealing with Ns and gaps all the type, so purity be damned, they are included as if they were real bases.
To allow Nucleotides
s to be unpacked and incorporated into
containers, we choose to represent them the same way as the BAM file
format: as a 4 bit wide field. Gaps are encoded as 0 where they
make sense, N is 15. The contained Word8
is guaranteed to be
0..15.
Bounded Nucleotides Source | |
Enum Nucleotides Source | |
Eq Nucleotides Source | |
Ord Nucleotides Source | |
Read Nucleotides Source | |
Show Nucleotides Source | |
Ix Nucleotides Source | |
Storable Nucleotides Source | |
Unbox Nucleotides Source | |
MVector MVector Nucleotides Source | |
Vector Vector Nucleotides Source | |
Vector Vector_Nucs_half Nucleotides | |
Show (Vector_Nucs_half Nucleotides) | |
data Vector Nucleotides = V_Nucleotides (Vector Word8) Source | |
data MVector s Nucleotides = MV_Nucleotides (MVector s Word8) Source |
data Vector_Nucs_half a Source
A vector that packs two Nucleotides
into one byte, just like Bam does.
Vector Vector_Nucs_half Nucleotides Source | |
Show (Vector_Nucs_half Nucleotides) Source | |
type Mutable Vector_Nucs_half Source |
type Extensions = [(BamKey, Ext)] Source
A collection of extension fields. The key is actually only two Char
s, but that proved impractical.
(Hmm... we could introduce a Key type that is a 16 bit int, then give
it an instance IsString
... practical?)
extAsString :: BamKey -> BamRec -> Bytes Source
setQualFlag :: Char -> BamRec -> BamRec Source
deleteE :: BamKey -> Extensions -> Extensions Source
Deletes all occurences of some extension field.
insertE :: BamKey -> Ext -> Extensions -> Extensions Source
Blindly inserts an extension field. This can create duplicates (and there is no telling how other tools react to that).
updateE :: BamKey -> Ext -> Extensions -> Extensions Source
Deletes all occurences of an extension field, then inserts it with
a new value. This is safer than insertE
, but also more expensive.
adjustE :: (Ext -> Ext) -> BamKey -> Extensions -> Extensions Source
Adjusts a named extension by applying a function.
isProperlyPaired :: BamRec -> Bool Source
isUnmapped :: BamRec -> Bool Source
isMateUnmapped :: BamRec -> Bool Source
isReversed :: BamRec -> Bool Source
isMateReversed :: BamRec -> Bool Source
isFirstMate :: BamRec -> Bool Source
isSecondMate :: BamRec -> Bool Source
isAuxillary :: BamRec -> Bool Source
isDuplicate :: BamRec -> Bool Source
isAlternative :: BamRec -> Bool Source
isExactIndex :: BamRec -> Bool Source
progressBam :: MonadIO m => String -> Refs -> Int -> (String -> IO ()) -> Enumeratee [BamRaw] [BamRaw] m a Source
A simple progress indicator that prints sequence id and position.
data Word32 :: *
Bounded Word32 | |
Enum Word32 | |
Eq Word32 | |
Integral Word32 | |
Data Word32 | |
Num Word32 | |
Ord Word32 | |
Read Word32 | |
Real Word32 | |
Show Word32 | |
Ix Word32 | |
Storable Word32 | |
Bits Word32 | |
FiniteBits Word32 | |
Unbox Word32 | |
Prim Word32 | |
Lift Word32 | |
Hashable Word32 | |
Random Word32 | |
MVector MVector Word32 | |
Vector Vector Word32 | |
data Vector Word32 = V_Word32 (Vector Word32) | |
data MVector s Word32 = MV_Word32 (MVector s Word32) |