Copyright | (c) Dominik Schrempf 2018 |
---|---|
License | GPL-3 |
Maintainer | dominik.schrempf@gmail.com |
Stability | unstable |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
Creation date: Thu Oct 4 18:40:18 2018.
Synopsis
- data MultiSequenceAlignment = MultiSequenceAlignment [SequenceName] Alphabet (Matrix Character)
- msaLength :: MultiSequenceAlignment -> Int
- msaNSequences :: MultiSequenceAlignment -> Int
- fromSequenceList :: [Sequence] -> Either String MultiSequenceAlignment
- toSequenceList :: MultiSequenceAlignment -> [Sequence]
- showMSA :: MultiSequenceAlignment -> ByteString
- summarizeMSA :: MultiSequenceAlignment -> ByteString
- msaJoin :: MultiSequenceAlignment -> MultiSequenceAlignment -> MultiSequenceAlignment
- msaConcatenate :: MultiSequenceAlignment -> MultiSequenceAlignment -> MultiSequenceAlignment
- msasConcatenate :: [MultiSequenceAlignment] -> MultiSequenceAlignment
- filterColumnsOnlyStd :: MultiSequenceAlignment -> MultiSequenceAlignment
- filterColumnsStd :: Double -> MultiSequenceAlignment -> MultiSequenceAlignment
- filterColumnsNoGaps :: MultiSequenceAlignment -> MultiSequenceAlignment
- type FrequencyData = Matrix Double
- toFrequencyData :: MultiSequenceAlignment -> FrequencyData
- kEffEntropy :: FrequencyData -> [Double]
- kEffHomoplasy :: FrequencyData -> [Double]
- countIUPACChars :: MultiSequenceAlignment -> Int
- countGaps :: MultiSequenceAlignment -> Int
- countUnknowns :: MultiSequenceAlignment -> Int
- subSample :: [Int] -> MultiSequenceAlignment -> MultiSequenceAlignment
- randomSubSample :: PrimMonad m => Int -> MultiSequenceAlignment -> Gen (PrimState m) -> m MultiSequenceAlignment
Documentation
data MultiSequenceAlignment Source #
A collection of sequences.
Instances
msaLength :: MultiSequenceAlignment -> Int Source #
Number of sites.
msaNSequences :: MultiSequenceAlignment -> Int Source #
Number of sequences.
- Input, output
fromSequenceList :: [Sequence] -> Either String MultiSequenceAlignment Source #
Create MultiSequenceAlignment
from a list of Sequence
s.
toSequenceList :: MultiSequenceAlignment -> [Sequence] Source #
Conversion to list of Sequence
s.
showMSA :: MultiSequenceAlignment -> ByteString Source #
Show a MultiSequenceAlignment
in text form.
summarizeMSA :: MultiSequenceAlignment -> ByteString Source #
Similar to summarizeSequenceList
but with different Header.
- Manipulation
msaJoin :: MultiSequenceAlignment -> MultiSequenceAlignment -> MultiSequenceAlignment Source #
Join two MultiSequenceAlignment
s vertically. That is, add more sequences
to an alignment. See also msaConcatenate
.
msaConcatenate :: MultiSequenceAlignment -> MultiSequenceAlignment -> MultiSequenceAlignment Source #
Concatenate two MultiSequenceAlignment
s horizontally. That is, add more
sites to an alignment. See also msaJoin
.
msasConcatenate :: [MultiSequenceAlignment] -> MultiSequenceAlignment Source #
Concatenate a list of MultiSequenceAlignment
s horizontally. See
msaConcatenate
.
filterColumnsOnlyStd :: MultiSequenceAlignment -> MultiSequenceAlignment Source #
Only keep columns with standard characters. Alignment columns with IUPAC characters are removed.
filterColumnsStd :: Double -> MultiSequenceAlignment -> MultiSequenceAlignment Source #
Filter columns with proportion of standard character larger than given number.
filterColumnsNoGaps :: MultiSequenceAlignment -> MultiSequenceAlignment Source #
Only keep columns without gaps or unknown characters.
- Analysis
type FrequencyData = Matrix Double Source #
Frequency data; do not store the actual characters, but only their frequencies.
toFrequencyData :: MultiSequenceAlignment -> FrequencyData Source #
Calculcate frequency of characters in multi sequence alignment.
kEffEntropy :: FrequencyData -> [Double] Source #
Diversity analysis. See kEffEntropy
.
kEffHomoplasy :: FrequencyData -> [Double] Source #
Diversity analysis. See kEffEntropy
.
countIUPACChars :: MultiSequenceAlignment -> Int Source #
Count the number of standard (i.e., not extended IUPAC) characters in the alignment.
countGaps :: MultiSequenceAlignment -> Int Source #
Count the number of gaps in the alignment.
countUnknowns :: MultiSequenceAlignment -> Int Source #
Count the number of unknown characters in the alignment.
- Sub sample
subSample :: [Int] -> MultiSequenceAlignment -> MultiSequenceAlignment Source #
Sample the given sites from a multi sequence alignment.
randomSubSample :: PrimMonad m => Int -> MultiSequenceAlignment -> Gen (PrimState m) -> m MultiSequenceAlignment Source #
Randomly sample a given number of sites of the multi sequence alignment.