Copyright	(c) Dominik Schrempf 2021
License	GPL-3.0-or-later
Maintainer	dominik.schrempf@gmail.com
Stability	unstable
Portability	portable
Safe Haskell	None
Language	Haskell2010

ELynx.Data.Sequence.Alignment

Description

Creation date: Thu Oct 4 18:40:18 2018.

This module is to be imported qualified.

Synopsis

data Alignment = Alignment {
- names :: [Name]
- descriptions :: [Description]
- alphabet :: Alphabet
- matrix :: Matrix Character
}
length :: Alignment -> Int
nSequences :: Alignment -> Int
fromSequences :: [Sequence] -> Either String Alignment
toSequences :: Alignment -> [Sequence]
summarize :: Alignment -> ByteString
join :: Alignment -> Alignment -> Alignment
concat :: Alignment -> Alignment -> Alignment
concatAlignments :: [Alignment] -> Alignment
filterColsConstant :: Alignment -> Alignment
filterColsConstantSoft :: Alignment -> Alignment
filterColsOnlyStd :: Alignment -> Alignment
filterColsStd :: Double -> Alignment -> Alignment
filterColsNoGaps :: Alignment -> Alignment
type FrequencyData = Matrix Double
distribution :: FrequencyData -> [Double]
toFrequencyData :: Alignment -> FrequencyData
kEffEntropy :: FrequencyData -> [Double]
kEffHomoplasy :: FrequencyData -> [Double]
countIUPACChars :: Alignment -> Int
countGaps :: Alignment -> Int
countUnknowns :: Alignment -> Int
subSample :: [Int] -> Alignment -> Alignment
randomSubSample :: PrimMonad m => Int -> Alignment -> Gen (PrimState m) -> m Alignment

Documentation

data Alignment Source #

A collection of sequences.

Constructors

Alignment
Fields names :: [Name] descriptions :: [Description] alphabet :: Alphabet matrix :: Matrix Character

Instances

Instances details

Eq Alignment Source #
Instance details Defined in ELynx.Data.Sequence.Alignment Methods (==) :: Alignment -> Alignment -> Bool # (/=) :: Alignment -> Alignment -> Bool #
Show Alignment Source #
Instance details Defined in ELynx.Data.Sequence.Alignment Methods showsPrec :: Int -> Alignment -> ShowS # show :: Alignment -> String # showList :: [Alignment] -> ShowS #

length :: Alignment -> Int Source #

Number of sites.

nSequences :: Alignment -> Int Source #

Number of sequences.

Input, output

fromSequences :: [Sequence] -> Either String Alignment Source #

Create Alignment from a list of Sequences.

toSequences :: Alignment -> [Sequence] Source #

Conversion to list of Sequences.

summarize :: Alignment -> ByteString Source #

Similar to summarizeSequenceList but with different Header.

Manipulation

join :: Alignment -> Alignment -> Alignment Source #

Join two Alignments vertically. That is, add more sequences to an alignment. See also concat.

concat :: Alignment -> Alignment -> Alignment Source #

Concatenate two Alignments horizontally. That is, add more sites to an alignment. See also join.

concatAlignments :: [Alignment] -> Alignment Source #

Concatenate a list of Alignments horizontally. See concat.

filterColsConstant :: Alignment -> Alignment Source #

Only keep constant columns.

filterColsConstantSoft :: Alignment -> Alignment Source #

Only keep constant columns, and constant columns with at least one standard character as well as any number of gaps or unknowns.

filterColsOnlyStd :: Alignment -> Alignment Source #

Only keep columns with standard characters. Alignment columns with IUPAC characters are removed.

filterColsStd :: Double -> Alignment -> Alignment Source #

Filter columns with proportion of standard character larger than given number.

filterColsNoGaps :: Alignment -> Alignment Source #

Only keep columns without gaps or unknown characters.

Analysis

type FrequencyData = Matrix Double Source #

Frequency data; do not store the actual characters, but their frequencies. The matrix is of size N x K, where N is the number of sites, and K is the number of characters.

distribution :: FrequencyData -> [Double] Source #

Calculate the distribution of characters.

toFrequencyData :: Alignment -> FrequencyData Source #

Calculcate frequency of characters at each site of a multi sequence alignment.

kEffEntropy :: FrequencyData -> [Double] Source #

Diversity analysis. See kEffEntropy.

kEffHomoplasy :: FrequencyData -> [Double] Source #

Diversity analysis. See kEffEntropy.

countIUPACChars :: Alignment -> Int Source #

Count the number of standard (i.e., not extended IUPAC) characters in the alignment.

countGaps :: Alignment -> Int Source #

Count the number of gaps in the alignment.

countUnknowns :: Alignment -> Int Source #

Count the number of unknown characters in the alignment.

Sub sample

subSample :: [Int] -> Alignment -> Alignment Source #

Sample the given sites from a multi sequence alignment.

randomSubSample :: PrimMonad m => Int -> Alignment -> Gen (PrimState m) -> m Alignment Source #

Randomly sample a given number of sites of the multi sequence alignment.