module ELynx.Data.Sequence.Alignment
( Alignment (..),
length,
nSequences,
fromSequences,
toSequences,
summarize,
join,
concat,
concatAlignments,
filterColsConstant,
filterColsConstantSoft,
filterColsOnlyStd,
filterColsStd,
filterColsNoGaps,
FrequencyData,
distribution,
toFrequencyData,
kEffEntropy,
kEffHomoplasy,
countIUPACChars,
countGaps,
countUnknowns,
subSample,
randomSubSample,
)
where
import Control.Monad hiding (join)
import Control.Monad.Primitive
import Control.Parallel.Strategies
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List hiding
( concat,
length,
)
import qualified Data.Matrix.Unboxed as M
import qualified Data.Vector.Unboxed as V
import qualified ELynx.Data.Alphabet.Alphabet as A
import ELynx.Data.Alphabet.Character
import qualified ELynx.Data.Alphabet.DistributionDiversity as D
import ELynx.Data.Sequence.Defaults
import qualified ELynx.Data.Sequence.Sequence as S
import System.Random.MWC
import Prelude hiding
( concat,
length,
)
data Alignment = Alignment
{ Alignment -> [Name]
names :: [S.Name],
Alignment -> [Name]
descriptions :: [S.Description],
Alignment -> Alphabet
alphabet :: A.Alphabet,
Alignment -> Matrix Character
matrix :: M.Matrix Character
}
deriving (Int -> Alignment -> ShowS
[Alignment] -> ShowS
Alignment -> String
(Int -> Alignment -> ShowS)
-> (Alignment -> String)
-> ([Alignment] -> ShowS)
-> Show Alignment
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Alignment] -> ShowS
$cshowList :: [Alignment] -> ShowS
show :: Alignment -> String
$cshow :: Alignment -> String
showsPrec :: Int -> Alignment -> ShowS
$cshowsPrec :: Int -> Alignment -> ShowS
Show, Alignment -> Alignment -> Bool
(Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Bool) -> Eq Alignment
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Alignment -> Alignment -> Bool
$c/= :: Alignment -> Alignment -> Bool
== :: Alignment -> Alignment -> Bool
$c== :: Alignment -> Alignment -> Bool
Eq)
length :: Alignment -> Int
length :: Alignment -> Int
length = Matrix Character -> Int
forall a. Context a => Matrix a -> Int
M.cols (Matrix Character -> Int)
-> (Alignment -> Matrix Character) -> Alignment -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> Matrix Character
matrix
nSequences :: Alignment -> Int
nSequences :: Alignment -> Int
nSequences = Matrix Character -> Int
forall a. Context a => Matrix a -> Int
M.rows (Matrix Character -> Int)
-> (Alignment -> Matrix Character) -> Alignment -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> Matrix Character
matrix
fromSequences :: [S.Sequence] -> Either String Alignment
fromSequences :: [Sequence] -> Either String Alignment
fromSequences [Sequence]
ss
| [Sequence] -> Bool
S.equalLength [Sequence]
ss Bool -> Bool -> Bool
&& [Alphabet] -> Bool
forall a. Eq a => [a] -> Bool
allEqual ((Sequence -> Alphabet) -> [Sequence] -> [Alphabet]
forall a b. (a -> b) -> [a] -> [b]
map Sequence -> Alphabet
S.alphabet [Sequence]
ss) =
Alignment -> Either String Alignment
forall a b. b -> Either a b
Right (Alignment -> Either String Alignment)
-> Alignment -> Either String Alignment
forall a b. (a -> b) -> a -> b
$
[Name] -> [Name] -> Alphabet -> Matrix Character -> Alignment
Alignment [Name]
ns [Name]
ds Alphabet
a Matrix Character
d
| [Sequence] -> Bool
S.equalLength [Sequence]
ss = String -> Either String Alignment
forall a b. a -> Either a b
Left String
"Sequences do not have equal codes."
| Bool
otherwise = String -> Either String Alignment
forall a b. a -> Either a b
Left String
"Sequences do not have equal lengths."
where
ns :: [Name]
ns = (Sequence -> Name) -> [Sequence] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map Sequence -> Name
S.name [Sequence]
ss
ds :: [Name]
ds = (Sequence -> Name) -> [Sequence] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map Sequence -> Name
S.description [Sequence]
ss
a :: Alphabet
a = Sequence -> Alphabet
S.alphabet (Sequence -> Alphabet) -> Sequence -> Alphabet
forall a b. (a -> b) -> a -> b
$ [Sequence] -> Sequence
forall a. [a] -> a
head [Sequence]
ss
bss :: [Characters]
bss = (Sequence -> Characters) -> [Sequence] -> [Characters]
forall a b. (a -> b) -> [a] -> [b]
map Sequence -> Characters
S.characters [Sequence]
ss
d :: Matrix Character
d = [Characters] -> Matrix Character
forall a. Context a => [Vector a] -> Matrix a
M.fromRows [Characters]
bss
allEqual :: [a] -> Bool
allEqual [] = Bool
True
allEqual [a]
xs = (a -> Bool) -> [a] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== [a] -> a
forall a. [a] -> a
head [a]
xs) ([a] -> Bool) -> [a] -> Bool
forall a b. (a -> b) -> a -> b
$ [a] -> [a]
forall a. [a] -> [a]
tail [a]
xs
toSequences :: Alignment -> [S.Sequence]
toSequences :: Alignment -> [Sequence]
toSequences (Alignment [Name]
ns [Name]
ds Alphabet
a Matrix Character
da) =
(Name -> Name -> Characters -> Sequence)
-> [Name] -> [Name] -> [Characters] -> [Sequence]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3
(\Name
n Name
d Characters
r -> Name -> Name -> Alphabet -> Characters -> Sequence
S.Sequence Name
n Name
d Alphabet
a Characters
r)
[Name]
ns
[Name]
ds
[Characters]
rows
where
rows :: [Characters]
rows = Matrix Character -> [Characters]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix Character
da
header :: Alignment -> BL.ByteString
Alignment
a =
[Name] -> Name
BL.unlines ([Name] -> Name) -> [Name] -> Name
forall a b. (a -> b) -> a -> b
$
[ String -> Name
BL.pack String
"Multi sequence alignment.",
String -> Name
BL.pack (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String
"Code: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Alphabet -> String
A.alphabetDescription (Alignment -> Alphabet
alphabet Alignment
a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
".",
String -> Name
BL.pack (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String
"Length: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Alignment -> Int
length Alignment
a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"."
]
[Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ [Name]
reportLengthSummary
[Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ [Name]
reportNumberSummary
where
reportLengthSummary :: [Name]
reportLengthSummary =
[ String -> Name
BL.pack (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$
String
"For each sequence, the "
String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
summaryLength
String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" first bases are shown."
| Alignment -> Int
length Alignment
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
summaryLength
]
reportNumberSummary :: [Name]
reportNumberSummary =
[ String -> Name
BL.pack (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$
Int -> String
forall a. Show a => a -> String
show Int
summaryNSequences
String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" out of "
String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Alignment -> Int
nSequences Alignment
a)
String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" sequences are shown."
| Alignment -> Int
nSequences Alignment
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
summaryNSequences
]
summarize :: Alignment -> BL.ByteString
summarize :: Alignment -> Name
summarize Alignment
a = Alignment -> Name
header Alignment
a Name -> Name -> Name
forall a. Semigroup a => a -> a -> a
<> [Sequence] -> Name
S.body (Alignment -> [Sequence]
toSequences Alignment
a)
(===) :: V.Unbox a => M.Matrix a -> M.Matrix a -> M.Matrix a
=== :: Matrix a -> Matrix a -> Matrix a
(===) Matrix a
l Matrix a
r = [Vector a] -> Matrix a
forall a. Context a => [Vector a] -> Matrix a
M.fromRows ([Vector a] -> Matrix a) -> [Vector a] -> Matrix a
forall a b. (a -> b) -> a -> b
$ [Vector a]
lRs [Vector a] -> [Vector a] -> [Vector a]
forall a. [a] -> [a] -> [a]
++ [Vector a]
rRs
where
lRs :: [Vector a]
lRs = Matrix a -> [Vector a]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix a
l
rRs :: [Vector a]
rRs = Matrix a -> [Vector a]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix a
r
(|||) :: V.Unbox a => M.Matrix a -> M.Matrix a -> M.Matrix a
||| :: Matrix a -> Matrix a -> Matrix a
(|||) Matrix a
l Matrix a
r = [Vector a] -> Matrix a
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector a] -> Matrix a) -> [Vector a] -> Matrix a
forall a b. (a -> b) -> a -> b
$ [Vector a]
lCs [Vector a] -> [Vector a] -> [Vector a]
forall a. [a] -> [a] -> [a]
++ [Vector a]
rCs
where
lCs :: [Vector a]
lCs = Matrix a -> [Vector a]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix a
l
rCs :: [Vector a]
rCs = Matrix a -> [Vector a]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix a
r
join :: Alignment -> Alignment -> Alignment
join :: Alignment -> Alignment -> Alignment
join Alignment
t Alignment
b
| Alignment -> Int
length Alignment
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> Int
length Alignment
b =
String -> Alignment
forall a. HasCallStack => String -> a
error
String
"join: Multi sequence alignments do not have equal lengths."
| Alignment -> Alphabet
alphabet Alignment
t Alphabet -> Alphabet -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> Alphabet
alphabet Alignment
b =
String -> Alignment
forall a. HasCallStack => String -> a
error
String
"join: Multi sequence alignments do not have equal alphabets."
| Bool
otherwise = [Name] -> [Name] -> Alphabet -> Matrix Character -> Alignment
Alignment [Name]
ns [Name]
ds Alphabet
al (Matrix Character
tD Matrix Character -> Matrix Character -> Matrix Character
forall a. Unbox a => Matrix a -> Matrix a -> Matrix a
=== Matrix Character
bD)
where
ns :: [Name]
ns = Alignment -> [Name]
names Alignment
t [Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ Alignment -> [Name]
names Alignment
b
ds :: [Name]
ds = Alignment -> [Name]
descriptions Alignment
t [Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ Alignment -> [Name]
descriptions Alignment
b
tD :: Matrix Character
tD = Alignment -> Matrix Character
matrix Alignment
t
bD :: Matrix Character
bD = Alignment -> Matrix Character
matrix Alignment
b
al :: Alphabet
al = Alignment -> Alphabet
alphabet Alignment
t
concat :: Alignment -> Alignment -> Alignment
concat :: Alignment -> Alignment -> Alignment
concat Alignment
l Alignment
r
| Alignment -> Int
nSequences Alignment
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> Int
nSequences Alignment
r =
String -> Alignment
forall a. HasCallStack => String -> a
error
String
"concat: Multi sequence alignments do not have an equal number of sequences."
| Alignment -> Alphabet
alphabet Alignment
l Alphabet -> Alphabet -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> Alphabet
alphabet Alignment
r =
String -> Alignment
forall a. HasCallStack => String -> a
error String
"concat: Multi sequence alignments do not have an equal alphabets."
| Alignment -> [Name]
names Alignment
l [Name] -> [Name] -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> [Name]
names Alignment
r =
String -> Alignment
forall a. HasCallStack => String -> a
error String
"concat: Multi sequence alignments do not have an equal names."
| Alignment -> [Name]
descriptions Alignment
l [Name] -> [Name] -> Bool
forall a. Eq a => a -> a -> Bool
/= Alignment -> [Name]
descriptions Alignment
r =
String -> Alignment
forall a. HasCallStack => String -> a
error String
"concat: Multi sequence alignments do not have an equal descriptions."
| Bool
otherwise =
[Name] -> [Name] -> Alphabet -> Matrix Character -> Alignment
Alignment (Alignment -> [Name]
names Alignment
l) (Alignment -> [Name]
descriptions Alignment
l) (Alignment -> Alphabet
alphabet Alignment
l) (Matrix Character
lD Matrix Character -> Matrix Character -> Matrix Character
forall a. Unbox a => Matrix a -> Matrix a -> Matrix a
||| Matrix Character
rD)
where
lD :: Matrix Character
lD = Alignment -> Matrix Character
matrix Alignment
l
rD :: Matrix Character
rD = Alignment -> Matrix Character
matrix Alignment
r
concatAlignments :: [Alignment] -> Alignment
concatAlignments :: [Alignment] -> Alignment
concatAlignments [] = String -> Alignment
forall a. HasCallStack => String -> a
error String
"concatAlignments: Nothing to concatenate."
concatAlignments [Alignment
a] = Alignment
a
concatAlignments [Alignment]
as = (Alignment -> Alignment -> Alignment)
-> Alignment -> [Alignment] -> Alignment
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Alignment -> Alignment -> Alignment
concat ([Alignment] -> Alignment
forall a. [a] -> a
head [Alignment]
as) ([Alignment] -> [Alignment]
forall a. [a] -> [a]
tail [Alignment]
as)
filterColsWith :: (V.Vector Character -> Bool) -> Alignment -> Alignment
filterColsWith :: (Characters -> Bool) -> Alignment -> Alignment
filterColsWith Characters -> Bool
p Alignment
a = Alignment
a {matrix :: Matrix Character
matrix = Matrix Character
m'}
where
m' :: Matrix Character
m' = [Characters] -> Matrix Character
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Characters] -> Matrix Character)
-> (Matrix Character -> [Characters])
-> Matrix Character
-> Matrix Character
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Characters -> Bool) -> [Characters] -> [Characters]
forall a. (a -> Bool) -> [a] -> [a]
filter Characters -> Bool
p ([Characters] -> [Characters])
-> (Matrix Character -> [Characters])
-> Matrix Character
-> [Characters]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Character -> [Characters]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns (Matrix Character -> Matrix Character)
-> Matrix Character -> Matrix Character
forall a b. (a -> b) -> a -> b
$ Alignment -> Matrix Character
matrix Alignment
a
filterColsConstant :: Alignment -> Alignment
filterColsConstant :: Alignment -> Alignment
filterColsConstant = (Characters -> Bool) -> Alignment -> Alignment
filterColsWith (\Characters
v -> (Character -> Bool) -> Characters -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
V.all (Character -> Character -> Bool
forall a. Eq a => a -> a -> Bool
== Characters -> Character
forall a. Unbox a => Vector a -> a
V.head Characters
v) Characters
v)
filterColsConstantSoft :: Alignment -> Alignment
filterColsConstantSoft :: Alignment -> Alignment
filterColsConstantSoft Alignment
a = (Characters -> Bool) -> Alignment -> Alignment
filterColsWith Characters -> Bool
f Alignment
a
where
al :: Alphabet
al = Alignment -> Alphabet
alphabet Alignment
a
f :: Characters -> Bool
f Characters
v = case (Character -> Bool) -> Characters -> Maybe Character
forall a. Unbox a => (a -> Bool) -> Vector a -> Maybe a
V.find (Alphabet -> Character -> Bool
A.isStd Alphabet
al) Characters
v of
Maybe Character
Nothing -> Bool
False
Just Character
c -> (Character -> Bool) -> Characters -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
V.all (\Character
x -> Character
x Character -> Character -> Bool
forall a. Eq a => a -> a -> Bool
== Character
c Bool -> Bool -> Bool
|| Alphabet -> Character -> Bool
A.isGap Alphabet
al Character
x Bool -> Bool -> Bool
|| Alphabet -> Character -> Bool
A.isUnknown Alphabet
al Character
x) Characters
v
filterColsOnlyStd :: Alignment -> Alignment
filterColsOnlyStd :: Alignment -> Alignment
filterColsOnlyStd Alignment
a = (Characters -> Bool) -> Alignment -> Alignment
filterColsWith ((Character -> Bool) -> Characters -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
V.all ((Character -> Bool) -> Characters -> Bool)
-> (Character -> Bool) -> Characters -> Bool
forall a b. (a -> b) -> a -> b
$ Alphabet -> Character -> Bool
A.isStd (Alignment -> Alphabet
alphabet Alignment
a)) Alignment
a
filterColsStd :: Double -> Alignment -> Alignment
filterColsStd :: Double -> Alignment -> Alignment
filterColsStd Double
prop Alignment
a =
(Characters -> Bool) -> Alignment -> Alignment
filterColsWith
(\Characters
col -> Double
prop Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
n Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Characters -> Int
forall a. Unbox a => Vector a -> Int
V.length ((Character -> Bool) -> Characters -> Characters
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
V.filter (Alphabet -> Character -> Bool
A.isStd Alphabet
al) Characters
col)))
Alignment
a
where
al :: Alphabet
al = Alignment -> Alphabet
alphabet Alignment
a
n :: Double
n = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ Alignment -> Int
nSequences Alignment
a
filterColsNoGaps :: Alignment -> Alignment
filterColsNoGaps :: Alignment -> Alignment
filterColsNoGaps Alignment
a = (Characters -> Bool) -> Alignment -> Alignment
filterColsWith ((Character -> Bool) -> Characters -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
V.all ((Character -> Bool) -> Characters -> Bool)
-> (Character -> Bool) -> Characters -> Bool
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> (Character -> Bool) -> Character -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alphabet -> Character -> Bool
A.isGap (Alignment -> Alphabet
alphabet Alignment
a)) Alignment
a
type FrequencyData = M.Matrix Double
fMapColParChunk ::
(V.Unbox a, V.Unbox b) =>
Int ->
(V.Vector a -> V.Vector b) ->
M.Matrix a ->
M.Matrix b
fMapColParChunk :: Int -> (Vector a -> Vector b) -> Matrix a -> Matrix b
fMapColParChunk Int
n Vector a -> Vector b
f Matrix a
m =
[Vector b] -> Matrix b
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ((Vector a -> Vector b) -> [Vector a] -> [Vector b]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Vector b
f (Matrix a -> [Vector a]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix a
m) [Vector b] -> Strategy [Vector b] -> [Vector b]
forall a. a -> Strategy a -> a
`using` Int -> Strategy (Vector b) -> Strategy [Vector b]
forall a. Int -> Strategy a -> Strategy [a]
parListChunk Int
n Strategy (Vector b)
forall a. Strategy a
rseq)
toFrequencyData :: Alignment -> FrequencyData
toFrequencyData :: Alignment -> FrequencyData
toFrequencyData Alignment
a = Int
-> (Characters -> Vector Double)
-> Matrix Character
-> FrequencyData
forall a b.
(Unbox a, Unbox b) =>
Int -> (Vector a -> Vector b) -> Matrix a -> Matrix b
fMapColParChunk Int
100 (AlphabetSpec -> Characters -> Vector Double
forall (v :: * -> *).
(Vector v Character, Vector v Int, Vector v Double) =>
AlphabetSpec -> v Character -> v Double
D.frequencyCharacters AlphabetSpec
spec) (Alignment -> Matrix Character
matrix Alignment
a)
where
spec :: AlphabetSpec
spec = Alphabet -> AlphabetSpec
A.alphabetSpec (Alignment -> Alphabet
alphabet Alignment
a)
distribution :: FrequencyData -> [Double]
distribution :: FrequencyData -> [Double]
distribution FrequencyData
fd =
(Double -> Double) -> [Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nSites) ([Double] -> [Double]) -> [Double] -> [Double]
forall a b. (a -> b) -> a -> b
$
Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$
(Vector Double -> Vector Double -> Vector Double)
-> [Vector Double] -> Vector Double
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1
((Double -> Double -> Double)
-> Vector Double -> Vector Double -> Vector Double
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+))
(FrequencyData -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns FrequencyData
fd)
where
nSites :: Int
nSites = FrequencyData -> Int
forall a. Context a => Matrix a -> Int
M.cols FrequencyData
fd
parMapChunk :: Int -> (a -> b) -> [a] -> [b]
parMapChunk :: Int -> (a -> b) -> [a] -> [b]
parMapChunk Int
n a -> b
f [a]
as = (a -> b) -> [a] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map a -> b
f [a]
as [b] -> Strategy [b] -> [b]
forall a. a -> Strategy a -> a
`using` Int -> Strategy b -> Strategy [b]
forall a. Int -> Strategy a -> Strategy [a]
parListChunk Int
n Strategy b
forall a. Strategy a
rseq
chunksize :: Int
chunksize :: Int
chunksize = Int
500
kEffEntropy :: FrequencyData -> [Double]
kEffEntropy :: FrequencyData -> [Double]
kEffEntropy FrequencyData
fd = Int -> (Vector Double -> Double) -> [Vector Double] -> [Double]
forall a b. Int -> (a -> b) -> [a] -> [b]
parMapChunk Int
chunksize Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
D.kEffEntropy (FrequencyData -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns FrequencyData
fd)
kEffHomoplasy :: FrequencyData -> [Double]
kEffHomoplasy :: FrequencyData -> [Double]
kEffHomoplasy FrequencyData
fd = Int -> (Vector Double -> Double) -> [Vector Double] -> [Double]
forall a b. Int -> (a -> b) -> [a] -> [b]
parMapChunk Int
chunksize Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
D.kEffHomoplasy (FrequencyData -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns FrequencyData
fd)
countIUPACChars :: Alignment -> Int
countIUPACChars :: Alignment -> Int
countIUPACChars Alignment
a = Characters -> Int
forall a. Unbox a => Vector a -> Int
V.length (Characters -> Int)
-> (Characters -> Characters) -> Characters -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Character -> Bool) -> Characters -> Characters
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
V.filter (Alphabet -> Character -> Bool
A.isIUPAC (Alignment -> Alphabet
alphabet Alignment
a)) (Characters -> Int) -> Characters -> Int
forall a b. (a -> b) -> a -> b
$ Characters
allChars
where
allChars :: Characters
allChars = Matrix Character -> Characters
forall a. Context a => Matrix a -> Vector a
M.flatten (Matrix Character -> Characters) -> Matrix Character -> Characters
forall a b. (a -> b) -> a -> b
$ Alignment -> Matrix Character
matrix Alignment
a
countGaps :: Alignment -> Int
countGaps :: Alignment -> Int
countGaps Alignment
a = Characters -> Int
forall a. Unbox a => Vector a -> Int
V.length (Characters -> Int)
-> (Characters -> Characters) -> Characters -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Character -> Bool) -> Characters -> Characters
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
V.filter (Alphabet -> Character -> Bool
A.isGap (Alignment -> Alphabet
alphabet Alignment
a)) (Characters -> Int) -> Characters -> Int
forall a b. (a -> b) -> a -> b
$ Characters
allChars
where
allChars :: Characters
allChars = Matrix Character -> Characters
forall a. Context a => Matrix a -> Vector a
M.flatten (Matrix Character -> Characters) -> Matrix Character -> Characters
forall a b. (a -> b) -> a -> b
$ Alignment -> Matrix Character
matrix Alignment
a
countUnknowns :: Alignment -> Int
countUnknowns :: Alignment -> Int
countUnknowns Alignment
a = Characters -> Int
forall a. Unbox a => Vector a -> Int
V.length (Characters -> Int)
-> (Characters -> Characters) -> Characters -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Character -> Bool) -> Characters -> Characters
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
V.filter (Alphabet -> Character -> Bool
A.isUnknown (Alignment -> Alphabet
alphabet Alignment
a)) (Characters -> Int) -> Characters -> Int
forall a b. (a -> b) -> a -> b
$ Characters
allChars
where
allChars :: Characters
allChars = Matrix Character -> Characters
forall a. Context a => Matrix a -> Vector a
M.flatten (Matrix Character -> Characters) -> Matrix Character -> Characters
forall a b. (a -> b) -> a -> b
$ Alignment -> Matrix Character
matrix Alignment
a
subSampleMatrix :: V.Unbox a => [Int] -> M.Matrix a -> M.Matrix a
subSampleMatrix :: [Int] -> Matrix a -> Matrix a
subSampleMatrix [Int]
is Matrix a
m =
[Vector a] -> Matrix a
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector a] -> Matrix a) -> [Vector a] -> Matrix a
forall a b. (a -> b) -> a -> b
$ ([Vector a] -> Int -> [Vector a])
-> [Vector a] -> [Int] -> [Vector a]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\[Vector a]
a Int
i -> Matrix a -> Int -> Vector a
forall a. Context a => Matrix a -> Int -> Vector a
M.takeColumn Matrix a
m Int
i Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
: [Vector a]
a) [] ([Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
is)
subSample :: [Int] -> Alignment -> Alignment
subSample :: [Int] -> Alignment -> Alignment
subSample [Int]
is Alignment
a = Alignment
a {matrix :: Matrix Character
matrix = Matrix Character
m'} where m' :: Matrix Character
m' = [Int] -> Matrix Character -> Matrix Character
forall a. Unbox a => [Int] -> Matrix a -> Matrix a
subSampleMatrix [Int]
is (Matrix Character -> Matrix Character)
-> Matrix Character -> Matrix Character
forall a b. (a -> b) -> a -> b
$ Alignment -> Matrix Character
matrix Alignment
a
randomSubSample ::
PrimMonad m => Int -> Alignment -> Gen (PrimState m) -> m Alignment
randomSubSample :: Int -> Alignment -> Gen (PrimState m) -> m Alignment
randomSubSample Int
n Alignment
a Gen (PrimState m)
g = do
let l :: Int
l = Alignment -> Int
length Alignment
a
[Int]
is <- Int -> m Int -> m [Int]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n (m Int -> m [Int]) -> m Int -> m [Int]
forall a b. (a -> b) -> a -> b
$ (Int, Int) -> Gen (PrimState m) -> m Int
forall a (m :: * -> *).
(Variate a, PrimMonad m) =>
(a, a) -> Gen (PrimState m) -> m a
uniformR (Int
0, Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Gen (PrimState m)
g
Alignment -> m Alignment
forall (m :: * -> *) a. Monad m => a -> m a
return (Alignment -> m Alignment) -> Alignment -> m Alignment
forall a b. (a -> b) -> a -> b
$ [Int] -> Alignment -> Alignment
subSample [Int]
is Alignment
a