module Math.Diversity.GenerateDiversity ( fragmentPos
, generatePositionMap ) where
import qualified Data.Map as M
import Data.List
import Data.Fasta.String
import qualified Data.Sequence as Seq
import qualified Data.List.Split as Split
import Math.Diversity.Types
getSample :: Int -> FastaSequence -> Sample
getSample x = (!! (x 1)) . Split.splitOn "|" . fastaHeader
fragmentPos :: Bool -> Int -> [(Position, Fragment)] -> [(Position, Fragment)]
fragmentPos whole win xs | whole && null xs = error "Empty line in file!!"
| whole = [combine xs]
| length xs < win = []
| otherwise = combine (take win xs)
: fragmentPos whole win (tail xs)
where
combine = foldl1' (\(!x, !ys) (_, y) -> (x, ys Seq.>< y))
generatePositionMap :: Bool
-> Int
-> Bool
-> Window
-> [FastaSequence]
-> PositionMap
generatePositionMap sample sampleField whole win =
M.fromListWith (++) . posSeqList
where
posSeqList = map toList . concatMap (\x -> map (\(!p, !f) -> (p, sampleIt sample x f))
. fragmentPos whole win
. map (\(!p, !f) -> (p, Seq.singleton f))
. filter (\(_, !f) -> noGaps f)
. zip [1..]
. fastaSeq
$ x)
toList (x, y) = (x, [y])
noGaps y = y /= '-' && y /= '.'
sampleIt True s f = (getSample sampleField s, f)
sampleIt False _ f = ("Sample", f)