{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Pileup (readPileupFromStdIn, readPileupFromFile, PileupRow(..), Strand(..)) where
import Control.Monad.Catch (MonadThrow)
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (toUpper)
import Pipes (Producer)
import qualified Pipes.ByteString as PB
import Pipes.Safe (MonadSafe)
import SequenceFormats.Utils (Chrom(..), word, readFileProd, consumeProducer)
data Strand = ForwardStrand | ReverseStrand deriving (Strand -> Strand -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Strand -> Strand -> Bool
$c/= :: Strand -> Strand -> Bool
== :: Strand -> Strand -> Bool
$c== :: Strand -> Strand -> Bool
Eq, Int -> Strand -> ShowS
[Strand] -> ShowS
Strand -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Strand] -> ShowS
$cshowList :: [Strand] -> ShowS
show :: Strand -> String
$cshow :: Strand -> String
showsPrec :: Int -> Strand -> ShowS
$cshowsPrec :: Int -> Strand -> ShowS
Show)
data PileupRow = PileupRow {
PileupRow -> Chrom
pileupChrom :: Chrom,
PileupRow -> Int
pileupPos :: Int,
PileupRow -> Char
pileupRef :: Char,
PileupRow -> [String]
pileupBases :: [String],
PileupRow -> [[Strand]]
pileupStrandInfo :: [[Strand]]
} deriving (PileupRow -> PileupRow -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PileupRow -> PileupRow -> Bool
$c/= :: PileupRow -> PileupRow -> Bool
== :: PileupRow -> PileupRow -> Bool
$c== :: PileupRow -> PileupRow -> Bool
Eq, Int -> PileupRow -> ShowS
[PileupRow] -> ShowS
PileupRow -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PileupRow] -> ShowS
$cshowList :: [PileupRow] -> ShowS
show :: PileupRow -> String
$cshow :: PileupRow -> String
showsPrec :: Int -> PileupRow -> ShowS
$cshowsPrec :: Int -> PileupRow -> ShowS
Show)
readPileupFromStdIn :: (MonadIO m, MonadThrow m) => Producer PileupRow m ()
readPileupFromStdIn :: forall (m :: * -> *).
(MonadIO m, MonadThrow m) =>
Producer PileupRow m ()
readPileupFromStdIn = forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser PileupRow
pileupParser forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin
readPileupFromFile :: (MonadSafe m) => FilePath -> Producer PileupRow m ()
readPileupFromFile :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer PileupRow m ()
readPileupFromFile = forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser PileupRow
pileupParser forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd
pileupParser :: A.Parser PileupRow
pileupParser :: Parser PileupRow
pileupParser = do
ByteString
chrom <- Parser ByteString
word
Char
_ <- Parser Char
A.space
Int
pos <- forall a. Integral a => Parser a
A.decimal
Char
_ <- Parser Char
A.space
Char
refA <- Char -> Char
toUpper forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNactgnM")
Char
_ <- Parser Char
A.space
[(String, [Strand])]
baseAndStrandEntries <- Char -> Parser ByteString (String, [Strand])
parsePileupPerSample Char
refA forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1`
(Char -> Bool) -> Parser Char
A.satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t')
Parser ()
A.endOfLine
let baseStrings :: [String]
baseStrings = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(String, [Strand])]
baseAndStrandEntries
strandInfoStrings :: [[Strand]]
strandInfoStrings = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(String, [Strand])]
baseAndStrandEntries
let ret :: PileupRow
ret = Chrom -> Int -> Char -> [String] -> [[Strand]] -> PileupRow
PileupRow (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Char
refA [String]
baseStrings [[Strand]]
strandInfoStrings
forall (m :: * -> *) a. Monad m => a -> m a
return PileupRow
ret
where
parsePileupPerSample :: Char -> Parser ByteString (String, [Strand])
parsePileupPerSample Char
refA =
Char -> Int -> ByteString -> (String, [Strand])
processPileupEntry Char
refA forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString
word forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString
word
processPileupEntry :: Char -> Int -> B.ByteString -> (String, [Strand])
processPileupEntry :: Char -> Int -> ByteString -> (String, [Strand])
processPileupEntry Char
refA Int
cov ByteString
readBaseString =
if Int
cov forall a. Eq a => a -> a -> Bool
== Int
0 then (String
"", []) else
let res :: [(Char, Strand)]
res = String -> [(Char, Strand)]
go (ByteString -> String
B.unpack ByteString
readBaseString)
in (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(Char, Strand)]
res, forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Char, Strand)]
res)
where
go :: String -> [(Char, Strand)]
go (Char
x:String
xs)
| Char
x forall a. Eq a => a -> a -> Bool
== Char
'.' = (Char
refA, Strand
ForwardStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
| Char
x forall a. Eq a => a -> a -> Bool
== Char
',' = (Char
refA, Strand
ReverseStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
| Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"ACTGN" :: String) = (Char
x, Strand
ForwardStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
| Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"actgn" :: String) = (Char -> Char
toUpper Char
x, Strand
ReverseStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
| Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"$*#<>" :: String) = String -> [(Char, Strand)]
go String
xs
| Char
x forall a. Eq a => a -> a -> Bool
== Char
'^' = String -> [(Char, Strand)]
go (forall a. Int -> [a] -> [a]
drop Int
1 String
xs)
| Char
x forall a. Eq a => a -> a -> Bool
== Char
'+' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'-' =
let [(Int
num, String
rest)] = forall a. Read a => ReadS a
reads String
xs in String -> [(Char, Strand)]
go (forall a. Int -> [a] -> [a]
drop Int
num String
rest)
| Bool
otherwise = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"cannot parse read base string: " forall a. [a] -> [a] -> [a]
++ (Char
xforall a. a -> [a] -> [a]
:String
xs)
go [] = []