{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Eigenstrat (EigenstratSnpEntry(..), EigenstratIndEntry(..),
readEigenstratInd, GenoEntry(..), GenoLine, Sex(..),
readEigenstratSnpStdIn, readEigenstratSnpFile,
readEigenstrat, writeEigenstrat, writeEigenstratIndFile, writeEigenstratSnp,
writeEigenstratGeno) where
import SequenceFormats.Utils (Chrom (..),
SeqFormatException (..),
consumeProducer,
readFileProd, word)
import Control.Applicative ((<|>))
import Control.Exception (throw)
import Control.Monad (forM_, void)
import Control.Monad.Catch (MonadThrow)
import Control.Monad.IO.Class (MonadIO, liftIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Vector (Vector, fromList, toList)
import Pipes (Consumer, Pipe, Producer,
cat, for, yield, (>->))
import qualified Pipes.ByteString as PB
import qualified Pipes.Prelude as P
import Pipes.Safe (MonadSafe)
import qualified Pipes.Safe.Prelude as PS
import System.IO (Handle, IOMode (..),
hPutStrLn, withFile)
data EigenstratSnpEntry = EigenstratSnpEntry
{ EigenstratSnpEntry -> Chrom
snpChrom :: Chrom
, EigenstratSnpEntry -> Int
snpPos :: Int
, EigenstratSnpEntry -> Double
snpGeneticPos :: Double
, EigenstratSnpEntry -> ByteString
snpId :: B.ByteString
, EigenstratSnpEntry -> Char
snpRef :: Char
, EigenstratSnpEntry -> Char
snpAlt :: Char
}
deriving (EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
(EigenstratSnpEntry -> EigenstratSnpEntry -> Bool)
-> (EigenstratSnpEntry -> EigenstratSnpEntry -> Bool)
-> Eq EigenstratSnpEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
$c/= :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
== :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
$c== :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
Eq, Int -> EigenstratSnpEntry -> ShowS
[EigenstratSnpEntry] -> ShowS
EigenstratSnpEntry -> String
(Int -> EigenstratSnpEntry -> ShowS)
-> (EigenstratSnpEntry -> String)
-> ([EigenstratSnpEntry] -> ShowS)
-> Show EigenstratSnpEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EigenstratSnpEntry] -> ShowS
$cshowList :: [EigenstratSnpEntry] -> ShowS
show :: EigenstratSnpEntry -> String
$cshow :: EigenstratSnpEntry -> String
showsPrec :: Int -> EigenstratSnpEntry -> ShowS
$cshowsPrec :: Int -> EigenstratSnpEntry -> ShowS
Show)
data EigenstratIndEntry = EigenstratIndEntry String Sex String
deriving (EigenstratIndEntry -> EigenstratIndEntry -> Bool
(EigenstratIndEntry -> EigenstratIndEntry -> Bool)
-> (EigenstratIndEntry -> EigenstratIndEntry -> Bool)
-> Eq EigenstratIndEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
$c/= :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
== :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
$c== :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
Eq, Int -> EigenstratIndEntry -> ShowS
[EigenstratIndEntry] -> ShowS
EigenstratIndEntry -> String
(Int -> EigenstratIndEntry -> ShowS)
-> (EigenstratIndEntry -> String)
-> ([EigenstratIndEntry] -> ShowS)
-> Show EigenstratIndEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EigenstratIndEntry] -> ShowS
$cshowList :: [EigenstratIndEntry] -> ShowS
show :: EigenstratIndEntry -> String
$cshow :: EigenstratIndEntry -> String
showsPrec :: Int -> EigenstratIndEntry -> ShowS
$cshowsPrec :: Int -> EigenstratIndEntry -> ShowS
Show)
data Sex = Male
| Female
| Unknown
deriving (Sex -> Sex -> Bool
(Sex -> Sex -> Bool) -> (Sex -> Sex -> Bool) -> Eq Sex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Sex -> Sex -> Bool
$c/= :: Sex -> Sex -> Bool
== :: Sex -> Sex -> Bool
$c== :: Sex -> Sex -> Bool
Eq, Int -> Sex -> ShowS
[Sex] -> ShowS
Sex -> String
(Int -> Sex -> ShowS)
-> (Sex -> String) -> ([Sex] -> ShowS) -> Show Sex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Sex] -> ShowS
$cshowList :: [Sex] -> ShowS
show :: Sex -> String
$cshow :: Sex -> String
showsPrec :: Int -> Sex -> ShowS
$cshowsPrec :: Int -> Sex -> ShowS
Show)
data GenoEntry = HomRef
| Het
| HomAlt
| Missing
deriving (GenoEntry -> GenoEntry -> Bool
(GenoEntry -> GenoEntry -> Bool)
-> (GenoEntry -> GenoEntry -> Bool) -> Eq GenoEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GenoEntry -> GenoEntry -> Bool
$c/= :: GenoEntry -> GenoEntry -> Bool
== :: GenoEntry -> GenoEntry -> Bool
$c== :: GenoEntry -> GenoEntry -> Bool
Eq, Int -> GenoEntry -> ShowS
[GenoEntry] -> ShowS
GenoEntry -> String
(Int -> GenoEntry -> ShowS)
-> (GenoEntry -> String)
-> ([GenoEntry] -> ShowS)
-> Show GenoEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GenoEntry] -> ShowS
$cshowList :: [GenoEntry] -> ShowS
show :: GenoEntry -> String
$cshow :: GenoEntry -> String
showsPrec :: Int -> GenoEntry -> ShowS
$cshowsPrec :: Int -> GenoEntry -> ShowS
Show)
type GenoLine = Vector GenoEntry
eigenstratSnpParser :: A.Parser EigenstratSnpEntry
eigenstratSnpParser :: Parser EigenstratSnpEntry
eigenstratSnpParser = do
ByteString
snpId_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ByteString
word
ByteString
chrom <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ByteString
word
Double
geneticPos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Double -> Parser ByteString Double
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Double
A.double
Int
pos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Int -> Parser ByteString Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal
Char
ref <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX")
Char
alt <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX")
Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry -> Parser EigenstratSnpEntry)
-> EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Double
geneticPos ByteString
snpId_ Char
ref Char
alt
eigenstratIndParser :: A.Parser EigenstratIndEntry
eigenstratIndParser :: Parser EigenstratIndEntry
eigenstratIndParser = do
Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space
ByteString
name <- Parser ByteString ByteString
word
Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space
Sex
sex <- Parser Sex
parseSex
Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space
ByteString
popName <- Parser ByteString ByteString
word
Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
EigenstratIndEntry -> Parser EigenstratIndEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratIndEntry -> Parser EigenstratIndEntry)
-> EigenstratIndEntry -> Parser EigenstratIndEntry
forall a b. (a -> b) -> a -> b
$ String -> Sex -> String -> EigenstratIndEntry
EigenstratIndEntry (ByteString -> String
B.unpack ByteString
name) Sex
sex (ByteString -> String
B.unpack ByteString
popName)
parseSex :: A.Parser Sex
parseSex :: Parser Sex
parseSex = Parser Sex
parseMale Parser Sex -> Parser Sex -> Parser Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Sex
parseFemale Parser Sex -> Parser Sex -> Parser Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Sex
parseUnknown
where
parseMale :: Parser Sex
parseMale = Char -> Parser ByteString Char
A.char Char
'M' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Male
parseFemale :: Parser Sex
parseFemale = Char -> Parser ByteString Char
A.char Char
'F' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Female
parseUnknown :: Parser Sex
parseUnknown = Char -> Parser ByteString Char
A.char Char
'U' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Unknown
readEigenstratInd :: (MonadIO m) => FilePath -> m [EigenstratIndEntry]
readEigenstratInd :: String -> m [EigenstratIndEntry]
readEigenstratInd String
fn =
IO [EigenstratIndEntry] -> m [EigenstratIndEntry]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [EigenstratIndEntry] -> m [EigenstratIndEntry])
-> ((Handle -> IO [EigenstratIndEntry]) -> IO [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry])
-> m [EigenstratIndEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String
-> IOMode
-> (Handle -> IO [EigenstratIndEntry])
-> IO [EigenstratIndEntry]
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
fn IOMode
ReadMode ((Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ \Handle
handle ->
Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall (m :: * -> *) a. Monad m => Producer a m () -> m [a]
P.toListM (Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry])
-> Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ Parser EigenstratIndEntry
-> Producer ByteString IO () -> Producer EigenstratIndEntry IO ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratIndEntry
eigenstratIndParser (Handle -> Producer' ByteString IO ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
handle)
eigenstratGenoParser :: A.Parser GenoLine
eigenstratGenoParser :: Parser GenoLine
eigenstratGenoParser = do
ByteString
line <- (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 Char -> Bool
isValidNum
Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
GenoLine -> Parser GenoLine
forall (m :: * -> *) a. Monad m => a -> m a
return (GenoLine -> Parser GenoLine)
-> ([GenoEntry] -> GenoLine) -> [GenoEntry] -> Parser GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
fromList ([GenoEntry] -> Parser GenoLine) -> [GenoEntry] -> Parser GenoLine
forall a b. (a -> b) -> a -> b
$ do
Char
l <- ByteString -> String
B.unpack ByteString
line
case Char
l of
Char
'0' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomAlt
Char
'1' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Het
Char
'2' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomRef
Char
'9' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Missing
Char
_ -> String -> [GenoEntry]
forall a. HasCallStack => String -> a
error String
"this should never happen"
where
isValidNum :: Char -> Bool
isValidNum Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'0' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'1' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'2' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'9'
readEigenstratSnpStdIn :: (MonadThrow m, MonadIO m) => Producer EigenstratSnpEntry m ()
readEigenstratSnpStdIn :: Producer EigenstratSnpEntry m ()
readEigenstratSnpStdIn = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
eigenstratSnpParser Producer ByteString m ()
forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin
readEigenstratSnpFile :: (MonadSafe m) => FilePath -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile :: String -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
eigenstratSnpParser (Producer ByteString m () -> Producer EigenstratSnpEntry m ())
-> (String -> Producer ByteString m ())
-> String
-> Producer EigenstratSnpEntry m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd
readEigenstrat :: (MonadSafe m) => FilePath
-> FilePath
-> FilePath
-> m ([EigenstratIndEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat :: String
-> String
-> String
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat String
genoFile String
snpFile String
indFile = do
[EigenstratIndEntry]
indEntries <- String -> m [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => String -> m [EigenstratIndEntry]
readEigenstratInd String
indFile
let snpProd :: Producer EigenstratSnpEntry m ()
snpProd = String -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile String
snpFile
genoProd :: Proxy X () () GenoLine m ()
genoProd = Parser GenoLine
-> Producer ByteString m () -> Proxy X () () GenoLine m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser GenoLine
eigenstratGenoParser (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
genoFile) Proxy X () () GenoLine m ()
-> Proxy () GenoLine () GenoLine m ()
-> Proxy X () () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
Int -> Proxy () GenoLine () GenoLine m ()
forall (m :: * -> *).
MonadThrow m =>
Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries ([EigenstratIndEntry] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EigenstratIndEntry]
indEntries)
([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *) a. Monad m => a -> m a
return ([EigenstratIndEntry]
indEntries, Producer EigenstratSnpEntry m ()
-> Proxy X () () GenoLine m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r b x' x.
Monad m =>
Producer a m r -> Producer b m r -> Proxy x' x () (a, b) m r
P.zip Producer EigenstratSnpEntry m ()
snpProd Proxy X () () GenoLine m ()
genoProd)
validateEigenstratEntries :: (MonadThrow m) => Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries :: Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries Int
nr = Pipe GenoLine GenoLine m ()
-> (GenoLine -> Pipe GenoLine GenoLine m ())
-> Pipe GenoLine GenoLine m ()
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Pipe GenoLine GenoLine m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((GenoLine -> Pipe GenoLine GenoLine m ())
-> Pipe GenoLine GenoLine m ())
-> (GenoLine -> Pipe GenoLine GenoLine m ())
-> Pipe GenoLine GenoLine m ()
forall a b. (a -> b) -> a -> b
$ \GenoLine
line ->
if GenoLine -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length GenoLine
line Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
nr
then do
let msg :: String
msg = String
"inconsistent nr of genotypes (" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show (GenoLine -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length GenoLine
line) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
", but should be " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nr String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
") in \
\genotype line " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> GenoLine -> String
forall a. Show a => a -> String
show GenoLine
line
SeqFormatException -> Pipe GenoLine GenoLine m ()
forall a e. Exception e => e -> a
throw (String -> SeqFormatException
SeqFormatException String
msg)
else
GenoLine -> Pipe GenoLine GenoLine m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield GenoLine
line
writeEigenstratIndFile :: (MonadIO m) => FilePath -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile :: String -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile String
f [EigenstratIndEntry]
indEntries =
IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ())
-> ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
f IOMode
WriteMode ((Handle -> IO ()) -> m ()) -> (Handle -> IO ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Handle
h ->
[EigenstratIndEntry] -> (EigenstratIndEntry -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [EigenstratIndEntry]
indEntries ((EigenstratIndEntry -> IO ()) -> IO ())
-> (EigenstratIndEntry -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(EigenstratIndEntry String
name Sex
sex String
popName) ->
Handle -> String -> IO ()
hPutStrLn Handle
h (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
name String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\t" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Sex -> String
forall p. IsString p => Sex -> p
sexToStr Sex
sex String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\t" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
popName
where
sexToStr :: Sex -> p
sexToStr Sex
sex = case Sex
sex of
Sex
Male -> p
"M"
Sex
Female -> p
"F"
Sex
Unknown -> p
"U"
writeEigenstratSnp :: (MonadIO m) => Handle
-> Consumer EigenstratSnpEntry m ()
writeEigenstratSnp :: Handle -> Consumer EigenstratSnpEntry m ()
writeEigenstratSnp Handle
snpFileH =
let snpOutTextConsumer :: Proxy () ByteString y' y m r
snpOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
snpFileH
toTextPipe :: Pipe EigenstratSnpEntry ByteString m r
toTextPipe = (EigenstratSnpEntry -> ByteString)
-> Pipe EigenstratSnpEntry ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(EigenstratSnpEntry Chrom
chrom Int
pos Double
gpos ByteString
gid Char
ref Char
alt) ->
let snpLine :: ByteString
snpLine = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [ByteString
gid, Chrom -> ByteString
unChrom Chrom
chrom, String -> ByteString
B.pack (Double -> String
forall a. Show a => a -> String
show Double
gpos),
String -> ByteString
B.pack (Int -> String
forall a. Show a => a -> String
show Int
pos), Char -> ByteString
B.singleton Char
ref, Char -> ByteString
B.singleton Char
alt]
in ByteString
snpLine ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
in Pipe EigenstratSnpEntry ByteString m ()
forall r. Pipe EigenstratSnpEntry ByteString m r
toTextPipe Pipe EigenstratSnpEntry ByteString m ()
-> Proxy () ByteString () X m ()
-> Consumer EigenstratSnpEntry m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
snpOutTextConsumer
writeEigenstratGeno :: (MonadIO m) => Handle
-> Consumer GenoLine m ()
writeEigenstratGeno :: Handle -> Consumer GenoLine m ()
writeEigenstratGeno Handle
genoFileH =
let genoOutTextConsumer :: Proxy () ByteString y' y m r
genoOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
genoFileH
toTextPipe :: Pipe GenoLine ByteString m r
toTextPipe = (GenoLine -> ByteString) -> Pipe GenoLine ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\GenoLine
genoLine ->
let genoLineStr :: ByteString
genoLineStr = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString)
-> (GenoLine -> [ByteString]) -> GenoLine -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GenoEntry -> ByteString) -> [GenoEntry] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (String -> ByteString
B.pack (String -> ByteString)
-> (GenoEntry -> String) -> GenoEntry -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show (Int -> String) -> (GenoEntry -> Int) -> GenoEntry -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoEntry -> Int
toEigenStratNum) ([GenoEntry] -> [ByteString])
-> (GenoLine -> [GenoEntry]) -> GenoLine -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
toList (GenoLine -> ByteString) -> GenoLine -> ByteString
forall a b. (a -> b) -> a -> b
$ GenoLine
genoLine
in ByteString
genoLineStr ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
in Pipe GenoLine ByteString m ()
forall r. Pipe GenoLine ByteString m r
toTextPipe Pipe GenoLine ByteString m ()
-> Proxy () ByteString () X m () -> Consumer GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
genoOutTextConsumer
where
toEigenStratNum :: GenoEntry -> Int
toEigenStratNum GenoEntry
c = case GenoEntry
c of
GenoEntry
HomRef -> Int
2 :: Int
GenoEntry
Het -> Int
1
GenoEntry
HomAlt -> Int
0
GenoEntry
Missing -> Int
9
writeEigenstrat :: (MonadSafe m) => FilePath
-> FilePath
-> FilePath
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeEigenstrat :: String
-> String
-> String
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeEigenstrat String
genoFile String
snpFile String
indFile [EigenstratIndEntry]
indEntries = do
IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ())
-> IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ String -> [EigenstratIndEntry] -> IO ()
forall (m :: * -> *).
MonadIO m =>
String -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile String
indFile [EigenstratIndEntry]
indEntries
let snpOutConsumer :: Proxy () EigenstratSnpEntry () X m ()
snpOutConsumer = String
-> IOMode
-> (Handle -> Proxy () EigenstratSnpEntry () X m ())
-> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
snpFile IOMode
WriteMode Handle -> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Consumer EigenstratSnpEntry m ()
writeEigenstratSnp
genoOutConsumer :: Proxy () GenoLine () X m ()
genoOutConsumer = String
-> IOMode
-> (Handle -> Proxy () GenoLine () X m ())
-> Proxy () GenoLine () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
genoFile IOMode
WriteMode Handle -> Proxy () GenoLine () X m ()
forall (m :: * -> *). MonadIO m => Handle -> Consumer GenoLine m ()
writeEigenstratGeno
Consumer (EigenstratSnpEntry, GenoLine) m ()
-> Pipe
(EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee (((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
-> Proxy () EigenstratSnpEntry () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () EigenstratSnpEntry () X m ()
snpOutConsumer) Pipe
(EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> ((EigenstratSnpEntry, GenoLine) -> GenoLine)
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> GenoLine
forall a b. (a, b) -> b
snd Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () GenoLine () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () GenoLine () X m ()
genoOutConsumer