{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Plink (readBimStdIn, readBimFile, writeBim, readFamFile, readPlinkBedFile, readPlink, writePlink) where
import SequenceFormats.Eigenstrat (EigenstratIndEntry (..),
EigenstratSnpEntry (..),
GenoEntry (..), GenoLine,
Sex (..))
import SequenceFormats.Utils (Chrom (..), consumeProducer,
readFileProd, word)
import Control.Applicative ((<|>))
import Control.Monad (forM_, void)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State.Strict (runStateT)
import qualified Data.Attoparsec.ByteString as AB
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import qualified Data.ByteString as BB
import qualified Data.ByteString.Char8 as B
import Data.Vector (fromList, toList)
import Data.Word (Word8)
import Pipes (Consumer, Producer, (>->))
import Pipes.Attoparsec (ParsingError (..), parse)
import qualified Pipes.ByteString as PB
import qualified Pipes.Prelude as P
import Pipes.Safe (MonadSafe)
import qualified Pipes.Safe.Prelude as PS
import System.IO (Handle, IOMode (..),
hPutStrLn, withFile)
bimParser :: A.Parser EigenstratSnpEntry
bimParser :: Parser EigenstratSnpEntry
bimParser = do
ByteString
chrom <- Parser ByteString
word
ByteString
snpId_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word
Double
geneticPos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Double -> Parser ByteString Double
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Double
A.double
Int
pos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Int -> Parser ByteString Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal
Char
ref <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX01234")
Char
alt <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX01234")
Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
let refConvert :: Char
refConvert = Char -> Char
convertNum Char
ref
altConvert :: Char
altConvert = Char -> Char
convertNum Char
alt
EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry -> Parser EigenstratSnpEntry)
-> EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Double
geneticPos ByteString
snpId_ Char
refConvert Char
altConvert
where
convertNum :: Char -> Char
convertNum Char
'0' = Char
'N'
convertNum Char
'1' = Char
'A'
convertNum Char
'2' = Char
'C'
convertNum Char
'3' = Char
'G'
convertNum Char
'4' = Char
'T'
convertNum Char
x = Char
x
famParser :: A.Parser EigenstratIndEntry
famParser :: Parser EigenstratIndEntry
famParser = do
Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space
ByteString
pop <- Parser ByteString
word
ByteString
ind <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word
Integer
_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
A.decimal
Integer
_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
A.decimal
Sex
sex <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Sex -> Parser ByteString Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Sex
parseSex
ByteString
_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word
Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
EigenstratIndEntry -> Parser EigenstratIndEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratIndEntry -> Parser EigenstratIndEntry)
-> EigenstratIndEntry -> Parser EigenstratIndEntry
forall a b. (a -> b) -> a -> b
$ String -> Sex -> String -> EigenstratIndEntry
EigenstratIndEntry (ByteString -> String
B.unpack ByteString
ind) Sex
sex (ByteString -> String
B.unpack ByteString
pop)
where
parseSex :: Parser ByteString Sex
parseSex = Parser ByteString Sex
parseMale Parser ByteString Sex
-> Parser ByteString Sex -> Parser ByteString Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Sex
parseFemale Parser ByteString Sex
-> Parser ByteString Sex -> Parser ByteString Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Sex
parseUnknown
parseMale :: Parser ByteString Sex
parseMale = Char -> Parser ByteString Char
A.char Char
'1' Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Male
parseFemale :: Parser ByteString Sex
parseFemale = Char -> Parser ByteString Char
A.char Char
'2' Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Female
parseUnknown :: Parser ByteString Sex
parseUnknown = Parser ByteString Char
A.anyChar Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Unknown
bedHeaderParser :: AB.Parser ()
= do
Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b01101100
Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b00011011
Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b00000001
bedGenotypeParser :: Int -> AB.Parser GenoLine
bedGenotypeParser :: Int -> Parser GenoLine
bedGenotypeParser Int
nrInds = do
let nrBytes :: Int
nrBytes = if Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4 else (Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
[Word8]
bytes <- ByteString -> [Word8]
BB.unpack (ByteString -> [Word8])
-> Parser ByteString -> Parser ByteString [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser ByteString
AB.take Int
nrBytes
let indBitPairs :: [Word8]
indBitPairs = (Word8 -> [Word8]) -> [Word8] -> [Word8]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Word8 -> [Word8]
forall a. (Bits a, Num a) => a -> [a]
getBitPairs [Word8]
bytes
GenoLine -> Parser GenoLine
forall (m :: * -> *) a. Monad m => a -> m a
return (GenoLine -> Parser GenoLine)
-> ([Word8] -> GenoLine) -> [Word8] -> Parser GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
fromList ([GenoEntry] -> GenoLine)
-> ([Word8] -> [GenoEntry]) -> [Word8] -> GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [GenoEntry] -> [GenoEntry]
forall a. Int -> [a] -> [a]
take Int
nrInds ([GenoEntry] -> [GenoEntry])
-> ([Word8] -> [GenoEntry]) -> [Word8] -> [GenoEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> GenoEntry) -> [Word8] -> [GenoEntry]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> GenoEntry
forall a. (Eq a, Num a) => a -> GenoEntry
bitPairToGenotype ([Word8] -> Parser GenoLine) -> [Word8] -> Parser GenoLine
forall a b. (a -> b) -> a -> b
$ [Word8]
indBitPairs
where
getBitPairs :: a -> [a]
getBitPairs a
byte = (a -> a) -> [a] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a
0b00000011 a -> a -> a
forall a. Bits a => a -> a -> a
.&.) [a
byte, a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftR a
byte Int
2, a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftR a
byte Int
4, a -> Int -> a
forall a. Bits a => a -> Int -> a
shiftR a
byte Int
6]
bitPairToGenotype :: a -> GenoEntry
bitPairToGenotype a
0b00000000 = GenoEntry
HomRef
bitPairToGenotype a
0b00000010 = GenoEntry
Het
bitPairToGenotype a
0b00000011 = GenoEntry
HomAlt
bitPairToGenotype a
0b00000001 = GenoEntry
Missing
bitPairToGenotype a
_ = String -> GenoEntry
forall a. HasCallStack => String -> a
error String
"This should never happen"
readPlinkBedProd :: (MonadThrow m) => Int -> Producer B.ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd :: Int -> Producer ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd Int
nrInds Producer ByteString m ()
prod = do
(Maybe (Either ParsingError ())
res, Producer ByteString m ()
rest) <- StateT
(Producer ByteString m ()) m (Maybe (Either ParsingError ()))
-> Producer ByteString m ()
-> m (Maybe (Either ParsingError ()), Producer ByteString m ())
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (Parser ByteString ()
-> Parser ByteString m (Maybe (Either ParsingError ()))
forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser ByteString ()
bedHeaderParser) Producer ByteString m ()
prod
()
_ <- case Maybe (Either ParsingError ())
res of
Maybe (Either ParsingError ())
Nothing -> ParsingError -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (ParsingError -> m ()) -> ParsingError -> m ()
forall a b. (a -> b) -> a -> b
$ [String] -> String -> ParsingError
ParsingError [] String
"Bed file exhausted prematurely"
Just (Left ParsingError
e) -> ParsingError -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM ParsingError
e
Just (Right ()
h) -> () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
h
Producer GenoLine m () -> m (Producer GenoLine m ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer GenoLine m () -> m (Producer GenoLine m ()))
-> Producer GenoLine m () -> m (Producer GenoLine m ())
forall a b. (a -> b) -> a -> b
$ Parser GenoLine
-> Producer ByteString m () -> Producer GenoLine m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer (Int -> Parser GenoLine
bedGenotypeParser Int
nrInds) Producer ByteString m ()
rest
readPlinkBedFile :: (MonadSafe m) => FilePath -> Int -> m (Producer GenoLine m ())
readPlinkBedFile :: String -> Int -> m (Producer GenoLine m ())
readPlinkBedFile String
file Int
nrInds = Int -> Producer ByteString m () -> m (Producer GenoLine m ())
forall (m :: * -> *).
MonadThrow m =>
Int -> Producer ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd Int
nrInds (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
file)
readBimStdIn :: (MonadThrow m, MonadIO m) => Producer EigenstratSnpEntry m ()
readBimStdIn :: Producer EigenstratSnpEntry m ()
readBimStdIn = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
bimParser Producer ByteString m ()
forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin
readBimFile :: (MonadSafe m) => FilePath -> Producer EigenstratSnpEntry m ()
readBimFile :: String -> Producer EigenstratSnpEntry m ()
readBimFile = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
bimParser (Producer ByteString m () -> Producer EigenstratSnpEntry m ())
-> (String -> Producer ByteString m ())
-> String
-> Producer EigenstratSnpEntry m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd
readFamFile :: (MonadIO m) => FilePath -> m [EigenstratIndEntry]
readFamFile :: String -> m [EigenstratIndEntry]
readFamFile String
fn =
IO [EigenstratIndEntry] -> m [EigenstratIndEntry]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [EigenstratIndEntry] -> m [EigenstratIndEntry])
-> ((Handle -> IO [EigenstratIndEntry]) -> IO [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry])
-> m [EigenstratIndEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String
-> IOMode
-> (Handle -> IO [EigenstratIndEntry])
-> IO [EigenstratIndEntry]
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
fn IOMode
ReadMode ((Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ \Handle
handle ->
Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall (m :: * -> *) a. Monad m => Producer a m () -> m [a]
P.toListM (Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry])
-> Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ Parser EigenstratIndEntry
-> Producer ByteString IO () -> Producer EigenstratIndEntry IO ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratIndEntry
famParser (Handle -> Producer' ByteString IO ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
handle)
readPlink :: (MonadSafe m) => FilePath
-> FilePath
-> FilePath
-> m ([EigenstratIndEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink :: String
-> String
-> String
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink String
bedFile String
bimFile String
famFile = do
[EigenstratIndEntry]
indEntries <- String -> m [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => String -> m [EigenstratIndEntry]
readFamFile String
famFile
let nrInds :: Int
nrInds = [EigenstratIndEntry] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EigenstratIndEntry]
indEntries
snpProd :: Producer EigenstratSnpEntry m ()
snpProd = String -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer EigenstratSnpEntry m ()
readBimFile String
bimFile
Producer GenoLine m ()
genoProd <- String -> Int -> m (Producer GenoLine m ())
forall (m :: * -> *).
MonadSafe m =>
String -> Int -> m (Producer GenoLine m ())
readPlinkBedFile String
bedFile Int
nrInds
([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *) a. Monad m => a -> m a
return ([EigenstratIndEntry]
indEntries, Producer EigenstratSnpEntry m ()
-> Producer GenoLine m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r b x' x.
Monad m =>
Producer a m r -> Producer b m r -> Proxy x' x () (a, b) m r
P.zip Producer EigenstratSnpEntry m ()
snpProd Producer GenoLine m ()
genoProd)
writeBim :: (MonadIO m) => Handle
-> Consumer EigenstratSnpEntry m ()
writeBim :: Handle -> Consumer EigenstratSnpEntry m ()
writeBim Handle
snpFileH =
let snpOutTextConsumer :: Proxy () ByteString y' y m r
snpOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
snpFileH
toTextPipe :: Pipe EigenstratSnpEntry ByteString m r
toTextPipe = (EigenstratSnpEntry -> ByteString)
-> Pipe EigenstratSnpEntry ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(EigenstratSnpEntry Chrom
chrom Int
pos Double
gpos ByteString
gid Char
ref Char
alt) ->
let bimLine :: ByteString
bimLine = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [Chrom -> ByteString
unChrom Chrom
chrom, ByteString
gid, String -> ByteString
B.pack (Double -> String
forall a. Show a => a -> String
show Double
gpos),
String -> ByteString
B.pack (Int -> String
forall a. Show a => a -> String
show Int
pos), Char -> ByteString
B.singleton Char
ref, Char -> ByteString
B.singleton Char
alt]
in ByteString
bimLine ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
in Pipe EigenstratSnpEntry ByteString m ()
forall r. Pipe EigenstratSnpEntry ByteString m r
toTextPipe Pipe EigenstratSnpEntry ByteString m ()
-> Proxy () ByteString () X m ()
-> Consumer EigenstratSnpEntry m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
snpOutTextConsumer
writeFam :: (MonadIO m) => FilePath -> [EigenstratIndEntry] -> m ()
writeFam :: String -> [EigenstratIndEntry] -> m ()
writeFam String
f [EigenstratIndEntry]
indEntries =
IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ())
-> ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
f IOMode
WriteMode ((Handle -> IO ()) -> m ()) -> (Handle -> IO ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Handle
h ->
[EigenstratIndEntry] -> (EigenstratIndEntry -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [EigenstratIndEntry]
indEntries ((EigenstratIndEntry -> IO ()) -> IO ())
-> (EigenstratIndEntry -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(EigenstratIndEntry String
name Sex
sex String
popName) ->
Handle -> String -> IO ()
hPutStrLn Handle
h (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
popName String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\t" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
name String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\t0\t0\t" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Sex -> String
forall p. IsString p => Sex -> p
sexToStr Sex
sex String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\t0"
where
sexToStr :: Sex -> p
sexToStr Sex
sex = case Sex
sex of
Sex
Male -> p
"1"
Sex
Female -> p
"2"
Sex
Unknown -> p
"0"
writeBed :: (MonadIO m) => Handle
-> Consumer GenoLine m ()
writeBed :: Handle -> Consumer GenoLine m ()
writeBed Handle
bedFileH = do
IO () -> Consumer GenoLine m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer GenoLine m ())
-> IO () -> Consumer GenoLine m ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BB.hPut Handle
bedFileH ([Word8] -> ByteString
BB.pack [Word8
0b01101100, Word8
0b00011011, Word8
0b00000001])
let bedOutConsumer :: Proxy () ByteString y' y m r
bedOutConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
bedFileH
toPlinkPipe :: Pipe GenoLine ByteString m r
toPlinkPipe = (GenoLine -> ByteString) -> Pipe GenoLine ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map ([Word8] -> ByteString
BB.pack ([Word8] -> ByteString)
-> (GenoLine -> [Word8]) -> GenoLine -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoLine -> [Word8]
genoLineToBytes)
Pipe GenoLine ByteString m ()
forall r. Pipe GenoLine ByteString m r
toPlinkPipe Pipe GenoLine ByteString m ()
-> Proxy () ByteString () X m () -> Consumer GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
bedOutConsumer
where
genoLineToBytes :: GenoLine -> [Word8]
genoLineToBytes :: GenoLine -> [Word8]
genoLineToBytes GenoLine
genoLine = [GenoEntry] -> [Word8]
go (GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
toList GenoLine
genoLine)
where
go :: [GenoEntry] -> [Word8]
go :: [GenoEntry] -> [Word8]
go [] = []
go (GenoEntry
g1 : GenoEntry
g2 : GenoEntry
g3 : GenoEntry
g4 : [GenoEntry]
rest) = [GenoEntry] -> Word8
constructByte [GenoEntry
g1, GenoEntry
g2, GenoEntry
g3, GenoEntry
g4] Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: [GenoEntry] -> [Word8]
go [GenoEntry]
rest
go [GenoEntry]
genoEntries = [[GenoEntry] -> Word8
constructByte [GenoEntry]
genoEntries]
constructByte :: [GenoEntry] -> Word8
constructByte :: [GenoEntry] -> Word8
constructByte [] = String -> Word8
forall a. HasCallStack => String -> a
error String
"constructByte - should never happen"
constructByte [GenoEntry
g] = GenoEntry -> Word8
genoEntryToByte GenoEntry
g
constructByte (GenoEntry
g:[GenoEntry]
gs) = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL ([GenoEntry] -> Word8
constructByte [GenoEntry]
gs) Int
2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. GenoEntry -> Word8
genoEntryToByte GenoEntry
g
genoEntryToByte :: GenoEntry -> Word8
genoEntryToByte :: GenoEntry -> Word8
genoEntryToByte GenoEntry
HomRef = Word8
0b00000000
genoEntryToByte GenoEntry
HomAlt = Word8
0b00000011
genoEntryToByte GenoEntry
Het = Word8
0b00000010
genoEntryToByte GenoEntry
Missing = Word8
0b00000001
writePlink :: (MonadSafe m) => FilePath
-> FilePath
-> FilePath
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writePlink :: String
-> String
-> String
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writePlink String
bedFile String
bimFile String
famFile [EigenstratIndEntry]
indEntries = do
IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ())
-> IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ String -> [EigenstratIndEntry] -> IO ()
forall (m :: * -> *).
MonadIO m =>
String -> [EigenstratIndEntry] -> m ()
writeFam String
famFile [EigenstratIndEntry]
indEntries
let bimOutConsumer :: Proxy () EigenstratSnpEntry () X m ()
bimOutConsumer = String
-> IOMode
-> (Handle -> Proxy () EigenstratSnpEntry () X m ())
-> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
bimFile IOMode
WriteMode Handle -> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Consumer EigenstratSnpEntry m ()
writeBim
bedOutConsumer :: Proxy () GenoLine () X m ()
bedOutConsumer = String
-> IOMode
-> (Handle -> Proxy () GenoLine () X m ())
-> Proxy () GenoLine () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
bedFile IOMode
WriteMode Handle -> Proxy () GenoLine () X m ()
forall (m :: * -> *). MonadIO m => Handle -> Consumer GenoLine m ()
writeBed
Consumer (EigenstratSnpEntry, GenoLine) m ()
-> Pipe
(EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee (((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
-> Proxy () EigenstratSnpEntry () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () EigenstratSnpEntry () X m ()
bimOutConsumer) Pipe
(EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> ((EigenstratSnpEntry, GenoLine) -> GenoLine)
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> GenoLine
forall a b. (a, b) -> b
snd Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () GenoLine () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () GenoLine () X m ()
bedOutConsumer