module Biobase.GeneticCodes.Import where
import Control.Monad.Except
import Data.ByteString.Char8 as BS hiding (unpack,map)
import Data.ByteString.Char8 (ByteString)
import Data.Text.Encoding (decodeUtf8)
import Data.Text.IO as TIO
import Data.Text (Text,unpack)
import Data.Void
import System.Exit
import Text.Megaparsec
import Text.Megaparsec.Char as MC
import Text.Megaparsec.Char.Lexer as MCL
import Biobase.Types.Codon
import Biobase.GeneticCodes.Types
type TTParser = Parsec Void Text
fromFile
:: (MonadIO m, MonadError String m)
=> FilePath
-> m [TranslationTable Char Char]
fromFile :: FilePath -> m [TranslationTable Char Char]
fromFile FilePath
fp = (IO ByteString -> m ByteString
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> m ByteString) -> IO ByteString -> m ByteString
forall a b. (a -> b) -> a -> b
$ FilePath -> IO ByteString
BS.readFile FilePath
fp) m ByteString
-> (ByteString -> m [TranslationTable Char Char])
-> m [TranslationTable Char Char]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> m [TranslationTable Char Char]
forall (m :: * -> *).
MonadError FilePath m =>
ByteString -> m [TranslationTable Char Char]
fromByteString
fromByteString
:: (MonadError String m)
=> ByteString
-> m [TranslationTable Char Char]
fromByteString :: ByteString -> m [TranslationTable Char Char]
fromByteString ByteString
bs = case Parsec Void Text [TranslationTable Char Char]
-> FilePath
-> Text
-> Either (ParseErrorBundle Text Void) [TranslationTable Char Char]
forall e s a.
Parsec e s a -> FilePath -> s -> Either (ParseErrorBundle s e) a
runParser (ParsecT Void Text Identity (TranslationTable Char Char)
-> Parsec Void Text [TranslationTable Char Char]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some ParsecT Void Text Identity (TranslationTable Char Char)
parseTranslationTable) FilePath
"" (ByteString -> Text
decodeUtf8 ByteString
bs) of
Left ParseErrorBundle Text Void
err -> FilePath -> m [TranslationTable Char Char]
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (FilePath -> m [TranslationTable Char Char])
-> FilePath -> m [TranslationTable Char Char]
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle Text Void -> FilePath
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> FilePath
errorBundlePretty ParseErrorBundle Text Void
err
Right [TranslationTable Char Char]
rs -> [TranslationTable Char Char] -> m [TranslationTable Char Char]
forall (m :: * -> *) a. Monad m => a -> m a
return [TranslationTable Char Char]
rs
parseTranslationTable :: TTParser (TranslationTable Char Char)
parseTranslationTable :: ParsecT Void Text Identity (TranslationTable Char Char)
parseTranslationTable = do
(Int
i,Text
hdr) ← TTParser (Int, Text)
parseHeader
FilePath
aas ← Text -> TTParser FilePath
parseData Text
"amino acids"
FilePath
starts' ← Text -> TTParser FilePath
parseData Text
"start codons"
FilePath
base1 ← Text -> TTParser FilePath
parseData Text
"Base 1"
FilePath
base2 ← Text -> TTParser FilePath
parseData Text
"Base 2"
FilePath
base3 ← Text -> TTParser FilePath
parseData Text
"Base 3"
let triplets :: [Codon Char]
triplets = (Char -> Char -> Char -> Codon Char)
-> FilePath -> FilePath -> FilePath -> [Codon Char]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3 Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon FilePath
base1 FilePath
base2 FilePath
base3
let starts :: [Bool]
starts = (Char -> Bool) -> FilePath -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'M') FilePath
starts'
let translations :: [TranslationElement Char Char]
translations = (Codon Char -> Bool -> Char -> TranslationElement Char Char)
-> [Codon Char]
-> [Bool]
-> FilePath
-> [TranslationElement Char Char]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3 Codon Char -> Bool -> Char -> TranslationElement Char Char
forall c a. Codon c -> Bool -> a -> TranslationElement c a
TranslationElement [Codon Char]
triplets [Bool]
starts FilePath
aas
TranslationTable Char Char
-> ParsecT Void Text Identity (TranslationTable Char Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (TranslationTable Char Char
-> ParsecT Void Text Identity (TranslationTable Char Char))
-> TranslationTable Char Char
-> ParsecT Void Text Identity (TranslationTable Char Char)
forall a b. (a -> b) -> a -> b
$ Int
-> Text
-> [TranslationElement Char Char]
-> TranslationTable Char Char
forall c a.
(Ord c, Ord a) =>
Int -> Text -> [TranslationElement c a] -> TranslationTable c a
genTranslationTable Int
i Text
hdr [TranslationElement Char Char]
translations
parseHeader :: TTParser (Int,Text)
= (,) (Int -> Text -> (Int, Text))
-> ParsecT Void Text Identity Int
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int)
-> ParsecT Void Text Identity Integer
-> ParsecT Void Text Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity ()
-> ParsecT Void Text Identity Integer
-> ParsecT Void Text Identity Integer
forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
lexeme ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space ParsecT Void Text Identity Integer
forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal)
ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
':' ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity ()
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space
ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity Text -> TTParser (Int, Text)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe FilePath
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe FilePath -> (Token s -> Bool) -> m (Tokens s)
takeWhileP Maybe FilePath
forall a. Maybe a
Nothing (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n')
TTParser (Int, Text)
-> ParsecT Void Text Identity () -> TTParser (Int, Text)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space
parseData :: Text -> TTParser String
parseData :: Text -> TTParser FilePath
parseData Text
t
= Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
(MonadParsec e s m, FoldCase (Tokens s)) =>
Tokens s -> m (Tokens s)
string' Text
Tokens Text
t ParsecT Void Text Identity Text
-> ParsecT Void Text Identity () -> ParsecT Void Text Identity Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space
ParsecT Void Text Identity Text
-> TTParser FilePath -> TTParser FilePath
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Text -> FilePath
unpack (Text -> FilePath)
-> ParsecT Void Text Identity Text -> TTParser FilePath
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe FilePath -> Int -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe FilePath -> Int -> m (Tokens s)
takeP Maybe FilePath
forall a. Maybe a
Nothing Int
64)
TTParser FilePath
-> ParsecT Void Text Identity () -> TTParser FilePath
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space