-- |
--
-- The tables imported with these functions are from ENA:
-- <https://www.ebi.ac.uk/ena/browse/translation-tables>

module Biobase.GeneticCodes.Import where

import Control.Monad.Except
import Data.ByteString.Char8 as BS hiding (unpack,map)
import Data.ByteString.Char8 (ByteString)
import Data.Text.Encoding (decodeUtf8)
import Data.Text.IO as TIO
import Data.Text (Text,unpack)
import Data.Void
import System.Exit
import Text.Megaparsec
import Text.Megaparsec.Char as MC
import Text.Megaparsec.Char.Lexer as MCL

import Biobase.Types.Codon

import Biobase.GeneticCodes.Types



type TTParser = Parsec Void Text

-- | Import translation tables from a given file. In case of parse error, print
-- the error and exit with a failure.

fromFile
  :: (MonadIO m, MonadError String m)
  => FilePath
  -> m [TranslationTable Char Char]
fromFile :: FilePath -> m [TranslationTable Char Char]
fromFile FilePath
fp = (IO ByteString -> m ByteString
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> m ByteString) -> IO ByteString -> m ByteString
forall a b. (a -> b) -> a -> b
$ FilePath -> IO ByteString
BS.readFile FilePath
fp) m ByteString
-> (ByteString -> m [TranslationTable Char Char])
-> m [TranslationTable Char Char]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> m [TranslationTable Char Char]
forall (m :: * -> *).
MonadError FilePath m =>
ByteString -> m [TranslationTable Char Char]
fromByteString

-- | Parse a ByteString with translation tables.

fromByteString
  :: (MonadError String m)
  => ByteString
  -> m [TranslationTable Char Char]
fromByteString :: ByteString -> m [TranslationTable Char Char]
fromByteString ByteString
bs = case Parsec Void Text [TranslationTable Char Char]
-> FilePath
-> Text
-> Either (ParseErrorBundle Text Void) [TranslationTable Char Char]
forall e s a.
Parsec e s a -> FilePath -> s -> Either (ParseErrorBundle s e) a
runParser (ParsecT Void Text Identity (TranslationTable Char Char)
-> Parsec Void Text [TranslationTable Char Char]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some ParsecT Void Text Identity (TranslationTable Char Char)
parseTranslationTable) FilePath
"" (ByteString -> Text
decodeUtf8 ByteString
bs) of
    -- Left err -> throwError $ parseErrorPretty err -- megaparsec 6.x
    Left ParseErrorBundle Text Void
err -> FilePath -> m [TranslationTable Char Char]
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (FilePath -> m [TranslationTable Char Char])
-> FilePath -> m [TranslationTable Char Char]
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle Text Void -> FilePath
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> FilePath
errorBundlePretty ParseErrorBundle Text Void
err   -- megaparsec 7.x
    Right [TranslationTable Char Char]
rs -> [TranslationTable Char Char] -> m [TranslationTable Char Char]
forall (m :: * -> *) a. Monad m => a -> m a
return [TranslationTable Char Char]
rs

-- | Parses a single translation table.

parseTranslationTable :: TTParser (TranslationTable Char Char)
parseTranslationTable :: ParsecT Void Text Identity (TranslationTable Char Char)
parseTranslationTable = do
  (Int
i,Text
hdr)  TTParser (Int, Text)
parseHeader
  FilePath
aas      Text -> TTParser FilePath
parseData Text
"amino acids"
  FilePath
starts'  Text -> TTParser FilePath
parseData Text
"start codons"
  FilePath
base1    Text -> TTParser FilePath
parseData Text
"Base 1"
  FilePath
base2    Text -> TTParser FilePath
parseData Text
"Base 2"
  FilePath
base3    Text -> TTParser FilePath
parseData Text
"Base 3"
  let triplets :: [Codon Char]
triplets = (Char -> Char -> Char -> Codon Char)
-> FilePath -> FilePath -> FilePath -> [Codon Char]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3 Char -> Char -> Char -> Codon Char
forall c. c -> c -> c -> Codon c
Codon FilePath
base1 FilePath
base2 FilePath
base3
  let starts :: [Bool]
starts   = (Char -> Bool) -> FilePath -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'M') FilePath
starts'
  let translations :: [TranslationElement Char Char]
translations = (Codon Char -> Bool -> Char -> TranslationElement Char Char)
-> [Codon Char]
-> [Bool]
-> FilePath
-> [TranslationElement Char Char]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3 Codon Char -> Bool -> Char -> TranslationElement Char Char
forall c a. Codon c -> Bool -> a -> TranslationElement c a
TranslationElement [Codon Char]
triplets [Bool]
starts FilePath
aas
  TranslationTable Char Char
-> ParsecT Void Text Identity (TranslationTable Char Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (TranslationTable Char Char
 -> ParsecT Void Text Identity (TranslationTable Char Char))
-> TranslationTable Char Char
-> ParsecT Void Text Identity (TranslationTable Char Char)
forall a b. (a -> b) -> a -> b
$ Int
-> Text
-> [TranslationElement Char Char]
-> TranslationTable Char Char
forall c a.
(Ord c, Ord a) =>
Int -> Text -> [TranslationElement c a] -> TranslationTable c a
genTranslationTable Int
i Text
hdr [TranslationElement Char Char]
translations

-- | Parse the header, returning the Identifier and the name of the table.

parseHeader :: TTParser (Int,Text)
parseHeader :: TTParser (Int, Text)
parseHeader
  = (,) (Int -> Text -> (Int, Text))
-> ParsecT Void Text Identity Int
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int)
-> ParsecT Void Text Identity Integer
-> ParsecT Void Text Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity ()
-> ParsecT Void Text Identity Integer
-> ParsecT Void Text Identity Integer
forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
lexeme ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space ParsecT Void Text Identity Integer
forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal)
  ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
':' ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity ()
-> ParsecT Void Text Identity (Text -> (Int, Text))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space
  ParsecT Void Text Identity (Text -> (Int, Text))
-> ParsecT Void Text Identity Text -> TTParser (Int, Text)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe FilePath
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe FilePath -> (Token s -> Bool) -> m (Tokens s)
takeWhileP Maybe FilePath
forall a. Maybe a
Nothing (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n')
  TTParser (Int, Text)
-> ParsecT Void Text Identity () -> TTParser (Int, Text)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space

parseData :: Text -> TTParser String
parseData :: Text -> TTParser FilePath
parseData Text
t
  = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
(MonadParsec e s m, FoldCase (Tokens s)) =>
Tokens s -> m (Tokens s)
string' Text
Tokens Text
t ParsecT Void Text Identity Text
-> ParsecT Void Text Identity () -> ParsecT Void Text Identity Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space
  ParsecT Void Text Identity Text
-> TTParser FilePath -> TTParser FilePath
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Text -> FilePath
unpack (Text -> FilePath)
-> ParsecT Void Text Identity Text -> TTParser FilePath
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe FilePath -> Int -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe FilePath -> Int -> m (Tokens s)
takeP Maybe FilePath
forall a. Maybe a
Nothing Int
64)
  TTParser FilePath
-> ParsecT Void Text Identity () -> TTParser FilePath
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
MC.space