-- | Functions for parsing Stockholm alignments
module Bio.StockholmParser (
                       readExistingStockholm,
                       parseStockholm,
                       readStockholm,
                       module Bio.StockholmData
                      ) where

import Bio.StockholmData
import Text.ParserCombinators.Parsec
import qualified Control.Exception.Base as CE
import qualified Data.Text as T
import Data.List
import System.Directory
import Data.Either.Unwrap

readExistingStockholm :: String -> IO (Either String [StockholmAlignment])
readExistingStockholm filePath =
  if null filePath
    then return (Left "")
    else do
      fileExists <- doesFileExist filePath
      if fileExists
         then do
          stockholmInput <- readStockholm filePath
          if isLeft stockholmInput
             then return (Left (show stockholmInput))
             else return (Right (fromRight stockholmInput))
         else return (Left ("Could not find stockholm alignment file with path:" ++ filePath))

-- | parse 
parseStockholm :: String -> Either ParseError [StockholmAlignment]
parseStockholm input = parse genParseStockholms "Stockholm" input

-- | parse StockholmAlignment from input filePath                      
readStockholm :: String -> IO (Either ParseError [StockholmAlignment])
readStockholm filePath = do
  parsedFile <- parseFromFile genParseStockholms filePath
  CE.evaluate parsedFile

-- | Parse the input as StockholmAlignment
genParseStockholms :: GenParser Char st [StockholmAlignment]
genParseStockholms = do
  alns <- many1 genParseStockholm
  eof
  return alns


-- | Parse the input as StockholmAlignment
genParseStockholm :: GenParser Char st StockholmAlignment
genParseStockholm = do
  string "# STOCKHOLM"
  many1 (try (string " "))
  _version <- many1 (noneOf "\n")
  many (try newline)
  _stockholmToken <- many1 genParseToken
  string "//\n"
  optional (try (string "\n"))
  return (tokenToStockholm (T.pack _version) _stockholmToken)

-- | Parse the input as StockholmAlignment datatype
genParseToken :: GenParser Char st StockholmToken
genParseToken = do
  choice [try genParseTokFileA, try genParseTokColA, try genParseTokResA, try genParseTokSeqA, try genParseTokSeq]

genParseTokFileA :: GenParser Char st StockholmToken
genParseTokFileA = do
  many newline
  string "#=GF"
  char ' '
  _tag <- many1 (oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ*")
  many1 (char ' ')
  _info <- many1 (noneOf "\n")
  newline
  return (TokFileA (T.pack _tag) (T.pack _info))

genParseTokColA :: GenParser Char st StockholmToken
genParseTokColA = do
  many newline
  string "#=GC"
  char ' '
  _tag <- many1 (noneOf " \n")
  many1 (char ' ')
  _info <- many1 (noneOf "\n")
  newline
  return $ TokColA (T.pack _tag) (T.pack _info)

genParseTokResA :: GenParser Char st StockholmToken
genParseTokResA = do
  many newline
  string "#=GR"
  char ' '
  _id <- many1 (noneOf " \n")
  many1 (char ' ')
  _tag <- many1 (noneOf " \n")
  _info <- many1 (noneOf "\n")
  newline
  return $ TokResA (T.pack _id) (T.pack _tag) (T.pack _info)

genParseTokSeqA :: GenParser Char st StockholmToken
genParseTokSeqA = do
  many newline
  string "#=GS"
  char ' '
  _id <- many1 (noneOf " \n")
  many1 (char ' ')
  _tag <- many1 (noneOf " \n")
  _info <- many1 (noneOf "\n")
  return $ TokSeqA (T.pack _id) (T.pack  _tag) (T.pack _info)

genParseTokSeq :: GenParser Char st StockholmToken
genParseTokSeq = do
  many newline
  _sid <- many1 (noneOf " \n")
  many1 (char ' ')
  _sequence <- many1 (oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.")
  newline
  return $ TokSeq (T.pack _sid) (T.pack _sequence)

tokenToStockholm :: T.Text -> [StockholmToken] -> StockholmAlignment
tokenToStockholm _version _token = StockholmAlignment _version _fileAnnotation _columnAnnotation _sequenceEntries
  where _fileAtoken = filter isFileTok _token
        _colAtoken = filter isColATok _token
        _resAtoken = filter isResATok _token
        _seqAtoken = filter isSeqATok _token
        _seqtoken = filter isSeqTok _token
        _fileAnnotation = mergeFileToken _fileAtoken
        _columnAnnotation = mergeColToken _colAtoken
        mergedSeqAToken = mergeSeqAToken _seqAtoken
        mergedRAToken = mergeResAToken _resAtoken
        _sequenceEntries = buildSeqEntries mergedSeqAToken mergedRAToken _seqtoken

isFileTok :: StockholmToken -> Bool
isFileTok (TokFileA _ _) = True
isFileTok _ = False

isColATok :: StockholmToken -> Bool
isColATok (TokColA _ _) = True
isColATok _ = False

isResATok :: StockholmToken -> Bool
isResATok (TokResA{}) = True
isResATok _ = False

isSeqATok :: StockholmToken -> Bool
isSeqATok (TokSeqA{}) = True
isSeqATok _ = False

isSeqTok :: StockholmToken -> Bool
isSeqTok (TokSeq _ _) = True
isSeqTok _ = False

mergeFileToken :: [StockholmToken] -> [AnnotationEntry]
mergeFileToken _token = entries
  where tags = nub (map fTag _token)
        entries = map (buildFEntry _token) tags

buildFEntry ::  [StockholmToken] ->  T.Text -> AnnotationEntry
buildFEntry  _token currentTag = entry
  where tagToken = filter (\t -> fTag t == currentTag) _token
        tagInfos = T.concat (map fInfo tagToken)
        entry = AnnotationEntry currentTag tagInfos

mergeColToken :: [StockholmToken] -> [AnnotationEntry]
mergeColToken _token = entries
  where tags = nub (map cTag _token)
        entries = map (buildCEntry _token) tags

buildCEntry :: [StockholmToken] -> T.Text -> AnnotationEntry
buildCEntry _token currentTag = entry
  where tagToken = filter (\t -> cTag t == currentTag) _token
        tagInfos = T.concat (map cInfo tagToken)
        entry = AnnotationEntry currentTag tagInfos

mergeSeqAToken :: [StockholmToken] -> [StockholmToken]
mergeSeqAToken _token = entries
  where aIds = nub (map aId _token)
        entries = concatMap (mergeSAIdToken _token) aIds

mergeSAIdToken :: [StockholmToken] -> T.Text -> [StockholmToken]
mergeSAIdToken _token currentId = tagIdToken
  where idToken = filter (\t -> aId t == currentId) _token
        tags = nub (map aTag idToken)
        tagIdToken = map (mergeSAIdTagToken idToken currentId) tags

mergeSAIdTagToken :: [StockholmToken] ->  T.Text -> T.Text -> StockholmToken
mergeSAIdTagToken _token currentId currentTag = entry
  where tagToken = filter (\t -> aId t == currentId) _token
        tagInfos = T.concat (map aInfo tagToken)
        entry = TokSeqA currentId currentTag tagInfos

mergeResAToken :: [StockholmToken] -> [StockholmToken]
mergeResAToken _token = entries
  where rIds = nub (map rId _token)
        entries = concatMap (mergeRAIdToken _token) rIds

mergeRAIdToken :: [StockholmToken] -> T.Text -> [StockholmToken]
mergeRAIdToken _token currentId = tagIdToken
  where idToken = filter (\t -> rId t == currentId) _token
        tags = nub (map rTag idToken)
        tagIdToken = map (mergeRAIdTagToken idToken currentId) tags

mergeRAIdTagToken :: [StockholmToken] ->  T.Text -> T.Text -> StockholmToken
mergeRAIdTagToken _token currentId currentTag= entry
  where tagToken = filter (\t -> rId t == currentId) _token
        tagInfos = T.concat (map rInfo tagToken)
        entry = TokResA currentId currentTag tagInfos

buildSeqEntries :: [StockholmToken] -> [StockholmToken] -> [StockholmToken] -> [SequenceEntry]
buildSeqEntries  seqA resA _token= entries
  where currentId = map sId _token
        entries = map (buildSeqEntry seqA resA _token) currentId

buildSeqEntry :: [StockholmToken] -> [StockholmToken] -> [StockholmToken] -> T.Text -> SequenceEntry
buildSeqEntry seqAtok resAtok _token currentId = entry
  where idToken = filter (\t -> sId t == currentId ) _token
        idSAToken = filter (\t -> aId t == currentId ) seqAtok
        idRAToken = filter (\t -> rId t == currentId ) resAtok
        seqA = map buildSAEntry idSAToken
        resA = map buildRAEntry idRAToken
        tagInfos = T.concat (map sSeq idToken)
        entry = SequenceEntry currentId tagInfos seqA resA


buildSAEntry :: StockholmToken -> AnnotationEntry
buildSAEntry tok = AnnotationEntry (aTag tok) (aInfo tok)

buildRAEntry :: StockholmToken -> AnnotationEntry
buildRAEntry tok = AnnotationEntry (rTag tok) (rInfo tok)