{-# LANGUAGE OverloadedStrings #-}
module Biobase.BLAST.HTTP ( BlastHTTPQuery (..),
blastHTTP,
blastTabularHTTP,
) where
import Network.HTTP.Conduit
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Char8 as B
import qualified Control.Monad as CM
import Text.XML.HXT.Core hiding (trace)
import Network.Socket
import Data.List
import Control.Concurrent
import Data.Maybe
import Network.HTTP.Base
import Biobase.BLAST.Import
import Biobase.BLAST.Types
import Biobase.Fasta.Strict
import qualified Data.Either.Unwrap as E
import Data.Int
import qualified Codec.Archive.Zip as CZ
data BlastHTTPQuery = BlastHTTPQuery
{ provider :: Maybe String
, program :: Maybe String
, database :: Maybe String
, querySequences :: [Fasta () ()]
, optionalArguments :: Maybe String
, optionalWalltime :: Maybe Int
}
deriving (Show, Eq)
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no]
atId :: ArrowXml a => String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))
startSession :: String -> String -> String -> String -> Maybe String -> IO String
startSession provider' program' database' querySequences' optionalArguments'
| provider' == "ebi" = startSessionEBI program' database' querySequences' optionalArguments'
| otherwise = startSessionNCBI program' database' querySequences' optionalArguments'
startSessionEBI :: String -> String -> String -> Maybe String -> IO String
startSessionEBI program' database' querySequences' optionalArguments' = do
requestXml <- withSocketsDo
$ sendQueryEBI program' database' querySequences' optionalArguments'
let requestID = L8.unpack requestXml
return requestID
startSessionNCBI :: String -> String -> String -> Maybe String -> IO String
startSessionNCBI program' database' querySequences' optionalArguments' = do
requestXml <- withSocketsDo
$ sendQueryNCBI program' database' querySequences' optionalArguments'
let requestXMLString = L8.unpack requestXml
CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")
sendQueryEBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryEBI program' database' querySequences' _ = do
putStrLn "Making HTTP request"
res <- do
initReq <- parseUrlThrow "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/run"
let req = (flip urlEncodedBody) initReq $
[ ("email", "florian.eggenhofer@univie.ac.at")
, ("program", (B.pack program'))
, ("database", (B.pack database'))
, ("stype", "dna")
, ("sequence", (B.pack querySequences'))
]
newManager tlsManagerSettings >>= httpLbs req
{ method = "POST" }
putStrLn "EBI Response"
print res
putStrLn "EBI Response Body"
print (responseBody res)
return (responseBody res)
sendQueryNCBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryNCBI program' database' querySequences' optionalArguments'
| isJust optionalArguments' = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ fromJust optionalArguments' ++ "&QUERY=" ++ querySequences')
| otherwise = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ "&QUERY=" ++ querySequences')
retrieveSessionStatus :: String -> String -> IO String
retrieveSessionStatus provider' rid = do
if provider' == "ebi"
then do
statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/status/" ++ rid)
let statusXMLString = L8.unpack statusXml
putStrLn "EBI statusXMLString"
return statusXMLString
else do
statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
let statusXMLString = L8.unpack statusXml
return statusXMLString
retrieveTabularResult :: String -> String -> IO (Either String [BlastTabularResult])
retrieveTabularResult provider' rid = do
if provider' == "ebi"
then do
resultResponse <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/result/" ++ rid ++ "/Tabular")
let resultHeaderLessResponse = L8.drop (0 :: Int64) resultResponse
let resultTabular = parseTabularHTTPBlasts resultHeaderLessResponse
return (Right resultTabular)
else do
resultResponse <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=Tabular&FORMAT_OBJECT=Alignment&CMD=Get")
let resultHeaderLessResponse = L8.drop (60 :: Int64) resultResponse
let resultTabular = parseTabularHTTPBlasts resultHeaderLessResponse
return (Right resultTabular)
retrieveJSONResult :: String -> String -> IO (Either String BlastJSON2)
retrieveJSONResult provider' rid = do
resultResponse <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=JSON2&FORMAT_OBJECT=Alignment&CMD=Get")
let archive = CZ.toArchive resultResponse
let files = CZ.filesInArchive archive
let bs = CZ.fromEntry (fromJust (CZ.findEntryByPath (files !! 1) archive))
let eitherjson = parseJSONBlast bs
return eitherjson
checkSessionStatus :: String -> String -> Maybe Int -> Int -> IO (Either String String)
checkSessionStatus provider' rid walltime consumedTime = do
threadDelay 120000000
status <- retrieveSessionStatus provider' rid
if (isNothing walltime)
then do
waitOrRetrieve provider' status rid walltime consumedTime
else do
if (consumedTime < (fromJust walltime))
then do
waitOrRetrieve provider' status rid walltime (consumedTime + 120000000)
else do
let exceptionMessage = "BLASTHTTP: Query did not return result within walltime"
return (Left exceptionMessage)
waitOrRetrieve :: String -> String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieve provider' status rid walltime consumedTime
| provider' == "ebi" = waitOrRetrieveEBI status rid walltime consumedTime
| otherwise = waitOrRetrieveNCBI status rid walltime consumedTime
waitOrRetrieveEBI :: String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieveEBI status rid walltime consumedTime
| "FINISHED" `isInfixOf` status = return (Right rid)
| "FAILURE" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: The EBI blast job failed."
return (Left exceptionMessage)
| "ERROR" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: An error occurred attempting to get the EBI blast job status."
return (Left exceptionMessage)
| "NOT_FOUND" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: The EBI blast job cannot be found."
return (Left exceptionMessage)
| otherwise = checkSessionStatus "ebi" rid walltime consumedTime
waitOrRetrieveNCBI :: String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieveNCBI status rid walltime consumedTime
| "Status=READY" `isInfixOf` status = return (Right rid)
| "Status=FAILURE" `isInfixOf` status = do
let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
return (Left exceptionMessage)
| "Status=UNKNOWN" `isInfixOf` status = do
let exceptionMessage = "Search $rid expired.\n"
return (Left exceptionMessage)
| "Status=WAITING" `isInfixOf` status = do
checkSessionStatus "ncbi" rid walltime consumedTime
| otherwise = do
let exceptionMessage = "Status has unexpected value " ++ status ++ " - aborting blast search\n"
return (Left exceptionMessage)
blastTabularHTTP :: BlastHTTPQuery -> IO (Either String [BlastTabularResult])
blastTabularHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
let defaultProvider = "ncbi"
let defaultProgram = "blastn"
let defaultDatabase = "refseq_genomic"
let defaultWalltime = Nothing
let selectedProvider = fromMaybe defaultProvider provider'
let selectedProgram = fromMaybe defaultProgram program'
let selectedDatabase = fromMaybe defaultDatabase database'
let selectedWalltime = maybe defaultWalltime Just walltime'
performTabularQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastJSON2)
blastHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
let defaultProvider = "ncbi"
let defaultProgram = "blastn"
let defaultDatabase = "refseq_genomic"
let defaultWalltime = Nothing
let selectedProvider = fromMaybe defaultProvider provider'
let selectedProgram = fromMaybe defaultProgram program'
let selectedDatabase = fromMaybe defaultDatabase database'
let selectedWalltime = maybe defaultWalltime Just walltime'
performJSONQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime
performTabularQuery :: String -> String -> String -> [Fasta () ()] -> Maybe String -> Maybe Int -> IO (Either String [BlastTabularResult])
performTabularQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
| null querySequences' = do
let exceptionMessage = "Error - no query sequence provided"
return (Left exceptionMessage)
| otherwise = do
let sequenceString = urlEncode $ concatMap (convertString . fastaToByteString 999999999) querySequences'
rid <- startSession provider' program' database' sequenceString (Just (maybe "&FORMAT_TYPE=TABULAR" ("&FORMAT_TYPE=TABULAR" ++) optionalArgumentMaybe))
sessionStatus <- checkSessionStatus provider' rid walltime (0 :: Int)
if E.isRight sessionStatus
then retrieveTabularResult provider' rid
else return (Left (E.fromLeft sessionStatus))
performJSONQuery :: String -> String -> String -> [Fasta () ()] -> Maybe String -> Maybe Int -> IO (Either String BlastJSON2)
performJSONQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
| null querySequences' = do
let exceptionMessage = "Error - no query sequence provided"
return (Left exceptionMessage)
| otherwise = do
let sequenceString = urlEncode $ concatMap (convertString . fastaToByteString 999999999) querySequences'
rid <- startSession provider' program' database' sequenceString (Just (maybe "" ("" ++) optionalArgumentMaybe))
sessionStatus <- checkSessionStatus provider' rid walltime (0 :: Int)
if E.isRight sessionStatus
then retrieveJSONResult provider' rid
else return (Left (E.fromLeft sessionStatus))