{-# LANGUAGE OverloadedStrings #-}
module Blast
( filterBlastBamRows
, blastBamToFasta
) where
import Data.Maybe
import qualified Data.Set as Set
import qualified Control.Foldl as Fold
import qualified Data.Text as T
import Safe
import Turtle
import Turtle.Line
import Types
getBlastSet :: Command -> Maybe Args -> Subject -> Query -> Shell BlastSet
getBlastSet (Command cmd) args (Subject sub) query = do
let queryIn (QueryFile x) = input x
queryIn (QueryShell x) = x
argsIn = maybe [] (fmap T.pack . words . unArgs) args
blastOutput = fmap fromJust
. mfilter isJust
. fmap (headMay . cut "\t" . lineToText)
. inproc cmd ( [ "-subject", format fp sub
, "-outfmt", "6"
, "-max_target_seqs", "1"
]
<> argsIn
)
. queryIn
$ query
blastSet <- fmap (BlastSet . Set.fromList) . fold blastOutput $ Fold.list
return blastSet
filterBlastBamRows :: Int -> Command -> Maybe Args -> Subject -> Query -> [BamRow] -> Shell [BamRow]
filterBlastBamRows accField cmd args sub query rows = do
blastSet <- getBlastSet cmd args sub $ query
let newRows =
filter
(flip Set.member (unBlastSet blastSet) . (`at` accField) . unBamRow)
rows
return newRows
blastBamToFasta :: BamRow -> [T.Text]
blastBamToFasta (BamRow row) =
[ ">" <> (fromMaybe (error "No accession in bam.") . headMay $ row)
, fromMaybe (error "No sequence in bam.") $ row `atMay` 9
]