module Bio.SeqLoc.Transcript
(
Junction (..)
, fromDonorAcceptor, donor, acceptor
, junctions
, Transcript(..), utr5, utr3
, cdsLocation
, sortContigs
)
where
import Control.Applicative
import Control.Monad
import qualified Data.ByteString.Char8 as BS
import Data.List
import Data.Ord
import qualified Data.Attoparsec.Zepto as ZP
import Bio.SeqLoc.LocRepr
import qualified Bio.SeqLoc.Location as Loc
import Bio.SeqLoc.OnSeq
import qualified Bio.SeqLoc.Position as Pos
import qualified Bio.SeqLoc.SpliceLocation as SpLoc
import Bio.SeqLoc.Strand
newtype Junction = Junction { intron :: Loc.ContigLoc } deriving (Show)
slash :: BS.ByteString
slash = BS.pack "/"
instance LocRepr Junction where
repr j = BS.concat [ repr . donor $ j, slash, repr . acceptor $ j ]
unrepr = fromDonorAcceptor <$> unrepr <*> (ZP.string slash *> unrepr)
fromDonorAcceptor :: Pos.Pos -> Pos.Pos -> Junction
fromDonorAcceptor d a = let len = 1 + abs (Pos.offset a Pos.offset d)
in case Pos.strand d of
Fwd -> Junction $! Loc.fromPosLen (Pos.slide d 1) len
RevCompl -> Junction $! Loc.fromPosLen (Pos.slide d (1)) len
donor :: Junction -> Pos.Pos
donor = Loc.startPos . Loc.extend (1, 0) . intron
acceptor :: Junction -> Pos.Pos
acceptor = Loc.endPos . Loc.extend (0, 1) . intron
junctions :: SpLoc.SpliceLoc -> [Junction]
junctions sploc = zipWith junction contigs (drop 1 contigs)
where contigs = Loc.toContigs sploc
junction c5 c3 = let p5 = Loc.endPos . Loc.extend (0, 1) $ c5
p3 = Loc.startPos . Loc.extend (1, 0) $ c3
len = 1 + abs (Pos.offset p3 Pos.offset p5)
in Junction $ Loc.fromPosLen p5 len
data Transcript = Transcript { geneId :: !SeqName
, trxId :: !SeqName
, location :: !SpliceSeqLoc
, cds :: !(Maybe Loc.ContigLoc)
} deriving (Show)
utr5 :: Transcript -> Maybe Loc.ContigLoc
utr5 trx = cds trx >>= utr5loc
where utr5loc cdsloc = case Loc.startPos cdsloc of
(Pos.Pos startoff Fwd) | startoff > 0 -> Just $! Loc.fromBoundsStrand 0 (startoff 1) Fwd
_ -> Nothing
utr3 :: Transcript -> Maybe Loc.ContigLoc
utr3 trx = cds trx >>= utr3loc
where utr3loc cdsloc = case Loc.endPos cdsloc of
(Pos.Pos endoff Fwd) | endoff < trxlast -> Just $! Loc.fromBoundsStrand (endoff + 1) trxlast Fwd
_ -> Nothing
trxlast = snd . Loc.bounds . unOnSeq . location $ trx
cdsLocation :: Transcript -> Maybe SpliceSeqLoc
cdsLocation trx = cds trx >>= liftM (OnSeq name) . flip Loc.clocOutof loc
where (OnSeq name loc) = location trx
sortContigs :: [Loc.ContigLoc] -> Maybe [Loc.ContigLoc]
sortContigs [] = Nothing
sortContigs cs@(c0:_)= liftM sortStrand contigStrand
where contigStrand | all ((== Loc.strand c0) . Loc.strand) cs = Just . Loc.strand $ c0
| otherwise = Nothing
sortStrand Fwd = sortBy (comparing Loc.offset5) cs
sortStrand RevCompl = sortBy (comparing (negate . Loc.offset5)) cs