module Bio.SeqLoc.Location
(
Location(..), overlaps
, ContigLoc, offset5, fromStartEnd, fromPosLen, fromBoundsStrand
, slide
)
where
import Prelude hiding (length)
import Control.Applicative
import Control.Monad
import qualified Data.ByteString.Char8 as BS
import qualified Data.Attoparsec.Zepto as ZP
import Bio.Core.Strand
import Bio.SeqLoc.LocRepr
import qualified Bio.SeqLoc.Position as Pos
import Bio.SeqLoc.Strand
import qualified Bio.SeqLoc.SeqLike as SeqLike
class Location l where
strand :: l -> Strand
length :: l -> Pos.Offset
bounds :: l -> (Pos.Offset, Pos.Offset)
startPos :: l -> Pos.Pos
endPos :: l -> Pos.Pos
seqData :: (SeqLike.SeqLike s, Stranded s) => s -> l -> Maybe s
seqDataPad :: (SeqLike.SeqLike s, Stranded s) => s -> l -> s
posInto :: Pos.Pos -> l -> Maybe Pos.Pos
posOutof :: Pos.Pos -> l -> Maybe Pos.Pos
clocInto :: ContigLoc -> l -> Maybe ContigLoc
clocOutof :: ContigLoc -> l -> Maybe l
extend :: (Pos.Offset, Pos.Offset) -> l -> l
offsetWithin :: Pos.Offset -> l -> Bool
posWithin :: Pos.Pos -> l -> Bool
contigOverlaps :: ContigLoc -> l -> Bool
toContigs :: l -> [ContigLoc]
overlaps :: (Location l1, Location l2) => l1 -> l2 -> Bool
overlaps l1 = any (\c2 -> contigOverlaps c2 l1) . toContigs
data ContigLoc = ContigLoc { offset5 :: !Pos.Offset
, clocLength :: !Pos.Offset
, clocStrand :: !Strand
} deriving (Eq, Ord, Show)
instance Stranded ContigLoc where
revCompl (ContigLoc seq5 len str) = ContigLoc seq5 len $ revCompl str
to :: BS.ByteString
to = BS.pack "to"
instance LocRepr ContigLoc where
repr cloc = let (seq5, seq3) = bounds cloc
in BS.concat [ repr seq5, to, repr seq3, repr . strand $ cloc ]
unrepr = fromBoundsStrand <$> unrepr <* ZP.string to <*> unrepr <*> unrepr
instance Location ContigLoc where
strand = clocStrand
length = clocLength
seqData sequ (ContigLoc seq5 len str) = liftM (stranded str) . (SeqLike.subseq seq5 len) $ sequ
seqDataPad sequ (ContigLoc seq5 len str) = (stranded str) . (SeqLike.subseqPad seq5 len) $ sequ
posInto = clocPosInto
posOutof = clocPosOutof
bounds (ContigLoc seq5 len _) = (seq5, seq5 + len 1)
startPos (ContigLoc seq5 len str)
= case str of
Plus -> Pos.Pos seq5 str
Minus -> Pos.Pos (seq5 + len 1) str
endPos (ContigLoc seq5 len str)
= case str of
Plus -> Pos.Pos (seq5 + len 1) str
Minus -> Pos.Pos seq5 str
clocInto = clocClocInto
clocOutof = clocClocOutof
extend = clocExtend
offsetWithin off (ContigLoc seq5 len _)
= (off >= seq5) && (off < seq5 + len)
posWithin (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
= (pos >= seq5) && (pos < seq5 + len) && (cStrand == pStrand)
contigOverlaps = clocOverlaps
toContigs = (: [])
fromBoundsStrand :: Pos.Offset -> Pos.Offset -> Strand -> ContigLoc
fromBoundsStrand seq5 seq3 _ | seq3 < seq5 = error "Bio.SeqLoc.Location.fromBoundsStrand: seq3 < seq5"
fromBoundsStrand seq5 seq3 str = ContigLoc seq5 (1 + seq3 seq5) str
fromStartEnd :: Pos.Offset -> Pos.Offset -> ContigLoc
fromStartEnd start end
| start < end = ContigLoc start (1 + end start) Plus
| otherwise = ContigLoc end (1 + start end) Minus
fromPosLen :: Pos.Pos -> Pos.Offset -> ContigLoc
fromPosLen _ len | len < 0 = error "Bio.SeqLoc.Location.fromPosLen: len < 0"
fromPosLen (Pos.Pos off5 Plus) len = ContigLoc off5 len Plus
fromPosLen (Pos.Pos off3 Minus) len = ContigLoc (off3 (len 1)) len Minus
slide :: Pos.Offset -> ContigLoc -> ContigLoc
slide dpos (ContigLoc seq5 len str) = ContigLoc (seq5 + dpos) len str
clocPosInto :: Pos.Pos -> ContigLoc -> Maybe Pos.Pos
clocPosInto (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
| pos < seq5 || pos >= seq5 + len = Nothing
| otherwise = Just $ case cStrand of
Plus -> Pos.Pos (pos seq5) pStrand
Minus -> Pos.Pos (seq5 + len (pos + 1)) (revCompl pStrand)
clocPosOutof :: Pos.Pos -> ContigLoc -> Maybe Pos.Pos
clocPosOutof (Pos.Pos pos pStrand) (ContigLoc seq5 len cStrand)
| pos < 0 || pos >= len = Nothing
| otherwise = Just $ case cStrand of
Plus -> Pos.Pos (pos + seq5) pStrand
Minus -> Pos.Pos (seq5 + len (pos + 1)) (revCompl pStrand)
clocExtend :: (Pos.Offset, Pos.Offset) -> ContigLoc -> ContigLoc
clocExtend (ext5, ext3) (ContigLoc seq5 len str)
= case str of
Plus -> ContigLoc (seq5 ext5) (len + ext5 + ext3) str
Minus -> ContigLoc (seq5 ext3) (len + ext5 + ext3) str
clocClocInto :: ContigLoc -> ContigLoc -> Maybe ContigLoc
clocClocInto subcloc tocloc
= case (posInto (startPos subcloc) tocloc, posInto (endPos subcloc) tocloc) of
(Just start, Just _) -> Just (fromPosLen start (length subcloc))
_ -> Nothing
clocClocOutof :: ContigLoc -> ContigLoc -> Maybe ContigLoc
clocClocOutof subcloc fromcloc
= case (posOutof (startPos subcloc) fromcloc, posOutof (endPos subcloc) fromcloc) of
(Just start, Just _) -> Just (fromPosLen start (length subcloc))
_ -> Nothing
clocOverlaps :: ContigLoc -> ContigLoc -> Bool
clocOverlaps contig1 contig2
= case (bounds contig1, bounds contig2) of
((low1, high1),(low2, high2)) -> (strand contig1 == strand contig2)
&& (low1 <= high2) && (low2 <= high1)