module Biobase.FR3D where
import Data.ByteString.Char8 as BS
import Data.List as L
import Biobase.Primary
import Biobase.Secondary
data FR3D = FR3D
{ pdbid :: ByteString
, chains :: [(ByteString,ByteString)]
, basepairs :: [Basepair]
} deriving (Show)
data Basepair = Basepair
{ interaction :: ExtPairAnnotation
, nucleotide1 :: Char
, pdbnumber1 :: Int
, chain1 :: ByteString
, seqpos1 :: Int
, nucleotide2 :: Char
, pdbnumber2 :: Int
, chain2 :: ByteString
, seqpos2 :: Int
} deriving (Show)
data LinFR3D = LinFR3D
{ pdbID :: ByteString
, sequence :: ByteString
, pairs :: [(ExtPairIdx,ExtPair)]
} deriving (Show)
linearizeFR3D :: FR3D -> LinFR3D
linearizeFR3D FR3D{..} = LinFR3D
{ pdbID = pdbid
, sequence = BS.intercalate "&" $ L.map snd chains
, pairs = L.map f basepairs
} where
trans = snd $ L.mapAccumL ( \acc (x,y) -> (acc + 1 + BS.length y, (x,acc))
) 0 chains
f Basepair{..} = (pi,p) where
pi = ( ( maybe (1) (\v -> v+seqpos1) $ L.lookup chain1 trans
, maybe (1) (\v -> v+seqpos2) $ L.lookup chain2 trans
)
, interaction
)
p = ( (mkNuc nucleotide1, mkNuc nucleotide2), interaction )
class RemoveDuplicatePairs a where
removeDuplicatePairs :: a -> a
instance RemoveDuplicatePairs FR3D where
removeDuplicatePairs x@FR3D{..} = x{basepairs = L.filter f basepairs} where
f Basepair{..} = (chain1,seqpos1) < (chain2,seqpos2)
instance RemoveDuplicatePairs LinFR3D where
removeDuplicatePairs x@LinFR3D{..} = x{pairs = L.filter (f.fst) pairs} where
f ((x,y),_) = x<y
checkFR3D fr3d@FR3D{..}
| L.null xs = Right fr3d
| otherwise = Left (fr3d,xs)
where
xs = [ x
| x <- basepairs
, let Just c1 = lookup (chain1 x) chains
, let Just c2 = lookup (chain2 x) chains
, seqpos1 x < 0
|| seqpos2 x < 0
|| seqpos1 x >= BS.length c1
|| seqpos2 x >= BS.length c2
|| nucleotide1 x /= c1 `BS.index` seqpos1 x
|| nucleotide2 x /= c2 `BS.index` seqpos2 x
]
checkLinFR3D linfr3d@LinFR3D{..}
| L.null xs = Right linfr3d
| otherwise = Left (linfr3d,xs)
where
xs = [ x
| x@(pi,p) <- pairs
, baseL pi < 0
|| baseR pi < 0
|| baseL pi >= BS.length sequence
|| baseR pi >= BS.length sequence
|| mkNuc (sequence `BS.index` baseL pi) /= baseL p
|| mkNuc (sequence `BS.index` baseR pi) /= baseR p
]