module Biobase.Types.Structure where
import Control.Applicative
import Control.DeepSeq
import Control.Lens
import Control.Monad.Error.Class
import Control.Monad (foldM,unless)
import Data.Attoparsec.ByteString.Char8
import Data.Attoparsec.Combinator
import Data.Bifunctor (second)
import Data.ByteString (ByteString)
import Data.Data
import Data.List (foldl1',foldl')
import Data.Monoid ((<>))
import Data.Set (Set)
import GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS8
import qualified Data.List as L
import qualified Data.Set as S
import qualified Data.Set as Set
import qualified Data.Vector.Unboxed as VU
import qualified Test.QuickCheck as Q
import Data.Forest.StructuredPaired
newtype RNAss = RNAss { _rnass ∷ ByteString }
deriving (Eq,Ord,Show,Read,Data,Typeable,Generic,Semigroup,Monoid)
makeLenses ''RNAss
instance NFData RNAss
newtype RNAensembleStructure = RNAes { _rnaes ∷ ByteString }
deriving (Eq,Ord,Show,Read,Data,Typeable,Generic)
makeLenses ''RNAensembleStructure
instance NFData RNAensembleStructure
data RNAds = RNAds
{ _rnadsL ∷ !ByteString
, _rnadsR ∷ !ByteString
}
deriving (Eq,Ord,Show,Read,Data,Typeable,Generic)
makeLenses ''RNAds
instance NFData RNAds
rnads ∷ Prism' ByteString RNAds
rnads = prism (\(RNAds l r) → BS8.concat [l, "&", r])
(\s → case BS8.split '&' s of [l,r] → Right (RNAds l r) ; _ → Left s)
{-# Inline rnads #-}
rnads2rnassPair ∷ Iso' RNAds (RNAss, RNAss)
rnads2rnassPair = iso (\(RNAds l r) → (RNAss l, RNAss r)) (\(RNAss l, RNAss r) → RNAds l r)
{-# Inline rnads2rnassPair #-}
mkRNAds ∷ (Monad m, MonadError RNAStructureError m) ⇒ ByteString → m RNAds
mkRNAds q = BS8.split '&' q & \case
[l,r] → do
return $ RNAds
{ _rnadsL = l
, _rnadsR = r
}
_ → throwError $ RNAStructureError "mkRNAds: not a dimer" q
{-# Inline mkRNAds #-}
data RNAStructureError = RNAStructureError
{ _rnaStructureError ∷ String
, _rnaOffender ∷ ByteString
}
deriving (Show,Generic)
instance NFData RNAStructureError
verifyRNAss ∷ (Monad m, MonadError RNAStructureError m) ⇒ RNAss → m RNAss
verifyRNAss ss = do
return ss
data RNApset = RNApset
{ _rnapset ∷ !(Set (Int,Int))
, _rnapsetSLen ∷ !Int
}
deriving (Read,Show,Eq,Ord,Generic)
makeLenses ''RNApset
instance NFData RNApset
rnassPairSet
∷ (MonadError String m)
⇒ RNAss
→ m RNApset
rnassPairSet (RNAss s2) = do
let go (set,ks ) (i,'(') = return (set,i:ks)
go (set,i:is) (j,')') = return (Set.insert (i,j) set, is)
go (set,[] ) (j,')') = throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" at position: " ++ show j
go (set,ks ) (_,'.') = return (set,ks)
(set,ss) ← foldM go (Set.empty,[]) . L.zip [0..] $ BS8.unpack s2
unless (null ss) $ throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" with opening bracket(s): " ++ show ss
return $ RNApset set (BS8.length s2)
{-# Inlinable rnassPairSet #-}
rnassSPForest
∷ (MonadError String m)
⇒ RNAss
→ m (SPForest ByteString Char)
rnassSPForest (RNAss s2) = either throwError return $ parseOnly (manyElems <* endOfInput) s2
where
tree = SPT <$> char '(' <*> someElems <*> char ')' <?> "SPT"
unpaired = SPR <$> takeWhile1 (=='.') <?> "SPR"
someElems = SPJ <$> many1 (tree <|> unpaired) <?> "many1 SPT / SPR"
manyElems = (\case {[] → SPE; xs → SPJ xs}) <$> many (tree <|> unpaired) <?> "many0 SPT / SPR"
{-# Inlinable rnassSPForest #-}
compactifySPForest
∷ SPForest ByteString Char
→ SPForest ByteString ByteString
compactifySPForest = go . second BS8.singleton
where go SPE = SPE
go (SPR x) = SPR x
go (SPJ xs) = SPJ (map go xs)
go (SPT l (SPJ [x]) r) = go $ SPT l x r
go (SPT l (SPT l' t r') r) = go $ SPT (l <> l') t (r' <> r)
go (SPT l t r) = SPT l (go t) r
rnassPairSet' ∷ RNAss → RNApset
rnassPairSet' = either error id . rnassPairSet
rnapsetRNAss ∷ RNApset → RNAss
rnapsetRNAss (RNApset ps l) = RNAss $ BS8.pack $ VU.toList xs
where xs = VU.replicate l '.' VU.// ls VU.// rs
ls = L.map ((,'(') . fst) $ S.toList ps
rs = L.map ((,')') . snd) $ S.toList ps
pairDist ∷ RNApset → RNApset → Int
pairDist (RNApset p1 _) (RNApset p2 _) = Set.size z1 + Set.size z2
where i = Set.intersection p1 p2
z1 = p1 `Set.difference` i
z2 = p2 `Set.difference` i
instance Q.Arbitrary RNApset where
arbitrary = do
let go ∷ Int → Int → Q.Gen (Set (Int,Int))
go l r
| l >= r = return S.empty
| otherwise = do
c ∷ Int ← Q.oneof [ Q.choose (l+1,r)
, Q.choose (l+1, min r $ l+20)
]
z ∷ Int ← Q.choose (0,5)
let stack = S.fromList [(l+k,c-k) | k ← [0..z-1], l+k+1 < c-k]
right ← go (c+1) r
return $ S.union stack right
l ∷ Int ← Q.choose (0,199)
s ← go 0 l
return $ RNApset s (l+1)
instance Q.Arbitrary RNAss where
arbitrary = rnapsetRNAss <$> Q.arbitrary