module Biobase.Types.Shape where
import Control.DeepSeq
import Control.Lens
import Control.Monad.Error.Class
import Control.Monad (foldM,unless)
import Data.ByteString (ByteString)
import Data.Data
import Data.List (foldl1')
import Data.Monoid ((<>))
import Data.Set (Set)
import GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS8
import qualified Data.List as L
import qualified Data.Set as Set
import Data.Forest.StructuredPaired
import qualified Biobase.Types.Structure as TS
data ShapeLevel
= SL1
| SL2
| SL3
| SL4
| SL5
deriving (Eq,Ord,Show,Read,Data,Typeable,Generic)
instance NFData ShapeLevel
data RNAshape
= RNAshape
{ _rnashapelevel ∷ !ShapeLevel
, _rnashape ∷ !ByteString
}
deriving (Eq,Ord,Show,Read,Data,Typeable,Generic)
makeLenses ''RNAshape
instance NFData RNAshape
shapeForest
∷ ShapeLevel
→ SPForest ByteString ByteString
→ SPForest Char Char
shapeForest = preStem
where
preStem s SPE = SPE
preStem s (SPT _ xs _) = SPT '[' (inStem s xs) ']'
preStem s spr@(SPR rs) = inStem s spr
preStem s (SPJ xs)
| [x] ← xs = preStem s x
| [l@SPR{},x@SPT{}] ← xs = if s <= SL2 then (SPJ [SPR '_', preStem s x]) else preStem s x
| [x@SPT{},r@SPR{}] ← xs = if s <= SL2 then (SPJ [preStem s x, SPR '_']) else preStem s x
| otherwise = SPJ $ map (preStem s) xs
inStem s SPE = SPE
inStem s (SPT _ xs _) = inStem s xs
inStem s (SPR rs)
| s == SL1 = SPR '_'
| otherwise = SPE
inStem s (SPJ xs)
| [x] ← xs = error "x"
| [l@SPR{},x] ← xs = if s <= SL3 then preStem s (SPJ xs) else inStem s x
| [x,r@SPR{}] ← xs = if s <= SL3 then preStem s (SPJ xs) else inStem s x
| [l@SPR{},x,r@SPR{}] ← xs = if s == SL5 then inStem s x else preStem s (SPJ xs)
| otherwise = SPJ $ map (preStem s) xs
rnass2shape lvl s = shapeForestshape lvl . shapeForest lvl . TS.compactifySPForest
. either (\e → error $ show (e,s)) id . TS.rnassSPForest $ s
test lvl = shapeForestshape lvl . shapeForest lvl $ TS.compactifySPForest $ either error id $ TS.rnassSPForest $ TS.RNAss "(((((...(((..(((...))))))...(((..((.....))..)))))))).."
shapeForestshape
∷ ShapeLevel
→ SPForest Char Char
→ RNAshape
shapeForestshape lvl = RNAshape lvl . go
where
go SPE = ""
go (SPT l x r) = BS8.singleton l <> go x <> BS8.singleton r
go (SPJ xs ) = mconcat $ map go xs
go (SPR x ) = BS8.singleton x
generateShape ∷ ShapeLevel → TS.RNAss → RNAshape
generateShape = undefined
data RNAshapepset = RNAshapepset { _rnashapepsetlevel ∷ ShapeLevel, _rnashapepset ∷ Set (Int,Int) }
deriving (Read,Show,Eq,Ord,Generic)
makeLenses ''RNAshapepset
instance NFData RNAshapepset
rnashapePairSet
∷ (MonadError String m)
⇒ RNAshape
→ m RNAshapepset
rnashapePairSet (RNAshape lvl s2) = do
let go (set,ks ) (i,'[') = return (set,i:ks)
go (set,i:is) (j,']') = return (Set.insert (i,j) set, is)
go (set,[] ) (j,']') = throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" at position: " ++ show j
go (set,ks ) (_,'_') = return (set,ks)
(set,ss) ← foldM go (Set.empty,[]) . L.zip [0..] $ BS8.unpack s2
unless (null ss) $ throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" with opening bracket(s): " ++ show ss
return $ RNAshapepset lvl set
{-# Inlinable rnashapePairSet #-}
rnassPairSet' ∷ RNAshape → RNAshapepset
rnassPairSet' = either error id . rnashapePairSet
shapePairDist ∷ RNAshapepset → RNAshapepset → Int
shapePairDist (RNAshapepset lvl1 p1) (RNAshapepset lvl2 p2) = Set.size z1 + Set.size z2
where i = Set.intersection p1 p2
z1 = p1 `Set.difference` i
z2 = p2 `Set.difference` i