-- | Given a sequence and a structure, evaluate the energy. -- -- TODO switch to 'SSTree [Energy]' type! module BioInf.RNAEval where import Text.Printf import qualified Data.Vector.Unboxed as VU import Biobase.RNA import Biobase.Structure import Biobase.Structure.DotBracket import Biobase.Types.Ring import Biobase.Vienna import Data.PrimitiveArray import BioInf.RNAFold.EnergyInt import BioInf.RNAFold.Functions -- | Sum up a complete (sub-) tree. rnaEval :: ViennaTables -> String -> String -> Int rnaEval vna pri' sec' = treeSum $ annotateWithEnergy vna pri sst where sec = dotbracketToPairlist sec' sst = toSSTree sec pri = mkPrimary pri' -- | Evaluate the energy of a secondary structure tree with sequence. We abuse -- the normal folding functions with a dummy table full of (one :: Energy). -- This is probably slower than another method but quickly written. -- -- TODO this is basically crapfuck ;-) Should use the FoldFunctions directly -- instead of that strange table. Should not xxxOpt either. annotateWithEnergy :: ViennaTables -> Primary -> SSTree () -> SSTree [Int] annotateWithEnergy trnr pri t = f t where -- extern part f (SSExt l _ []) = SSExt l [one] [] f (SSExt l _ xs) = let es = map ((\(i,j) -> externalLoopOpt trnr pri dummy i j) . treeIJ) xs in SSExt l es (map f xs) -- hairpin f (SSTree i j _ []) = SSTree i j [hairpinOpt trnr pri i j] [] -- 1-loop f (SSTree i j _ [x]) -- stack | i+1==k&&j-1==l = SSTree i j [stackOpt trnr pri dummy i j] [f x] | (di,dj) `VU.elem` tabbedInteriorLoopDistances i j = SSTree i j [tabbedInteriorLoopOpt trnr pri dummy i j] [f x] | di==0&&dj>1 = SSTree i j [bulgeLOpt trnr pri dummy i j] [f x] | di>1&&dj==0 = SSTree i j [bulgeROpt trnr pri dummy i j] [f x] | di==1&&dj>1 = SSTree i j [interior1xnLOpt trnr pri dummy i j] [f x] | di>1&&dj==1 = SSTree i j [interior1xnROpt trnr pri dummy i j] [f x] | ds+dl>30 = error "not handling loops with size >30" -- large interiorr loop | otherwise = SSTree i j [largeInteriorLoopOpt trnr pri dummy i j] [f x] where (k,l) = treeIJ x di = k-i-1 dj = j-l-1 ds = min di dj dl = max di dj -- multiple loop f (SSTree i j _ xs) = let cl = multibranchCloseOpt trnr pri dummy dummy i j ls = map ((\(k,l) -> multibranchIJLoopOpt trnr pri dummy k l) . treeIJ) xs in SSTree i j (cl:ls) (map f xs) dummy = fromAssocs (0,0) (n,n) one [] n = snd $ bounds pri -- | convert an annotated tree into strings that explain each score. -- -- TODO this is a stupid name explainTree :: Primary -> SSTree [Int] -> [String] explainTree inp sst = f sst where -- | exterior loop f (SSExt _ _ []) = [""] f (SSExt _ _ xs) = this : concatMap f xs where eners = map (\(SSTree _ _ [e] _) -> e) xs ener = sum eners this = printf "%-20s %6d %s" "External loop" ener (show eners) -- | hairpin f (SSTree i j [e] []) = [this] where this = printf "%-20s (%4d,%4d) %4s %6d" "Hairpin loop" (i+1) (j+1) (show $ pair inp i j) e f (SSTree i j [e] [x@(SSTree k l _ _)]) = this : f x where this = printf "%-20s (%4d,%4d) %4s (%4d,%4d) %4s %6d" "Interior" (i+1) (j+1) (show $ pair inp i j) (k+1) (l+1) (show $ pair inp k l) e f (SSTree i j es xs) = concatMap f xs ++ [this] where this = printf "%-20s (%4d,%4d) %4s %6d %6d, %s" "Multi loop" (i+1) (j+1) (show $ pair inp i j) (sum es) (head es) (show $ tail es) -- | input sequence indices treeIJ (SSTree i j _ _) = (i,j) -- | Run a sum over the tree. (foldl (+), the tree annotations) -- -- TODO that should go into Biobase.Structure as a generic walk over the tree treeSum t = f t where f (SSExt _ es xs) = ringProductL $ es ++ map f xs f (SSTree _ _ es xs) = ringProductL $ es ++ map f xs