module Biobase.Secondary.Isostericity where
import Data.ByteString.Char8 (ByteString)
import Data.FileEmbed (embedFile)
import Data.Function (on)
import Data.List
import Data.Tuple.Select
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M
import Text.CSV
import Biobase.Primary.Nuc
import Biobase.Secondary.Basepair
class IsostericityLookup a where
getClasses :: a -> [String]
inClass :: String -> [a]
instance IsostericityLookup (ExtPair n) where
getClasses p
| Just cs <- M.lookup p defaultIsostericityMap
= cs
| otherwise = []
inClass x = map fst . filter ((x `elem`).snd) $ M.assocs defaultIsostericityMap
instance IsostericityLookup (Pair n) where
getClasses p
| Just cs <- M.lookup (p,CWW) defaultIsostericityMap
= cs
| otherwise = []
inClass x = map (sel1 . fst)
. filter ((CWW==). snd . fst)
. filter ((x `elem`).snd)
$ M.assocs defaultIsostericityMap
defaultIsostericityMap = mkIsostericityMap parsedCSV
mkIsostericityMap = M.fromListWith (\x y -> nub $ x++y) . mkIsostericityList
mkIsostericityList :: [[[String]]] -> [(ExtPair n, [String])]
mkIsostericityList gs = nubBy ((==) `on` fst) . concatMap turn . concatMap f $ gs where
f g = map (\e -> ( ( let [x,y] = fst e
in (charRNA x, charRNA y), read bpt
)
, nub $ snd e)
) $ map entry xs where
bpt = head $ head g
xs = tail g
entry x = (x!!0, map (filter (\z -> not $ z `elem` "()")) . takeWhile ('I' `elem`) . drop 2 $ x)
turn entry@(((x,y),(wc,tx,ty)), cs) = [entry, (((y,x),(wc,ty,tx)), cs)]
parsedCSV = filter (not . null) gs where
gs = map (filter ((""/=).head)) . groupBy (\x y -> ""/= (head y)) $ csv
Right csv = parseCSV "isostericity/detailed" $ BS.unpack detailedCSV
detailedCSV :: ByteString
detailedCSV = $(embedFile "sources/isostericity-detailed.csv")