{-# LANGUAGE ExistentialQuantification, TypeFamilies #-} {-# LANGUAGE RecordWildCards, FlexibleContexts #-} {-# LANGUAGE BangPatterns, ParallelListComp #-} module StatUtil where import Bio.Adna import Bio.Bam import Bio.Base import Bio.Util.Numeric ( wilson ) import Control.Applicative import Control.Monad ( mplus ) import Numeric ( showGFloat ) import Data.List ( intersperse ) import qualified Data.Vector.Unboxed as U defaultWidth, defaultHeight :: Int defaultWidth = 1024 defaultHeight = 768 data Span a = SP {-# UNPACK #-} !Int !(U.Vector a) deriving Show {-# INLINE shiftSpan #-} shiftSpan :: Span NPair -> Span NPair shiftSpan (SP _ xs) = SP i xs where i = 1 - U.length xs + U.length (U.takeWhile (isGap.snd) (U.reverse xs)) {-# INLINE spanMap #-} spanMap :: (U.Unbox a, U.Unbox b) => (a -> b) -> Span a -> Span b spanMap f (SP l xs) = SP l (U.map f xs) {-# INLINE zipWithSpan #-} zipWithSpan :: (U.Unbox a, U.Unbox b, U.Unbox c) => (a -> b -> c) -> Span a -> Span b -> Span c zipWithSpan f (SP u xs) (SP v ys) | u <= v = SP v $ U.zipWith f (U.drop (v-u) xs) ys | True = SP u $ U.zipWith f xs (U.drop (u-v) ys) {-# INLINE zipWithSpan1 #-} zipWithSpan1 :: (U.Unbox a, U.Unbox b, U.Unbox c) => (a -> c) -> (b -> c) -> (a -> b -> c) -> Span a -> Span b -> Span c zipWithSpan1 f g h (SP u xs) (SP v ys) | U.null ys = SP u (U.map f xs) | U.null xs = SP v (U.map g ys) | u + U.length xs < v || v + U.length ys < u = error "zipWithSpan1: not supposed to happen" | u <= v = SP u $ U.concat [ U.map f xleft, U.zipWith h xmid ymid, U.map f xright, U.map g yright ] where xleft = U.slice 0 (v-u) xs xmid = U.slice (v-u) midlen xs xright = U.slice (v-u + midlen) (U.length xs - (v-u) - midlen) xs ymid = U.slice 0 midlen ys yright = U.slice midlen (U.length ys - midlen) ys midlen = (U.length xs - (v-u)) `min` U.length ys zipWithSpan1 f g h (SP u xs) (SP v ys) = -- u > v SP v $ U.concat [ U.map g yleft, U.zipWith h xmid ymid, U.map f xright, U.map g yright ] where yleft = U.slice 0 (u-v) ys ymid = U.slice (u-v) midlen ys yright = U.slice (u-v + midlen) (U.length ys - (u-v) - midlen) ys xmid = U.slice 0 midlen xs xright = U.slice midlen (U.length xs - midlen) xs midlen = (U.length ys - (u-v)) `min` U.length xs data LineStyle = Solid Double Double Double | Dashed [Double] Double Double Double | Hidden solid_red, solid_green, solid_blue, solid_black :: LineStyle solid_red = Solid 1 0 0 solid_green = Solid 0 1 0 solid_blue = Solid 0.25 0.25 1 solid_black = Solid 0 0 0 type Ratio = ( Int, Int ) type Predicate a = Att (Span a) (Span Bool) data Att a b = Att { p_name :: String , p_linestyle :: Maybe LineStyle , p_func :: a -> b } data Function a c = Function { f_label :: String , f_linestyle :: Maybe LineStyle , f_fold :: Fold a c } data Curve v = Curve { c_label :: String , c_linestyle :: Maybe LineStyle , c_values :: Span v } data Fold a c = forall b . F b (b -> a -> b) (b -> c) data PairS a b = PairS !a !b runFold :: Fold a c -> [a] -> c runFold (F acc0 cons fun) = go acc0 where go ac [ ] = fun ac go ac (a:as) = (go $! cons ac a) as instance Functor (Fold a) where fmap f (F ac cons fun) = F ac cons (f . fun) instance Applicative (Fold a) where pure a = F () (\_ _ -> ()) (\_ -> a) F acc1 cons1 fun1 <*> F acc2 cons2 fun2 = F acc' cons' fun' where acc' = PairS acc1 acc2 cons' (PairS u v) a = PairS (cons1 u a) (cons2 v a) fun' (PairS u v) = fun1 u $! fun2 v {-# INLINE liftF2 #-} liftF2 :: U.Unbox a => String -> (a -> a -> a) -> Function x (Span a) -> Function x (Span a) -> Function x (Span a) liftF2 ops op f g = Function { f_label = f_label f ++ ops ++ f_label g , f_linestyle = f_linestyle f `mplus` f_linestyle g , f_fold = fold_both (f_fold f) (f_fold g) } where fold_both (F f_nil f_cons f_func) (F g_nil g_cons g_func) = F (f_nil, g_nil) (\ (acf,acg) x -> ((,) $! f_cons acf x) $! g_cons acg x) (\ (yf,yg) -> zipWithSpan1 id id op (f_func yf) (g_func yg)) {-# INLINE liftF2' #-} liftF2' :: (U.Unbox a, U.Unbox b, U.Unbox c) => String -> (a -> b -> c) -> Function x (Span a) -> Function x (Span b) -> Function x (Span c) liftF2' ops op f g = Function { f_label = f_label f ++ ops ++ f_label g , f_linestyle = f_linestyle f `mplus` f_linestyle g , f_fold = fold_both (f_fold f) (f_fold g) } where fold_both (F f_nil f_cons f_func) (F g_nil g_cons g_func) = F (f_nil, g_nil) (\ (acf,acg) x -> ((,) $! f_cons acf x) $! g_cons acg x) (\ (yf,yg) -> zipWithSpan op (f_func yf) (g_func yg)) {-# INLINE liftF #-} liftF :: (U.Unbox a, U.Unbox b) => String -> (a -> b) -> Function x (Span a) -> Function x (Span b) liftF ops op f = Function { f_label = ops ++ f_label f , f_linestyle = f_linestyle f , f_fold = case f_fold f of F nil cons func -> F nil cons (spanMap op . func) } liftA :: (a -> b) -> Att a b liftA = Att "" Nothing unstyled :: Function a b -> Function a b unstyled f = f { f_linestyle = Nothing } styled :: LineStyle -> Function a b -> Function a b styled s f = f { f_linestyle = Just s } (^+), (^-), (^*) :: (U.Unbox b, Num b) => Function a (Span b) -> Function a (Span b) -> Function a (Span b) (^+) = liftF2 "+" (+) (^-) = liftF2' "-" (-) (^*) = liftF2' "*" (*) (^/) :: (U.Unbox b, Fractional b) => Function a (Span b) -> Function a (Span b) -> Function a (Span b) (^/) = liftF2' "/" (/) (^//) :: Function a (Span Int) -> Function a (Span Int) -> Function a (Span Ratio) (^//) = liftF2' "/" (,) scale :: (U.Unbox b, Num b, Show b) => b -> Function a (Span b) -> Function a (Span b) scale a = liftF (shows a "*") (a*) hadAdapter :: Alignment -> Bool hadAdapter (ALN { a_fragment_type = Complete }) = True hadAdapter _ = False show_common, show_decimal :: Ratio -> String show_common (n,1) = show n show_common (n,d) = shows n "/" ++ show d show_decimal (n,1) = show n show_decimal (n,d) = showGFloat (Just 3) p . (" ["++) . showGFloat (Just 3) u . (".."++) . showGFloat (Just 3) v $ "]" where (u,p,v) = wilson 0.05 n d to_table :: (Ratio -> String) -> (Int,Int) -> [Curve Ratio] -> String to_table showD (xmin,xmax) curves = unlines . map concat . map (intersperse "\t") $ let labels = [ c_label c | c <- curves ] values = [ c_values c | c <- curves ] in ( [] : labels ) : [ show x : [ showD (row `atS` x) | row <- values ] | x <- [xmin..xmax] ] acc :: Att a (Span Bool) -> Function a (Span Int) acc (Att nm s pr) = Function { f_label = "p(" ++ nm ++ ")" , f_linestyle = s , f_fold = F (SP 0 U.empty) (\ac xs -> zipWithSpan1 id fromEnum (\a b -> a + fromEnum b) ac (pr xs)) id } comp :: Att b c -> Att a b -> Att a c comp f g = Att (p_name f) (p_linestyle f) (p_func f . p_func g) atS :: Span Ratio -> Int -> Ratio atS (SP u xs) n | n < u = (0,0) | n - u >= U.length xs = (0,0) | otherwise = xs U.! (n-u) nA, nC, nG, nT, nN, ngap, whatever, anything :: Predicate Nucleotides nA = Att "A" (Just solid_green) (spanMap (nucsA ==)) nC = Att "C" (Just solid_blue) (spanMap (nucsC ==)) nG = Att "G" (Just solid_black) (spanMap (nucsG ==)) nT = Att "T" (Just solid_red) (spanMap (nucsT ==)) nN = Att "N" Nothing (spanMap (nucsN ==)) ngap = Att "-" Nothing (spanMap isGap) anything = Att "?" Nothing (spanMap (const True)) whatever = Att "X" Nothing (spanMap isBase) inside, insideR :: Int -> Predicate Nucleotides inside n = Att "inside" Nothing $ \ (SP u xs) -> let n' = min n (U.length xs) in SP u $ U.replicate (U.length xs - n') True U.++ U.replicate n' False insideR n = Att "inside" Nothing $ \ (SP u xs) -> let n' = min n (U.length xs) in SP u $ U.replicate n' False U.++ U.replicate (U.length xs - n') True {-# INLINE bases #-} bases :: [Predicate Nucleotides] bases = [nA, nC, nG, nT] lbases :: [( String, Predicate Nucleotides )] lbases = zip ["A","C","G","T"] bases dinucleotides :: [Predicate Nucleotides] dinucleotides = [ a ^& before b | a <- bases, b <- bases ] lbl :: String -> Function a b -> Function a b lbl l (Function _ s p) = Function l s p no :: Predicate Nucleotides -> Predicate Nucleotides no (Att l s p) = Att ("no "++l) s (spanMap not . p) was, is :: Predicate Nucleotides -> Predicate NPair was (Att l s p) = Att ("was "++ l) s (p . spanMap fst) is (Att l s p) = Att ("is " ++ l) s (p . spanMap snd) {-# INLINE (^&) #-} {-# INLINE (^|) #-} (^&), (^|) :: Predicate a -> Predicate a -> Predicate a (^&) (Att l s p) (Att m t q) = Att (l++" "++m) (mplus s t) $ \sp -> zipWithSpan (&&) (p sp) (q sp) (^|) (Att l s p) (Att m t q) = Att (l++","++m) (mplus s t) $ \sp -> zipWithSpan (||) (p sp) (q sp) before, after :: Predicate a -> Predicate a before (Att l _ p) = Att ("before "++ l) Nothing $ \ (SP i xs) -> p $ SP (i-1) xs after (Att l _ p) = Att ("after " ++ l) Nothing $ \ (SP i xs) -> p $ SP (i+1) xs dpairs :: [a] -> [(a,a)] dpairs [] = [] dpairs (a:as) = map ((,) a) as ++ map (flip (,) a) as ++ dpairs as {-# INLINE substitutions #-} substitutions :: Att a (Span NPair) -> Predicate Nucleotides -> [Function a (Span Ratio)] substitutions = substitutions_C id {-# INLINE substitutions_C #-} substitutions_C :: (Predicate NPair -> Predicate NPair) -> Att a (Span NPair) -> Predicate Nucleotides -> [Function a (Span Ratio)] substitutions_C cond get insideP = [ styled s $ lbl l $ acc ((cond $ was x ^& is y ^& is insideP) `comp` get) ^// acc ((cond $ was x ^& is insideP) `comp` get) | x <- [ nA, nA, nA, nC, nC, nC, nG, nG, nG, nT, nT, nT ] | y <- [ nC, nG, nT, nA, nG, nT, nA, nC, nT, nA, nC, nG ] | l <- [ "A->C", "A->G", "A->T" , "C->A", "C->G", "C->T" , "G->A", "G->C", "G->T" , "T->A", "T->C", "T->G"] | s <- [ Dashed dashed intens intens 1, Dashed dashed intens intens intens, Dashed dashed intens 1 1 , Solid 1 intens 1, Solid intens intens intens, Solid 1 0 0 , Solid 0 1 0, Dashed dotted 1 0.7 intens, Dashed dotted intens 1 1 , Dashed dashdot 1 intens 1, Dashed dashdot 1 0.7 intens, Dashed dashdot intens intens intens ] ] where intens = 0.3 dashed = [3,3] dotted = [1,2] dashdot = [3,2,1,2]