module Text.PhonotacticLearner.Util.Probability (
Multicount (..),
getCounts, consMC, singleMC, fromMC,
Expectation(..),
normalizeExp,
Cdf, massToCdf, sampleCdf, uniformSample,
upperConfidenceOE
) where
import Text.PhonotacticLearner.Util.Ring
import Data.Tuple
import Data.Monoid
import qualified Data.Map as M
import System.Random
import Control.Monad.State
import qualified Data.Vector.Unboxed as V
import Control.DeepSeq
newtype Multicount = MC {unMC :: V.Vector Int} deriving (Eq, Show, NFData)
getCounts :: Multicount -> [Int]
getCounts (MC xs) = V.toList xs
instance Monoid Multicount where
mempty = MC V.empty
mappend (MC xs) (MC ys)
| V.null xs = MC ys
| V.null ys = MC xs
| lx == ly = MC (V.zipWith (+) xs ys)
| lx < ly = MC (V.zipWith (+) xs (V.take lx ys) V.++ V.drop lx ys)
| ly < lx = MC (V.zipWith (+) ys (V.take ly xs) V.++ V.drop ly xs)
where lx = V.length xs
ly = V.length ys
consMC :: Sum Int -> Multicount -> Multicount
consMC (Sum x) (MC xs) = MC (V.cons x xs)
singleMC :: Sum Int -> Multicount
singleMC (Sum x) = MC (V.singleton x)
fromMC :: Multicount -> Vec
fromMC (MC xs) = Vec (V.map fromIntegral xs)
data Expectation v = Exp {
prob :: !Double
, exps :: !v
} deriving (Eq, Show)
instance (RingModule Double v) => Additive (Expectation v) where
zero = Exp 0 zero
(Exp p1 v1) ⊕ (Exp p2 v2) = Exp (p1 + p2) (v1 ⊕ v2)
instance (RingModule Double v) => Semiring (Expectation v) where
one = Exp 1 zero
(Exp p1 v1) ⊗ (Exp p2 v2) = Exp (p1 * p2) ((p1 ⊙ v2) ⊕ (p2 ⊙ v1))
normalizeExp :: (RingModule Double v) => Expectation v -> v
normalizeExp (Exp p vs) = (1/p) ⊙ vs
newtype Cdf a = Cdf (M.Map Double a) deriving Show
massToCdf :: [(a, Double)] -> Cdf a
massToCdf xs = Cdf (M.fromList (zip partialsums (fmap fst xs')))
where
xs' = filter ((/=0) . snd) xs
totalp = sum (fmap snd xs)
partialsums = scanl (+) 0 (fmap ((/ totalp) . snd) xs')
sampleCdf :: (RandomGen g, MonadState g m) => Cdf a -> m a
sampleCdf (Cdf cdm) = do
y :: Double <- state (randomR (0,1))
let (Just (_,x)) = M.lookupLE y cdm
return x
uniformSample :: Cdf a -> Int -> [(a, Int)]
uniformSample (Cdf cmf) n = zipWith subentries (tail breaks ++ [(undefined, n)]) breaks
where
breaks = fmap (fmap (round . (n ⊙)) . swap) . M.assocs $ cmf
subentries (_,cx) (y,cy) = (y,cxcy)
upperConfidenceOE :: Double -> Double -> Double
upperConfidenceOE o e = if p >= 1 then 1 else min 1 (p + 3*v)
where
p = (o + 0.5) / (e+1)
v = sqrt (p * (1p) / (e+1))