{-# LANGUAGE TypeFamilies #-}
module Math.HiddenMarkovModel (
T(..),
Discrete, DiscreteTrained,
Gaussian, GaussianTrained,
uniform,
generate,
generateLabeled,
probabilitySequence,
Normalized.logLikelihood,
Normalized.reveal,
Trained(..),
trainSupervised,
Normalized.trainUnsupervised,
mergeTrained, finishTraining, trainMany,
deviation,
toCSV,
fromCSV,
) where
import qualified Math.HiddenMarkovModel.Distribution as Distr
import qualified Math.HiddenMarkovModel.Normalized as Normalized
import qualified Math.HiddenMarkovModel.CSV as HMMCSV
import Math.HiddenMarkovModel.Private
(T(..), Trained(..), mergeTrained, toCells, parseCSV)
import Math.HiddenMarkovModel.Utility
(squareConstant, distance, matrixDistance,
randomItemProp, normalizeProb, attachOnes)
import qualified Numeric.LAPACK.Matrix.Array as ArrMatrix
import qualified Numeric.LAPACK.Matrix as Matrix
import qualified Numeric.LAPACK.Vector as Vector
import Numeric.LAPACK.Matrix ((#!))
import qualified Numeric.Netlib.Class as Class
import qualified Data.Array.Comfort.Storable as StorableArray
import qualified Data.Array.Comfort.Shape as Shape
import qualified Text.CSV.Lazy.String as CSV
import qualified System.Random as Rnd
import qualified Control.Monad.Exception.Synchronous as ME
import qualified Control.Monad.Trans.State as MS
import qualified Control.Monad.HT as Monad
import qualified Data.NonEmpty as NonEmpty
import Data.Traversable (Traversable, mapAccumL)
import Data.Foldable (Foldable)
type DiscreteTrained symbol sh prob =
Trained (Distr.Discrete symbol) sh prob
type Discrete symbol sh prob = T (Distr.Discrete symbol) sh prob
type GaussianTrained emiSh stateSh a =
Trained (Distr.Gaussian emiSh) stateSh a
type Gaussian emiSh stateSh a = T (Distr.Gaussian emiSh) stateSh a
uniform ::
(Distr.Info typ, Shape.C sh, Class.Real prob) =>
Distr.T typ sh prob -> T typ sh prob
uniform distr =
let sh = Distr.statesShape distr
c = recip $ fromIntegral $ Shape.size sh
in Cons {
initial = Vector.constant sh c,
transition = squareConstant sh c,
distribution = distr
}
probabilitySequence ::
(Distr.EmissionProb typ, Shape.Indexed sh, Shape.Index sh ~ state,
Class.Real prob, Distr.Emission typ prob ~ emission, Traversable f) =>
T typ sh prob -> f (state, emission) -> f prob
probabilitySequence hmm =
snd
.
mapAccumL
(\index (s, e) ->
((transition hmm #!) . flip (,) s,
index s * Distr.emissionStateProb (distribution hmm) e s))
(initial hmm StorableArray.!)
generate ::
(Distr.Generate typ, Shape.Indexed sh, Class.Real prob,
Rnd.RandomGen g, Rnd.Random prob, Distr.Emission typ prob ~ emission) =>
T typ sh prob -> g -> [emission]
generate hmm = map snd . generateLabeled hmm
generateLabeled ::
(Distr.Generate typ, Shape.Indexed sh, Shape.Index sh ~ state,
Rnd.RandomGen g, Rnd.Random prob,
Class.Real prob, Distr.Emission typ prob ~ emission) =>
T typ sh prob -> g -> [(state, emission)]
generateLabeled hmm =
MS.evalState $
flip MS.evalStateT (initial hmm) $
Monad.repeat $ MS.StateT $ \v0 -> do
s <-
randomItemProp $
zip (Shape.indices $ StorableArray.shape v0) (Vector.toList v0)
x <- Distr.generate (distribution hmm) s
return ((s, x), Matrix.takeColumn (transition hmm) s)
trainSupervised ::
(Distr.Estimate typ, Shape.Indexed sh, Shape.Index sh ~ state,
Class.Real prob, Distr.Emission typ prob ~ emission) =>
sh -> NonEmpty.T [] (state, emission) -> Trained typ sh prob
trainSupervised sh xs =
let getState (s, _x) = s
in Trained {
trainedInitial = Vector.unit sh $ getState $ NonEmpty.head xs,
trainedTransition =
Matrix.transpose $ ArrMatrix.fromVector $
StorableArray.accumulate (+)
(ArrMatrix.toVector $ squareConstant sh 0) $
attachOnes $ NonEmpty.mapAdjacent (,) $ fmap getState xs,
trainedDistribution = Distr.accumulateEmissions sh xs
}
finishTraining ::
(Distr.Estimate typ, Shape.C sh, Eq sh, Class.Real prob) =>
Trained typ sh prob -> T typ sh prob
finishTraining hmm =
Cons {
initial = normalizeProb $ trainedInitial hmm,
transition = normalizeProbColumns $ trainedTransition hmm,
distribution = Distr.normalize $ trainedDistribution hmm
}
normalizeProbColumns ::
(Shape.C sh, Eq sh, Class.Real a) => Matrix.Square sh a -> Matrix.Square sh a
normalizeProbColumns m =
Matrix.scaleColumns (StorableArray.map recip (Matrix.columnSums m)) m
trainMany ::
(Distr.Estimate typ, Shape.C sh, Eq sh, Class.Real prob, Foldable f) =>
(trainingData -> Trained typ sh prob) ->
NonEmpty.T f trainingData -> T typ sh prob
trainMany train = finishTraining . NonEmpty.foldl1Map mergeTrained train
deviation ::
(Shape.C sh, Eq sh, Class.Real prob) =>
T typ sh prob -> T typ sh prob -> prob
deviation hmm0 hmm1 =
distance (initial hmm0) (initial hmm1)
`max`
matrixDistance (transition hmm0) (transition hmm1)
toCSV ::
(Distr.ToCSV typ, Shape.Indexed sh, Class.Real prob, Show prob) =>
T typ sh prob -> String
toCSV hmm =
CSV.ppCSVTable $ snd $ CSV.toCSVTable $ HMMCSV.padTable "" $ toCells hmm
fromCSV ::
(Distr.FromCSV typ, Shape.Indexed sh, Eq sh, Class.Real prob, Read prob) =>
(Int -> sh) -> String -> ME.Exceptional String (T typ sh prob)
fromCSV makeShape =
MS.evalStateT (parseCSV makeShape) . map HMMCSV.fixShortRow . CSV.parseCSV