src/Numeric/Regression/Logistic.hs

module Numeric.Regression.Logistic
  (Model, regress) where

import Control.Applicative
import Data.Foldable
import Data.Monoid
import Data.Traversable
import Numeric.AD
import Numeric.Regression.Internal

-- | A model using the given @f@ to store parameters of type @a@.
--   Can be thought of as some kind of vector throughough this
--   package.
type Model f a = f a

logit :: Floating a => a -> a
logit x = 1 / (1 + exp (negate x))
{-# INLINE logit #-}

logLikelihood :: (Applicative v, Foldable v, Floating a)
              => Model v a -- theta vector
              -> a         -- y
              -> v a       -- x vector (observation)
              -> a
logLikelihood theta y x =
  y * log (logit z) + (1 - y) * log (1 - logit z)

  where z = theta `dot` x
{-# INLINE logLikelihood #-}

totalLogLikelihood :: (Applicative v, Foldable v, Applicative f, Foldable f, Floating a)
                   => a -- delta
                   -> Model v a
                   -> f a
                   -> f (v a)
                   -> a
totalLogLikelihood delta theta ys xs =
  (a - delta * b) / fromIntegral n

  where Acc n (Sum a) = foldMap acc $ liftA2 (logLikelihood theta) ys xs
        b = (/2) . getSum $ foldMap (\x -> Sum (x^(2::Int))) theta
{-# INLINE totalLogLikelihood #-}

-- | Given some observed \"predictions\" @ys@, the corresponding
--   input values @xs@ and initial values for the model's parameters @theta0@,
--
-- > regress ys xs theta0
--
-- returns a stream of values for the parameters that'll fit the data better
-- and better.
--
-- Example:
--
-- @
-- ys_ex :: [Double]
-- xs_ex :: [[Double]]
-- (ys_ex, xs_ex) = unzip $
--   [ (1, [1, 1])
--   , (0, [-1, -2])
--   , (1, [2, 5])
--   , (0, [-1, 1])
--   , (1, [2, -1])
--   , (1, [1, -10])
--   , (0, [-0.1, 30])
--   ]
--
-- t0 :: [Double]
-- t0 = [1, 0.1]
--
-- approxs' :: [Model [] Double]
-- approxs' = learn 0.1 ys_ex xs_ex t0
-- @
regress :: (Traversable v, Applicative v, Foldable f, Applicative f, Ord a, Floating a)
        => a           -- ^ learning rate
        -> f a         -- ^ expect prediction for each observation
        -> f (v a)     -- ^ input data for each observation
        -> Model v a   -- ^ initial values for the model's parameters
        -> [Model v a] -- ^ stream of increasingly accurate values for
                       --   the model's parameters
regress delta ys xs =
  gradientAscent $ \theta ->
    totalLogLikelihood (auto delta) theta (fmap auto ys) (fmap (fmap auto) xs)
{-# INLINE regress #-}