module NLP.POS.AvgPerceptron
( Perceptron(..)
, Class(..)
, Weight
, Feature(..)
, emptyPerceptron
, predict
, train
, update
, averageWeights
)
where
import Data.List (foldl')
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Serialize (Serialize, put, get)
import Data.Text (Text)
import System.Random.Shuffle (shuffleM)
import GHC.Generics
import NLP.Types ()
newtype Feature = Feat Text
deriving (Read, Show, Eq, Ord, Generic)
instance Serialize Feature where
put (Feat txt) = put txt
get = fmap Feat get
newtype Class = Class String
deriving (Read, Show, Eq, Ord, Generic)
instance Serialize Class
type Weight = Double
infinity :: Weight
infinity = recip 0
emptyPerceptron :: Perceptron
emptyPerceptron = Perceptron { weights = Map.empty
, totals = Map.empty
, tstamps = Map.empty
, instances = 0 }
data Perceptron = Perceptron {
weights :: Map Feature (Map Class Weight)
, totals :: Map (Feature, Class) Weight
, tstamps :: Map (Feature, Class) Int
, instances :: Int
} deriving (Read, Show, Eq, Generic)
instance Serialize Perceptron
incrementInstances :: Perceptron -> Perceptron
incrementInstances p = p { instances = 1 + (instances p) }
getTimestamp :: Perceptron -> (Feature, Class) -> Int
getTimestamp p param = Map.findWithDefault 0 param (tstamps p)
getTotal :: Perceptron -> (Feature, Class) -> Weight
getTotal p param = Map.findWithDefault 0 param (totals p)
getFeatureWeight :: Perceptron -> Feature -> Map Class Weight
getFeatureWeight p f = Map.findWithDefault Map.empty f (weights p)
predict :: Perceptron -> Map Feature Int -> Maybe Class
predict per features =
sortedScores
where
sortedScores :: Maybe Class
sortedScores = fst $ Map.foldlWithKey ranker (Nothing, negate infinity) finalScores
ranker r@(_, ow) nc nw | nw > ow = (Just nc, nw)
| otherwise = r
finalScores :: Map Class Weight
finalScores = Map.foldlWithKey fn Map.empty features
fn :: Map Class Weight -> Feature -> Int -> Map Class Weight
fn scores f v
| v > 0 = case Map.lookup f (weights per) of
Just vec -> Map.foldlWithKey (doProd v) scores vec
Nothing -> scores
| otherwise = scores
doProd :: Int -> Map Class Weight -> Class -> Weight -> Map Class Weight
doProd value scores label weight =
Map.alter (updater (weight * (fromIntegral value))) label scores
updater :: Weight -> Maybe Weight -> Maybe Weight
updater newVal Nothing = Just newVal
updater newVal (Just v) = Just (v + newVal)
update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron
update per truth guess features
| truth == guess = incrementInstances per
| otherwise = foldr loopBody per features
where
loopBody :: Feature -> Perceptron -> Perceptron
loopBody f p = let
fweights = getFeatureWeight p f
cweight c = Map.findWithDefault 0 c fweights
in upd_feat guess f (cweight guess) (1)
(upd_feat truth f (cweight truth) 1 p)
upd_feat :: Class -> Feature -> Weight -> Weight -> Perceptron -> Perceptron
upd_feat c f w v p = let
newInstances = 1 + (instances p)
paramTstamp = newInstances getTimestamp p (f, c)
tmpTotal = (getTotal p (f, c)) + ((fromIntegral paramTstamp) * w)
newTotals = Map.insert (f, c) tmpTotal (totals p)
newTstamps = Map.insert (f, c) newInstances (tstamps p)
newWeights = Map.insert f (Map.insert c (w + v) (getFeatureWeight p f)) (weights p)
in p { totals = newTotals
, tstamps = newTstamps
, weights = newWeights }
averageWeights :: Perceptron -> Perceptron
averageWeights per = per { weights = Map.mapWithKey avgWeights $ weights per }
where
avgWeights :: Feature -> Map Class Weight -> Map Class Weight
avgWeights feat ws = Map.foldlWithKey (doAvg feat) Map.empty ws
doAvg :: Feature -> Map Class Weight -> Class -> Weight -> Map Class Weight
doAvg f acc c w = let
param = (f, c)
paramTotal = instances per getTimestamp per param
total :: Weight
total = (getTotal per param) + ((fromIntegral paramTotal) * w)
averaged = roundTo 3 (total / (fromIntegral $ instances per))
in if 0 == averaged
then acc
else Map.insert c averaged acc
roundTo :: RealFrac a => Int -> a -> a
roundTo n f = (fromInteger $ round $ f * (10^n)) / (10.0^^n)
train :: Int -> Perceptron -> [(Map Feature Int, Class)] -> IO Perceptron
train itr model exs = do
trainingSet <- shuffleM $ concat $ take itr $ repeat exs
return $ averageWeights $ foldl' trainEx model trainingSet
trainEx :: Perceptron -> (Map Feature Int, Class) -> Perceptron
trainEx model (feats, truth) = let
guess = fromMaybe (Class "Unk") $ predict model feats
in update model truth guess $ Map.keys feats