chatter-0.9.1.0: A library of simple NLP algorithms.

Safe HaskellNone
LanguageHaskell2010

NLP.ML.AvgPerceptron

Description

Average Perceptron implementation of Part of speech tagging, adapted for Haskell from this python implementation, which is described on the blog post:

The Perceptron code can be found on github:

Synopsis

Documentation

data Perceptron Source #

The perceptron model.

Constructors

Perceptron 

Fields

newtype Class Source #

The classes that the perceptron assigns are represnted with a newtype-wrapped String.

Eventually, I think this should become a typeclass, so the classes can be defined by the users of the Perceptron (such as custom POS tag ADTs, or more complex classes).

Constructors

Class String 

Instances

Eq Class Source # 

Methods

(==) :: Class -> Class -> Bool #

(/=) :: Class -> Class -> Bool #

Ord Class Source # 

Methods

compare :: Class -> Class -> Ordering #

(<) :: Class -> Class -> Bool #

(<=) :: Class -> Class -> Bool #

(>) :: Class -> Class -> Bool #

(>=) :: Class -> Class -> Bool #

max :: Class -> Class -> Class #

min :: Class -> Class -> Class #

Read Class Source # 
Show Class Source # 

Methods

showsPrec :: Int -> Class -> ShowS #

show :: Class -> String #

showList :: [Class] -> ShowS #

Generic Class Source # 

Associated Types

type Rep Class :: * -> * #

Methods

from :: Class -> Rep Class x #

to :: Rep Class x -> Class #

Serialize Class Source # 

Methods

put :: Putter Class #

get :: Get Class #

NFData Class Source # 

Methods

rnf :: Class -> () #

type Rep Class Source # 
type Rep Class = D1 (MetaData "Class" "NLP.ML.AvgPerceptron" "chatter-0.9.1.0-CnWxxDeMROyIxVsZb3fGkc" True) (C1 (MetaCons "Class" PrefixI False) (S1 (MetaSel (Nothing Symbol) NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 String)))

type Weight = Double Source #

Typedef for doubles to make the code easier to read, and to make this simple to change if necessary.

newtype Feature Source #

Constructors

Feat Text 

emptyPerceptron :: Perceptron Source #

An empty perceptron, used to start training.

predict :: Perceptron -> Map Feature Int -> Maybe Class Source #

Predict a class given a feature vector.

Ported from python:

def predict(self, features):
    '''Dot-product the features and current weights and return the best label.'''
    scores = defaultdict(float)
    for feat, value in features.items():
        if feat not in self.weights or value == 0:
            continue
        weights = self.weights[feat]
        for label, weight in weights.items():
            scores[label] += value * weight
    # Do a secondary alphabetic sort, for stability
    return max(self.classes, key=lambda label: (scores[label], label))

update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron Source #

Update the perceptron with a new example.

update(self, truth, guess, features)
   ...
        self.i += 1
        if truth == guess:
            return None
        for f in features:
            weights = self.weights.setdefault(f, {}) -- setdefault is Map.findWithDefault, and destructive.
            upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
            upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
        return None

averageWeights :: Perceptron -> Perceptron Source #

Average the weights

Ported from Python:

def average_weights(self):
    for feat, weights in self.weights.items():
        new_feat_weights = {}
        for clas, weight in weights.items():
            param = (feat, clas)
            total = self._totals[param]
            total += (self.i - self._tstamps[param]) * weight
            averaged = round(total / float(self.i), 3)
            if averaged:
                new_feat_weights[clas] = averaged
        self.weights[feat] = new_feat_weights
    return None