------------------------------------------------------------------------ -- | -- Module : Data.Datamining.Clustering.Classifier -- Copyright : (c) 2012-2021 Amy de Buitléir -- License : BSD-style -- Maintainer : amy@nualeargais.ie -- Stability : experimental -- Portability : portable -- -- Tools for identifying patterns in data. -- ------------------------------------------------------------------------ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeFamilies #-} module Data.Datamining.Clustering.Classifier ( Classifier(..) ) where import Data.List (minimumBy) import Data.Ord (comparing) -- | A machine which learns to classify input patterns. -- Minimal complete definition: @trainBatch@, @reportAndTrain@. class Classifier (c :: * -> * -> * -> *) v k p where -- | Returns a list of index\/model pairs. toList :: c v k p -> [(k, p)] -- | Returns the number of models this classifier can learn. numModels :: c v k p -> Int -- | Returns the current models of the classifier. models :: c v k p -> [p] -- | @'differences' c target@ returns the indices of all nodes in -- @c@, paired with the difference between @target@ and the -- node's model. differences :: c v k p -> p -> [(k, v)] -- | @classify c target@ returns the index of the node in @c@ -- whose model best matches the @target@. classify :: Ord v => c v k p -> p -> k classify c v k p c p p = [(k, v)] -> k forall b p. Ord b => [(p, b)] -> p f ([(k, v)] -> k) -> [(k, v)] -> k forall a b. (a -> b) -> a -> b $ c v k p -> p -> [(k, v)] forall (c :: * -> * -> * -> *) v k p. Classifier c v k p => c v k p -> p -> [(k, v)] differences c v k p c p p where f :: [(p, b)] -> p f [] = [Char] -> p forall a. HasCallStack => [Char] -> a error [Char] "classifier has no models" f [(p, b)] xs = (p, b) -> p forall a b. (a, b) -> a fst ((p, b) -> p) -> (p, b) -> p forall a b. (a -> b) -> a -> b $ ((p, b) -> (p, b) -> Ordering) -> [(p, b)] -> (p, b) forall (t :: * -> *) a. Foldable t => (a -> a -> Ordering) -> t a -> a minimumBy (((p, b) -> b) -> (p, b) -> (p, b) -> Ordering forall a b. Ord a => (b -> a) -> b -> b -> Ordering comparing (p, b) -> b forall a b. (a, b) -> b snd) [(p, b)] xs -- | @'train' c target@ returns a modified copy -- of the classifier @c@ that has partially learned the @target@. train :: c v k p -> p -> c v k p train c v k p c p p = c v k p c' where (k _, [(k, v)] _, c v k p c') = c v k p -> p -> (k, [(k, v)], c v k p) forall (c :: * -> * -> * -> *) v k p. Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p) reportAndTrain c v k p c p p -- | @'trainBatch' c targets@ returns a modified copy -- of the classifier @c@ that has partially learned the @targets@. trainBatch :: c v k p -> [p] -> c v k p -- | @'classifyAndTrain' c target@ returns a tuple containing the -- index of the node in @c@ whose model best matches the input -- @target@, and a modified copy of the classifier @c@ that has -- partially learned the @target@. Invoking @classifyAndTrain c p@ -- may be faster than invoking @(p `classify` c, train c p)@, but -- they -- should give identical results. classifyAndTrain :: c v k p -> p -> (k, c v k p) classifyAndTrain c v k p c p p = (k bmu, c v k p c') where (k bmu, [(k, v)] _, c v k p c') = c v k p -> p -> (k, [(k, v)], c v k p) forall (c :: * -> * -> * -> *) v k p. Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p) reportAndTrain c v k p c p p -- | @'diffAndTrain' c target@ returns a tuple containing: -- 1. The indices of all nodes in @c@, paired with the difference -- between @target@ and the node's model -- 2. A modified copy of the classifier @c@ that has partially -- learned the @target@. -- Invoking @diffAndTrain c p@ may be faster than invoking -- @(p `diff` c, train c p)@, but they should give identical -- results. diffAndTrain :: c v k p -> p -> ([(k, v)], c v k p) diffAndTrain c v k p c p p = ([(k, v)] ds, c v k p c') where (k _, [(k, v)] ds, c v k p c') = c v k p -> p -> (k, [(k, v)], c v k p) forall (c :: * -> * -> * -> *) v k p. Classifier c v k p => c v k p -> p -> (k, [(k, v)], c v k p) reportAndTrain c v k p c p p -- | @'reportAndTrain' c f target@ returns a tuple containing: -- 1. The index of the node in @c@ whose model best matches the -- input @target@ -- 2. The indices of all nodes in @c@, paired with the difference -- between @target@ and the node's model -- 3. A modified copy of the classifier @c@ that has partially -- learned the @target@ -- Invoking @diffAndTrain c p@ may be faster than invoking -- @(p `diff` c, train c p)@, but they should give identical -- results. reportAndTrain :: c v k p -> p -> (k, [(k, v)], c v k p)