module AI.Clustering.Hierarchical
( Dendrogram(..)
, size
, cutAt
, members
, Metric(..)
, hclust
, computeDists
, euclidean
) where
import Control.Applicative ((<$>))
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import AI.Clustering.Hierarchical.Internal
import AI.Clustering.Hierarchical.Types
data Metric = Single
| Complete
| Average
| Weighted
| Ward
| Centroid
| Median
hclust :: G.Vector v a => Metric -> v a -> DistFn a -> Dendrogram a
hclust method xs f = label <$> nnChain dists fn
where
dists = computeDists f xs
label i = xs G.! i
fn = case method of
Single -> single
Complete -> complete
Average -> average
Weighted -> weighted
Ward -> ward
_ -> error "Not implemented"
computeDists :: G.Vector v a => DistFn a -> v a -> DistanceMat
computeDists f vec = DistanceMat n . U.fromList . flip concatMap [0..n1] $ \i ->
flip map [i+1..n1] $ \j -> f (vec `G.unsafeIndex` i) (vec `G.unsafeIndex` j)
where
n = G.length vec
euclidean :: G.Vector v Double => DistFn (v Double)
euclidean xs ys = sqrt $ G.sum $ G.zipWith (\x y -> (xy)**2) xs ys