Copyright | (c) 2015 Kai Zhang |
---|---|
License | MIT |
Maintainer | kai@kzhang.org |
Stability | experimental |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
High performance agglomerative hierarchical clustering library. Example:
>>>
:set -XOverloadedLists
>>>
import qualified Data.Vector as V
>>>
let points = [[2, 3, 4], [2, 1, 2], [2, 1, 6], [2, 4, 6], [5, 1, 2]] :: V.Vector (V.Vector Double)
>>>
let dendro = hclust Average points euclidean
>>>
print dendro
Branch 5 4.463747440868191 (Branch 3 2.914213562373095 (Leaf (fromList [2.0,1.0,6.0])) (Branch 2 2.23606797749979 (Leaf (fromList [2.0,3.0,4.0])) (Leaf (fromList [2.0,4.0,6.0])))) (Branch 2 3.0 (Leaf (fromList [2.0,1.0,2.0])) (Leaf (fromList [5.0,1.0,2.0])))>>>
putStr $ drawDendrogram $ fmap show dendro
h: 4.4637 | +- h: 2.9142 | | | +- fromList [2.0,1.0,6.0] | | | `- h: 2.2361 | | | +- fromList [2.0,3.0,4.0] | | | `- fromList [2.0,4.0,6.0] | `- h: 3.0000 | +- fromList [2.0,1.0,2.0] | `- fromList [5.0,1.0,2.0]
Synopsis
- data Dendrogram a
- = Leaf !a
- | Branch !Size !Distance !(Dendrogram a) !(Dendrogram a)
- size :: Dendrogram a -> Int
- data Linkage
- hclust :: Vector v a => Linkage -> v a -> DistFn a -> Dendrogram a
- normalize :: Dendrogram a -> Dendrogram a
- cutAt :: Dendrogram a -> Distance -> [Dendrogram a]
- flatten :: Dendrogram a -> [a]
- drawDendrogram :: Dendrogram String -> String
- euclidean :: Vector v Double => DistFn (v Double)
- hamming :: (Vector v a, Vector v Bool, Eq a) => DistFn (v a)
Documentation
data Dendrogram a Source #
Leaf !a | |
Branch !Size !Distance !(Dendrogram a) !(Dendrogram a) |
Instances
Functor Dendrogram Source # | |
Defined in AI.Clustering.Hierarchical.Types fmap :: (a -> b) -> Dendrogram a -> Dendrogram b # (<$) :: a -> Dendrogram b -> Dendrogram a # | |
Eq a => Eq (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types (==) :: Dendrogram a -> Dendrogram a -> Bool # (/=) :: Dendrogram a -> Dendrogram a -> Bool # | |
Show a => Show (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types showsPrec :: Int -> Dendrogram a -> ShowS # show :: Dendrogram a -> String # showList :: [Dendrogram a] -> ShowS # | |
Binary a => Binary (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types |
size :: Dendrogram a -> Int Source #
O(1) Return the size of a dendrogram
Different hierarchical clustering schemes.
Single | O(n^2) Single linkage, $d(A,B) = min_{a in A, b in B} d(a,b)$. |
Complete | O(n^2) Complete linkage, $d(A,B) = max_{a in A, b in B} d(a,b)$. |
Average | O(n^2) Average linkage or UPGMA, $d(A,B) = frac{sum_{a in A}sum_{b in B}d(a,b)}{|A||B|}$. |
Weighted | O(n^2) Weighted linkage. |
Ward | O(n^2) Ward's method. |
Centroid | O(n^3) Centroid linkage, not implemented. |
Median | O(n^3) Median linkage, not implemented. |
hclust :: Vector v a => Linkage -> v a -> DistFn a -> Dendrogram a Source #
Perform hierarchical clustering.
normalize :: Dendrogram a -> Dendrogram a Source #
Normalize the tree heights so that the highest is 1.
cutAt :: Dendrogram a -> Distance -> [Dendrogram a] Source #
Cut a dendrogram at given height.
flatten :: Dendrogram a -> [a] Source #
Return the elements of a dendrogram in pre-order.
drawDendrogram :: Dendrogram String -> String Source #
2-dimensional drawing of a dendrogram
Distance functions
euclidean :: Vector v Double => DistFn (v Double) Source #
Compute euclidean distance between two points.
References
Müllner D (2011). Modern Hierarchical, Agglomerative Clustering Algorithms. ArXiv:1109.2378 [stat.ML]. http://arxiv.org/abs/1109.2378