ConClusion-0.0.2: Cluster algorithms, PCA, and chemical conformere analysis
CopyrightPhillip Seeber 2021
LicenseAGPL-3
Maintainerphillip.seeber@googlemail.com
Stabilityexperimental
PortabilityPOSIX, Windows
Safe HaskellNone
LanguageHaskell2010

ConClusion.Numeric.Statistics

Description

 
Synopsis

PCA

data PCA Source #

Constructors

PCA 

Fields

  • x :: Matrix U Double

    Original feature matrix.

  • x' :: Matrix U Double

    Feature matrix in mean deviation form.

  • y :: Matrix U Double

    Transformed data.

  • a :: Matrix U Double

    Transformation matrix to transform feature matrix into PCA result matrix.

  • mse :: Double

    Mean squared error introduced by PCA.

  • remaining :: Double

    Percentage of the behaviour captured in the remaining dimensions.

  • allEigenValues :: Vector U Double

    All eigenvalues from the diagonalisation of the covariance matrix.

  • pcaEigenValues :: Vector U Double

    Eigenvalues that were kept for PCA.

  • allEigenVecs :: Matrix U Double

    All eigenvectors from the diagonalisation of the covariance matrix.

  • pcaEigenVecs :: Matrix U Double

    Eigenvectors that were kept for PCA.

pca Source #

Arguments

:: (Numeric r Double, Mutable r Ix2 Double, Manifest r Ix1 Double, Source (R r) Ix2 Double, Extract r Ix2 Double, MonadThrow m) 
=> Int

Dimensionalty after PCA transformation.

-> Matrix r Double

\(m \times n\) Feaute matrix \(\mathbf{X}\), with \(m\) different measurements (rows) in \(n\) different trials (columns).

-> m PCA 

Performs a PCA on the feature matrix \(\mathbf{X}\) by solving the eigenproblem of the covariance matrix. The function takes the feature matrix directly and perfoms the conversion to mean deviation form, the calculation of the covariance matrix and the eigenvalue problem automatically.

Variance

normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Source r Ix2 e, Mutable r Ix2 e) => Array r Ix2 e -> Array r Ix2 e Source #

Normalise each value so that the maximum absolute value in each row becomes one.

meanDeviation :: (Source r Ix2 e, Fractional e, Unbox e, Numeric r e, Mutable r Ix2 e) => Matrix r e -> Matrix r e Source #

Subtract the mean value of all columns from the feature matrix. Brings the feature matrix to mean deviation form.

covariance :: (Numeric r e, Mutable r Ix2 e, Fractional e) => Matrix r e -> Matrix r e Source #

Obtains the covariance matrix \(\mathbf{C_X}\) from the feature matrix \(\mathbf{X}\). \[ \mathbf{C_X} \equiv \frac{1}{n - 1} \mathbf{X} \mathbf{X}^T \] where \(n\) is the number of columns in the matrix.

The feature matrix should be in mean deviation form, see meanDeviation.

Distance Metrics

type DistFn r e = Matrix r e -> Matrix r e Source #

Distance matrix generator functions.

lpNorm :: (Mutable r Ix2 e, Floating e) => Int -> DistFn r e Source #

The \(L_p\) norm between two vectors. Generalisation of Manhattan and Euclidean distances. \[ d(\mathbf{a}, \mathbf{b}) = \left( \sum \limits_{i=1}^n \lvert \mathbf{a}_i - \mathbf{b}_i \rvert ^p \right) ^ \frac{1}{p} \]

manhattan :: (Mutable r Ix2 e, Floating e) => DistFn r e Source #

The Manhattan distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 1\). \[ d(\mathbf{a}, \mathbf{b}) = \sum \limits_{i=1}^n \lvert \mathbf{a}_i - \mathbf{b}_i \rvert \]

euclidean :: (Mutable r Ix2 e, Floating e) => DistFn r e Source #

The Euclidean distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 2\). \[ d(\mathbf{a}, \mathbf{b}) = \sqrt{\sum \limits_{i=1}^n (\mathbf{a}_i - \mathbf{b}_i)^2} \]

mahalanobis :: (Unbox e, Numeric r e, Mutable r Ix2 e, Mutable r Ix1 e, Field e) => DistFn r e Source #

Mahalanobis distance between points. Suitable for non correlated axes. \[ d(\mathbf{a}, \mathbf{b}) = \sqrt{(\mathbf{a} - \mathbf{b})^T \mathbf{S}^{-1} (\mathbf{a} - \mathbf{b})} \] where \(\mathbf{S}\) is the covariance matrix.

Cluster Algorithms

type Clusters = Vector B IntSet Source #

Representation of clusters.

DBScan

newtype DistanceInvalidException e Source #

Exception for invalid search distances.

Constructors

DistanceInvalidException e 

Instances

Instances details
Eq e => Eq (DistanceInvalidException e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Show e => Show (DistanceInvalidException e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

showsPrec :: Int -> DistanceInvalidException e -> ShowS

show :: DistanceInvalidException e -> String

showList :: [DistanceInvalidException e] -> ShowS

(Typeable e, Show e) => Exception (DistanceInvalidException e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

toException :: DistanceInvalidException e -> SomeException

fromException :: SomeException -> Maybe (DistanceInvalidException e)

displayException :: DistanceInvalidException e -> String

dbscan Source #

Arguments

:: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r Ix2 e) 
=> DistFn r e

Distance measure to build the distance matrix of all points.

-> Int

Minimal number of members in a cluster.

-> e

Search radius \(\epsilon\)

-> Matrix r e

\(n\) \(m\)-dimensional data points as column vectors of a \(m \times n\) matrix.

-> m Clusters

Resulting clusters.

DBScan algorithm.

Hierarchical Cluster Analysis

data Dendrogram e Source #

A dendrogram as a binary tree.

Instances

Instances details
Eq e => Eq (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

(==) :: Dendrogram e -> Dendrogram e -> Bool

(/=) :: Dendrogram e -> Dendrogram e -> Bool

Show e => Show (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

showsPrec :: Int -> Dendrogram e -> ShowS

show :: Dendrogram e -> String

showList :: [Dendrogram e] -> ShowS

Generic (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Associated Types

type Rep (Dendrogram e) :: Type -> Type

Methods

from :: Dendrogram e -> Rep (Dendrogram e) x

to :: Rep (Dendrogram e) x -> Dendrogram e

FromJSON e => FromJSON (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

parseJSON :: Value -> Parser (Dendrogram e)

parseJSONList :: Value -> Parser [Dendrogram e]

ToJSON e => ToJSON (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

toJSON :: Dendrogram e -> Value

toEncoding :: Dendrogram e -> Encoding

toJSONList :: [Dendrogram e] -> Value

toEncodingList :: [Dendrogram e] -> Encoding

type Rep (Dendrogram e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

type Rep (Dendrogram e)

data JoinStrat e Source #

A strategy/distance measure for clusters.

Instances

Instances details
Eq e => Eq (JoinStrat e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

(==) :: JoinStrat e -> JoinStrat e -> Bool

(/=) :: JoinStrat e -> JoinStrat e -> Bool

Show e => Show (JoinStrat e) Source # 
Instance details

Defined in ConClusion.Numeric.Statistics

Methods

showsPrec :: Int -> JoinStrat e -> ShowS

show :: JoinStrat e -> String

showList :: [JoinStrat e] -> ShowS

hca :: (MonadThrow m, Mutable r Ix1 e, Mutable r Ix2 e, Mutable r Ix1 (e, Ix1), Manifest (R r) Ix1 e, OuterSlice r Ix2 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e) Source #

Performance improved hierarchical clustering algorithm. GENERIC_LINKAGE from figure 3, https://arxiv.org/pdf/1109.2378.pdf.

cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters Source #

Cut a Dendrogram at a given distance and obtain all clusters from it.