module Numeric.Sibe.Utils
( similarity
, ordNub
, onehot
, average
, pca
) where
import qualified Data.Vector.Storable as V
import qualified Data.Set as Set
import Numeric.LinearAlgebra
similarity :: Vector Double -> Vector Double -> Double
similarity a b = (V.sum $ a * b) / (magnitude a * magnitude b)
where
magnitude :: Vector Double -> Double
magnitude v = sqrt $ V.sum (cmap (^2) v)
onehot :: Int -> Int -> Vector Double
onehot len i = vector $ replicate i 0 ++ [1] ++ replicate (len i 1) 0
ordNub :: (Ord a) => [a] -> [a]
ordNub = go Set.empty
where
go _ [] = []
go s (x:xs) = if x `Set.member` s then go s xs
else x : go (Set.insert x s) xs
average :: Vector Double -> Vector Double
average v = cmap (/ (V.sum v)) v
pca :: Matrix Double -> Int -> Matrix Double
pca m d =
let rs = toRows m
means = map (\v -> V.sum v / fromIntegral (V.length v)) rs
meanReduced = map (\(a, b) -> V.map (+ (negate b)) a) $ zip rs means
mat = fromRows meanReduced
(u, s, v) = svd mat
diagS = diagRect 0 s (rows mat) (cols mat)
in u ?? (All, Take d) <> diagS ?? (Take d, Take d)