module Numeric.Sibe.Utils ( similarity , ordNub , onehot , average , pca ) where import qualified Data.Vector.Storable as V import qualified Data.Set as Set import Numeric.LinearAlgebra similarity :: Vector Double -> Vector Double -> Double similarity a b = (V.sum $ a * b) / (magnitude a * magnitude b) where magnitude :: Vector Double -> Double magnitude v = sqrt $ V.sum (cmap (^2) v) onehot :: Int -> Int -> Vector Double onehot len i = vector $ replicate i 0 ++ [1] ++ replicate (len - i - 1) 0 ordNub :: (Ord a) => [a] -> [a] ordNub = go Set.empty where go _ [] = [] go s (x:xs) = if x `Set.member` s then go s xs else x : go (Set.insert x s) xs average :: Vector Double -> Vector Double average v = cmap (/ (V.sum v)) v pca :: Matrix Double -> Int -> Matrix Double pca m d = let rs = toRows m means = map (\v -> V.sum v / fromIntegral (V.length v)) rs meanReduced = map (\(a, b) -> V.map (+ (negate b)) a) $ zip rs means mat = fromRows meanReduced (u, s, v) = svd mat diagS = diagRect 0 s (rows mat) (cols mat) in u ?? (All, Take d) <> diagS ?? (Take d, Take d)