module Data.Algorithm.TSNE.Internals where
import Control.Applicative
import Control.DeepSeq
import Control.Exception (assert)
import Data.Default (def)
import Data.List(zipWith4)
import Data.Random.Normal (normalsIO')
import Pipes
import Data.Algorithm.TSNE.Types
import Data.Algorithm.TSNE.Utils
import Data.Algorithm.TSNE.Checks
inputSize :: TSNEInput -> Int
inputSize = length
inputValueSize :: TSNEInput -> Int
inputValueSize i = w
where (w,h) = shape2D i
inputIsValid :: TSNEInput -> Either String ()
inputIsValid [] = Left "empty input data"
inputIsValid xss
| not (isRectangular xss) = Left "input data values are not all the same length"
| otherwise = Right ()
isValidStateForInput :: TSNEInput -> TSNEState -> Either String ()
isValidStateForInput i st
| not (has2DShape (n,3) s) = Left $ "solution is wrong shape: " ++ show (shape2D s)
| otherwise = Right ()
where
n = inputSize i
s = stSolution st
initState :: Int -> IO TSNEState
initState n = do
s <- initSolution3D n
return $ TSNEState 0 s (rr 1) (rr 0)
where
rr = repeat.repeat
initSolution3D :: Int -> IO [[Double]]
initSolution3D n = do
let ns = normalsIO' (0, 1e-4)
xs <- ns
ys <- ns
zs <- ns
return $ take n <$> [xs,ys,zs]
runTSNE :: TSNEOptions -> TSNEInput -> [[Probability]] -> TSNEState -> Producer TSNEOutput3D IO ()
runTSNE opts vs ps st = do
let st' = force $ stepTSNE opts vs ps st
yield $ output3D ps st'
runTSNE opts vs ps st'
stepTSNE :: TSNEOptions -> TSNEInput -> [[Probability]] -> TSNEState -> TSNEState
stepTSNE opts vs ps st = TSNEState i' s'' g' d'
where
i = stIteration st
s = stSolution st
g = stGains st
d = stDeltas st
gr = gradients ps st
i' = i + 1
s' = recenter $ z (+) s d'
g' = z3 newGain g d gr
d' = z3 (newDelta (tsneLearningRate opts) i) g' d gr
z = zipWith.zipWith
z3 = zipWith3.zipWith3
s'' = assert (length s' == length vs) s'
newGain :: Gain -> Delta -> Gradient -> Gain
newGain g d gr = max 0.01 g'
where
g' = if signum d == signum gr
then g * 0.8
else g + 0.2
newDelta :: Double -> Int -> Gain -> Delta -> Gradient -> Delta
newDelta e i g' d gr = (m * d) (e * g' * gr)
where
m = if i < 250 then 0.5 else 0.8
gradients :: [[Probability]] -> TSNEState -> [[Gradient]]
gradients pss st = gradient <$> ss
where
gradient :: [Double] -> [Gradient]
gradient s = zipWith4 (f s) s pss qss qss'
ss = stSolution st
i = stIteration st
qss = qdist ss
qss' = qdist' ss
f :: [Double] -> Double -> [Double] -> [Double] -> [Double] -> Gradient
f s x ps qs qs' = sum $ zipWith4 g s ps qs qs'
where
g y p q q' = m * (x y)
where
m = 4 * (k * p q') * q
k = if i < 100 then 4 else 1
solution3D :: [[Double]] -> [Position3D]
solution3D (xs:ys:zs:_) = zip3 xs ys zs
output3D :: [[Double]] -> TSNEState -> TSNEOutput3D
output3D pss st = TSNEOutput3D i s c
where
i = stIteration st
s = (solution3D . stSolution) st
c = cost pss st
cost :: [[Double]] -> TSNEState -> Double
cost pss st = sumsum $ (zipWith.zipWith) c pss (qdist' (stSolution st))
where
c p q = p * log q
targetEntropy :: TSNEOptions -> Entropy
targetEntropy = log.realToFrac.tsnePerplexity
data Beta = Beta {
betaValue :: Double,
betaMin :: Double,
betaMax :: Double
}
neighbourProbabilities :: TSNEOptions -> TSNEInput -> [[Probability]]
neighbourProbabilities opts vs = symmetrize $ rawNeighbourProbabilities opts vs
rawNeighbourProbabilities :: TSNEOptions -> TSNEInput -> [[Probability]]
rawNeighbourProbabilities opts vs = map np vs
where
np a = aps (beta a) vs a
beta a = betaValue $ binarySearchBeta opts vs a
aps :: Double -> TSNEInput -> TSNEInputValue -> [Probability]
aps beta bs a = map pj' bs
where
psum = sum $ map pj bs
pj b
| a == b = 0
| otherwise = exp $ (distanceSquared a b) * beta
pj' b = pj b / psum
binarySearchBeta :: TSNEOptions -> TSNEInput -> TSNEInputValue -> Beta
binarySearchBeta opts vs = binarySearchBeta' opts vs 1e-4 0 (Beta 1 (infinity) infinity)
binarySearchBeta' :: TSNEOptions -> TSNEInput -> Double -> Int -> Beta -> TSNEInputValue -> Beta
binarySearchBeta' opts bs tol i beta a
| i == 50 = beta
| abs (e t) < tol = beta
| e > t = r $ incPrecision beta
| otherwise = r $ decPrecision beta
where
t = targetEntropy opts
e = entropyForInputValue (betaValue beta) bs a
incPrecision (Beta b _ bmax)
| bmax == infinity = Beta (b * 2) b bmax
| otherwise = Beta ((b + bmax) / 2) b bmax
decPrecision (Beta b bmin _)
| bmin == infinity = Beta (b / 2) bmin b
| otherwise = Beta ((b + bmin) / 2) bmin b
r beta' = binarySearchBeta' opts bs tol (i+1) beta' a
entropyForInputValue :: Double -> TSNEInput -> TSNEInputValue -> Entropy
entropyForInputValue beta bs a = sum $ map h bs
where
h b = if x > 1e-7 then x * log x else 0
where x = pj' b
psum = sum $ map pj bs
pj b
| a == b = 0
| otherwise = exp $ (distanceSquared a b) * beta
pj' b = pj b / psum