{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DeriveFunctor #-}
{-# language DeriveGeneric #-}
{-# language LambdaCase #-}
{-# language GeneralizedNewtypeDeriving #-}
-- {-# language MultiParamTypeClasses #-}
{-# LANGUAGE MultiWayIf #-}
{-# options_ghc -Wno-unused-imports #-}
{-# options_ghc -Wno-unused-top-binds #-}

{-|
Random projection trees for approximate nearest neighbor search in high-dimensional vector spaces.

== Introduction

Similarity search is a common problem in many fields (imaging, natural language processing, ..), and is often one building block of a larger data processing system.

There are many ways to /embed/ data in a vector space such that similarity search can be recast as a geometrical nearest neighbor lookup.

In turn, the efficiency and effectiveness of querying such a vector database strongly depends on how internally the data index is represented, graphs and trees being two common approaches.

The naive, all-pairs exact search becomes impractical even at moderate data sizes, which motivated research into approximate indexing methods.


== Overview

This library provides a /tree/-based approach to approximate nearest neighbor search. The database is recursively partitioned according to a series of random projections, and this partitioning is logically arranged as a tree which allows for rapid lookup.

Internally, a single random projection vector is sampled per tree level, as proposed in [1]. The projection vectors in turn can be sparse with a tunable sparsity parameter, which can help compressing the database at a small accuracy cost.

Retrieval accuracy can be improved by populating multiple trees (i.e. a /random forest/), and intersecting the results of the same query against each of them.

== Quick Start

1) Build an index with 'forest'

2) Lookup the \(k\) nearest neighbors to a query point with 'knn'

3) The database can be serialised and restored with 'serialiseRPForest' and 'deserialiseRPForest', respectively.



== References

1) Hyvonen, V., et al., Fast Nearest Neighbor Search through Sparse Random Projections and Voting,  https://www.cs.helsinki.fi/u/ttonteri/pub/bigdata2016.pdf

-}
module Data.RPTree (
  -- * Construction
  -- ** Batch
  treeBatch
  , forestBatch
  -- ** Incremental (Conduit-based)
  , tree
  , forest
  -- ** Parameters
  , rpTreeCfg, RPTreeConfig(..)
  -- , ForestParams
  -- * Query
  , knn
  , knnH
  -- * I/O
  , serialiseRPForest
  , deserialiseRPForest
  -- * Statistics
  , recallWith
  -- * Access
  , leaves, levels, points, candidates
  -- * Validation
  , treeStats, treeSize, leafSizes
  , RPTreeStats
  -- * Types
  , Embed(..)
  -- ** RPTree
  , RPTree, RPForest
  -- * Vector types
  -- ** Sparse
  , SVector, fromListSv, fromVectorSv
  -- ** Dense
  , DVector, fromListDv, fromVectorDv
  -- * Vector space typeclasses
  , Inner(..), Scale(..)
    -- ** Helpers for implementing 'Inner' instances
    -- *** Inner product
  , innerSS, innerSD, innerDD
    -- *** L2 distance
  , metricSSL2, metricSDL2
  -- *** Scale
  , scaleS, scaleD

  -- * Rendering
  -- , draw
  -- ** CSV
  , writeCsv, knnWriteCsv
  -- ** GraphViz dot
  , writeDot
  -- * Testing
  , BenchConfig(..), normalSparse2
  , liftC
  -- ** Random generation
  , randSeed
  -- *** Batch
  , dataBatch
  -- *** Conduit
  , dataSource
  , datS, datD
  -- *** Vector data
  , sparse, dense
  , normal2, circle2d
  ) where

import Control.Monad (replicateM)

import Control.Monad.IO.Class (MonadIO(..))
import Data.Foldable (Foldable(..), maximumBy, minimumBy)
import Data.Functor.Identity (Identity(..))
import Data.List (partition, sortBy)
import Data.Maybe (maybeToList)
import Data.Monoid (Sum(..))
import Data.Ord (comparing)
import Data.Semigroup (Min(..))
import GHC.Generics (Generic)
import GHC.Word (Word64)

-- containers
import Data.Sequence (Seq, (|>))
import qualified Data.Map as M (Map, fromList, toList, foldrWithKey, insert, insertWith, intersection)
import qualified Data.Set as S (Set, fromList, intersection, insert)
-- deepseq
import Control.DeepSeq (NFData(..))
-- heaps
import qualified Data.Heap as H (Heap, fromList, insert, Entry(..), empty, group, viewMin, map, union)
-- -- psqueues
-- import qualified Data.IntPSQ as PQ (IntPSQ, findMin, minView, empty, insert, fromList, toList)
-- transformers
import Control.Monad.Trans.State (StateT(..), runStateT, evalStateT, State, runState, evalState, get, put)
import Control.Monad.Trans.Class (MonadTrans(..))
-- vector
import qualified Data.Vector as V (Vector, replicateM, fromList)
import qualified Data.Vector.Generic as VG (Vector(..), unfoldrM, length, replicateM, (!), map, freeze, thaw, take, drop, unzip, foldl, fromList)
import qualified Data.Vector.Unboxed as VU (Vector, Unbox, fromList)
import qualified Data.Vector.Storable as VS (Vector)
-- vector-algorithms
import qualified Data.Vector.Algorithms.Merge as V (sortBy)

import Data.RPTree.Batch (treeBatch, forestBatch, dataBatch)
import Data.RPTree.Conduit (tree, forest, dataSource, liftC, rpTreeCfg, RPTreeConfig(..))
import Data.RPTree.Gen (sparse, dense, normal2, normalSparse2, circle2d)
import Data.RPTree.Internal (RPTree(..), RPForest, RPT(..), Embed(..), leaves, levels, points, Inner(..), Scale(..), scaleS, scaleD, (/.), innerDD, innerSD, innerSS, metricSSL2, metricSDL2, SVector(..), fromListSv, fromVectorSv, DVector(..), fromListDv, fromVectorDv, partitionAtMedian, Margin, getMargin, sortByVG, serialiseRPForest, deserialiseRPForest)
import Data.RPTree.Internal.Testing (BenchConfig(..), randSeed, datS, datD)
import Data.RPTree.Draw (writeDot, writeCsv, knnWriteCsv)




-- | Look up the \(k\) nearest neighbors to a query point
--
-- The supplied distance function @d@ must satisfy the definition of a metric, i.e.
--
-- * identity of indiscernible elements : \( d(x, y) = 0 \leftrightarrow x \equiv y \)
--
-- * symmetry : \(  d(x, y) = d(y, x)  \)
--
-- * triangle inequality : \( d(x, y) + d(y, z) \geq d(x, z) \)
knn :: (Ord p, Inner SVector v, VU.Unbox d, Real d) =>
       (u d -> v d -> p) -- ^ distance function
    -> Int -- ^ k neighbors
    -> RPForest d (V.Vector (Embed u d x)) -- ^ random projection forest
    -> v d -- ^ query point
    -> V.Vector (p, Embed u d x) -- ^ ordered in increasing distance order to the query point
knn :: (u d -> v d -> p)
-> Int
-> RPForest d (Vector (Embed u d x))
-> v d
-> Vector (p, Embed u d x)
knn u d -> v d -> p
distf Int
k RPForest d (Vector (Embed u d x))
tts v d
q = Int -> Vector (p, Embed u d x) -> Vector (p, Embed u d x)
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
VG.take Int
k (Vector (p, Embed u d x) -> Vector (p, Embed u d x))
-> Vector (p, Embed u d x) -> Vector (p, Embed u d x)
forall a b. (a -> b) -> a -> b
$ ((p, Embed u d x) -> p)
-> Vector (p, Embed u d x) -> Vector (p, Embed u d x)
forall (v :: * -> *) a b.
(Vector v a, Ord b) =>
(a -> b) -> v a -> v a
sortByVG (p, Embed u d x) -> p
forall a b. (a, b) -> a
fst Vector (p, Embed u d x)
cs
  where
    cs :: Vector (p, Embed u d x)
cs = (Embed u d x -> (p, Embed u d x))
-> Vector (Embed u d x) -> Vector (p, Embed u d x)
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Embed u d x
xe -> (Embed u d x -> u d
forall (v :: * -> *) e a. Embed v e a -> v e
eEmbed Embed u d x
xe u d -> v d -> p
`distf` v d
q, Embed u d x
xe)) (Vector (Embed u d x) -> Vector (p, Embed u d x))
-> Vector (Embed u d x) -> Vector (p, Embed u d x)
forall a b. (a -> b) -> a -> b
$ IntMap (Vector (Embed u d x)) -> Vector (Embed u d x)
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold (IntMap (Vector (Embed u d x)) -> Vector (Embed u d x))
-> IntMap (Vector (Embed u d x)) -> Vector (Embed u d x)
forall a b. (a -> b) -> a -> b
$ (RPTree d () (Vector (Embed u d x)) -> v d -> Vector (Embed u d x)
forall (v :: * -> *) d xs l.
(Inner SVector v, Unbox d, Ord d, Num d, Semigroup xs) =>
RPTree d l xs -> v d -> xs
`candidates` v d
q) (RPTree d () (Vector (Embed u d x)) -> Vector (Embed u d x))
-> RPForest d (Vector (Embed u d x))
-> IntMap (Vector (Embed u d x))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RPForest d (Vector (Embed u d x))
tts

-- | Same as 'knn' but based on 'H.Heap'
--
-- FIXME uses 'nub' internally so might be slow
knnPQ :: (Ord p, Inner SVector v, VU.Unbox d, Real d) =>
         (u d -> v d -> p) -- ^ distance function
      -> Int -- ^ k neighbors
      -> RPForest d (V.Vector (Embed u d x)) -- ^ random projection forest
      -> v d -- ^ query point
      -> V.Vector (p, Embed u d x) -- ^ ordered in increasing distance order to the query point
knnPQ :: (u d -> v d -> p)
-> Int
-> RPForest d (Vector (Embed u d x))
-> v d
-> Vector (p, Embed u d x)
knnPQ u d -> v d -> p
distf Int
k RPForest d (Vector (Embed u d x))
tts v d
q = [(p, Embed u d x)] -> Vector (p, Embed u d x)
forall (v :: * -> *) a. Vector v a => [a] -> v a
VG.fromList ([(p, Embed u d x)] -> Vector (p, Embed u d x))
-> [(p, Embed u d x)] -> Vector (p, Embed u d x)
forall a b. (a -> b) -> a -> b
$ Int -> [(p, Embed u d x)] -> [(p, Embed u d x)]
forall a. Int -> [a] -> [a]
take Int
k ([(p, Embed u d x)] -> [(p, Embed u d x)])
-> [(p, Embed u d x)] -> [(p, Embed u d x)]
forall a b. (a -> b) -> a -> b
$ (Entry p (Embed u d x) -> (p, Embed u d x))
-> [Entry p (Embed u d x)] -> [(p, Embed u d x)]
forall a b. (a -> b) -> [a] -> [b]
map (\(H.Entry p
p Embed u d x
x) -> (p
p , Embed u d x
x)) ([Entry p (Embed u d x)] -> [(p, Embed u d x)])
-> [Entry p (Embed u d x)] -> [(p, Embed u d x)]
forall a b. (a -> b) -> a -> b
$ Heap (Entry p (Embed u d x)) -> [Entry p (Embed u d x)]
forall a. Ord a => Heap a -> [a]
nub Heap (Entry p (Embed u d x))
h
  where
    xs :: Vector (Embed u d x)
xs = IntMap (Vector (Embed u d x)) -> Vector (Embed u d x)
forall (t :: * -> *) m. (Foldable t, Monoid m) => t m -> m
fold (IntMap (Vector (Embed u d x)) -> Vector (Embed u d x))
-> IntMap (Vector (Embed u d x)) -> Vector (Embed u d x)
forall a b. (a -> b) -> a -> b
$ (RPTree d () (Vector (Embed u d x)) -> v d -> Vector (Embed u d x)
forall (v :: * -> *) d xs l.
(Inner SVector v, Unbox d, Ord d, Num d, Semigroup xs) =>
RPTree d l xs -> v d -> xs
`candidates` v d
q) (RPTree d () (Vector (Embed u d x)) -> Vector (Embed u d x))
-> RPForest d (Vector (Embed u d x))
-> IntMap (Vector (Embed u d x))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RPForest d (Vector (Embed u d x))
tts
    h :: Heap (Entry p (Embed u d x))
h = (Heap (Entry p (Embed u d x))
 -> Embed u d x -> Heap (Entry p (Embed u d x)))
-> Heap (Entry p (Embed u d x))
-> Vector (Embed u d x)
-> Heap (Entry p (Embed u d x))
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl Heap (Entry p (Embed u d x))
-> Embed u d x -> Heap (Entry p (Embed u d x))
forall a.
Heap (Entry p (Embed u d a))
-> Embed u d a -> Heap (Entry p (Embed u d a))
insf Heap (Entry p (Embed u d x))
forall a. Heap a
H.empty Vector (Embed u d x)
xs
      where
        insf :: Heap (Entry p (Embed u d a))
-> Embed u d a -> Heap (Entry p (Embed u d a))
insf Heap (Entry p (Embed u d a))
acc Embed u d a
x = Entry p (Embed u d a)
-> Heap (Entry p (Embed u d a)) -> Heap (Entry p (Embed u d a))
forall a. Ord a => a -> Heap a -> Heap a
H.insert (p -> Embed u d a -> Entry p (Embed u d a)
forall p a. p -> a -> Entry p a
H.Entry p
p Embed u d a
x) Heap (Entry p (Embed u d a))
acc
          where
            p :: p
p = Embed u d a -> u d
forall (v :: * -> *) e a. Embed v e a -> v e
eEmbed Embed u d a
x u d -> v d -> p
`distf` v d
q

-- | Same as 'knn' but based on min-'H.Heap'
--
-- Leaves are prioritized according to their margin
knnH :: (Ord p, Inner SVector v, VU.Unbox d, Fractional d, Ord d) =>
         (u d -> v d -> p) -- ^ distance function
      -> Int -- ^ k neighbors
      -> RPForest d (V.Vector (Embed u d x)) -- ^ random projection forest
      -> v d -- ^ query point
      -> V.Vector (p, Embed u d x) -- ^ ordered in increasing distance order to the query point
knnH :: (u d -> v d -> p)
-> Int
-> RPForest d (Vector (Embed u d x))
-> v d
-> Vector (p, Embed u d x)
knnH u d -> v d -> p
distf Int
k RPForest d (Vector (Embed u d x))
tts v d
q = (Embed u d x -> (p, Embed u d x))
-> Vector (Embed u d x) -> Vector (p, Embed u d x)
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Embed u d x
xe -> (Embed u d x -> u d
forall (v :: * -> *) e a. Embed v e a -> v e
eEmbed Embed u d x
xe u d -> v d -> p
`distf` v d
q, Embed u d x
xe)) (Vector (Embed u d x) -> Vector (p, Embed u d x))
-> Vector (Embed u d x) -> Vector (p, Embed u d x)
forall a b. (a -> b) -> a -> b
$ Vector (Embed u d x)
-> Int
-> Heap (Entry d (Vector (Embed u d x)))
-> Vector (Embed u d x)
forall (t :: * -> *) a p.
(Semigroup (t a), Foldable t) =>
t a -> Int -> Heap (Entry p (t a)) -> t a
go Vector (Embed u d x)
forall a. Monoid a => a
mempty Int
0 Heap (Entry d (Vector (Embed u d x)))
htot
  where
    htot :: Heap (Entry d (Vector (Embed u d x)))
htot = IntMap (Heap (Entry d (Vector (Embed u d x))))
-> Heap (Entry d (Vector (Embed u d x)))
forall (t :: * -> *) a. Foldable t => t (Heap a) -> Heap a
unions (IntMap (Heap (Entry d (Vector (Embed u d x))))
 -> Heap (Entry d (Vector (Embed u d x))))
-> IntMap (Heap (Entry d (Vector (Embed u d x))))
-> Heap (Entry d (Vector (Embed u d x)))
forall a b. (a -> b) -> a -> b
$ (RPTree d () (Vector (Embed u d x))
-> v d -> Heap (Entry d (Vector (Embed u d x)))
forall (v :: * -> *) d l a.
(Inner SVector v, Unbox d, Ord d, Fractional d) =>
RPTree d l a -> v d -> Heap (Entry d a)
`candidatesH` v d
q) (RPTree d () (Vector (Embed u d x))
 -> Heap (Entry d (Vector (Embed u d x))))
-> RPForest d (Vector (Embed u d x))
-> IntMap (Heap (Entry d (Vector (Embed u d x))))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RPForest d (Vector (Embed u d x))
tts
    go :: t a -> Int -> Heap (Entry p (t a)) -> t a
go t a
acc Int
n Heap (Entry p (t a))
hh = case Heap (Entry p (t a)) -> Maybe (Entry p (t a), Heap (Entry p (t a)))
forall a. Heap a -> Maybe (a, Heap a)
H.viewMin Heap (Entry p (t a))
hh of
          Maybe (Entry p (t a), Heap (Entry p (t a)))
Nothing -> t a
acc
          Just ((H.Entry p
_ t a
xsh), Heap (Entry p (t a))
hrest) ->
            let
              nels :: Int
nels = t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
xsh
              ntot :: Int
ntot = Int
nels Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n
            in
              if Int
ntot Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
k Bool -> Bool -> Bool
&& Bool -> Bool
not (t a -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null t a
acc)
              then t a
acc
              else t a -> Int -> Heap (Entry p (t a)) -> t a
go (t a
xsh t a -> t a -> t a
forall a. Semigroup a => a -> a -> a
<> t a
acc) Int
ntot Heap (Entry p (t a))
hrest


unions :: Foldable t => t (H.Heap a) -> H.Heap a
unions :: t (Heap a) -> Heap a
unions = (Heap a -> Heap a -> Heap a) -> Heap a -> t (Heap a) -> Heap a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Heap a -> Heap a -> Heap a
forall a. Heap a -> Heap a -> Heap a
H.union Heap a
forall a. Heap a
H.empty

-- | deduplicate
nub :: (Ord a) => H.Heap a -> [a]
nub :: Heap a -> [a]
nub = (Maybe a -> [a]) -> Heap (Maybe a) -> [a]
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Maybe a -> [a]
forall a. Maybe a -> [a]
maybeToList (Heap (Maybe a) -> [a])
-> (Heap a -> Heap (Maybe a)) -> Heap a -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Heap a -> Maybe a) -> Heap (Heap a) -> Heap (Maybe a)
forall b a. Ord b => (a -> b) -> Heap a -> Heap b
H.map Heap a -> Maybe a
forall b. Heap b -> Maybe b
view (Heap (Heap a) -> Heap (Maybe a))
-> (Heap a -> Heap (Heap a)) -> Heap a -> Heap (Maybe a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Heap a -> Heap (Heap a)
forall a. Heap a -> Heap (Heap a)
H.group
  where
    view :: Heap b -> Maybe b
view Heap b
h = (b, Heap b) -> b
forall a b. (a, b) -> a
fst ((b, Heap b) -> b) -> Maybe (b, Heap b) -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Heap b -> Maybe (b, Heap b)
forall a. Heap a -> Maybe (a, Heap a)
H.viewMin Heap b
h

type PQueue p a = H.Heap (H.Entry p a)

-- -- | Same as 'knn' but accumulating the result in low margin order (following the intuition in 'annoy').
-- --
-- -- FIXME to be verified
-- knnPQ :: (Ord p, Inner SVector v, VU.Unbox d, RealFrac d) =>
--          (u d -> v d -> p) -- ^ distance function
--       -> Int -- ^ k neighbors
--       -> RPForest d (V.Vector (Embed u d x)) -- ^ random projection forest
--       -> v d -- ^ query point
--       -> V.Vector (p, Embed u d x)
-- knnPQ distf k tts q = sortByVG fst cs
--   where
--     cs = VG.map (\xe -> (eEmbed xe `distf` q, xe)) $ fold cstt
--     cstt = (takeFromPQ nsing) . (`candidatesPQ` q) <$> tts
--     nsing = (k `div` n) `max` 1
--     n = length tts




-- | Average recall-at-k, computed over a set of trees
-- 
-- The supplied distance function @d@ must satisfy the definition of a metric, i.e.
--
-- * identity of indiscernible elements : \( d(x, y) = 0 \leftrightarrow x \equiv y \)
--
-- * symmetry : \(  d(x, y) = d(y, x)  \)
--
-- * triangle inequality : \( d(x, y) + d(y, z) \geq d(x, z) \)
recallWith :: (Inner SVector v, VU.Unbox d, Fractional b, Ord d, Ord a, Ord x, Ord (u d), Num d) =>
              (u d -> v d -> a) -- ^ distance function
           -> RPForest d (V.Vector (Embed u d x))
           -> Int -- ^ k : number of nearest neighbors to consider
           -> v d -- ^ query point
           -> b
recallWith :: (u d -> v d -> a)
-> RPForest d (Vector (Embed u d x)) -> Int -> v d -> b
recallWith u d -> v d -> a
distf RPForest d (Vector (Embed u d x))
tt Int
k v d
q = IntMap b -> b
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum IntMap b
rs b -> b -> b
forall a. Fractional a => a -> a -> a
/ Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
  where
    rs :: IntMap b
rs = (RPTree d () (Vector (Embed u d x)) -> b)
-> RPForest d (Vector (Embed u d x)) -> IntMap b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\RPTree d () (Vector (Embed u d x))
t -> (u d -> v d -> a)
-> RPTree d () (Vector (Embed u d x)) -> Int -> v d -> b
forall (v :: * -> *) d p a x (u :: * -> *) l.
(Inner SVector v, Ord d, Unbox d, Fractional p, Ord a, Ord x,
 Ord (u d), Num d) =>
(u d -> v d -> a)
-> RPTree d l (Vector (Embed u d x)) -> Int -> v d -> p
recallWith1 u d -> v d -> a
distf RPTree d () (Vector (Embed u d x))
t Int
k v d
q) RPForest d (Vector (Embed u d x))
tt
    n :: Int
n = RPForest d (Vector (Embed u d x)) -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length RPForest d (Vector (Embed u d x))
tt

recallWith1 :: (Inner SVector v, Ord d, VU.Unbox d, Fractional p, Ord a, Ord x, Ord (u d), Num d) =>
              (u d -> v d -> a) -- ^ distance function
           -> RPTree d l (V.Vector (Embed u d x))
           -> Int -- ^ k : number of nearest neighbors to consider
           -> v d -- ^ query point
           -> p
recallWith1 :: (u d -> v d -> a)
-> RPTree d l (Vector (Embed u d x)) -> Int -> v d -> p
recallWith1 u d -> v d -> a
distf RPTree d l (Vector (Embed u d x))
tt Int
k v d
q = Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Set (Embed u d x) -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Set (Embed u d x)
aintk) p -> p -> p
forall a. Fractional a => a -> a -> a
/ Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k
  where
    aintk :: Set (Embed u d x)
aintk = Set (Embed u d x)
aa Set (Embed u d x) -> Set (Embed u d x) -> Set (Embed u d x)
forall a. Ord a => Set a -> Set a -> Set a
`S.intersection` Set (Embed u d x)
kk
    aa :: Set (Embed u d x)
aa = Vector (Embed u d x) -> Set (Embed u d x)
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> Set a
set (Vector (Embed u d x) -> Set (Embed u d x))
-> Vector (Embed u d x) -> Set (Embed u d x)
forall a b. (a -> b) -> a -> b
$ RPTree d l (Vector (Embed u d x)) -> v d -> Vector (Embed u d x)
forall (v :: * -> *) d xs l.
(Inner SVector v, Unbox d, Ord d, Num d, Semigroup xs) =>
RPTree d l xs -> v d -> xs
candidates RPTree d l (Vector (Embed u d x))
tt v d
q
    kk :: Set (Embed u d x)
kk = [Embed u d x] -> Set (Embed u d x)
forall a. Ord a => [a] -> Set a
S.fromList ([Embed u d x] -> Set (Embed u d x))
-> [Embed u d x] -> Set (Embed u d x)
forall a b. (a -> b) -> a -> b
$ ((Embed u d x, a) -> Embed u d x)
-> [(Embed u d x, a)] -> [Embed u d x]
forall a b. (a -> b) -> [a] -> [b]
map (Embed u d x, a) -> Embed u d x
forall a b. (a, b) -> a
fst ([(Embed u d x, a)] -> [Embed u d x])
-> [(Embed u d x, a)] -> [Embed u d x]
forall a b. (a -> b) -> a -> b
$ Int -> [(Embed u d x, a)] -> [(Embed u d x, a)]
forall a. Int -> [a] -> [a]
take Int
k [(Embed u d x, a)]
dists -- first k nn's
    dists :: [(Embed u d x, a)]
dists = ((Embed u d x, a) -> (Embed u d x, a) -> Ordering)
-> [(Embed u d x, a)] -> [(Embed u d x, a)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Embed u d x, a) -> a)
-> (Embed u d x, a) -> (Embed u d x, a) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Embed u d x, a) -> a
forall a b. (a, b) -> b
snd) ([(Embed u d x, a)] -> [(Embed u d x, a)])
-> [(Embed u d x, a)] -> [(Embed u d x, a)]
forall a b. (a -> b) -> a -> b
$ Vector (Embed u d x, a) -> [(Embed u d x, a)]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Vector (Embed u d x, a) -> [(Embed u d x, a)])
-> Vector (Embed u d x, a) -> [(Embed u d x, a)]
forall a b. (a -> b) -> a -> b
$ (Embed u d x -> (Embed u d x, a))
-> Vector (Embed u d x) -> Vector (Embed u d x, a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Embed u d x
x -> (Embed u d x
x, Embed u d x -> u d
forall (v :: * -> *) e a. Embed v e a -> v e
eEmbed Embed u d x
x u d -> v d -> a
`distf` v d
q)) Vector (Embed u d x)
xs
    xs :: Vector (Embed u d x)
xs = RPTree d l (Vector (Embed u d x)) -> Vector (Embed u d x)
forall m d l. Monoid m => RPTree d l m -> m
points RPTree d l (Vector (Embed u d x))
tt

set :: (Foldable t, Ord a) => t a -> S.Set a
set :: t a -> Set a
set = (Set a -> a -> Set a) -> Set a -> t a -> Set a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl ((a -> Set a -> Set a) -> Set a -> a -> Set a
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
S.insert) Set a
forall a. Monoid a => a
mempty



{-# SCC candidates #-}
-- | Retrieve points nearest to the query
--
-- in case of a narrow margin, collect both branches of the tree
candidates :: (Inner SVector v, VU.Unbox d, Ord d, Num d, Semigroup xs) =>
              RPTree d l xs
           -> v d -- ^ query point
           -> xs
candidates :: RPTree d l xs -> v d -> xs
candidates (RPTree Vector (SVector d)
rvs RPT d l xs
tt) v d
x = Int -> RPT d l xs -> xs
forall a l. Semigroup a => Int -> RPT d l a -> a
go Int
0 RPT d l xs
tt
  where
    go :: Int -> RPT d l a -> a
go Int
_     (Tip l
_ a
xs)                     = a
xs
    go Int
ixLev (Bin l
_ d
thr Margin d
margin RPT d l a
ltree RPT d l a
rtree) =
      let
        (d
mglo, d
mghi) = Margin d -> (d, d)
forall a. Margin a -> (a, a)
getMargin Margin d
margin
        r :: SVector d
r = Vector (SVector d)
rvs Vector (SVector d) -> Int -> SVector d
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
VG.! Int
ixLev
        proj :: d
proj = SVector d
r SVector d -> v d -> d
forall (u :: * -> *) (v :: * -> *) a.
(Inner u v, Unbox a, Num a) =>
u a -> v a -> a
`inner` v d
x
        i' :: Int
i' = Int -> Int
forall a. Enum a => a -> a
succ Int
ixLev
        dl :: d
dl = d -> d
forall a. Num a => a -> a
abs (d
mglo d -> d -> d
forall a. Num a => a -> a -> a
- d
proj) -- left margin
        dr :: d
dr = d -> d
forall a. Num a => a -> a
abs (d
mghi d -> d -> d
forall a. Num a => a -> a -> a
- d
proj) -- right margin
      in
        if | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
thr Bool -> Bool -> Bool
&&
             d
dl d -> d -> Bool
forall a. Ord a => a -> a -> Bool
> d
dr -> Int -> RPT d l a -> a
go Int
i' RPT d l a
ltree a -> a -> a
forall a. Semigroup a => a -> a -> a
<> Int -> RPT d l a -> a
go Int
i' RPT d l a
rtree
           | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
thr  -> Int -> RPT d l a -> a
go Int
i' RPT d l a
ltree
           | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
> d
thr Bool -> Bool -> Bool
&&
             d
dl d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
dr -> Int -> RPT d l a -> a
go Int
i' RPT d l a
ltree a -> a -> a
forall a. Semigroup a => a -> a -> a
<> Int -> RPT d l a -> a
go Int
i' RPT d l a
rtree
           | Bool
otherwise   -> Int -> RPT d l a -> a
go Int
i' RPT d l a
rtree


-- | RPTree margins used as search priority (using a min-heaps : lower margin leaves will be ranked highest and therefore searched first)
candidatesH :: (Inner SVector v, VU.Unbox d, Ord d, Fractional d) =>
               RPTree d l a
            -> v d -- ^ query point
            -> H.Heap (H.Entry d a)
candidatesH :: RPTree d l a -> v d -> Heap (Entry d a)
candidatesH (RPTree Vector (SVector d)
rvs RPT d l a
tt) v d
x = Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
forall l a.
Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
0 RPT d l a
tt Heap (Entry d a)
forall a. Heap a
H.empty d
infty
  where
    infty :: d
infty = d
1 d -> d -> d
forall a. Fractional a => a -> a -> a
/ d
0
    go :: Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
_ (Tip l
_ a
xs) Heap (Entry d a)
acc d
p = d -> a -> Heap (Entry d a) -> Heap (Entry d a)
forall p a. Ord p => p -> a -> Heap (Entry p a) -> Heap (Entry p a)
insertp d
p a
xs Heap (Entry d a)
acc
    go Int
ixLev (Bin l
_ d
thr Margin d
margin RPT d l a
ltree RPT d l a
rtree) Heap (Entry d a)
acc d
p =
      let
        (d
mglo, d
mghi) = Margin d -> (d, d)
forall a. Margin a -> (a, a)
getMargin Margin d
margin
        r :: SVector d
r = Vector (SVector d)
rvs Vector (SVector d) -> Int -> SVector d
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
VG.! Int
ixLev
        proj :: d
proj = SVector d
r SVector d -> v d -> d
forall (u :: * -> *) (v :: * -> *) a.
(Inner u v, Unbox a, Num a) =>
u a -> v a -> a
`inner` v d
x
        i' :: Int
i' = Int -> Int
forall a. Enum a => a -> a
succ Int
ixLev
        dl :: d
dl = d -> d
forall a. Num a => a -> a
abs (d
mglo d -> d -> d
forall a. Num a => a -> a -> a
- d
proj) -- left margin
        dr :: d
dr = d -> d
forall a. Num a => a -> a
abs (d
mghi d -> d -> d
forall a. Num a => a -> a -> a
- d
proj) -- right margin
        pl :: d
pl = d
p d -> d -> d
forall a. Ord a => a -> a -> a
`min` d
dl
        pr :: d
pr = d
p d -> d -> d
forall a. Ord a => a -> a -> a
`min` d
dr
      in if
        | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
thr Bool -> Bool -> Bool
&&
          d
dl d -> d -> Bool
forall a. Ord a => a -> a -> Bool
> d
dr -> Heap (Entry d a) -> Heap (Entry d a) -> Heap (Entry d a)
forall a. Heap a -> Heap a -> Heap a
H.union (Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
ltree Heap (Entry d a)
acc d
pl) (Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
rtree Heap (Entry d a)
acc d
pr)
        | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
thr  -> Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
ltree Heap (Entry d a)
acc d
pl
        | d
proj d -> d -> Bool
forall a. Ord a => a -> a -> Bool
> d
thr Bool -> Bool -> Bool
&&
          d
dl d -> d -> Bool
forall a. Ord a => a -> a -> Bool
< d
dr -> Heap (Entry d a) -> Heap (Entry d a) -> Heap (Entry d a)
forall a. Heap a -> Heap a -> Heap a
H.union (Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
ltree Heap (Entry d a)
acc d
pl) (Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
rtree Heap (Entry d a)
acc d
pr)
        | Bool
otherwise -> Int -> RPT d l a -> Heap (Entry d a) -> d -> Heap (Entry d a)
go Int
i' RPT d l a
rtree Heap (Entry d a)
acc d
pr

insertp :: Ord p =>
           p -> a -> H.Heap (H.Entry p a) -> H.Heap (H.Entry p a)
insertp :: p -> a -> Heap (Entry p a) -> Heap (Entry p a)
insertp p
p a
x = Entry p a -> Heap (Entry p a) -> Heap (Entry p a)
forall a. Ord a => a -> Heap a -> Heap a
H.insert (p -> a -> Entry p a
forall p a. p -> a -> Entry p a
H.Entry p
p a
x)




data RPTreeStats = RPTreeStats {
  RPTreeStats -> Int
rptsLength :: Int
                               } deriving (RPTreeStats -> RPTreeStats -> Bool
(RPTreeStats -> RPTreeStats -> Bool)
-> (RPTreeStats -> RPTreeStats -> Bool) -> Eq RPTreeStats
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RPTreeStats -> RPTreeStats -> Bool
$c/= :: RPTreeStats -> RPTreeStats -> Bool
== :: RPTreeStats -> RPTreeStats -> Bool
$c== :: RPTreeStats -> RPTreeStats -> Bool
Eq, Int -> RPTreeStats -> ShowS
[RPTreeStats] -> ShowS
RPTreeStats -> String
(Int -> RPTreeStats -> ShowS)
-> (RPTreeStats -> String)
-> ([RPTreeStats] -> ShowS)
-> Show RPTreeStats
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RPTreeStats] -> ShowS
$cshowList :: [RPTreeStats] -> ShowS
show :: RPTreeStats -> String
$cshow :: RPTreeStats -> String
showsPrec :: Int -> RPTreeStats -> ShowS
$cshowsPrec :: Int -> RPTreeStats -> ShowS
Show)

treeStats :: RPTree d l a -> RPTreeStats
treeStats :: RPTree d l a -> RPTreeStats
treeStats (RPTree Vector (SVector d)
_ RPT d l a
tt) = Int -> RPTreeStats
RPTreeStats Int
l
  where
    l :: Int
l = RPT d l a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length RPT d l a
tt


-- | How many data items are stored in the 'RPTree'
treeSize :: (Foldable t) => RPTree d l (t a) -> Int
treeSize :: RPTree d l (t a) -> Int
treeSize = RPT d l Int -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (RPT d l Int -> Int)
-> (RPTree d l (t a) -> RPT d l Int) -> RPTree d l (t a) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RPTree d l (t a) -> RPT d l Int
forall (t :: * -> *) d l a.
Foldable t =>
RPTree d l (t a) -> RPT d l Int
leafSizes

-- | How many data items are stored in each leaf of the 'RPTree'
leafSizes :: Foldable t => RPTree d l (t a) -> RPT d l Int
leafSizes :: RPTree d l (t a) -> RPT d l Int
leafSizes (RPTree Vector (SVector d)
_ RPT d l (t a)
tt) = t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (t a -> Int) -> RPT d l (t a) -> RPT d l Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RPT d l (t a)
tt



-- candidatesPQ :: (Ord a, Ord d, Inner SVector v, VU.Unbox d, Num d) =>
--                 RPTree d l a -> v d -> H.Heap a
-- candidatesPQ (RPTree rvs tt) x = go 0 tt H.empty
--   where
--     go _ (Tip _ xs) acc = H.insert xs acc
--     go ixLev (Bin _ thr margin ltree rtree) acc =
--       let
--         (mglo, mghi) = getMargin margin
--         r = rvs VG.! ixLev
--         proj = r `inner` x
--         i' = succ ixLev
--         dl = abs (mglo - proj) -- left margin
--         dr = abs (mghi - proj) -- right margin
--       in if
--         | proj < thr &&
--           dl > dr -> H.union (go i' ltree acc) (go i' rtree acc)
--         | proj < thr  -> go i' ltree acc
--         | proj > thr &&
--           dl < dr -> H.union (go i' ltree acc) (go i' rtree acc)
--         | otherwise -> go i' rtree acc


-- -- | like 'candidates' but outputs an ordered 'IntPQ' where the margin to the median projection is interpreted as queue priority
-- candidatesPQ :: (Fractional d, Ord d, Inner SVector v, VU.Unbox d) =>
--                RPTree d l xs
--             -> v d -- ^ query point
--             -> PQ.IntPSQ d xs
-- candidatesPQ (RPTree rvs tt) x = evalS $ go 0 tt PQ.empty (1/0)
--   where
--     go _ (Tip _ xs) acc dprev =
--       insPQ dprev xs acc
--     go ixLev (Bin _ thr margin ltree rtree) acc dprev = do
--       let
--         (mglo, mghi) = getMargin margin
--         r = rvs VG.! ixLev
--         proj = r `inner` x
--         i' = succ ixLev
--         dl = abs (mglo - proj) -- left margin
--         dr = abs (mghi - proj) -- right margin
--       if | proj < thr &&
--            dl > dr -> do
--              ll <- go i' ltree acc (min dprev dl)
--              lr <- go i' rtree acc (min dprev dr)
--              pure $ PQ.fromList (PQ.toList ll <> PQ.toList lr)
--          | proj < thr  -> go i' ltree acc (min dprev dl)
--          | proj > thr &&
--            dl < dr -> do
--              ll <- go i' ltree acc (min dprev dl)
--              lr <- go i' rtree acc (min dprev dr)
--              pure $ PQ.fromList (PQ.toList ll <> PQ.toList lr)
--          | otherwise -> go i' rtree acc (min dprev dr)

-- takeFromPQ :: (Ord p, Foldable t, Monoid (t a)) =>
--               Int -- ^ number of elements to keep
--            -> PQ.IntPSQ p (t a)
--            -> t a
-- takeFromPQ n pq = foldMap snd $ reverse $ go [] 0 pq
--   where
--     go acc nacc q = case PQ.minView q of
--       Nothing -> acc
--       Just (_, p, xs, pqRest) ->
--         let
--           nxs = length xs
--           nacc' = nacc + nxs
--         in if nacc' < n
--            then go ((p, xs) : acc) nacc' pqRest
--            else acc

-- type S = State Int
-- evalS :: S a -> a
-- evalS = flip evalState 0

-- insPQ :: (Ord p) => p -> v -> PQ.IntPSQ p v -> S (PQ.IntPSQ p v)
-- insPQ p x pq = do
--   i <- get
--   let
--     pq' = PQ.insert i p x pq
--   put (succ i)
--   pure pq'






-- pqSeq :: Ord a => PQ.IntPSQ a b -> Seq (a, b)
-- pqSeq pqq = go pqq mempty
--   where
--     go pq acc = case PQ.minView pq of
--       Nothing -> acc
--       Just (_, p, v, rest) -> go rest $ acc |> (p, v)


-- newtype Counts a = Counts {
--   unCounts :: M.Map a (Sum Int) } deriving (Eq, Show, Semigroup, Monoid)
-- keepCounts :: Int -- ^ keep entry iff counts are larger than this value
--            -> Counts a
--            -> [(a, Int)]
-- keepCounts thr cs = M.foldrWithKey insf mempty c
--   where
--     insf k v acc
--       | v >= thr = (k, v) : acc
--       | otherwise = acc
--     c = getSum `fmap` unCounts cs
-- counts :: (Foldable t, Ord a) => t a -> Counts a
-- counts = foldl count mempty
-- count :: Ord a => Counts a -> a -> Counts a
-- count (Counts mm) x = Counts $ M.insertWith mappend x (Sum 1) mm


-- forest :: Inner SVector v =>
--           Int -- ^ # of trees
--        -> Int -- ^ maximum tree height
--        -> Double -- ^ nonzero density of sparse projection vectors
--        -> Int -- ^ dimension of projection vectors
--        -> V.Vector (v Double) -- ^ dataset
--        -> Gen [RPTree Double (V.Vector (v Double))]
-- forest nt maxDepth pnz dim xss =
--   replicateM nt (tree maxDepth pnz dim xss)

-- -- | Build a random projection tree
-- --
-- -- Optimization: instead of sampling one projection vector per branch, we sample one per tree level (as suggested in https://www.cs.helsinki.fi/u/ttonteri/pub/bigdata2016.pdf )
-- tree :: (Inner SVector v) =>
--          Int -- ^ maximum tree height
--       -> Double -- ^ nonzero density of sparse projection vectors
--       -> Int -- ^ dimension of projection vectors
--       -> V.Vector (v Double) -- ^ dataset
--       -> Gen (RPTree Double (V.Vector (v Double)))
-- tree maxDepth pnz dim xss = do
--   -- sample all projection vectors
--   rvs <- V.replicateM maxDepth (sparse pnz dim stdNormal)
--   let
--     loop ixLev xs = do
--       if ixLev >= maxDepth || length xs <= 100
--         then
--           pure $ Tip xs
--         else
--         do
--           let
--             r = rvs VG.! ixLev
--             (thr, margin, ll, rr) = partitionAtMedian r xs
--           treel <- loop (ixLev + 1) ll
--           treer <- loop (ixLev + 1) rr
--           pure $ Bin thr margin treel treer
--   rpt <- loop 0 xss
--   pure $ RPTree rvs rpt





-- -- | Partition at median inner product
-- treeRT :: (Monad m, Inner SVector v) =>
--            Int
--         -> Int
--         -> Double
--         -> Int
--         -> V.Vector (v Double)
--         -> GenT m (RT SVector Double (V.Vector (v Double)))
-- treeRT maxDepth minLeaf pnz dim xss = loop 0 xss
--   where
--     loop ixLev xs = do
--       if ixLev >= maxDepth || length xs <= minLeaf
--         then
--           pure $ RTip xs
--         else
--         do
--           r <- sparse pnz dim stdNormal
--           let
--             (_, mrg, ll, rr) = partitionAtMedian r xs
--           treel <- loop (ixLev + 1) ll
--           treer <- loop (ixLev + 1) rr
--           pure $ RBin r mrg treel treer







-- -- | Like 'tree' but here we partition at the median of the inner product values instead
-- tree' :: (Inner SVector v) =>
--          Int
--       -> Double
--       -> Int
--       -> V.Vector (v Double)
--       -> Gen (RPTree Double (V.Vector (v Double)))
-- tree' maxDepth pnz dim xss = do
--   -- sample all projection vectors
--   rvs <- V.replicateM maxDepth (sparse pnz dim stdNormal)
--   let
--     loop ixLev xs =
--       if ixLev >= maxDepth || length xs <= 100
--         then Tip xs
--         else
--           let
--             r = rvs VG.! ixLev
--             (thr, margin, ll, rr) = partitionAtMedian r xs
--             tl = loop (ixLev + 1) ll
--             tr = loop (ixLev + 1) rr
--           in Bin thr margin tl tr
--   let rpt = loop 0 xss
--   pure $ RPTree rvs rpt


-- -- | Partition uniformly at random between inner product extreme values
-- treeRT :: (Monad m, Inner SVector v) =>
--           Int -- ^ max tree depth
--        -> Int -- ^ min leaf size
--        -> Double -- ^ nonzero density
--        -> Int -- ^ embedding dimension
--        -> V.Vector (v Double) -- ^ data
--        -> GenT m (RT SVector Double (V.Vector (v Double)))
-- treeRT maxDepth minLeaf pnz dim xss = loop 0 xss
--   where
--     loop ixLev xs = do
--       if ixLev >= maxDepth || length xs <= minLeaf
--         then
--           pure $ RTip xs
--         else
--         do
--           -- sample projection vector
--           r <- sparse pnz dim stdNormal
--           let
--             -- project the dataset
--             projs = map (\x -> (x, r `inner` x)) xs
--             hi = snd $ maximumBy (comparing snd) projs
--             lo = snd $ minimumBy (comparing snd) projs
--           -- sample a threshold
--           thr <- uniformR lo hi
--           let
--             (ll, rr) = partition (\xp -> snd xp < thr) projs
--           treel <- loop (ixLev + 1) (map fst ll)
--           treer <- loop (ixLev + 1) (map fst rr)
--           pure $ RBin r treel treer


-- -- | Partition wrt a plane _|_ to the segment connecting two points sampled at random
-- --
-- -- (like annoy@@)
-- treeRT2 :: (Monad m, Ord d, Fractional d, Inner v v, VU.Unbox d, Num d) =>
--            Int
--         -> Int
--         -> [v d]
--         -> GenT m (RT v d [v d])
-- treeRT2 maxd minl xss = loop 0 xss
--   where
--     loop ixLev xs = do
--       if ixLev >= maxd || length xs <= minl
--         then
--           pure $ RTip xs
--         else
--         do
--           x12 <- sampleWOR 2 xs
--           let
--             (x1:x2:_) = x12
--             r = x1 ^-^ x2
--             (ll, rr) = partition (\x -> (r `inner` (x ^-^ x1) < 0)) xs
--           treel <- loop (ixLev + 1) ll
--           treer <- loop (ixLev + 1) rr
--           pure $ RBin r treel treer










-- ulid :: MonadIO m => a -> m (ULID a)
-- ulid x = ULID <$> pure x <*> liftIO UU.getULID
-- data ULID a = ULID { uData :: a , uULID :: UU.ULID } deriving (Eq, Show)
-- instance (Eq a) => Ord (ULID a) where
--   ULID _ u1 <= ULID _ u2 = u1 <= u2