rp-tree-0.1.0.0: Random projection trees
Safe HaskellNone
LanguageHaskell2010

Data.RPTree

Description

Random projection trees for approximate nearest neighbor search in high-dimensional vector spaces

Synopsis

Construction

forest Source #

Arguments

:: (MonadThrow m, Inner SVector v) 
=> Word64

random seed

-> Int

max tree depth

-> Int

min leaf size

-> Int

number of trees

-> Int

data chunk size

-> Double

nonzero density of projection vectors

-> Int

dimension of projection vectors

-> ConduitT () (v Double) m ()

data source

-> m (RPForest Double (Vector (v Double))) 

Populate a forest from a data stream

Assumptions on the data source:

  • non-empty : contains at least one value
  • stationary : each chunk is representative of the whole dataset
  • bounded : we wait until the end of the stream to produce a result

Throws EmptyResult if the conduit is empty

Query

knn Source #

Arguments

:: (Ord p, Inner SVector v, Unbox d, Real d) 
=> (v2 -> v d -> p)

distance function

-> Int

k neighbors

-> RPForest d (Vector v2)

random projection forest

-> v d

query point

-> Vector (p, v2)

ordered in increasing distance order

k nearest neighbors

Validation

recall Source #

Arguments

:: (Inner u v, Inner SVector v, Unbox a, Ord a, Ord (u a), Floating a) 
=> RPForest a (Vector (u a)) 
-> Int

k : number of nearest neighbors to consider

-> v a

query point

-> Double 

average recall-at-k, computed over a set of trees

Access

levels :: RPTree d a -> Int Source #

Number of tree levels

points :: Monoid m => RPTree d m -> m Source #

Set of data points used to construct the index

leaves :: RPTree d a -> [a] Source #

candidates Source #

Arguments

:: (Inner SVector v, Unbox d, Ord d, Num d, Semigroup xs) 
=> RPTree d xs 
-> v d

query point

-> xs 

Retrieve points nearest to the query

in case of a narrow margin, collect both branches of the tree

Types

RPTree

data RPTree d a Source #

Random projection trees

The first type parameter corresponds to a floating point scalar value, the second is the type of the data collected at the leaves of the tree (e.g. lists of vectors)

We keep them separate to leverage the Functor instance for postprocessing and visualization

One projection vector per tree level (as suggested in https://www.cs.helsinki.fi/u/ttonteri/pub/bigdata2016.pdf )

Instances

Instances details
Functor (RPTree d) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

fmap :: (a -> b) -> RPTree d a -> RPTree d b #

(<$) :: a -> RPTree d b -> RPTree d a #

Foldable (RPTree d) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

fold :: Monoid m => RPTree d m -> m #

foldMap :: Monoid m => (a -> m) -> RPTree d a -> m #

foldMap' :: Monoid m => (a -> m) -> RPTree d a -> m #

foldr :: (a -> b -> b) -> b -> RPTree d a -> b #

foldr' :: (a -> b -> b) -> b -> RPTree d a -> b #

foldl :: (b -> a -> b) -> b -> RPTree d a -> b #

foldl' :: (b -> a -> b) -> b -> RPTree d a -> b #

foldr1 :: (a -> a -> a) -> RPTree d a -> a #

foldl1 :: (a -> a -> a) -> RPTree d a -> a #

toList :: RPTree d a -> [a] #

null :: RPTree d a -> Bool #

length :: RPTree d a -> Int #

elem :: Eq a => a -> RPTree d a -> Bool #

maximum :: Ord a => RPTree d a -> a #

minimum :: Ord a => RPTree d a -> a #

sum :: Num a => RPTree d a -> a #

product :: Num a => RPTree d a -> a #

Traversable (RPTree d) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

traverse :: Applicative f => (a -> f b) -> RPTree d a -> f (RPTree d b) #

sequenceA :: Applicative f => RPTree d (f a) -> f (RPTree d a) #

mapM :: Monad m => (a -> m b) -> RPTree d a -> m (RPTree d b) #

sequence :: Monad m => RPTree d (m a) -> m (RPTree d a) #

(Unbox d, Eq d, Eq a) => Eq (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(==) :: RPTree d a -> RPTree d a -> Bool #

(/=) :: RPTree d a -> RPTree d a -> Bool #

(Unbox d, Show d, Show a) => Show (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

showsPrec :: Int -> RPTree d a -> ShowS #

show :: RPTree d a -> String #

showList :: [RPTree d a] -> ShowS #

Generic (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

Associated Types

type Rep (RPTree d a) :: Type -> Type #

Methods

from :: RPTree d a -> Rep (RPTree d a) x #

to :: Rep (RPTree d a) x -> RPTree d a #

(NFData a, NFData d) => NFData (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

rnf :: RPTree d a -> () #

(Serialise d, Serialise a, Unbox d) => Serialise (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

encode :: RPTree d a -> Encoding #

decode :: Decoder s (RPTree d a) #

encodeList :: [RPTree d a] -> Encoding #

decodeList :: Decoder s [RPTree d a] #

type Rep (RPTree d a) Source # 
Instance details

Defined in Data.RPTree.Internal

type Rep (RPTree d a)

type RPForest d a = IntMap (RPTree d a) Source #

data SVector a Source #

Sparse vectors with unboxed components

Instances

Instances details
Scale SVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(.*) :: (Unbox a, Num a) => a -> SVector a -> SVector a Source #

Inner SVector Vector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> Vector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> Vector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> Vector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> Vector a -> SVector a Source #

Inner SVector DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> DVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> DVector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

Inner SVector SVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> SVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> SVector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> SVector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> SVector a -> SVector a Source #

(Unbox a, Eq a) => Eq (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(==) :: SVector a -> SVector a -> Bool #

(/=) :: SVector a -> SVector a -> Bool #

(Unbox a, Ord a) => Ord (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

compare :: SVector a -> SVector a -> Ordering #

(<) :: SVector a -> SVector a -> Bool #

(<=) :: SVector a -> SVector a -> Bool #

(>) :: SVector a -> SVector a -> Bool #

(>=) :: SVector a -> SVector a -> Bool #

max :: SVector a -> SVector a -> SVector a #

min :: SVector a -> SVector a -> SVector a #

(Unbox a, Show a) => Show (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

showsPrec :: Int -> SVector a -> ShowS #

show :: SVector a -> String #

showList :: [SVector a] -> ShowS #

Generic (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Associated Types

type Rep (SVector a) :: Type -> Type #

Methods

from :: SVector a -> Rep (SVector a) x #

to :: Rep (SVector a) x -> SVector a #

NFData (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

rnf :: SVector a -> () #

(Unbox a, Serialise a) => Serialise (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

type Rep (SVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

type Rep (SVector a) = D1 ('MetaData "SVector" "Data.RPTree.Internal" "rp-tree-0.1.0.0-CFkPWzCADA43VltLgcxOCH" 'False) (C1 ('MetaCons "SV" 'PrefixI 'True) (S1 ('MetaSel ('Just "svDim") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 Int) :*: S1 ('MetaSel ('Just "svVec") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Vector (Int, a)))))

fromListSv :: Unbox a => Int -> [(Int, a)] -> SVector a Source #

data DVector a Source #

Dense vectors with unboxed components

Instances

Instances details
Scale DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(.*) :: (Unbox a, Num a) => a -> DVector a -> DVector a Source #

Inner DVector DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => DVector a -> DVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => DVector a -> DVector a -> a Source #

(^+^) :: (Unbox a, Num a) => DVector a -> DVector a -> DVector a Source #

(^-^) :: (Unbox a, Num a) => DVector a -> DVector a -> DVector a Source #

Inner SVector DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> DVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> DVector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

(Unbox a, Eq a) => Eq (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(==) :: DVector a -> DVector a -> Bool #

(/=) :: DVector a -> DVector a -> Bool #

(Unbox a, Ord a) => Ord (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

compare :: DVector a -> DVector a -> Ordering #

(<) :: DVector a -> DVector a -> Bool #

(<=) :: DVector a -> DVector a -> Bool #

(>) :: DVector a -> DVector a -> Bool #

(>=) :: DVector a -> DVector a -> Bool #

max :: DVector a -> DVector a -> DVector a #

min :: DVector a -> DVector a -> DVector a #

(Unbox a, Show a) => Show (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

showsPrec :: Int -> DVector a -> ShowS #

show :: DVector a -> String #

showList :: [DVector a] -> ShowS #

Generic (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

Associated Types

type Rep (DVector a) :: Type -> Type #

Methods

from :: DVector a -> Rep (DVector a) x #

to :: Rep (DVector a) x -> DVector a #

(Unbox a, Serialise a) => Serialise (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

type Rep (DVector a) Source # 
Instance details

Defined in Data.RPTree.Internal

type Rep (DVector a) = D1 ('MetaData "DVector" "Data.RPTree.Internal" "rp-tree-0.1.0.0-CFkPWzCADA43VltLgcxOCH" 'True) (C1 ('MetaCons "DV" 'PrefixI 'True) (S1 ('MetaSel ('Just "dvVec") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Vector a))))

fromListDv :: Unbox a => [a] -> DVector a Source #

inner product

class (Scale u, Scale v) => Inner u v where Source #

Inner product spaces

This typeclass is provided as a convenience for library users to interface their own vector types.

Methods

inner :: (Unbox a, Num a) => u a -> v a -> a Source #

metricL2 :: (Unbox a, Floating a) => u a -> v a -> a Source #

(^+^) :: (Unbox a, Num a) => u a -> v a -> u a Source #

(^-^) :: (Unbox a, Num a) => u a -> v a -> u a Source #

Instances

Instances details
Inner DVector DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => DVector a -> DVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => DVector a -> DVector a -> a Source #

(^+^) :: (Unbox a, Num a) => DVector a -> DVector a -> DVector a Source #

(^-^) :: (Unbox a, Num a) => DVector a -> DVector a -> DVector a Source #

Inner SVector Vector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> Vector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> Vector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> Vector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> Vector a -> SVector a Source #

Inner SVector DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> DVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> DVector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> DVector a -> SVector a Source #

Inner SVector SVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

inner :: (Unbox a, Num a) => SVector a -> SVector a -> a Source #

metricL2 :: (Unbox a, Floating a) => SVector a -> SVector a -> a Source #

(^+^) :: (Unbox a, Num a) => SVector a -> SVector a -> SVector a Source #

(^-^) :: (Unbox a, Num a) => SVector a -> SVector a -> SVector a Source #

class Scale v where Source #

Methods

(.*) :: (Unbox a, Num a) => a -> v a -> v a Source #

Instances

Instances details
Scale Vector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(.*) :: (Unbox a, Num a) => a -> Vector a -> Vector a Source #

Scale DVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(.*) :: (Unbox a, Num a) => a -> DVector a -> DVector a Source #

Scale SVector Source # 
Instance details

Defined in Data.RPTree.Internal

Methods

(.*) :: (Unbox a, Num a) => a -> SVector a -> SVector a Source #

Conduit

dataSource Source #

Arguments

:: Monad m 
=> Int

number of vectors to generate

-> GenT m a

random generator for the vector components

-> ConduitT i a (GenT m) () 

Source of random data points

Random generation

vector

sparse Source #

Arguments

:: (Monad m, Unbox a) 
=> Double

nonzero density

-> Int

vector dimension

-> GenT m a

random generator of vector components

-> GenT m (SVector a) 

Generate a sparse random vector with a given nonzero density and components sampled from the supplied random generator

dense Source #

Arguments

:: (Monad m, Vector Vector a) 
=> Int

vector dimension

-> GenT m a

random generator of vector components

-> GenT m (DVector a) 

Generate a dense random vector with components sampled from the supplied random generator

Rendering

draw :: (Show a, Boxed a, PrintfArg v) => RPTree v a -> IO () Source #

Render a tree to stdout

Useful for debugging

This should be called only for small trees, otherwise the printed result quickly overflows the screen and becomes hard to read.

NB : prints distance information rounded to two decimal digits

CSV

writeCsv Source #

Arguments

:: (Show a, Show b, Unbox a) 
=> FilePath 
-> [(DVector a, b)]

data point, label

-> IO () 

Encode dataset as CSV and save into file