Safe Haskell | None |
---|---|

Language | Haskell2010 |

Efficient in-memory (in-core) storage of tabular data.

## Synopsis

- type family VectorFor t :: * -> *
- type VectorMFor a = Mutable (VectorFor a)
- initialCapacity :: Int
- type family VectorMs m rs where ...
- type family Vectors rs where ...
- class RecVec (rs :: [(Symbol, Type)]) where
- allocRec :: PrimMonad m => proxy rs -> Int -> m (Record (VectorMs m rs))
- freezeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs))
- growRec :: PrimMonad m => proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs))
- writeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
- indexRec :: proxy rs -> Int -> Record (Vectors rs) -> Record rs
- produceRec :: proxy rs -> Record (Vectors rs) -> Rec ((->) Int :. ElField) rs

- inCoreSoA :: forall m rs. (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (Int, Rec ((->) Int :. ElField) rs)
- inCoreAoS :: (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (FrameRec rs)
- inCoreAoS' :: (PrimMonad m, RecVec rs) => (Rec ((->) Int :. ElField) rs -> Rec ((->) Int :. ElField) ss) -> Producer (Record rs) m () -> m (FrameRec ss)
- toAoS :: Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs
- inCore :: forall m n rs. (PrimMonad m, RecVec rs, Monad n) => Producer (Record rs) m () -> m (Producer (Record rs) n ())
- toFrame :: (Foldable f, RecVec rs) => f (Record rs) -> Frame (Record rs)
- filterFrame :: RecVec rs => (Record rs -> Bool) -> FrameRec rs -> FrameRec rs
- produceFrameChunks :: forall rs m. (RecVec rs, PrimMonad m) => Int -> Producer (Record rs) m () -> Producer (FrameRec rs) m ()
- frameChunks :: Int -> FrameRec rs -> [FrameRec rs]

# Documentation

type family VectorFor t :: * -> * Source #

The most efficient vector type for each column data type.

#### Instances

type VectorFor Bool Source # | |

Defined in Frames.InCore | |

type VectorFor Double Source # | |

Defined in Frames.InCore | |

type VectorFor Float Source # | |

Defined in Frames.InCore | |

type VectorFor Int Source # | |

Defined in Frames.InCore | |

type VectorFor String Source # | |

Defined in Frames.InCore | |

type VectorFor Text Source # | |

Defined in Frames.InCore |

type VectorMFor a = Mutable (VectorFor a) Source #

The mutable version of `VectorFor`

a particular type.

initialCapacity :: Int Source #

Since we stream into the in-memory representation, we use an exponential growth strategy to resize arrays as more data is read in. This is the initial capacity of each column.

class RecVec (rs :: [(Symbol, Type)]) where Source #

Tooling to allocate, grow, write to, freeze, and index into records of vectors.

allocRec :: PrimMonad m => proxy rs -> Int -> m (Record (VectorMs m rs)) Source #

freezeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs)) Source #

growRec :: PrimMonad m => proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs)) Source #

writeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m () Source #

indexRec :: proxy rs -> Int -> Record (Vectors rs) -> Record rs Source #

produceRec :: proxy rs -> Record (Vectors rs) -> Rec ((->) Int :. ElField) rs Source #

#### Instances

RecVec ('[] :: [(Symbol, Type)]) Source # | |

Defined in Frames.InCore allocRec :: PrimMonad m => proxy '[] -> Int -> m (Record (VectorMs m '[])) Source # freezeRec :: PrimMonad m => proxy '[] -> Int -> Record (VectorMs m '[]) -> m (Record (Vectors '[])) Source # growRec :: PrimMonad m => proxy '[] -> Record (VectorMs m '[]) -> m (Record (VectorMs m '[])) Source # writeRec :: PrimMonad m => proxy '[] -> Int -> Record (VectorMs m '[]) -> Record '[] -> m () Source # indexRec :: proxy '[] -> Int -> Record (Vectors '[]) -> Record '[] Source # produceRec :: proxy '[] -> Record (Vectors '[]) -> Rec ((->) Int :. ElField) '[] Source # | |

(MVector (VectorMFor a) a, Vector (VectorFor a) a, KnownSymbol s, RecVec rs) => RecVec ((s :-> a) ': rs) Source # | |

Defined in Frames.InCore allocRec :: PrimMonad m => proxy ((s :-> a) ': rs) -> Int -> m (Record (VectorMs m ((s :-> a) ': rs))) Source # freezeRec :: PrimMonad m => proxy ((s :-> a) ': rs) -> Int -> Record (VectorMs m ((s :-> a) ': rs)) -> m (Record (Vectors ((s :-> a) ': rs))) Source # growRec :: PrimMonad m => proxy ((s :-> a) ': rs) -> Record (VectorMs m ((s :-> a) ': rs)) -> m (Record (VectorMs m ((s :-> a) ': rs))) Source # writeRec :: PrimMonad m => proxy ((s :-> a) ': rs) -> Int -> Record (VectorMs m ((s :-> a) ': rs)) -> Record ((s :-> a) ': rs) -> m () Source # indexRec :: proxy ((s :-> a) ': rs) -> Int -> Record (Vectors ((s :-> a) ': rs)) -> Record ((s :-> a) ': rs) Source # produceRec :: proxy ((s :-> a) ': rs) -> Record (Vectors ((s :-> a) ': rs)) -> Rec ((->) Int :. ElField) ((s :-> a) ': rs) Source # |

inCoreSoA :: forall m rs. (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (Int, Rec ((->) Int :. ElField) rs) Source #

Stream a finite sequence of rows into an efficient in-memory
representation for further manipulation. Each column of the input
table will be stored optimally based on its type, making use of the
resulting generators a matter of indexing into a densely packed
representation. Returns the number of rows and a record of column
indexing functions. See `toAoS`

to convert the result to a `Frame`

which provides an easier-to-use function that indexes into the
table in a row-major fashion.

inCoreAoS :: (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (FrameRec rs) Source #

Stream a finite sequence of rows into an efficient in-memory
representation for further manipulation. Each column of the input
table will be stored optimally based on its type, making use of the
resulting generators a matter of indexing into a densely packed
representation. Returns a `Frame`

that provides a function to index
into the table.

inCoreAoS' :: (PrimMonad m, RecVec rs) => (Rec ((->) Int :. ElField) rs -> Rec ((->) Int :. ElField) ss) -> Producer (Record rs) m () -> m (FrameRec ss) Source #

toAoS :: Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs Source #

Convert a structure-of-arrays to an array-of-structures. This can simplify usage of an in-memory representation.

inCore :: forall m n rs. (PrimMonad m, RecVec rs, Monad n) => Producer (Record rs) m () -> m (Producer (Record rs) n ()) Source #

Stream a finite sequence of rows into an efficient in-memory representation for further manipulation. Each column of the input table will be stored optimally based on its type, making use of the resulting generator a matter of indexing into a densely packed representation.

filterFrame :: RecVec rs => (Record rs -> Bool) -> FrameRec rs -> FrameRec rs Source #

Keep only those rows of a `FrameRec`

that satisfy a predicate.