{-# LANGUAGE BangPatterns, CPP, DataKinds, EmptyCase,
             FlexibleContexts, FlexibleInstances, PolyKinds,
             ScopedTypeVariables, TupleSections, TypeFamilies,
             TypeOperators, UndecidableInstances #-}
-- | Efficient in-memory (in-core) storage of tabular data.
module Frames.InCore where
import Control.Monad.Primitive
import Control.Monad.ST (runST)
import Data.Kind (Type)
import Data.Proxy
import Data.Text (Text)
import qualified Data.Vector as VB
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import qualified Data.Vector.Unboxed as VU
import Data.Vinyl (Rec(..))
import qualified Data.Vinyl as V
import Data.Vinyl.Functor (Compose(..), getCompose, ElField(..), (:.))
import Frames.Col
import Frames.Frame
import Frames.Rec
import Frames.RecF
#if __GLASGOW_HASKELL__ < 800
import GHC.Prim (RealWorld)
#endif
import GHC.TypeLits (KnownSymbol)
import GHC.Types (Symbol)
import qualified Pipes as P
import qualified Pipes.Prelude as P

-- | The most efficient vector type for each column data type.
type family VectorFor t :: Type -> Type
type instance VectorFor Bool = VU.Vector
type instance VectorFor Int = VU.Vector
type instance VectorFor Float = VU.Vector
type instance VectorFor Double = VU.Vector
type instance VectorFor String = VB.Vector
type instance VectorFor Text = VB.Vector

-- | The mutable version of 'VectorFor' a particular type.
type VectorMFor a = VG.Mutable (VectorFor a)

-- | Since we stream into the in-memory representation, we use an
-- exponential growth strategy to resize arrays as more data is read
-- in. This is the initial capacity of each column.
initialCapacity :: Int
initialCapacity :: Int
initialCapacity = Int
128

-- | Mutable vector types for each column in a row.
type family VectorMs m rs where
  VectorMs m '[] = '[]
  VectorMs m (s :-> a ': rs) =
    s :-> VectorMFor a (PrimState m) a ': VectorMs m rs

-- | Immutable vector types for each column in a row.
type family Vectors rs where
  Vectors '[] = '[]
  Vectors (s :-> a ': rs) = s :-> VectorFor a a ': Vectors rs

-- | Tooling to allocate, grow, write to, freeze, and index into
-- records of vectors.
class RecVec (rs :: [(Symbol,Type)]) where
  allocRec   :: PrimMonad m
             => proxy rs -> Int -> m (Record (VectorMs m rs))
  freezeRec  :: PrimMonad m
             => proxy rs -> Int -> Record (VectorMs m rs)
             -> m (Record (Vectors rs))
  growRec    :: PrimMonad m
             => proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs))
  writeRec   :: PrimMonad m
             => proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
  indexRec   :: proxy rs -> Int -> Record (Vectors rs) -> Record rs
  produceRec :: proxy rs -> Record (Vectors rs) -> V.Rec (((->) Int) :. ElField) rs

-- The use of the type families Vectors and VectorMs interferes with
-- GHC's pattern match exhaustiveness checker, so we write down dummy
-- cases to avoid warnings. This is probably a GHC bug as writing the
-- apparently missing pattern match causes a type checker error!

instance RecVec '[] where
  allocRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy '[] -> Int -> m (Record (VectorMs m '[]))
allocRec proxy '[]
_ Int
_ = forall (m :: * -> *) a. Monad m => a -> m a
return forall {u} (a :: u -> *). Rec a '[]
V.RNil
  {-# INLINE allocRec #-}

  freezeRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy '[]
-> Int -> Record (VectorMs m '[]) -> m (Record (Vectors '[]))
freezeRec proxy '[]
_ Int
_ Rec ElField (VectorMs m '[])
V.RNil = forall (m :: * -> *) a. Monad m => a -> m a
return forall {u} (a :: u -> *). Rec a '[]
V.RNil
#if __GLASGOW_HASKELL__ < 800
  freezeRec _ _ x = case x of
#endif
  {-# INLINE freezeRec #-}

  growRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy '[] -> Record (VectorMs m '[]) -> m (Record (VectorMs m '[]))
growRec proxy '[]
_ Rec ElField (VectorMs m '[])
V.RNil = forall (m :: * -> *) a. Monad m => a -> m a
return forall {u} (a :: u -> *). Rec a '[]
V.RNil
#if __GLASGOW_HASKELL__ < 800
  growRec _ x = case x of
#endif
  {-# INLINE growRec #-}

  indexRec :: forall (proxy :: [(Symbol, *)] -> *).
proxy '[] -> Int -> Record (Vectors '[]) -> Record '[]
indexRec proxy '[]
_ Int
_ Record (Vectors '[])
_ = forall {u} (a :: u -> *). Rec a '[]
V.RNil
  {-# INLINE indexRec #-}

  writeRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy '[] -> Int -> Record (VectorMs m '[]) -> Record '[] -> m ()
writeRec proxy '[]
_ Int
_ Rec ElField (VectorMs m '[])
V.RNil Record '[]
V.RNil = forall (m :: * -> *) a. Monad m => a -> m a
return ()
#if __GLASGOW_HASKELL__ < 800
  writeRec _ _ x _ = case x of
#endif
  {-# INLINE writeRec #-}

  produceRec :: forall (proxy :: [(Symbol, *)] -> *).
proxy '[] -> Record (Vectors '[]) -> Rec ((->) Int :. ElField) '[]
produceRec proxy '[]
_ Record (Vectors '[])
V.RNil = forall {u} (a :: u -> *). Rec a '[]
V.RNil
#if __GLASGOW_HASKELL__ < 800
  produceRec _ x = case x of
#endif
  {-# INLINE produceRec #-}

instance forall s a rs.
  (VGM.MVector (VectorMFor a) a,
   VG.Vector (VectorFor a) a,
   KnownSymbol s, RecVec rs)
  => RecVec (s :-> a ': rs) where
  allocRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy ((s :-> a) : rs)
-> Int -> m (Record (VectorMs m ((s :-> a) : rs)))
allocRec proxy ((s :-> a) : rs)
_ Int
size = forall (s :: Symbol) a (rs :: [(Symbol, *)]).
KnownSymbol s =>
a -> Record rs -> Record ((s :-> a) : rs)
(&:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) (v :: * -> * -> *) a.
(HasCallStack, PrimMonad m, MVector v a) =>
Int -> m (v (PrimState m) a)
VGM.new Int
size forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> m (Record (VectorMs m rs))
allocRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
size
  {-# INLINE allocRec #-}

  freezeRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy ((s :-> a) : rs)
-> Int
-> Record (VectorMs m ((s :-> a) : rs))
-> m (Record (Vectors ((s :-> a) : rs)))
freezeRec proxy ((s :-> a) : rs)
_ Int
n (Col Mutable (VectorFor a) (PrimState m) a
x :& Rec ElField rs
xs) =
    forall (s :: Symbol) a (rs :: [(Symbol, *)]).
KnownSymbol s =>
a -> Record rs -> Record ((s :-> a) : rs)
(&:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
VG.unsafeFreeze forall a b. (a -> b) -> a -> b
$ forall (v :: * -> * -> *) a s.
MVector v a =>
Int -> Int -> v s a -> v s a
VGM.unsafeSlice Int
0 Int
n Mutable (VectorFor a) (PrimState m) a
x)
         forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs
-> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs))
freezeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
n Rec ElField rs
xs
  freezeRec proxy ((s :-> a) : rs)
_ Int
_ Rec ElField (VectorMs m ((s :-> a) : rs))
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible case freezeRec"
  {-# INLINE freezeRec #-}

  growRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy ((s :-> a) : rs)
-> Record (VectorMs m ((s :-> a) : rs))
-> m (Record (VectorMs m ((s :-> a) : rs)))
growRec proxy ((s :-> a) : rs)
_ (Col Mutable (VectorFor a) (PrimState m) a
x :& Rec ElField rs
xs) = forall (s :: Symbol) a (rs :: [(Symbol, *)]).
KnownSymbol s =>
a -> Record rs -> Record ((s :-> a) : rs)
(&:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) (v :: * -> * -> *) a.
(HasCallStack, PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m (v (PrimState m) a)
VGM.grow Mutable (VectorFor a) (PrimState m) a
x (forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
VGM.length Mutable (VectorFor a) (PrimState m) a
x)
                                              forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs))
growRec (forall {k} (t :: k). Proxy t
Proxy :: Proxy rs) Rec ElField rs
xs
  growRec proxy ((s :-> a) : rs)
_ Rec ElField (VectorMs m ((s :-> a) : rs))
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible case growRec"
  {-# INLINE growRec #-}

  writeRec :: forall (m :: * -> *) (proxy :: [(Symbol, *)] -> *).
PrimMonad m =>
proxy ((s :-> a) : rs)
-> Int
-> Record (VectorMs m ((s :-> a) : rs))
-> Record ((s :-> a) : rs)
-> m ()
writeRec proxy ((s :-> a) : rs)
_ !Int
i !(Col Mutable (VectorFor a) (PrimState m) a
v :& Rec ElField rs
vs) (Col a
x :& Rec ElField rs
xs) =
    forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
VGM.unsafeWrite Mutable (VectorFor a) (PrimState m) a
v Int
i a
x forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
writeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
i Rec ElField rs
vs Rec ElField rs
xs
  writeRec proxy ((s :-> a) : rs)
_ Int
_ Rec ElField (VectorMs m ((s :-> a) : rs))
_ Record ((s :-> a) : rs)
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible case writeRec"
  {-# INLINE writeRec #-}

  indexRec :: forall (proxy :: [(Symbol, *)] -> *).
proxy ((s :-> a) : rs)
-> Int
-> Record (Vectors ((s :-> a) : rs))
-> Record ((s :-> a) : rs)
indexRec proxy ((s :-> a) : rs)
_ !Int
i !(Col VectorFor a a
x :& Rec ElField rs
xs) =
    VectorFor a a
x forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i forall (s :: Symbol) a (rs :: [(Symbol, *)]).
KnownSymbol s =>
a -> Record rs -> Record ((s :-> a) : rs)
&: forall (rs :: [(Symbol, *)]) (proxy :: [(Symbol, *)] -> *).
RecVec rs =>
proxy rs -> Int -> Record (Vectors rs) -> Record rs
indexRec (forall {k} (t :: k). Proxy t
Proxy :: Proxy rs) Int
i Rec ElField rs
xs
  indexRec proxy ((s :-> a) : rs)
_ Int
_ Record (Vectors ((s :-> a) : rs))
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible case indexRec"
  {-# INLINE indexRec #-}

  produceRec :: forall (proxy :: [(Symbol, *)] -> *).
proxy ((s :-> a) : rs)
-> Record (Vectors ((s :-> a) : rs))
-> Rec ((->) Int :. ElField) ((s :-> a) : rs)
produceRec proxy ((s :-> a) : rs)
_ (Col VectorFor a a
v V.:& Rec ElField rs
vs) = forall l k (f :: l -> *) (g :: k -> l) (x :: k).
f (g x) -> Compose f g x
Compose (forall (t :: (Symbol, *)). Snd t -> ElField t
Field forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VectorFor a a
v forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.!)) forall {u} (a :: u -> *) (r :: u) (rs :: [u]).
a r -> Rec a rs -> Rec a (r : rs)
:& forall (rs :: [(Symbol, *)]) (proxy :: [(Symbol, *)] -> *).
RecVec rs =>
proxy rs -> Record (Vectors rs) -> Rec ((->) Int :. ElField) rs
produceRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Rec ElField rs
vs
  produceRec proxy ((s :-> a) : rs)
_ Record (Vectors ((s :-> a) : rs))
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible case produceRec"
  {-# INLINE produceRec #-}

-- | Stream a finite sequence of rows into an efficient in-memory
-- representation for further manipulation. Each column of the input
-- table will be stored optimally based on its type, making use of the
-- resulting generators a matter of indexing into a densely packed
-- representation. Returns the number of rows and a record of column
-- indexing functions. See 'toAoS' to convert the result to a 'Frame'
-- which provides an easier-to-use function that indexes into the
-- table in a row-major fashion.
inCoreSoA :: forall m rs. (PrimMonad m, RecVec rs)
          => P.Producer (Record rs) m ()
          -> m (Int, V.Rec (((->) Int) :. ElField) rs)
inCoreSoA :: forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (Int, Rec ((->) Int :. ElField) rs)
inCoreSoA Producer (Record rs) m ()
xs =
  do Record (VectorMs m rs)
mvs <- forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> m (Record (VectorMs m rs))
allocRec (forall {k} (t :: k). Proxy t
Proxy :: Proxy rs) Int
initialCapacity
     let feed :: (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed (!Int
i, !Int
sz, !Record (VectorMs m rs)
mvs') Record rs
row
           | Int
i forall a. Eq a => a -> a -> Bool
== Int
sz = forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs))
growRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Record (VectorMs m rs)
mvs'
                       forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed Record rs
row forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
i, Int
szforall a. Num a => a -> a -> a
*Int
2,)
           | Bool
otherwise = do forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
writeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
i Record (VectorMs m rs)
mvs' Record rs
row
                            forall (m :: * -> *) a. Monad m => a -> m a
return (Int
iforall a. Num a => a -> a -> a
+Int
1, Int
sz, Record (VectorMs m rs)
mvs')
         fin :: (Int, Int, Record (VectorMs m rs))
-> m (Int, Rec ((->) Int :. ElField) rs)
fin (Int
n,Int
_,Record (VectorMs m rs)
mvs') =
           do Record (Vectors rs)
vs <- forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs
-> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs))
freezeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
n Record (VectorMs m rs)
mvs'
              forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
n,) forall a b. (a -> b) -> a -> b
$ forall (rs :: [(Symbol, *)]) (proxy :: [(Symbol, *)] -> *).
RecVec rs =>
proxy rs -> Record (Vectors rs) -> Rec ((->) Int :. ElField) rs
produceRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Record (Vectors rs)
vs
     forall (m :: * -> *) x a b.
Monad m =>
(x -> a -> m x) -> m x -> (x -> m b) -> Producer a m () -> m b
P.foldM (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed (forall (m :: * -> *) a. Monad m => a -> m a
return (Int
0,Int
initialCapacity,Record (VectorMs m rs)
mvs)) (Int, Int, Record (VectorMs m rs))
-> m (Int, Rec ((->) Int :. ElField) rs)
fin Producer (Record rs) m ()
xs
{-# INLINE inCoreSoA #-}

-- | Stream a finite sequence of rows into an efficient in-memory
-- representation for further manipulation. Each column of the input
-- table will be stored optimally based on its type, making use of the
-- resulting generators a matter of indexing into a densely packed
-- representation. Returns a 'Frame' that provides a function to index
-- into the table.
inCoreAoS :: (PrimMonad m, RecVec rs)
          => P.Producer (Record rs) m () -> m (FrameRec rs)
inCoreAoS :: forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (FrameRec rs)
inCoreAoS = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall (rs :: [(Symbol, *)]).
Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs
toAoS) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (Int, Rec ((->) Int :. ElField) rs)
inCoreSoA

-- | Like 'inCoreAoS', but applies the provided function to the record
-- of columns before building the 'Frame'.
inCoreAoS' :: (PrimMonad m, RecVec rs)
           => (V.Rec ((->) Int :. ElField) rs -> V.Rec ((->) Int :. ElField) ss)
           -> P.Producer (Record rs) m () -> m (FrameRec ss)
inCoreAoS' :: forall (m :: * -> *) (rs :: [(Symbol, *)]) (ss :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
(Rec ((->) Int :. ElField) rs -> Rec ((->) Int :. ElField) ss)
-> Producer (Record rs) m () -> m (FrameRec ss)
inCoreAoS' Rec ((->) Int :. ElField) rs -> Rec ((->) Int :. ElField) ss
f = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall (rs :: [(Symbol, *)]).
Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs
toAoS forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Rec ((->) Int :. ElField) rs)
-> (Int, Rec ((->) Int :. ElField) ss)
aux) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (Int, Rec ((->) Int :. ElField) rs)
inCoreSoA
  where aux :: (Int, Rec ((->) Int :. ElField) rs)
-> (Int, Rec ((->) Int :. ElField) ss)
aux (Int
x,Rec ((->) Int :. ElField) rs
y) = (Int
x, Rec ((->) Int :. ElField) rs -> Rec ((->) Int :. ElField) ss
f Rec ((->) Int :. ElField) rs
y)

-- | Convert a structure-of-arrays to an array-of-structures. This can
-- simplify usage of an in-memory representation.
toAoS :: Int -> V.Rec ((->) Int :. ElField) rs -> FrameRec rs
toAoS :: forall (rs :: [(Symbol, *)]).
Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs
toAoS Int
n = forall r. Int -> (Int -> r) -> Frame r
Frame Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {u} (h :: * -> *) (f :: u -> *) (g :: u -> *) (rs :: [u]).
Applicative h =>
(forall (x :: u). f x -> h (g x)) -> Rec f rs -> h (Rec g rs)
rtraverse forall l k (f :: l -> *) (g :: k -> l) (x :: k).
Compose f g x -> f (g x)
getCompose
{-# INLINE toAoS #-}

-- | Stream a finite sequence of rows into an efficient in-memory
-- representation for further manipulation. Each column of the input
-- table will be stored optimally based on its type, making use of the
-- resulting generator a matter of indexing into a densely packed
-- representation.
inCore :: forall m n rs. (PrimMonad m, RecVec rs, Monad n)
       => P.Producer (Record rs) m () -> m (P.Producer (Record rs) n ())
inCore :: forall (m :: * -> *) (n :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs, Monad n) =>
Producer (Record rs) m () -> m (Producer (Record rs) n ())
inCore Producer (Record rs) m ()
xs =
  do Record (VectorMs m rs)
mvs <- forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> m (Record (VectorMs m rs))
allocRec (forall {k} (t :: k). Proxy t
Proxy :: Proxy rs) Int
initialCapacity
     let feed :: (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed (!Int
i,!Int
sz,!Record (VectorMs m rs)
mvs') Record rs
row
              | Int
i forall a. Eq a => a -> a -> Bool
== Int
sz = forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs))
growRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Record (VectorMs m rs)
mvs'
                          forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed Record rs
row forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
i, Int
szforall a. Num a => a -> a -> a
*Int
2,)
              | Bool
otherwise = do forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
writeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
i Record (VectorMs m rs)
mvs' Record rs
row
                               forall (m :: * -> *) a. Monad m => a -> m a
return (Int
iforall a. Num a => a -> a -> a
+Int
1, Int
sz, Record (VectorMs m rs)
mvs')
         fin :: (Int, Int, Record (VectorMs m rs)) -> m (Producer (Record rs) n ())
fin (Int
n,Int
_,Record (VectorMs m rs)
mvs') =
           do Record (Vectors rs)
vs <- forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs
-> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs))
freezeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
n Record (VectorMs m rs)
mvs'
              let spool :: Int -> Producer (Record rs) n ()
spool !Int
i
                    | Int
i forall a. Eq a => a -> a -> Bool
== Int
n = forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
                    | Bool
otherwise = forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
P.yield (forall (rs :: [(Symbol, *)]) (proxy :: [(Symbol, *)] -> *).
RecVec rs =>
proxy rs -> Int -> Record (Vectors rs) -> Record rs
indexRec forall {k} (t :: k). Proxy t
Proxy Int
i Record (Vectors rs)
vs) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Producer (Record rs) n ()
spool (Int
iforall a. Num a => a -> a -> a
+Int
1)
              forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Producer (Record rs) n ()
spool Int
0
     forall (m :: * -> *) x a b.
Monad m =>
(x -> a -> m x) -> m x -> (x -> m b) -> Producer a m () -> m b
P.foldM (Int, Int, Record (VectorMs m rs))
-> Record rs -> m (Int, Int, Record (VectorMs m rs))
feed (forall (m :: * -> *) a. Monad m => a -> m a
return (Int
0,Int
initialCapacity,Record (VectorMs m rs)
mvs)) (Int, Int, Record (VectorMs m rs)) -> m (Producer (Record rs) n ())
fin Producer (Record rs) m ()
xs
{-# INLINE inCore #-}

-- | Build a 'Frame' from a collection of 'Record's using efficient
-- column-based storage.
toFrame :: (P.Foldable f, RecVec rs) => f (Record rs) -> Frame (Record rs)
toFrame :: forall (f :: * -> *) (rs :: [(Symbol, *)]).
(Foldable f, RecVec rs) =>
f (Record rs) -> Frame (Record rs)
toFrame f (Record rs)
xs = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (FrameRec rs)
inCoreAoS (forall (m :: * -> *) (f :: * -> *) a x' x.
(Functor m, Foldable f) =>
f a -> Proxy x' x () a m ()
P.each f (Record rs)
xs)
{-# INLINE toFrame #-}

-- | Keep only those rows of a 'FrameRec' that satisfy a predicate.
filterFrame :: RecVec rs => (Record rs -> Bool) -> FrameRec rs -> FrameRec rs
filterFrame :: forall (rs :: [(Symbol, *)]).
RecVec rs =>
(Record rs -> Bool) -> FrameRec rs -> FrameRec rs
filterFrame Record rs -> Bool
p FrameRec rs
f = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) (rs :: [(Symbol, *)]).
(PrimMonad m, RecVec rs) =>
Producer (Record rs) m () -> m (FrameRec rs)
inCoreAoS forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) (f :: * -> *) a x' x.
(Functor m, Foldable f) =>
f a -> Proxy x' x () a m ()
P.each FrameRec rs
f forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
P.>-> forall (m :: * -> *) a r. Functor m => (a -> Bool) -> Pipe a a m r
P.filter Record rs -> Bool
p
{-# INLINE filterFrame #-}

-- | Process a stream of 'Record's into a stream of 'Frame's that each
-- contains no more than the given number of records.
produceFrameChunks :: forall rs m. (RecVec rs, PrimMonad m)
                   => Int
                   -> P.Producer (Record rs) m ()
                   -> P.Producer (FrameRec rs) m ()
produceFrameChunks :: forall (rs :: [(Symbol, *)]) (m :: * -> *).
(RecVec rs, PrimMonad m) =>
Int -> Producer (Record rs) m () -> Producer (FrameRec rs) m ()
produceFrameChunks Int
chunkSize = Producer (Record rs) m () -> Proxy X () () (FrameRec rs) m ()
go
  where go :: Producer (Record rs) m () -> Proxy X () () (FrameRec rs) m ()
go Producer (Record rs) m ()
src = do Record (VectorMs m rs)
mutVecs <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
P.lift (forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> m (Record (VectorMs m rs))
allocRec (forall {k} (t :: k). Proxy t
Proxy :: Proxy rs) Int
chunkSize)
                    Producer (Record rs) m ()
-> Record (VectorMs m rs)
-> Int
-> Proxy X () () (FrameRec rs) m ()
goChunk Producer (Record rs) m ()
src Record (VectorMs m rs)
mutVecs Int
0
        goChunk :: Producer (Record rs) m ()
-> Record (VectorMs m rs)
-> Int
-> Proxy X () () (FrameRec rs) m ()
goChunk Producer (Record rs) m ()
src Record (VectorMs m rs)
mutVecs !Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
chunkSize =
              do FrameRec rs
chunk <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
P.lift (Int -> Record (VectorMs m rs) -> m (FrameRec rs)
freezeFrame Int
i Record (VectorMs m rs)
mutVecs)
                 forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
P.yield FrameRec rs
chunk
                 Producer (Record rs) m () -> Proxy X () () (FrameRec rs) m ()
go Producer (Record rs) m ()
src
          | Bool
otherwise =
            do Either () (Record rs, Producer (Record rs) m ())
maybeRow <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
P.lift (forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
P.next Producer (Record rs) m ()
src)
               case Either () (Record rs, Producer (Record rs) m ())
maybeRow of
                 Left ()
_ -> do
                   forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
P.lift (Int -> Record (VectorMs m rs) -> m (FrameRec rs)
freezeFrame Int
i Record (VectorMs m rs)
mutVecs) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
P.yield
                 Right (Record rs
r,Producer (Record rs) m ()
src') -> do
                   forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
P.lift (forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m ()
writeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
i Record (VectorMs m rs)
mutVecs Record rs
r)
                   Producer (Record rs) m ()
-> Record (VectorMs m rs)
-> Int
-> Proxy X () () (FrameRec rs) m ()
goChunk Producer (Record rs) m ()
src' Record (VectorMs m rs)
mutVecs (Int
iforall a. Num a => a -> a -> a
+Int
1)
        freezeFrame :: Int -> Record (VectorMs m rs) -> m (FrameRec rs)
        freezeFrame :: Int -> Record (VectorMs m rs) -> m (FrameRec rs)
freezeFrame Int
n =
          forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (rs :: [(Symbol, *)]).
Int -> Rec ((->) Int :. ElField) rs -> FrameRec rs
toAoS Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (rs :: [(Symbol, *)]) (proxy :: [(Symbol, *)] -> *).
RecVec rs =>
proxy rs -> Record (Vectors rs) -> Rec ((->) Int :. ElField) rs
produceRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs))
          forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (rs :: [(Symbol, *)]) (m :: * -> *)
       (proxy :: [(Symbol, *)] -> *).
(RecVec rs, PrimMonad m) =>
proxy rs
-> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs))
freezeRec (forall {k} (t :: k). Proxy t
Proxy::Proxy rs) Int
n
{-# INLINABLE produceFrameChunks #-}

-- | Split a 'Frame' into chunks of no more than the given number of
-- records. The underlying memory is shared with the original 'Frame'.
frameChunks :: Int -> FrameRec rs -> [FrameRec rs]
frameChunks :: forall (rs :: [(Symbol, *)]). Int -> FrameRec rs -> [FrameRec rs]
frameChunks Int
chunkSize FrameRec rs
whole = forall a b. (a -> b) -> [a] -> [b]
map Int -> FrameRec rs
aux [ Int
0, Int
chunkSize .. forall r. Frame r -> Int
frameLength FrameRec rs
whole forall a. Num a => a -> a -> a
- Int
1 ]
  where aux :: Int -> FrameRec rs
aux Int
i = forall r. Int -> (Int -> r) -> Frame r
Frame (forall a. Ord a => a -> a -> a
min (forall r. Frame r -> Int
frameLength FrameRec rs
whole forall a. Num a => a -> a -> a
- Int
i) Int
chunkSize)
                      (forall r. Frame r -> Int -> r
frameRow FrameRec rs
whole forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Num a => a -> a -> a
+ Int
i))
{-# INLINABLE frameChunks #-}