{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE GADTs                 #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE TypeOperators         #-}
{-# LANGUAGE RankNTypes            #-}
{-# LANGUAGE PolyKinds             #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE ConstraintKinds       #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE UndecidableInstances  #-}
{-# LANGUAGE AllowAmbiguousTypes   #-}
{-# LANGUAGE BangPatterns          #-}
{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
{-|
Module      : Control.MapReduce.Engines
Description : map-reduce-folds builders
Copyright   : (c) Adam Conner-Sax 2019
License     : BSD-3-Clause
Maintainer  : adam_conner_sax@yahoo.com
Stability   : experimental

Types and functions used by all the engines.
Notes:

  1. The provided grouping functions group elements into a 'Data.Sequence.Seq' as this is a good default choice.
  2. The <http://hackage.haskell.org/package/streamly Streamly> engine is the fastest in my benchmarks.  It's the engine used by default if you import @Control.MapReduce.Simple@.
  3. All the engines take a grouping function as a parameter and default ones are provided.  For simple map/reduce, the grouping step may be the bottleneck and I wanted to leave room for experimentation.  I've tried (and failed!) to find anything faster than using 'Map' or 'HashMap' via @toList . fromListWith (<>)@.

-}
module Control.MapReduce.Engines
  (
    -- * Fold Types
    MapReduceFold
  , MapReduceFoldM

  -- * Engine Helpers
  , reduceFunction
  , reduceFunctionM

  -- * @groupBy@ Helpers
  , fromListWithHT
  )
where

import qualified Control.MapReduce.Core        as MRC
import qualified Control.Foldl                 as FL
import           Control.Monad.ST              as ST
import           Data.Hashable                  ( Hashable )

import qualified Data.HashTable.Class          as HT


-- | Type-alias for a map-reduce-fold engine
type MapReduceFold y k c q x d = MRC.Unpack x y -> MRC.Assign k y c -> MRC.Reduce k c d -> FL.Fold x (q d)

-- | Type-alias for a monadic (effectful) map-reduce-fold engine
type MapReduceFoldM m y k c q x d = MRC.UnpackM m x y -> MRC.AssignM m k y c -> MRC.ReduceM m k c d -> FL.FoldM m x (q d)

-- | Turn @Reduce@ into a function we can apply
reduceFunction :: (Foldable h, Functor h) => MRC.Reduce k x d -> k -> h x -> d
reduceFunction :: Reduce k x d -> k -> h x -> d
reduceFunction (MRC.Reduce     k -> forall (h :: * -> *). (Foldable h, Functor h) => h x -> d
f) k
k = k -> forall (h :: * -> *). (Foldable h, Functor h) => h x -> d
f k
k
reduceFunction (MRC.ReduceFold k -> Fold x d
f) k
k = Fold x d -> h x -> d
forall (f :: * -> *) a b. Foldable f => Fold a b -> f a -> b
FL.fold (k -> Fold x d
f k
k)
{-# INLINABLE reduceFunction #-}

-- | Turn @ReduceM@ into a function we can apply
reduceFunctionM
  :: (Traversable h, Monad m) => MRC.ReduceM m k x d -> k -> h x -> m d
reduceFunctionM :: ReduceM m k x d -> k -> h x -> m d
reduceFunctionM (MRC.ReduceM     k -> forall (h :: * -> *). (Foldable h, Functor h) => h x -> m d
f) k
k = k -> forall (h :: * -> *). (Foldable h, Functor h) => h x -> m d
f k
k
reduceFunctionM (MRC.ReduceFoldM k -> FoldM m x d
f) k
k = FoldM m x d -> h x -> m d
forall (f :: * -> *) (m :: * -> *) a b.
(Foldable f, Monad m) =>
FoldM m a b -> f a -> m b
FL.foldM (k -> FoldM m x d
f k
k)
{-# INLINABLE reduceFunctionM #-}

{-
-- copied from Frames
-- which causes overlapping instances. 
instance {-# OVERLAPPABLE #-} Grouping Text where
  grouping = contramap hash grouping
-}

{- | an implementation of @fromListWith@ for mutable hashtables from the <http://hackage.haskell.org/package/hashtables-1.2.3.1 hastables>
package.  Basically a copy @fromList@ from that package using mutate instead of insert to apply the given function if the
was already in the map.  Might not be the ideal implementation.
Notes:

* This function is specific hashtable agnostic so you'll have to supply a specific implementation from the package via TypeApplication
* This function returns the hash-table in the @ST@ monad.  You can fold over it (using @foldM@ from @hashtables@)
and then use @runST@ to get the grouped structure out.
-}
fromListWithHT
  :: forall h k v s
   . (HT.HashTable h, Eq k, Hashable k)
  => (v -> v -> v)
  -> [(k, v)]
  -> ST.ST s (h s k v)
fromListWithHT :: (v -> v -> v) -> [(k, v)] -> ST s (h s k v)
fromListWithHT v -> v -> v
f [(k, v)]
l = do
  h s k v
ht <- ST s (h s k v)
forall (h :: * -> * -> * -> *) s k v. HashTable h => ST s (h s k v)
HT.new
  h s k v -> [(k, v)] -> ST s (h s k v)
go h s k v
ht [(k, v)]
l
 where
  g :: v -> Maybe v -> (Maybe v, ())
g v
x Maybe v
mx = (v -> Maybe v
forall a. a -> Maybe a
Just (v -> Maybe v) -> v -> Maybe v
forall a b. (a -> b) -> a -> b
$ v -> (v -> v) -> Maybe v -> v
forall b a. b -> (a -> b) -> Maybe a -> b
maybe v
x (v -> v -> v
`f` v
x) Maybe v
mx, ())
  go :: h s k v -> [(k, v)] -> ST s (h s k v)
go h s k v
ht = [(k, v)] -> ST s (h s k v)
go'
   where
    go' :: [(k, v)] -> ST s (h s k v)
go' []              = h s k v -> ST s (h s k v)
forall (m :: * -> *) a. Monad m => a -> m a
return h s k v
ht
    go' ((!k
k, !v
v) : [(k, v)]
xs) = do
      h s k v -> k -> (Maybe v -> (Maybe v, ())) -> ST s ()
forall (h :: * -> * -> * -> *) k s v a.
(HashTable h, Eq k, Hashable k) =>
h s k v -> k -> (Maybe v -> (Maybe v, a)) -> ST s a
HT.mutate h s k v
ht k
k (v -> Maybe v -> (Maybe v, ())
g v
v)
      [(k, v)] -> ST s (h s k v)
go' [(k, v)]
xs
{-# INLINABLE fromListWithHT #-}