```{-|
Module      : Data.Conduit.Algorithms
Copyright   : 2013-2017 Luis Pedro Coelho
Maintainer  : luis@luispedro.org

Simple algorithms packaged as Conduits
-}

{-# LANGUAGE Rank2Types #-}
module Data.Conduit.Algorithms
( uniqueOnC
, uniqueC
, mergeC
, mergeC2
) where

import qualified Data.Conduit as C
import qualified Data.Conduit.Combinators as CC
import qualified Data.Set as S

import           Data.Conduit.Algorithms.Utils (awaitJust)

-- | Unique conduit.
--
-- Note that this conduit **does not** assume that the input is sorted. Instead
-- it uses a 'Data.Set' to store previously seen elements. Thus, memory usage
-- is O(N).
uniqueOnC :: (Ord b, Monad m) => (a -> b) -> C.Conduit a m a
uniqueOnC f = checkU (S.empty :: S.Set b)
where
checkU cur = awaitJust \$ \val ->
if f val `S.member` cur
then checkU cur
else do
C.yield val
checkU (S.insert (f val) cur)
-- | See 'uniqueOnC'
uniqueC :: (Ord a, Monad m) => C.Conduit a m a
uniqueC = uniqueOnC id

-- | Merge a list of sorted sources
--
-- See 'mergeC2'
mergeC :: (Ord a, Monad m) => [C.Source m a] -> C.Source m a
mergeC [] = return ()
mergeC [s] = s
mergeC [a,b] = mergeC2 a b
mergeC args = let (a,b) = split2 args in mergeC2 (mergeC a) (mergeC b)
where
split2 :: [a] -> ([a],[a])
split2 [] = ([], [])
split2 [a] = ([a], [])
split2 [a,b] = ([a], [b])
split2 (x:y:rs) = let (xs,ys) = split2 rs in (x:xs, y:ys)

-- | Take two sorted sources and merge them.
--
-- See 'mergeC'
mergeC2 :: (Ord a, Monad m) => C.Source m a -> C.Source m a -> C.Source m a
mergeC2 s1 s2 = do
(c1', e1) <- lift \$ s1 C.\$\$+ CC.head
(c2', e2) <- lift \$ s2 C.\$\$+ CC.head
continue c1' c2' e1 e2
where
continue :: (Monad m, Ord a) => C.ResumableSource m a -> C.ResumableSource m a -> Maybe a -> Maybe a -> C.Source m a
continue _ _ Nothing Nothing = return ()
continue _ c Nothing e = continue c undefined e Nothing
continue c _ (Just e) Nothing = do
C.yield e
yieldAll c
continue c1 c2 je1@(Just e1) je2@(Just e2)
| compare e1 e2 == GT = continue c2 c1 je2 je1
| otherwise = do
C.yield e1
(c1', e1') <- lift \$ c1 C.\$\$++ CC.head
continue c1' c2 e1' (Just e2)
yieldAll :: (Monad m) => C.ResumableSource m a -> C.Source m a
yieldAll rs = do
(rs',v) <- lift \$ rs C.\$\$++ CC.head
case v of
Just v' -> do
C.yield v'
yieldAll rs'
Nothing -> return ()
```