{-| Module : Data.Conduit.Algorithms Copyright : 2013-2017 Luis Pedro Coelho License : MIT Maintainer : luis@luispedro.org Simple algorithms packaged as Conduits -} {-# LANGUAGE Rank2Types #-} module Data.Conduit.Algorithms ( uniqueOnC , uniqueC , mergeC , mergeC2 ) where import qualified Data.Conduit as C import qualified Data.Conduit.Combinators as CC import qualified Data.Set as S import Control.Monad.Trans.Class (lift) import Data.Conduit.Algorithms.Utils (awaitJust) -- | Unique conduit. -- -- Note that this conduit **does not** assume that the input is sorted. Instead -- it uses a 'Data.Set' to store previously seen elements. Thus, memory usage -- is O(N). uniqueOnC :: (Ord b, Monad m) => (a -> b) -> C.Conduit a m a uniqueOnC f = checkU (S.empty :: S.Set b) where checkU cur = awaitJust $ \val -> if f val `S.member` cur then checkU cur else do C.yield val checkU (S.insert (f val) cur) -- | See 'uniqueOnC' uniqueC :: (Ord a, Monad m) => C.Conduit a m a uniqueC = uniqueOnC id -- | Merge a list of sorted sources -- -- See 'mergeC2' mergeC :: (Ord a, Monad m) => [C.Source m a] -> C.Source m a mergeC [] = return () mergeC [s] = s mergeC [a,b] = mergeC2 a b mergeC args = let (a,b) = split2 args in mergeC2 (mergeC a) (mergeC b) where split2 :: [a] -> ([a],[a]) split2 [] = ([], []) split2 [a] = ([a], []) split2 [a,b] = ([a], [b]) split2 (x:y:rs) = let (xs,ys) = split2 rs in (x:xs, y:ys) -- | Take two sorted sources and merge them. -- -- See 'mergeC' mergeC2 :: (Ord a, Monad m) => C.Source m a -> C.Source m a -> C.Source m a mergeC2 s1 s2 = do (c1', e1) <- lift $ s1 C.$$+ CC.head (c2', e2) <- lift $ s2 C.$$+ CC.head continue c1' c2' e1 e2 where continue :: (Monad m, Ord a) => C.ResumableSource m a -> C.ResumableSource m a -> Maybe a -> Maybe a -> C.Source m a continue _ _ Nothing Nothing = return () continue _ c Nothing e = continue c undefined e Nothing continue c _ (Just e) Nothing = do C.yield e yieldAll c continue c1 c2 je1@(Just e1) je2@(Just e2) | compare e1 e2 == GT = continue c2 c1 je2 je1 | otherwise = do C.yield e1 (c1', e1') <- lift $ c1 C.$$++ CC.head continue c1' c2 e1' (Just e2) yieldAll :: (Monad m) => C.ResumableSource m a -> C.Source m a yieldAll rs = do (rs',v) <- lift $ rs C.$$++ CC.head case v of Just v' -> do C.yield v' yieldAll rs' Nothing -> return ()