{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ParallelListComp #-} {-# LANGUAGE CPP #-} #if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702 {-# LANGUAGE Trustworthy #-} #endif ----------------------------------------------------------------------------- -- | -- Module : Data.Generator.LZ78 -- Copyright : (c) Edward Kmett 2009-2012 -- License : BSD-style -- Maintainer : ekmett@gmail.com -- Stability : experimental -- Portability : non-portable (type families) -- -- Compression algorithms are all about exploiting redundancy. When applying -- an expensive 'Reducer' to a redundant source, it may be better to -- extract the structural redundancy that is present. 'LZ78' is a compression -- algorithm that does so, without requiring the dictionary to be populated -- with all of the possible values of a data type unlike its later -- refinement LZW, and which has fewer comparison reqirements during encoding -- than its earlier counterpart LZ77. ----------------------------------------------------------------------------- module Data.Compressed.Internal.LZ78 ( -- * Lempel-Ziv 78 Token(..) , LZ78(..) -- * Encoding , encode -- /O(n)/ , encodeOrd -- /O(n log n)/ , encodeEq -- /O(n^2)/ -- * Decoding (reduce) , decode -- * Recoding , recode -- /O(n)/ , recodeOrd -- /O(n log n)/ , recodeEq -- /O(n^2)/ -- * Unsafe (exposes internal structure) , Entry(..) , entries ) where #if __GLASGOW_HASKELL__ < 710 import Control.Applicative import Data.Traversable #endif import Control.Monad.Zip import qualified Data.Sequence as Seq import Data.Sequence ((|>)) import qualified Data.Map as Map import qualified Data.HashMap.Lazy as HashMap import qualified Data.List as List import Data.Functor.Extend import Data.Generator import Data.Function (on) import Data.Key as Key import Data.Foldable import Data.Semigroup import Data.Pointed import Text.Read import Control.Comonad import Data.Hashable import Data.Semigroup.Reducer (Reducer(..), Count(..)) data Token a = Token {-# UNPACK #-} !Int a deriving (Eq, Ord) instance Functor Token where fmap f (Token i a) = Token i (f a) instance Foldable Token where foldMap f (Token _ a) = f a instance Traversable Token where traverse f (Token i a) = Token i <$> f a instance Extend Token where extended = extend instance Comonad Token where extend f t@(Token i _) = Token i (f t) duplicate t@(Token i _) = Token i t extract (Token _ a) = a instance Hashable a => Hashable (Token a) where hashWithSalt s (Token i a) = s `hashWithSalt` i `hashWithSalt` a -- | An LZ78 compressed 'Generator'. data LZ78 a = Cons {-# UNPACK #-} !(Token a) (LZ78 a) | Nil instance Show a => Show (LZ78 a) where showsPrec d xs = showParen (d > 10) $ showString "encode " . showsPrec 11 (toList xs) instance Eq a => Eq (LZ78 a) where (==) = (==) `on` decode instance Ord a => Ord (LZ78 a) where compare = compare `on` decode instance (Read a, Hashable a, Eq a) => Read (LZ78 a) where readPrec = parens $ prec 10 $ do Ident "encode" <- lexP encode <$> step readPrec instance Generator (LZ78 a) where type Elem (LZ78 a) = a mapTo = go (Seq.singleton mempty) where go _ _ m Nil = m go s f m (Cons (Token w c) ws) = m `mappend` go (s |> v) f v ws where v = Seq.index s w `mappend` unit (f c) instance Functor LZ78 where fmap f (Cons (Token i a) as) = Cons (Token i (f a)) (fmap f as) fmap _ Nil = Nil a <$ xs = go 0 (getCount (reduce xs)) where go !_ 0 = Nil go k n | n > k = Cons (Token k a) (go (k + 1) (n - k - 1)) | otherwise = Cons (Token (n - 1) a) Nil instance Pointed LZ78 where point a = Cons (Token 0 a) Nil instance Foldable LZ78 where foldMap f = unwrapMonoid . mapReduce f fold = unwrapMonoid . reduce -- | /O(n)/ Construct an LZ78-compressed 'Generator' using a 'HashMap' internally. encode :: (Hashable a, Eq a) => [a] -> LZ78 a encode = go HashMap.empty 1 0 where go _ _ _ [] = Nil go _ _ p [c] = Cons (Token p c) Nil go d f p (c:cs) = let t = Token p c in case HashMap.lookup t d of Just p' -> go d f p' cs Nothing -> Cons t (go (HashMap.insert t f d) (succ f) 0 cs) -- | /O(n log n)/ Contruct an LZ78-compressed 'Generator' using a 'Map' internally. encodeOrd :: Ord a => [a] -> LZ78 a encodeOrd = go Map.empty 1 0 where go _ _ _ [] = Nil go _ _ p [c] = Cons (Token p c) Nil go d f p (c:cs) = let t = Token p c in case Map.lookup t d of Just p' -> go d f p' cs Nothing -> Cons t (go (Map.insert t f d) (succ f) 0 cs) -- | /O(n^2)/ Contruct an LZ78-compressed 'Generator' using a list internally, requires an instance of Eq, -- less efficient than encode. encodeEq :: Eq a => [a] -> LZ78 a encodeEq = go [] 1 0 where go _ _ _ [] = Nil go _ _ p [c] = Cons (Token p c) Nil go d f p (c:cs) = let t = Token p c in case List.lookup t d of Just p' -> go d f p' cs Nothing -> Cons t (go ((t, f):d) (succ f) 0 cs) -- | A type-constrained 'reduce' operation decode :: LZ78 a -> [a] decode = reduce -- | /O(n)/. Recompress with 'Hashable' recode :: (Eq a, Hashable a) => LZ78 a -> LZ78 a recode = encode . decode -- | /O(n log n)/. Recompress with 'Ord' recodeOrd :: Ord a => LZ78 a -> LZ78 a recodeOrd = encodeOrd . decode -- | /O(n^2)/. Recompress with 'Eq' recodeEq :: Eq a => LZ78 a -> LZ78 a recodeEq = encodeEq . decode data Entry i a = Entry !i a deriving (Show,Read) instance Functor (Entry i) where fmap f (Entry i a) = Entry i (f a) instance Extend (Entry i) where extended = extend instance Comonad (Entry i) where extend f e@(Entry i _) = Entry i (f e) duplicate e@(Entry i _) = Entry i e extract (Entry _ a) = a instance Eq i => Eq (Entry i a) where Entry i _ == Entry j _ = i == j instance Ord i => Ord (Entry i a) where compare (Entry i _) (Entry j _) = compare i j instance Hashable i => Hashable (Entry i a) where hashWithSalt n (Entry i _) = hashWithSalt n i -- | exposes internal structure entries :: LZ78 a -> LZ78 (Entry Int a) entries = go 0 where go k (Cons (Token i t) xs) = Cons (Token i (Entry k t)) $ (go $! k + 1) xs go _ Nil = Nil instance Applicative LZ78 where pure a = Cons (Token 0 a) Nil fs <*> as = fmap extract $ encode $ do Entry i f <- decode (entries fs) Entry j a <- decode (entries as) return $ Entry (i,j) (f a) as *> bs = fmap extract $ encode $ Prelude.concat $ replicate (reduceWith getCount as) $ decode (entries bs) as <* bs = fmap extract $ encode $ Prelude.concat $ replicate (reduceWith getCount bs) <$> decode (entries as) instance Monad LZ78 where return a = Cons (Token 0 a) Nil (>>) = (*>) as >>= k = fmap extract $ encode $ do Entry i a <- decode (entries as) Entry j b <- decode (entries (k a)) return $ Entry (i,j) b instance MonadZip LZ78 where mzipWith = Key.zipWith munzip as = (fmap fst as, fmap snd as) instance Adjustable LZ78 where adjust f i = fmap extract . encode . adjust (Entry (-1) . f . extract) i . decode . entries type instance Key LZ78 = Int instance Lookup LZ78 where lookup i xs = Key.lookup i (decode xs) instance Indexable LZ78 where index xs i = index (decode xs) i instance FoldableWithKey LZ78 where foldMapWithKey f xs = foldMapWithKey f (decode xs) instance Zip LZ78 where zipWith f as bs = extract <$> encode [ Entry (i,j) (f a b) | Entry i a <- decode (entries as) | Entry j b <- decode (entries bs) ]