{-# LANGUAGE BangPatterns #-}
-- |
-- Module : Codec.Compression.Lzma
-- Copyright : © 2015 Herbert Valerio Riedel
-- License : BSD3
--
-- Maintainer : hvr@gnu.org
-- Stability : experimental
--
-- Compression and decompression of data streams in the lzma/xz format
--
-- See also the XZ Utils home page:
module Codec.Compression.Lzma
( -- * Simple (de)compression
compress
, decompress
-- * Extended API with control over parameters
, compressWith
, decompressWith
-- * Monadic incremental (de)compression API
--
-- | See for more information.
-- ** Compression
, CompressStream(..)
, compressIO
, compressST
-- ** Decompression
, DecompressStream(..)
, decompressIO
, decompressST
, LzmaRet(..)
-- * Parameters
-- ** Compression parameters
, defaultCompressParams
, CompressParams
, compressIntegrityCheck
, compressLevel
, compressLevelExtreme
, IntegrityCheck(..)
, CompressionLevel(..)
-- ** Decompression parameters
, defaultDecompressParams
, DecompressParams
, decompressTellNoCheck
, decompressTellUnsupportedCheck
, decompressTellAnyCheck
, decompressConcatenated
, decompressAutoDecoder
, decompressMemLimit
) where
import Control.Exception
import Control.Monad
import Control.Monad.ST (stToIO)
import Control.Monad.ST.Lazy (ST, runST, strictToLazyST)
import qualified Control.Monad.ST.Strict as ST.Strict (ST)
import Control.Monad.ST.Unsafe (unsafeIOToST)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Internal as BSL
import GHC.IO (noDuplicate)
import LibLzma
-- | Decompress lazy 'ByteString' from the @.xz@ format
decompress :: BSL.ByteString -> BSL.ByteString
decompress = decompressWith defaultDecompressParams
-- | Like 'decompress' but with the ability to specify various decompression
-- parameters. Typical usage:
--
-- > decompressWith defaultDecompressParams { decompress... = ... }
decompressWith :: DecompressParams -> BSL.ByteString -> BSL.ByteString
decompressWith parms input = runST (decompress' input)
where
decompress' :: BSL.ByteString -> ST s BSL.ByteString
decompress' ibs0 = loop ibs0 =<< decompressST parms
where
loop BSL.Empty (DecompressStreamEnd rest)
| BS.null rest = return BSL.Empty
| otherwise = error "Codec.Compression.Lzma.decompressWith: trailing data"
loop (BSL.Chunk _ _) (DecompressStreamEnd _) =
error "Codec.Compression.Lzma.decompressWith: trailing data"
loop _ (DecompressStreamError e) =
error ("Codec.Compression.Lzma.decompressWith: decoding error " ++ show e)
loop BSL.Empty (DecompressInputRequired supply) =
loop BSL.Empty =<< supply BS.empty
loop (BSL.Chunk c bs') (DecompressInputRequired supply) =
loop bs' =<< supply c
loop ibs (DecompressOutputAvailable oc next) = do
obs <- loop ibs =<< next
return (BSL.chunk oc obs)
{-# NOINLINE decompressWith #-}
----------------------------------------------------------------------------
----------------------------------------------------------------------------
-- | Compress lazy 'ByteString' into @.xz@ format using 'defaultCompressParams'.
compress :: BSL.ByteString -> BSL.ByteString
compress = compressWith defaultCompressParams
-- | Like 'compress' but with the ability to specify various compression
-- parameters. Typical usage:
--
-- > compressWith defaultCompressParams { compress... = ... }
compressWith :: CompressParams -> BSL.ByteString -> BSL.ByteString
compressWith parms input = runST (compress' input)
where
compress' :: BSL.ByteString -> ST s BSL.ByteString
compress' ibs0 = loop ibs0 =<< compressST parms
where
loop BSL.Empty CompressStreamEnd =
return BSL.Empty
loop (BSL.Chunk _ _) CompressStreamEnd =
error "Codec.Compression.Lzma.compressWith: the impossible happened"
loop BSL.Empty (CompressInputRequired _ supply) =
loop BSL.Empty =<< supply BS.empty
loop (BSL.Chunk c bs') (CompressInputRequired _ supply) =
loop bs' =<< supply c
loop ibs (CompressOutputAvailable oc next) = do
obs <- loop ibs =<< next
return (BSL.chunk oc obs)
{-# NOINLINE compressWith #-}
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Type derived from 'zlib' and augmented with flushing support
data CompressStream m =
CompressInputRequired {- flush -} (m (CompressStream m))
{- supply -} (ByteString -> m (CompressStream m))
-- ^ Compression process requires input to proceed. You can
-- either flush the stream (first field), supply an input chunk
-- (second field), or signal the end of input (via empty
-- chunk).
| CompressOutputAvailable !ByteString (m (CompressStream m)) -- ^ Output chunk available.
| CompressStreamEnd
-- | Incremental compression in the 'IO' monad.
compressIO :: CompressParams -> IO (CompressStream IO)
compressIO parms = (stToIO $ newEncodeLzmaStream parms) >>= either throwIO go
where
bUFSIZ = 32752
go :: LzmaStream -> IO (CompressStream IO)
go ls = return inputRequired
where
inputRequired = CompressInputRequired goFlush (withChunk goFinish goInput)
goInput :: ByteString -> IO (CompressStream IO)
goInput chunk = do
(rc, used, obuf) <- stToIO $ runLzmaStream ls chunk LzmaRun bUFSIZ
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
fail "compressIO: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (CompressOutputAvailable obuf
(withChunk (return inputRequired) goInput chunk'))
_ -> throwIO rc
goFlush, goFinish :: IO (CompressStream IO)
goFlush = goSync LzmaSyncFlush (return inputRequired)
goFinish = goSync LzmaFinish retStreamEnd
-- drain encoder till LzmaRetStreamEnd is reported
goSync :: LzmaAction -> IO (CompressStream IO) -> IO (CompressStream IO)
goSync LzmaRun _ = fail "goSync called with invalid argument"
goSync action next = goSync'
where
goSync' = do
(rc, 0, obuf) <- stToIO $ runLzmaStream ls BS.empty action bUFSIZ
case rc of
LzmaRetOK
| BS.null obuf -> fail ("compressIO: empty output chunk during " ++ show action)
| otherwise -> return (CompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> next
| otherwise -> return (CompressOutputAvailable obuf next)
_ -> throwIO rc
retStreamEnd = do
!() <- stToIO (endLzmaStream ls)
return CompressStreamEnd
-- | Incremental compression in the lazy 'ST' monad.
compressST :: CompressParams -> ST s (CompressStream (ST s))
compressST parms = strictToLazyST (newEncodeLzmaStream parms) >>=
either throw go
where
bUFSIZ = 32752
go ls = return inputRequired
where
inputRequired = CompressInputRequired goFlush (withChunk goFinish goInput)
goInput :: ByteString -> ST s (CompressStream (ST s))
goInput chunk = do
(rc, used, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls chunk LzmaRun bUFSIZ)
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
error "compressST: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (CompressOutputAvailable obuf
(withChunk (return inputRequired) goInput chunk'))
_ -> throw rc
goFlush, goFinish :: ST s (CompressStream (ST s))
goFlush = goSync LzmaSyncFlush (return inputRequired)
goFinish = goSync LzmaFinish retStreamEnd
-- drain encoder till LzmaRetStreamEnd is reported
goSync :: LzmaAction -> ST s (CompressStream (ST s)) -> ST s (CompressStream (ST s))
goSync LzmaRun _ = error "compressST: goSync called with invalid argument"
goSync action next = goSync'
where
goSync' = do
(rc, n, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls BS.empty action bUFSIZ)
when (n /= 0) $ error "compressST: n was not zero"
case rc of
LzmaRetOK
| BS.null obuf -> error ("compressIO: empty output chunk during " ++ show action)
| otherwise -> return (CompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> next
| otherwise -> return (CompressOutputAvailable obuf next)
_ -> throw rc
retStreamEnd = do
!() <- strictToLazyST (noDuplicateST >> endLzmaStream ls)
return CompressStreamEnd
--------------------------------------------------------------------------------
data DecompressStream m =
DecompressInputRequired (ByteString -> m (DecompressStream m)) -- ^ Decoding process requires input to proceed. An empty 'ByteString' chunk signals end of input.
| DecompressOutputAvailable !ByteString (m (DecompressStream m)) -- ^ Decompressed output chunk available.
| DecompressStreamEnd ByteString -- ^ Decoded stream is finished. Any unconsumed leftovers from the input stream are returned via the 'ByteString' field
| DecompressStreamError !LzmaRet -- TODO define subset-enum of LzmaRet
-- | Incremental decompression in the 'IO' monad.
decompressIO :: DecompressParams -> IO (DecompressStream IO)
decompressIO parms = stToIO (newDecodeLzmaStream parms) >>= either (return . DecompressStreamError) go
where
bUFSIZ = 32752
go :: LzmaStream -> IO (DecompressStream IO)
go ls = return inputRequired
where
inputRequired = DecompressInputRequired goInput
goInput :: ByteString -> IO (DecompressStream IO)
goInput chunk
| BS.null chunk = goFinish
| otherwise = do
(rc, used, obuf) <- stToIO $ runLzmaStream ls chunk LzmaRun bUFSIZ
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
fail "decompressIO: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(withChunk goDrain goInput chunk'))
LzmaRetStreamEnd
| BS.null obuf -> retStreamEnd chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(retStreamEnd chunk'))
_ -> return (DecompressStreamError rc)
goDrain, goFinish :: IO (DecompressStream IO)
goDrain = goSync LzmaRun (return inputRequired)
goFinish = goSync LzmaFinish (return $ DecompressStreamError LzmaRetOK)
goSync :: LzmaAction -> IO (DecompressStream IO) -> IO (DecompressStream IO)
goSync action next = goSync'
where
goSync' = do
(rc, 0, obuf) <- stToIO $ runLzmaStream ls BS.empty action bUFSIZ
case rc of
LzmaRetOK
| BS.null obuf -> next
| otherwise -> return (DecompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> eof0
| otherwise -> return (DecompressOutputAvailable obuf eof0)
_ -> return (DecompressStreamError rc)
eof0 = retStreamEnd BS.empty
retStreamEnd chunk' = do
!() <- stToIO (endLzmaStream ls)
return (DecompressStreamEnd chunk')
-- | Incremental decompression in the lazy 'ST' monad.
decompressST :: DecompressParams -> ST s (DecompressStream (ST s))
decompressST parms = strictToLazyST (newDecodeLzmaStream parms) >>=
either (return . DecompressStreamError) go
where
bUFSIZ = 32752
go :: LzmaStream -> ST s (DecompressStream (ST s))
go ls = return inputRequired
where
inputRequired = DecompressInputRequired goInput
goInput :: ByteString -> ST s (DecompressStream (ST s))
goInput chunk
| BS.null chunk = goFinish
| otherwise = do
(rc, used, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls chunk LzmaRun bUFSIZ)
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
error "decompressST: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(withChunk goDrain goInput chunk'))
LzmaRetStreamEnd
| BS.null obuf -> retStreamEnd chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(retStreamEnd chunk'))
_ -> return (DecompressStreamError rc)
goDrain, goFinish :: ST s (DecompressStream (ST s))
goDrain = goSync LzmaRun (return inputRequired)
goFinish = goSync LzmaFinish (return $ DecompressStreamError LzmaRetOK)
goSync :: LzmaAction -> ST s (DecompressStream (ST s)) -> ST s (DecompressStream (ST s))
goSync action next = goSync'
where
goSync' = do
(rc, n, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls BS.empty action bUFSIZ)
when (n /= 0) $ error "decompressST: n was not zero"
case rc of
LzmaRetOK
| BS.null obuf -> next
| otherwise -> return (DecompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> eof0
| otherwise -> return (DecompressOutputAvailable obuf eof0)
_ -> return (DecompressStreamError rc)
eof0 = retStreamEnd BS.empty
retStreamEnd chunk' = do
!() <- strictToLazyST (noDuplicateST >> endLzmaStream ls)
return (DecompressStreamEnd chunk')
-- | Small 'maybe'-ish helper distinguishing between empty and
-- non-empty 'ByteString's
withChunk :: t -> (ByteString -> t) -> ByteString -> t
withChunk emptyChunk nemptyChunk chunk
| BS.null chunk = emptyChunk
| otherwise = nemptyChunk chunk
-- | See
noDuplicateST :: ST.Strict.ST s ()
noDuplicateST = unsafeIOToST noDuplicate