module Data.Iteratee.BZip
(
enumCompress,
enumDecompress,
BZipParamsException(..),
BZipException(..),
CompressParams(..),
defaultCompressParams,
DecompressParams(..),
defaultDecompressParams,
BlockSize(..),
WorkFactor(..)
)
where
import Control.Applicative
import Control.Exception
import Control.Monad.Trans
import Data.ByteString as BS
import Data.ByteString.Internal
import Data.Iteratee
import Data.Iteratee.IO
import Data.Typeable
import Foreign
import Foreign.C
import qualified Foreign.Concurrent as C
import System.IO (stderr)
import qualified System.IO as IO
data BZipParamsException
= IncorrectBlockSize !Int
| IncorrectWorkFactor !Int
| IncorrectBufferSize !Int
deriving (Eq,Typeable)
data BZipException
= ConfigError
| MemError
| DataError
| DataErrorMagic
| Unexpected !Int
| IncorrectState
deriving (Eq,Typeable)
data BZipFlush = BZipFlush
deriving (Eq,Typeable)
instance Show BZipFlush where
show BZipFlush = "bzlib: flush requested"
instance Exception BZipFlush
fromFlush :: BZipFlush -> CInt
fromFlush BZipFlush = 1
instance Show BZipParamsException where
show (IncorrectBlockSize size)
= "bzlib: incorrect block size " ++ show size
show (IncorrectWorkFactor wf)
= "bzlib: incorrect work factor " ++ show wf
show (IncorrectBufferSize size)
= "bzlib: incorrect buffer size " ++ show size
instance Show BZipException where
show ConfigError = "bzlib: library is not configure properly"
show MemError = "bzlib: memory allocation failed"
show DataError = "bzlib: input is corrupted"
show DataErrorMagic = "bzlib: magic number does not match"
show (Unexpected n) = "bzlib: unexpected error " ++ show n
show IncorrectState = "bzlib: incorrect state"
instance Exception BZipParamsException
instance Exception BZipException
newtype BZStream = BZStream (ForeignPtr BZStream)
withBZStream :: BZStream -> (Ptr BZStream -> IO a) -> IO a
withBZStream (BZStream fptr) = withForeignPtr fptr
mallocBZStream :: IO BZStream
mallocBZStream = BZStream <$> mallocForeignPtrBytes (48)
data CompressParams = CompressParams {
compressBlockSize :: BlockSize,
compressWorkFactor :: WorkFactor,
compressBufferSize :: !Int
}
defaultCompressParams
= CompressParams DefaultBlockSize DefaultWorkFactor (8*1024)
data DecompressParams = DecompressParams {
decompressSaveMemory :: !Bool,
decompressBufferSize :: !Int
}
defaultDecompressParams = DecompressParams False (8*1024)
data BlockSize
= DefaultBlockSize
| BestSpeed
| BestCompression
| CompressionLevel !Int
data WorkFactor
= DefaultWorkFactor
| BestSpeedWorkFactor
| BestCompressionWorkFactor
| WorkFactor !Int
fromBlockSize :: BlockSize -> Either BZipParamsException CInt
fromBlockSize DefaultBlockSize = Right $! 6
fromBlockSize BestSpeed = Right $! 1
fromBlockSize BestCompression = Right $! 9
fromBlockSize (CompressionLevel lvl)
| lvl < 0 || lvl > 250 = Left $! IncorrectBlockSize $! fromIntegral lvl
| otherwise = Right $! fromIntegral lvl
fromWorkFactor :: WorkFactor -> Either BZipParamsException CInt
fromWorkFactor DefaultWorkFactor = Right $! 0
fromWorkFactor BestSpeedWorkFactor = Right $! 1
fromWorkFactor BestCompressionWorkFactor = Right $! 250
fromWorkFactor (WorkFactor wf)
| wf < 0 || wf > 250 = Left $! IncorrectWorkFactor $! fromIntegral wf
| otherwise = Right $! fromIntegral wf
fromErrno :: CInt -> Either BZipException Bool
fromErrno (0) = Right $! True
fromErrno (1) = Right $! True
fromErrno (2) = Right $! True
fromErrno (3) = Right $! True
fromErrno (4) = Right $! False
fromErrno (9) = Left $! ConfigError
fromErrno (3) = Left $! MemError
fromErrno (4) = Left $! DataError
fromErrno (5) = Left $! DataErrorMagic
fromErrno n = Left $! Unexpected $! fromIntegral n
newtype Initial = Initial BZStream
data EmptyIn = EmptyIn !BZStream !ByteString
data FullOut = FullOut !BZStream !ByteString
data Invalid = Invalid !BZStream !ByteString !ByteString
data Finishing = Finishing !BZStream !ByteString
data Flushing = Flushing !BZStream !BZipFlush !ByteString
withByteString :: ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a
withByteString (PS ptr off len) f
= withForeignPtr ptr (\ptr' -> f (ptr' `plusPtr` off) len)
mkByteString :: MonadIO m => Int -> m ByteString
mkByteString s = liftIO $ do
base <- mallocForeignPtrArray s
withForeignPtr base $ \ptr -> C.addForeignPtrFinalizer base $ do
IO.hPutStrLn stderr $ "Freed buffer " ++ show ptr
IO.hPutStrLn stderr $ "Allocated buffer " ++ show base
return $! PS base 0 s
dumpZStream :: BZStream -> IO ()
dumpZStream bzstr = withBZStream bzstr $ \bzptr -> do
IO.hPutStr stderr $ "<<BZStream@"
IO.hPutStr stderr $ (show bzptr)
IO.hPutStr stderr . (" next_in=" ++) . show =<<
((\hsc_ptr -> peekByteOff hsc_ptr 0) bzptr :: IO (Ptr CChar))
IO.hPutStr stderr . (" avail_in=" ++) . show =<<
((\hsc_ptr -> peekByteOff hsc_ptr 4) bzptr :: IO CUInt)
total_in_lo <- (\hsc_ptr -> peekByteOff hsc_ptr 8) bzptr :: IO CUInt
total_in_hi <- (\hsc_ptr -> peekByteOff hsc_ptr 12) bzptr :: IO CUInt
let total_in_lo' = fromIntegral total_in_lo
total_in_hi' = fromIntegral total_in_hi `shiftL` 32
total_in = total_in_lo' + total_in_hi' :: Int64
IO.hPutStr stderr $ " total_out=" ++ show (total_in :: Int64)
IO.hPutStr stderr . (" next_out=" ++) . show =<<
((\hsc_ptr -> peekByteOff hsc_ptr 16) bzptr :: IO (Ptr CChar))
IO.hPutStr stderr . (" avail_out=" ++) . show =<<
((\hsc_ptr -> peekByteOff hsc_ptr 20) bzptr :: IO CUInt)
total_out_lo <- (\hsc_ptr -> peekByteOff hsc_ptr 24) bzptr :: IO CUInt
total_out_hi <- (\hsc_ptr -> peekByteOff hsc_ptr 28) bzptr:: IO CUInt
let total_out_lo' = fromIntegral total_out_lo
total_out_hi' = fromIntegral total_out_hi `shiftL` 32
total_out = total_out_lo' + total_out_hi'
IO.hPutStr stderr $ " total_out=" ++ show (total_out :: Int64)
IO.hPutStrLn stderr ">>"
putOutBuffer :: Int -> BZStream -> IO ByteString
putOutBuffer size bzstr = do
_out <- mkByteString size
withByteString _out $ \ptr len -> withBZStream bzstr $ \bzptr -> do
(\hsc_ptr -> pokeByteOff hsc_ptr 16) bzptr ptr
(\hsc_ptr -> pokeByteOff hsc_ptr 20) bzptr len
return _out
putInBuffer :: BZStream -> ByteString -> IO ()
putInBuffer bzstr _in
= withByteString _in $ \ptr len -> withBZStream bzstr $ \bzptr -> do
(\hsc_ptr -> pokeByteOff hsc_ptr 0) bzptr ptr
(\hsc_ptr -> pokeByteOff hsc_ptr 4) bzptr len
pullOutBuffer :: BZStream -> ByteString -> IO ByteString
pullOutBuffer bzstr _out = withByteString _out $ \ptr _ -> do
next_out <- withBZStream bzstr $ \bzptr -> (\hsc_ptr -> peekByteOff hsc_ptr 16) bzptr
return $! BS.take (next_out `minusPtr` ptr) _out
pullInBuffer :: BZStream -> ByteString -> IO ByteString
pullInBuffer bzstr _in = withByteString _in $ \ptr _ -> do
next_in <- withBZStream bzstr $ \bzptr -> (\hsc_ptr -> peekByteOff hsc_ptr 0) bzptr
return $! BS.drop (next_in `minusPtr` ptr) _in
insertOut :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> Initial
-> Enumerator ByteString m a
insertOut size run (Initial bzstr) iter = return $! do
_out <- liftIO $ putOutBuffer size bzstr
liftIO $ IO.hPutStrLn stderr $ "Inserted out buffer of size " ++ show size
joinIM $ fill size run (EmptyIn bzstr _out) iter
fill :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> EmptyIn
-> Enumerator ByteString m a
fill size run (EmptyIn bzstr _out) iter
= let fill' (Chunk _in)
| not (BS.null _in) = do
liftIO $ putInBuffer bzstr _in
liftIO $ IO.hPutStrLn stderr $
"Inserted in buffer of size " ++ show (BS.length _in)
joinIM $ doRun size run (Invalid bzstr _in _out) iter
| otherwise = fillI
fill' (EOF Nothing) = do
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
joinIM $ finish size run (Finishing bzstr BS.empty) iter'
fill' (EOF (Just err))
= case fromException err of
Just err' ->
joinIM $ flush size run (Flushing bzstr err' _out) iter
Nothing -> throwRecoverableErr err fill'
fillI = do
liftIO $ IO.hPutStrLn stderr $ "About to insert in buffer"
liftI fill'
in return $! fillI
swapOut :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> FullOut
-> Enumerator ByteString m a
swapOut size run (FullOut bzstr _in) iter = return $! do
_out <- liftIO $ putOutBuffer size bzstr
liftIO $ IO.hPutStrLn stderr $ "Swapped out buffer of size " ++ show size
joinIM $ doRun size run (Invalid bzstr _in _out) iter
doRun :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> Invalid
-> Enumerator ByteString m a
doRun size run (Invalid bzstr _in _out) iter = return $! do
liftIO $ IO.hPutStrLn stderr $ "About to run"
liftIO $ dumpZStream bzstr
status <- liftIO $ run bzstr 0
liftIO $ IO.hPutStrLn stderr $ "Runned"
case fromErrno status of
Left err -> joinIM $ enumErr err iter
Right False -> do
remaining <- liftIO $ pullInBuffer bzstr _in
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
res <- lift $ tryRun iter'
case res of
Left err@(SomeException _) -> throwErr err
Right x -> idone x (Chunk remaining)
Right True -> do
(avail_in, avail_out) <- liftIO $ withBZStream bzstr $ \bzptr -> do
avail_in <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 4) bzptr
avail_out <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 20) bzptr
return (avail_in, avail_out) :: IO (CInt, CInt)
case avail_out of
0 -> do
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
joinIM $ case avail_in of
0 -> insertOut size run (Initial bzstr) iter'
_ -> swapOut size run (FullOut bzstr _in) iter'
_ -> joinIM $ case avail_in of
0 -> fill size run (EmptyIn bzstr _out) iter
_ -> enumErr IncorrectState iter
flush :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> Flushing
-> Enumerator ByteString m a
flush size run fin@(Flushing bzstr _flush _out) iter = return $! do
status <- liftIO $ run bzstr (fromFlush _flush)
case fromErrno status of
Left err -> joinIM $ enumErr err iter
Right False -> do
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
res <- lift $ tryRun iter'
case res of
Left err@(SomeException _) -> throwErr err
Right x -> idone x (Chunk BS.empty)
Right True -> do
(avail_in, avail_out) <- liftIO $ withBZStream bzstr $ \bzptr -> do
avail_in <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 4) bzptr
avail_out <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 20) bzptr
return (avail_in, avail_out) :: IO (CInt, CInt)
case avail_out of
0 -> do
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
out' <- liftIO $ putOutBuffer size bzstr
joinIM $ flush size run (Flushing bzstr _flush out') iter'
_ -> joinIM $ insertOut size run (Initial bzstr) iter
finish :: MonadIO m
=> Int
-> (BZStream -> CInt -> IO CInt)
-> Finishing
-> Enumerator ByteString m a
finish size run fin@(Finishing bzstr _in) iter = return $! do
liftIO $ IO.hPutStrLn stderr $
"Finishing with out buffer of size " ++ show size
_out <- liftIO $ putOutBuffer size bzstr
status <- liftIO $ run bzstr 2
case fromErrno status of
Left err -> joinIM $ enumErr err iter
Right False -> do
remaining <- liftIO $ pullInBuffer bzstr _in
out <- liftIO $ pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
res <- lift $ tryRun iter'
case res of
Left err@(SomeException _) -> throwErr err
Right x -> idone x (Chunk remaining)
Right True -> do
(avail_in, avail_out) <- liftIO $ withBZStream bzstr $ \bzptr -> do
avail_in <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 4) bzptr
avail_out <- liftIO $ (\hsc_ptr -> peekByteOff hsc_ptr 20) bzptr
return (avail_in, avail_out) :: IO (CInt, CInt)
case avail_out of
0 -> do
out <- liftIO $ withBZStream bzstr $ \bzptr ->
pullOutBuffer bzstr _out
iter' <- lift $ enumPure1Chunk out iter
joinIM $ finish size run fin iter'
_ -> throwErr $! SomeException IncorrectState
foreign import ccall unsafe "BZ2_bzCompressInit"
compressInit :: Ptr BZStream -> CInt -> CInt
-> CInt -> IO CInt
foreign import ccall unsafe "BZ2_bzDecompressInit"
decompressInit :: Ptr BZStream -> CInt -> CInt
-> IO CInt
foreign import ccall unsafe "BZ2_bzCompress"
compress :: Ptr BZStream -> CInt -> IO CInt
foreign import ccall unsafe "BZ2_bzDecompress"
decompress :: Ptr BZStream -> IO CInt
foreign import ccall unsafe "&BZ2_bzCompressEnd"
compressEnd :: FunPtr (Ptr BZStream -> IO ())
foreign import ccall unsafe "&BZ2_bzDecompressEnd"
decompressEnd :: FunPtr (Ptr BZStream -> IO ())
compress' :: BZStream -> CInt -> IO CInt
compress' bz f = withBZStream bz $ \p -> do
IO.hPutStrLn stderr "About to run compress"
compress p f
decompress' :: BZStream -> CInt -> IO CInt
decompress' bz _ = withBZStream bz $ \p -> do
IO.hPutStrLn stderr "About to run decompress"
decompress p
verboseLevel :: CInt
verboseLevel = 3
mkCompress :: CompressParams -> IO (Either BZipParamsException Initial)
mkCompress (CompressParams blk wf _)
= case fromBlockSize blk of
Left err -> return $! Left $! err
Right blk' -> case fromWorkFactor wf of
Left err -> return $! Left $! err
Right wf' -> do
bzstr <- mallocForeignPtrBytes (48)
withForeignPtr bzstr $ \bzptr -> do
memset (castPtr bzptr) 0 (48)
compressInit bzptr blk' verboseLevel wf' `finally`
addForeignPtrFinalizer compressEnd bzstr
return $! Right $! Initial $ BZStream bzstr
mkDecompress :: DecompressParams -> IO (Either BZipParamsException Initial)
mkDecompress (DecompressParams small _) = do
bzstr <- mallocForeignPtrBytes (48)
withForeignPtr bzstr $ \bzptr -> do
memset (castPtr bzptr) 0 (48)
decompressInit bzptr verboseLevel (if small then 0 else 1) `finally`
addForeignPtrFinalizer decompressEnd bzstr
return $! Right $! Initial $ BZStream bzstr
enumCompress :: MonadIO m
=> CompressParams
-> Enumerator ByteString m a
enumCompress cp@(CompressParams _ _ size) iter = do
cmp <- liftIO $ mkCompress cp
case cmp of
Left err -> enumErr err iter
Right init -> insertOut size compress' init iter
enumDecompress :: MonadIO m
=> DecompressParams
-> Enumerator ByteString m a
enumDecompress dp@(DecompressParams _ size) iter = do
dcmp <- liftIO $ mkDecompress dp
case dcmp of
Left err -> enumErr err iter
Right init -> insertOut size decompress' init iter