module Data.Conduit.Binary
(
sourceFile
, sourceHandle
, sourceHandleUnsafe
, sourceIOHandle
, sourceFileRange
, sourceHandleRange
, sourceHandleRangeWithBuffer
, withSourceFile
, sinkFile
, sinkFileCautious
, sinkTempFile
, sinkSystemTempFile
, sinkHandle
, sinkIOHandle
, sinkHandleBuilder
, sinkHandleFlush
, withSinkFile
, withSinkFileBuilder
, withSinkFileCautious
, conduitFile
, conduitHandle
, sourceLbs
, head
, dropWhile
, take
, drop
, sinkCacheLength
, sinkLbs
, mapM_
, sinkStorable
, sinkStorableEx
, isolate
, takeWhile
, Data.Conduit.Binary.lines
) where
import qualified Data.ByteString.Builder as BB
import qualified Data.Streaming.FileRead as FR
import Prelude hiding (head, take, drop, takeWhile, dropWhile, mapM_)
import qualified Data.ByteString as S
import Data.ByteString.Unsafe (unsafeUseAsCString)
import qualified Data.ByteString.Lazy as L
import Data.Conduit
import Data.Conduit.List (sourceList, consume)
import qualified Data.Conduit.List as CL
import Control.Exception (assert, finally, bracket)
import Control.Monad (unless, when)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Control.Monad.IO.Unlift
import Control.Monad.Trans.Resource (allocate, release)
import Control.Monad.Trans.Class (lift)
import qualified System.IO as IO
import Data.Word (Word8, Word64)
#if (__GLASGOW_HASKELL__ < 710)
import Control.Applicative ((<$>))
#endif
import System.Directory (getTemporaryDirectory, removeFile)
import Data.ByteString.Lazy.Internal (defaultChunkSize)
import Data.ByteString.Internal (ByteString (PS), inlinePerformIO)
import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
import Foreign.ForeignPtr (touchForeignPtr)
import Foreign.Ptr (plusPtr, castPtr)
import Foreign.Storable (Storable, peek, sizeOf)
import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
import Control.Monad.Trans.Resource (MonadResource)
import Control.Monad.Catch (MonadThrow (..))
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import Foreign.Ptr (Ptr)
#ifndef ALLOW_UNALIGNED_ACCESS
import Foreign.Marshal (alloca, copyBytes)
#endif
import System.Directory (renameFile)
import System.FilePath (takeDirectory, takeFileName, (<.>))
import System.IO (hClose, openBinaryTempFile)
import Control.Exception (throwIO, catch)
import System.IO.Error (isDoesNotExistError)
import qualified Data.ByteString.Builder as BB
sourceFile :: MonadResource m
=> FilePath
-> Producer m S.ByteString
sourceFile fp =
bracketP
(FR.openFile fp)
FR.closeFile
loop
where
loop h = do
bs <- liftIO $ FR.readChunk h
unless (S.null bs) $ do
yield bs
loop h
sourceHandle :: MonadIO m
=> IO.Handle
-> Producer m S.ByteString
sourceHandle h =
loop
where
loop = do
bs <- liftIO (S.hGetSome h defaultChunkSize)
if S.null bs
then return ()
else yield bs >> loop
sourceHandleUnsafe :: MonadIO m => IO.Handle -> Source m ByteString
sourceHandleUnsafe handle = do
fptr <- liftIO $ mallocPlainForeignPtrBytes defaultChunkSize
let ptr = unsafeForeignPtrToPtr fptr
loop = do
count <- liftIO $ IO.hGetBuf handle ptr defaultChunkSize
when (count > 0) $ do
yield (PS fptr 0 count)
loop
loop
liftIO $ touchForeignPtr fptr
sourceIOHandle :: MonadResource m
=> IO IO.Handle
-> Producer m S.ByteString
sourceIOHandle alloc = bracketP alloc IO.hClose sourceHandle
sinkHandle :: MonadIO m
=> IO.Handle
-> Consumer S.ByteString m ()
sinkHandle h = awaitForever (liftIO . S.hPut h)
sinkHandleBuilder :: MonadIO m => IO.Handle -> ConduitM BB.Builder o m ()
sinkHandleBuilder h = awaitForever (liftIO . BB.hPutBuilder h)
sinkHandleFlush :: MonadIO m
=> IO.Handle
-> ConduitM (Flush S.ByteString) o m ()
sinkHandleFlush h =
awaitForever $ \mbs -> liftIO $
case mbs of
Chunk bs -> S.hPut h bs
Flush -> IO.hFlush h
sinkIOHandle :: MonadResource m
=> IO IO.Handle
-> Consumer S.ByteString m ()
sinkIOHandle alloc = bracketP alloc IO.hClose sinkHandle
sourceFileRange :: MonadResource m
=> FilePath
-> Maybe Integer
-> Maybe Integer
-> Producer m S.ByteString
sourceFileRange fp offset count = bracketP
(IO.openBinaryFile fp IO.ReadMode)
IO.hClose
(\h -> sourceHandleRange h offset count)
sourceHandleRange :: MonadIO m
=> IO.Handle
-> Maybe Integer
-> Maybe Integer
-> Producer m S.ByteString
sourceHandleRange handle offset count =
sourceHandleRangeWithBuffer handle offset count defaultChunkSize
sourceHandleRangeWithBuffer :: MonadIO m
=> IO.Handle
-> Maybe Integer
-> Maybe Integer
-> Int
-> Producer m S.ByteString
sourceHandleRangeWithBuffer handle offset count buffer = do
case offset of
Nothing -> return ()
Just off -> liftIO $ IO.hSeek handle IO.AbsoluteSeek off
case count of
Nothing -> pullUnlimited
Just c -> pullLimited (fromInteger c)
where
pullUnlimited = do
bs <- liftIO $ S.hGetSome handle buffer
if S.null bs
then return ()
else do
yield bs
pullUnlimited
pullLimited c = do
bs <- liftIO $ S.hGetSome handle (min c buffer)
let c' = c S.length bs
assert (c' >= 0) $
if S.null bs
then return ()
else do
yield bs
pullLimited c'
sinkFile :: MonadResource m
=> FilePath
-> Consumer S.ByteString m ()
sinkFile fp = sinkIOHandle (IO.openBinaryFile fp IO.WriteMode)
sinkFileCautious
:: MonadResource m
=> FilePath
-> ConduitM S.ByteString o m ()
sinkFileCautious fp =
bracketP (cautiousAcquire fp) cautiousCleanup inner
where
inner (tmpFP, h) = do
sinkHandle h
liftIO $ do
hClose h
renameFile tmpFP fp
sinkTempFile :: MonadResource m
=> FilePath
-> String
-> ConduitM ByteString o m FilePath
sinkTempFile tmpdir pattern = do
(_releaseKey, (fp, h)) <- allocate
(IO.openBinaryTempFile tmpdir pattern)
(\(fp, h) -> IO.hClose h `finally` (removeFile fp `Control.Exception.catch` \e ->
if isDoesNotExistError e
then return ()
else throwIO e))
sinkHandle h
liftIO $ IO.hClose h
return fp
sinkSystemTempFile
:: MonadResource m
=> String
-> ConduitM ByteString o m FilePath
sinkSystemTempFile pattern = do
dir <- liftIO getTemporaryDirectory
sinkTempFile dir pattern
conduitFile :: MonadResource m
=> FilePath
-> Conduit S.ByteString m S.ByteString
conduitFile fp = bracketP
(IO.openBinaryFile fp IO.WriteMode)
IO.hClose
conduitHandle
conduitHandle :: MonadIO m => IO.Handle -> Conduit S.ByteString m S.ByteString
conduitHandle h = awaitForever $ \bs -> liftIO (S.hPut h bs) >> yield bs
isolate :: Monad m
=> Int
-> Conduit S.ByteString m S.ByteString
isolate =
loop
where
loop 0 = return ()
loop count = do
mbs <- await
case mbs of
Nothing -> return ()
Just bs -> do
let (a, b) = S.splitAt count bs
case count S.length a of
0 -> do
unless (S.null b) $ leftover b
yield a
count' -> assert (S.null b) $ yield a >> loop count'
head :: Monad m => Consumer S.ByteString m (Maybe Word8)
head = do
mbs <- await
case mbs of
Nothing -> return Nothing
Just bs ->
case S.uncons bs of
Nothing -> head
Just (w, bs') -> leftover bs' >> return (Just w)
takeWhile :: Monad m => (Word8 -> Bool) -> Conduit S.ByteString m S.ByteString
takeWhile p =
loop
where
loop = await >>= maybe (return ()) go
go bs
| S.null x = next
| otherwise = yield x >> next
where
next = if S.null y then loop else leftover y
(x, y) = S.span p bs
dropWhile :: Monad m => (Word8 -> Bool) -> Consumer S.ByteString m ()
dropWhile p =
loop
where
loop = do
mbs <- await
case S.dropWhile p <$> mbs of
Nothing -> return ()
Just bs
| S.null bs -> loop
| otherwise -> leftover bs
take :: Monad m => Int -> Consumer S.ByteString m L.ByteString
take 0 = return L.empty
take n0 = go n0 id
where
go n front =
await >>= maybe (return $ L.fromChunks $ front []) go'
where
go' bs =
case S.length bs `compare` n of
LT -> go (n S.length bs) (front . (bs:))
EQ -> return $ L.fromChunks $ front [bs]
GT ->
let (x, y) = S.splitAt n bs
in assert (not $ S.null y) $ leftover y >> return (L.fromChunks $ front [x])
drop :: Monad m => Int -> Consumer S.ByteString m ()
drop 0 = return ()
drop n0 = go n0
where
go n =
await >>= maybe (return ()) go'
where
go' bs =
case S.length bs `compare` n of
LT -> go (n S.length bs)
EQ -> return ()
GT ->
let y = S.drop n bs
in assert (not $ S.null y) $ leftover y >> return ()
lines :: Monad m => Conduit S.ByteString m S.ByteString
lines =
loop []
where
loop acc = await >>= maybe (finish acc) (go acc)
finish acc =
let final = S.concat $ reverse acc
in unless (S.null final) (yield final)
go acc more =
case S.uncons second of
Just (_, second') -> yield (S.concat $ reverse $ first:acc) >> go [] second'
Nothing -> loop $ more:acc
where
(first, second) = S.break (== 10) more
sourceLbs :: Monad m => L.ByteString -> Producer m S.ByteString
sourceLbs = sourceList . L.toChunks
sinkCacheLength :: (MonadResource m1, MonadResource m2)
=> Sink S.ByteString m1 (Word64, Source m2 S.ByteString)
sinkCacheLength = do
tmpdir <- liftIO getTemporaryDirectory
(releaseKey, (fp, h)) <- allocate
(IO.openBinaryTempFile tmpdir "conduit.cache")
(\(fp, h) -> IO.hClose h `finally` removeFile fp)
len <- sinkHandleLen h
liftIO $ IO.hClose h
return (len, sourceFile fp >> release releaseKey)
where
sinkHandleLen :: MonadResource m => IO.Handle -> Sink S.ByteString m Word64
sinkHandleLen h =
loop 0
where
loop x =
await >>= maybe (return x) go
where
go bs = do
liftIO $ S.hPut h bs
loop $ x + fromIntegral (S.length bs)
sinkLbs :: Monad m => Sink S.ByteString m L.ByteString
sinkLbs = fmap L.fromChunks consume
mapM_BS :: Monad m => (Word8 -> m ()) -> S.ByteString -> m ()
mapM_BS f (PS fptr offset len) = do
let start = unsafeForeignPtrToPtr fptr `plusPtr` offset
end = start `plusPtr` len
loop ptr
| ptr >= end = inlinePerformIO (touchForeignPtr fptr) `seq` return ()
| otherwise = do
f (inlinePerformIO (peek ptr))
loop (ptr `plusPtr` 1)
loop start
mapM_ :: Monad m => (Word8 -> m ()) -> Consumer S.ByteString m ()
mapM_ f = awaitForever (lift . mapM_BS f)
sinkStorable :: (Monad m, Storable a) => Consumer S.ByteString m (Maybe a)
sinkStorable = sinkStorableHelper Just (return Nothing)
sinkStorableEx :: (MonadThrow m, Storable a) => Consumer S.ByteString m a
sinkStorableEx = sinkStorableHelper id (throwM SinkStorableInsufficientBytes)
sinkStorableHelper :: forall m a b. (Monad m, Storable a)
=> (a -> b)
-> (Consumer S.ByteString m b)
-> Consumer S.ByteString m b
sinkStorableHelper wrap failure = do
start
where
size = sizeOf (undefined :: a)
start = do
mbs <- await
case mbs of
Nothing -> failure
Just bs
| S.null bs -> start
| otherwise ->
case compare (S.length bs) size of
LT -> do
leftover bs
lbs <- take size
let bs = S.concat $ L.toChunks lbs
case compare (S.length bs) size of
LT -> do
leftover bs
failure
EQ -> process bs
GT -> assert False (process bs)
EQ -> process bs
GT -> do
let (x, y) = S.splitAt size bs
leftover y
process x
process bs = return $! wrap $! inlinePerformIO $!
unsafeUseAsCString bs (safePeek undefined . castPtr)
safePeek :: a -> Ptr a -> IO a
#ifdef ALLOW_UNALIGNED_ACCESS
safePeek _ = peek
#else
safePeek val ptr = alloca (\t -> copyBytes t ptr (sizeOf val) >> peek t)
#endif
data SinkStorableException = SinkStorableInsufficientBytes
deriving (Show, Typeable)
instance Exception SinkStorableException
withSourceFile
:: (MonadUnliftIO m, MonadIO n)
=> FilePath
-> (ConduitM i ByteString n () -> m a)
-> m a
withSourceFile fp inner =
withRunInIO $ \run ->
IO.withBinaryFile fp IO.ReadMode $
run . inner . sourceHandle
withSinkFile
:: (MonadUnliftIO m, MonadIO n)
=> FilePath
-> (ConduitM ByteString o n () -> m a)
-> m a
withSinkFile fp inner =
withRunInIO $ \run ->
IO.withBinaryFile fp IO.ReadMode $
run . inner . sinkHandle
withSinkFileBuilder
:: (MonadUnliftIO m, MonadIO n)
=> FilePath
-> (ConduitM BB.Builder o n () -> m a)
-> m a
withSinkFileBuilder fp inner =
withRunInIO $ \run ->
IO.withBinaryFile fp IO.WriteMode $ \h ->
run $ inner $ CL.mapM_ (liftIO . BB.hPutBuilder h)
withSinkFileCautious
:: (MonadUnliftIO m, MonadIO n)
=> FilePath
-> (ConduitM S.ByteString o n () -> m a)
-> m a
withSinkFileCautious fp inner =
withRunInIO $ \run -> bracket
(cautiousAcquire fp)
cautiousCleanup
(\(tmpFP, h) -> do
a <- run $ inner $ sinkHandle h
hClose h
renameFile tmpFP fp
return a)
cautiousAcquire :: FilePath -> IO (FilePath, IO.Handle)
cautiousAcquire fp = openBinaryTempFile (takeDirectory fp) (takeFileName fp <.> "tmp")
cautiousCleanup :: (FilePath, IO.Handle) -> IO ()
cautiousCleanup (tmpFP, h) = do
hClose h
removeFile tmpFP `Control.Exception.catch` \e ->
if isDoesNotExistError e
then return ()
else throwIO e