{-# language BangPatterns #-}
{-# language DerivingStrategies #-}
{-# language DuplicateRecordFields #-}
{-# language MagicHash #-}
{-# language NamedFieldPuns #-}
{-# language RankNTypes #-}
{-# language TypeApplications #-}
{-# language TypeFamilies #-}
{-# language UnboxedTuples #-}
module Data.Bytes.Chunks
(
Chunks(..)
, length
, null
, concat
, concatPinned
, concatU
, reverse
, reverseOnto
, foldl'
, split
, fnv1a32
, fnv1a64
, fromBytes
, fromByteArray
, unsafeCopy
, hGetContents
, readFile
, hPut
, writeFile
) where
import Prelude hiding (length,concat,reverse,readFile,writeFile,null)
import Control.Exception (IOException,catch)
import Control.Monad.ST.Run (runIntByteArrayST)
import Data.Bits (xor)
import Data.Bytes.Types (Bytes(Bytes))
import Data.Word (Word8,Word32,Word64)
import Data.Primitive (ByteArray(..),MutableByteArray(..))
import GHC.Exts (ByteArray#,MutableByteArray#)
import GHC.Exts (Int#,State#,Int(I#),(+#))
import GHC.ST (ST(..))
import System.IO (Handle,hFileSize,IOMode(ReadMode,WriteMode),withBinaryFile)
import qualified GHC.Exts as Exts
import qualified Data.Primitive as PM
import qualified Data.Bytes.Types as B
import qualified Data.Bytes.Pure as Bytes
import qualified Data.Bytes.Byte as Byte
import qualified Data.Bytes.IO as IO
data Chunks
= ChunksCons {-# UNPACK #-} !Bytes !Chunks
| ChunksNil
deriving stock (Show)
instance Semigroup Chunks where
ChunksNil <> a = a
cs@(ChunksCons _ _) <> ChunksNil = cs
as@(ChunksCons _ _) <> bs@(ChunksCons _ _) =
reverseOnto bs (reverse as)
instance Monoid Chunks where
mempty = ChunksNil
instance Eq Chunks where
a == b = concat a == concat b
null :: Chunks -> Bool
null = go where
go ChunksNil = True
go (ChunksCons (Bytes _ _ len) xs) = case len of
0 -> go xs
_ -> False
concatPinned :: Chunks -> Bytes
concatPinned x = case x of
ChunksNil -> Bytes.emptyPinned
ChunksCons b y -> case y of
ChunksNil -> Bytes.pin b
ChunksCons c z -> case concatPinnedFollowing2 b c z of
(# len, r #) -> Bytes (ByteArray r) 0 (I# len)
concat :: Chunks -> Bytes
concat x = case x of
ChunksNil -> Bytes.empty
ChunksCons b y -> case y of
ChunksNil -> b
ChunksCons c z -> case concatFollowing2 b c z of
(# len, r #) -> Bytes (ByteArray r) 0 (I# len)
concatU :: Chunks -> ByteArray
concatU x = case x of
ChunksNil -> mempty
ChunksCons b y -> case y of
ChunksNil -> Bytes.toByteArray b
ChunksCons c z -> case concatFollowing2 b c z of
(# _, r #) -> ByteArray r
concatFollowing2 :: Bytes -> Bytes -> Chunks -> (# Int#, ByteArray# #)
concatFollowing2 = internalConcatFollowing2 PM.newByteArray
concatPinnedFollowing2 :: Bytes -> Bytes -> Chunks -> (# Int#, ByteArray# #)
concatPinnedFollowing2 = internalConcatFollowing2 PM.newPinnedByteArray
internalConcatFollowing2 ::
(forall s. Int -> ST s (MutableByteArray s))
-> Bytes
-> Bytes
-> Chunks
-> (# Int#, ByteArray# #)
{-# inline internalConcatFollowing2 #-}
internalConcatFollowing2 allocate
(Bytes{array=c,offset=coff,length=szc})
(Bytes{array=d,offset=doff,length=szd}) ds =
let !(I# x, ByteArray y) = runIntByteArrayST $ do
let !szboth = szc + szd
!len = chunksLengthGo szboth ds
dst <- allocate len
PM.copyByteArray dst 0 c coff szc
PM.copyByteArray dst szc d doff szd
!len2 <- unsafeCopy dst szboth ds
result <- PM.unsafeFreezeByteArray dst
pure (len2,result)
in (# x, y #)
length :: Chunks -> Int
length = chunksLengthGo 0
chunksLengthGo :: Int -> Chunks -> Int
chunksLengthGo !n ChunksNil = n
chunksLengthGo !n (ChunksCons (Bytes{B.length=len}) cs) =
chunksLengthGo (n + len) cs
unsafeCopy ::
MutableByteArray s
-> Int
-> Chunks
-> ST s Int
{-# inline unsafeCopy #-}
unsafeCopy (MutableByteArray dst) (I# off) cs = ST
(\s0 -> case copy# dst off cs s0 of
(# s1, nextOff #) -> (# s1, I# nextOff #)
)
copy# :: MutableByteArray# s -> Int# -> Chunks -> State# s -> (# State# s, Int# #)
copy# _ off ChunksNil s0 = (# s0, off #)
copy# marr off (ChunksCons (Bytes{B.array,B.offset,B.length=len}) cs) s0 =
case Exts.copyByteArray# (unBa array) (unI offset) marr off (unI len) s0 of
s1 -> copy# marr (off +# unI len) cs s1
reverse :: Chunks -> Chunks
reverse = reverseOnto ChunksNil
reverseOnto :: Chunks -> Chunks -> Chunks
reverseOnto !x ChunksNil = x
reverseOnto !x (ChunksCons y ys) =
reverseOnto (ChunksCons y x) ys
unI :: Int -> Int#
unI (I# i) = i
unBa :: ByteArray -> ByteArray#
unBa (ByteArray x) = x
hGetContents :: Handle -> IO Chunks
hGetContents !h = hGetContentsCommon ChunksNil h
hGetContentsHint :: Int -> Handle -> IO Chunks
hGetContentsHint !hint !h = do
c <- IO.hGet h hint
let !r = ChunksCons c ChunksNil
if Bytes.length c == hint
then pure r
else hGetContentsCommon r h
hGetContentsCommon ::
Chunks
-> Handle
-> IO Chunks
hGetContentsCommon !acc0 !h = go acc0 where
go !acc = do
c <- IO.hGet h chunkSize
let !r = ChunksCons c acc
if Bytes.length c == chunkSize
then go r
else pure $! reverse r
readFile :: FilePath -> IO Chunks
readFile f = withBinaryFile f ReadMode $ \h -> do
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
let hint = (fromIntegral filesz `max` 255) + 1
hGetContentsHint hint h
where
useZeroIfNotRegularFile :: IOException -> IO Integer
useZeroIfNotRegularFile _ = return 0
chunkSize :: Int
chunkSize = 16384 - 16
fromBytes :: Bytes -> Chunks
fromBytes !b = ChunksCons b ChunksNil
fromByteArray :: ByteArray -> Chunks
fromByteArray !b = fromBytes (Bytes.fromByteArray b)
foldl' :: (a -> Word8 -> a) -> a -> Chunks -> a
{-# inline foldl' #-}
foldl' g = go where
go !a ChunksNil = a
go !a (ChunksCons c cs) = go (Bytes.foldl' g a c) cs
fnv1a32 :: Chunks -> Word32
fnv1a32 = foldl'
(\acc w -> (fromIntegral @Word8 @Word32 w `xor` acc) * 0x01000193
) 0x811c9dc5
fnv1a64 :: Chunks -> Word64
fnv1a64 = foldl'
(\acc w -> (fromIntegral @Word8 @Word64 w `xor` acc) * 0x00000100000001B3
) 0xcbf29ce484222325
hPut :: Handle -> Chunks -> IO ()
hPut h = go where
go ChunksNil = pure ()
go (ChunksCons c cs) = IO.hPut h c *> go cs
writeFile :: FilePath -> Chunks -> IO ()
writeFile path cs = withBinaryFile path WriteMode (\h -> hPut h cs)
split :: Word8 -> Chunks -> [Bytes]
{-# inline split #-}
split !w !cs0 = Exts.build
(\g x0 ->
let go !cs = case splitOnto ChunksNil w cs of
(hd,tl) -> let !x = concat (reverse hd) in
case tl of
ChunksNil -> x0
_ -> g x (go tl)
in go cs0
)
splitOnto :: Chunks -> Word8 -> Chunks -> (Chunks,Chunks)
{-# inline splitOnto #-}
splitOnto !acc0 !w !cs0 = go acc0 cs0 where
go !acc ChunksNil = (acc,ChunksNil)
go !acc (ChunksCons b bs) = case Byte.split1 w b of
Nothing -> go (ChunksCons b acc) bs
Just (hd,tl) ->
let !r1 = ChunksCons hd acc
!r2 = ChunksCons tl bs
in (r1,r2)