{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE FlexibleInstances   #-}
{-# LANGUAGE MultiWayIf          #-}
{-# LANGUAGE ScopedTypeVariables #-}

module HaskellWorks.Data.ByteString
  ( chunkedBy
  , ToByteString(..)
  , ToByteStrings(..)
  , mmap
  , padded
  , rechunk
  , rechunkPadded
  , resegment
  , resegmentPadded
  , hGetContentsChunkedBy
  ) where

import Control.Monad.ST
import Data.Word
import Foreign.ForeignPtr

import qualified Control.Monad.ST.Unsafe       as ST
import qualified Data.ByteString               as BS
import qualified Data.ByteString.Internal      as BSI
import qualified Data.ByteString.Lazy          as LBS
import qualified Data.ByteString.Lazy.Internal as LBSI
import qualified Data.Vector.Storable          as DVS
import qualified Data.Vector.Storable.Mutable  as DVSM
import qualified System.IO                     as IO
import qualified System.IO.MMap                as IO
import qualified System.IO.Unsafe              as IO

class ToByteString a where
  toByteString :: a -> BS.ByteString

instance ToByteString BS.ByteString where
  toByteString :: ByteString -> ByteString
toByteString = forall a. a -> a
id
  {-# INLINE toByteString #-}

instance ToByteString (DVS.Vector Word8) where
  toByteString :: Vector Word8 -> ByteString
toByteString Vector Word8
v = case forall a. Vector a -> (ForeignPtr a, Int, Int)
DVS.unsafeToForeignPtr Vector Word8
v of
    (ForeignPtr Word8
fptr, Int
start, Int
offset) -> ForeignPtr Word8 -> Int -> Int -> ByteString
BSI.fromForeignPtr ForeignPtr Word8
fptr Int
start Int
offset
  {-# INLINE toByteString #-}

instance ToByteString (DVS.Vector Word16) where
  toByteString :: Vector Word16 -> ByteString
toByteString Vector Word16
v = case forall a. Vector a -> (ForeignPtr a, Int, Int)
DVS.unsafeToForeignPtr (forall a b. (Storable a, Storable b) => Vector a -> Vector b
DVS.unsafeCast Vector Word16
v :: DVS.Vector Word8) of
    (ForeignPtr Word8
fptr, Int
start, Int
offset) -> ForeignPtr Word8 -> Int -> Int -> ByteString
BSI.fromForeignPtr ForeignPtr Word8
fptr Int
start Int
offset
  {-# INLINE toByteString #-}

instance ToByteString (DVS.Vector Word32) where
  toByteString :: Vector Word32 -> ByteString
toByteString Vector Word32
v = case forall a. Vector a -> (ForeignPtr a, Int, Int)
DVS.unsafeToForeignPtr (forall a b. (Storable a, Storable b) => Vector a -> Vector b
DVS.unsafeCast Vector Word32
v :: DVS.Vector Word8) of
    (ForeignPtr Word8
fptr, Int
start, Int
offset) -> ForeignPtr Word8 -> Int -> Int -> ByteString
BSI.fromForeignPtr ForeignPtr Word8
fptr Int
start Int
offset
  {-# INLINE toByteString #-}

instance ToByteString (DVS.Vector Word64) where
  toByteString :: Vector Word64 -> ByteString
toByteString Vector Word64
v = case forall a. Vector a -> (ForeignPtr a, Int, Int)
DVS.unsafeToForeignPtr (forall a b. (Storable a, Storable b) => Vector a -> Vector b
DVS.unsafeCast Vector Word64
v :: DVS.Vector Word8) of
    (ForeignPtr Word8
fptr, Int
start, Int
offset) -> ForeignPtr Word8 -> Int -> Int -> ByteString
BSI.fromForeignPtr ForeignPtr Word8
fptr Int
start Int
offset
  {-# INLINE toByteString #-}

instance ToByteString [Word64] where
  toByteString :: [Word64] -> ByteString
toByteString = forall a. ToByteString a => a -> ByteString
toByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Storable a => [a] -> Vector a
DVS.fromList
  {-# INLINE toByteString #-}

class ToByteStrings a where
  toByteStrings :: a -> [BS.ByteString]

instance ToByteStrings [BS.ByteString] where
  toByteStrings :: [ByteString] -> [ByteString]
toByteStrings = forall a. a -> a
id
  {-# INLINE toByteStrings #-}

instance ToByteStrings LBS.ByteString where
  toByteStrings :: ByteString -> [ByteString]
toByteStrings = ByteString -> [ByteString]
LBS.toChunks
  {-# INLINE toByteStrings #-}

instance ToByteStrings BS.ByteString where
  toByteStrings :: ByteString -> [ByteString]
toByteStrings = (forall a. a -> [a] -> [a]
:[])
  {-# INLINE toByteStrings #-}

instance ToByteStrings (DVS.Vector Word8) where
  toByteStrings :: Vector Word8 -> [ByteString]
toByteStrings = (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToByteString a => a -> ByteString
toByteString
  {-# INLINE toByteStrings #-}

instance ToByteStrings (DVS.Vector Word16) where
  toByteStrings :: Vector Word16 -> [ByteString]
toByteStrings = (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToByteString a => a -> ByteString
toByteString
  {-# INLINE toByteStrings #-}

instance ToByteStrings (DVS.Vector Word32) where
  toByteStrings :: Vector Word32 -> [ByteString]
toByteStrings = (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToByteString a => a -> ByteString
toByteString
  {-# INLINE toByteStrings #-}

instance ToByteStrings (DVS.Vector Word64) where
  toByteStrings :: Vector Word64 -> [ByteString]
toByteStrings = (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToByteString a => a -> ByteString
toByteString
  {-# INLINE toByteStrings #-}

instance ToByteStrings [Word64] where
  toByteStrings :: [Word64] -> [ByteString]
toByteStrings [Word64]
ws = forall a. ToByteString a => a -> ByteString
toByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Word64] -> [Vector Word64]
go [Word64]
ws
    where go :: [Word64] -> [DVS.Vector Word64]
          go :: [Word64] -> [Vector Word64]
go [Word64]
us = forall (f :: * -> *) a.
(Traversable f, Storable a) =>
(forall s. ST s (f (MVector s a))) -> f (Vector a)
DVS.createT (forall s. [Word64] -> ST s [MVector s Word64]
goST [Word64]
us)
          goST :: [Word64] -> ST s [DVSM.MVector s Word64]
          goST :: forall s. [Word64] -> ST s [MVector s Word64]
goST [Word64]
us = do
            MVector s Word64
mv <- forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
Int -> m (MVector (PrimState m) a)
DVSM.new Int
defaultChunkSize
            (Int
i, [Word64]
ts) <- forall s.
Int -> Int -> [Word64] -> MVector s Word64 -> ST s (Int, [Word64])
writeWords Int
0 Int
defaultChunkSize [Word64]
us MVector s Word64
mv
            [MVector s Word64]
mvs <- forall s a. ST s a -> ST s a
ST.unsafeInterleaveST (forall s. [Word64] -> ST s [MVector s Word64]
goST [Word64]
ts)
            if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Word64]
ts
              then forall (m :: * -> *) a. Monad m => a -> m a
return [forall a s. Storable a => Int -> MVector s a -> MVector s a
DVSM.take Int
i MVector s Word64
mv]
              else forall (m :: * -> *) a. Monad m => a -> m a
return (forall a s. Storable a => Int -> MVector s a -> MVector s a
DVSM.take Int
i MVector s Word64
mvforall a. a -> [a] -> [a]
:[MVector s Word64]
mvs)
          writeWords :: Int -> Int -> [Word64] -> DVSM.MVector s Word64 -> ST s (Int, [Word64])
          writeWords :: forall s.
Int -> Int -> [Word64] -> MVector s Word64 -> ST s (Int, [Word64])
writeWords Int
i Int
n [Word64]
us MVector s Word64
mv = if Int
i forall a. Ord a => a -> a -> Bool
< Int
n
            then case [Word64]
us of
              Word64
t:[Word64]
ts -> do
                forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
MVector (PrimState m) a -> Int -> a -> m ()
DVSM.write MVector s Word64
mv Int
i Word64
t
                forall s.
Int -> Int -> [Word64] -> MVector s Word64 -> ST s (Int, [Word64])
writeWords (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
n [Word64]
ts MVector s Word64
mv
              [] -> forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Word64]
us)
            else forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, [Word64]
us)
          defaultChunkSize :: Int
defaultChunkSize = (Int
LBSI.defaultChunkSize forall a. Integral a => a -> a -> a
`div` Int
64) forall a. Num a => a -> a -> a
* Int
8
  {-# INLINE toByteStrings #-}

instance ToByteStrings [DVS.Vector Word8] where
  toByteStrings :: [Vector Word8] -> [ByteString]
toByteStrings = (forall a. ToByteString a => a -> ByteString
toByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>)
  {-# INLINE toByteStrings #-}

instance ToByteStrings [DVS.Vector Word16] where
  toByteStrings :: [Vector Word16] -> [ByteString]
toByteStrings = (forall a. ToByteString a => a -> ByteString
toByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>)
  {-# INLINE toByteStrings #-}

instance ToByteStrings [DVS.Vector Word32] where
  toByteStrings :: [Vector Word32] -> [ByteString]
toByteStrings = (forall a. ToByteString a => a -> ByteString
toByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>)
  {-# INLINE toByteStrings #-}

instance ToByteStrings [DVS.Vector Word64] where
  toByteStrings :: [Vector Word64] -> [ByteString]
toByteStrings = (forall a. ToByteString a => a -> ByteString
toByteString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>)
  {-# INLINE toByteStrings #-}

-- | Chunk a @bs into list of smaller byte strings of no more than @n elements
chunkedBy :: Int -> BS.ByteString -> [BS.ByteString]
chunkedBy :: Int -> ByteString -> [ByteString]
chunkedBy Int
n ByteString
bs = if ByteString -> Int
BS.length ByteString
bs forall a. Eq a => a -> a -> Bool
== Int
0
  then []
  else case Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
n ByteString
bs of
    (ByteString
as, ByteString
zs) -> ByteString
as forall a. a -> [a] -> [a]
: Int -> ByteString -> [ByteString]
chunkedBy Int
n ByteString
zs
{-# INLINE chunkedBy #-}

rechunk :: Int -> [BS.ByteString] -> [BS.ByteString]
rechunk :: Int -> [ByteString] -> [ByteString]
rechunk Int
size = [ByteString] -> [ByteString]
go
  where go :: [ByteString] -> [ByteString]
go (ByteString
bs:[ByteString]
bss) = let bsLen :: Int
bsLen = ByteString -> Int
BS.length ByteString
bs in if Int
bsLen forall a. Ord a => a -> a -> Bool
> Int
0
          then if Int
bsLen forall a. Ord a => a -> a -> Bool
< Int
size
            then let bsNeed :: Int
bsNeed = Int
size forall a. Num a => a -> a -> a
- Int
bsLen in case [ByteString]
bss of
              (ByteString
cs:[ByteString]
css) -> case ByteString -> Int
BS.length ByteString
cs of
                Int
csLen | Int
csLen forall a. Ord a => a -> a -> Bool
>  Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.take Int
bsNeed ByteString
cs )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsNeed ByteString
csforall a. a -> [a] -> [a]
:[ByteString]
css)
                Int
csLen | Int
csLen forall a. Eq a => a -> a -> Bool
== Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs                )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go                    [ByteString]
css
                Int
_                       ->                            [ByteString] -> [ByteString]
go ((ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs)       forall a. a -> [a] -> [a]
:[ByteString]
css)
              [] -> [ByteString
bs]
            else if Int
size forall a. Eq a => a -> a -> Bool
== Int
bsLen
              then ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go [ByteString]
bss
              else Int -> ByteString -> ByteString
BS.take Int
size ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
size ByteString
bsforall a. a -> [a] -> [a]
:[ByteString]
bss)
          else [ByteString] -> [ByteString]
go [ByteString]
bss
        go [] = []

rechunkPadded :: Int -> [BS.ByteString] -> [BS.ByteString]
rechunkPadded :: Int -> [ByteString] -> [ByteString]
rechunkPadded Int
size = [ByteString] -> [ByteString]
go
  where go :: [ByteString] -> [ByteString]
go (ByteString
bs:[ByteString]
bss) = let bsLen :: Int
bsLen = ByteString -> Int
BS.length ByteString
bs in if Int
bsLen forall a. Ord a => a -> a -> Bool
> Int
0
          then if Int
bsLen forall a. Ord a => a -> a -> Bool
< Int
size
            then let bsNeed :: Int
bsNeed = Int
size forall a. Num a => a -> a -> a
- Int
bsLen in case [ByteString]
bss of
              (ByteString
cs:[ByteString]
css) -> case ByteString -> Int
BS.length ByteString
cs of
                Int
csLen | Int
csLen forall a. Ord a => a -> a -> Bool
>  Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.take Int
bsNeed ByteString
cs )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsNeed ByteString
csforall a. a -> [a] -> [a]
:[ByteString]
css)
                Int
csLen | Int
csLen forall a. Eq a => a -> a -> Bool
== Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs                )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go                    [ByteString]
css
                Int
_                       ->                            [ByteString] -> [ByteString]
go ((ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs)       forall a. a -> [a] -> [a]
:[ByteString]
css)
              [] -> [ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> Word8 -> ByteString
BS.replicate Int
bsNeed Word8
0]
            else if Int
size forall a. Eq a => a -> a -> Bool
== Int
bsLen
              then ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go [ByteString]
bss
              else Int -> ByteString -> ByteString
BS.take Int
size ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
size ByteString
bsforall a. a -> [a] -> [a]
:[ByteString]
bss)
          else [ByteString] -> [ByteString]
go [ByteString]
bss
        go [] = []

resegment :: Int -> [BS.ByteString] -> [BS.ByteString]
resegment :: Int -> [ByteString] -> [ByteString]
resegment Int
size = [ByteString] -> [ByteString]
go
  where go :: [ByteString] -> [ByteString]
go (ByteString
bs:[ByteString]
bss) = let bsLen :: Int
bsLen = ByteString -> Int
BS.length ByteString
bs in if Int
bsLen forall a. Ord a => a -> a -> Bool
> Int
0
          then if Int
bsLen forall a. Ord a => a -> a -> Bool
< Int
size
            then case [ByteString]
bss of
              (ByteString
cs:[ByteString]
css) -> let bsNeed :: Int
bsNeed = Int
size forall a. Num a => a -> a -> a
- Int
bsLen; csLen :: Int
csLen = ByteString -> Int
BS.length ByteString
cs in if
                | Int
csLen forall a. Ord a => a -> a -> Bool
>  Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.take Int
bsNeed ByteString
cs )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsNeed ByteString
csforall a. a -> [a] -> [a]
:[ByteString]
css)
                | Int
csLen forall a. Eq a => a -> a -> Bool
== Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs                )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go                    [ByteString]
css
                | Bool
otherwise       ->                            [ByteString] -> [ByteString]
go ((ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs)       forall a. a -> [a] -> [a]
:[ByteString]
css)
              [] -> [ByteString
bs]
            else let bsCroppedLen :: Int
bsCroppedLen = (Int
bsLen forall a. Integral a => a -> a -> a
`div` Int
size) forall a. Num a => a -> a -> a
* Int
size in if Int
bsCroppedLen forall a. Eq a => a -> a -> Bool
== Int
bsLen
              then ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go [ByteString]
bss
              else Int -> ByteString -> ByteString
BS.take Int
bsCroppedLen ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsCroppedLen ByteString
bsforall a. a -> [a] -> [a]
:[ByteString]
bss)
          else [ByteString] -> [ByteString]
go [ByteString]
bss
        go [] = []

resegmentPadded :: Int -> [BS.ByteString] -> [BS.ByteString]
resegmentPadded :: Int -> [ByteString] -> [ByteString]
resegmentPadded Int
size = [ByteString] -> [ByteString]
go
  where go :: [ByteString] -> [ByteString]
go (ByteString
bs:[ByteString]
bss) = let bsLen :: Int
bsLen = ByteString -> Int
BS.length ByteString
bs in if Int
bsLen forall a. Ord a => a -> a -> Bool
> Int
0
          then if Int
bsLen forall a. Ord a => a -> a -> Bool
< Int
size
            then let bsNeed :: Int
bsNeed = Int
size forall a. Num a => a -> a -> a
- Int
bsLen in case [ByteString]
bss of
              (ByteString
cs:[ByteString]
css) -> case ByteString -> Int
BS.length ByteString
cs of
                Int
csLen | Int
csLen forall a. Ord a => a -> a -> Bool
>  Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.take Int
bsNeed ByteString
cs )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsNeed ByteString
csforall a. a -> [a] -> [a]
:[ByteString]
css)
                Int
csLen | Int
csLen forall a. Eq a => a -> a -> Bool
== Int
bsNeed -> (ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs                )forall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go                    [ByteString]
css
                Int
_                       ->                            [ByteString] -> [ByteString]
go ((ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
cs)       forall a. a -> [a] -> [a]
:[ByteString]
css)
              [] -> [ByteString
bs forall a. Semigroup a => a -> a -> a
<> Int -> Word8 -> ByteString
BS.replicate Int
bsNeed Word8
0]
            else case (Int
bsLen forall a. Integral a => a -> a -> a
`div` Int
size) forall a. Num a => a -> a -> a
* Int
size of
              Int
bsCroppedLen -> if Int
bsCroppedLen forall a. Eq a => a -> a -> Bool
== Int
bsLen
                then ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go [ByteString]
bss
                else Int -> ByteString -> ByteString
BS.take Int
bsCroppedLen ByteString
bsforall a. a -> [a] -> [a]
:[ByteString] -> [ByteString]
go (Int -> ByteString -> ByteString
BS.drop Int
bsCroppedLen ByteString
bsforall a. a -> [a] -> [a]
:[ByteString]
bss)
          else [ByteString] -> [ByteString]
go [ByteString]
bss
        go [] = []

hGetContentsChunkedBy :: Int -> IO.Handle -> IO [BS.ByteString]
hGetContentsChunkedBy :: Int -> Handle -> IO [ByteString]
hGetContentsChunkedBy Int
k Handle
h = IO [ByteString]
lazyRead
  where lazyRead :: IO [ByteString]
lazyRead = forall a. IO a -> IO a
IO.unsafeInterleaveIO IO [ByteString]
loop
        loop :: IO [ByteString]
loop = do
          ByteString
c <- Int -> (Ptr Word8 -> IO Int) -> IO ByteString
BSI.createAndTrim Int
k forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> forall a. Handle -> Ptr a -> Int -> IO Int
IO.hGetBuf Handle
h Ptr Word8
p Int
k
          if ByteString -> Bool
BS.null ByteString
c
            then Handle -> IO ()
IO.hClose Handle
h forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return []
            else (ByteString
cforall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO [ByteString]
lazyRead

mmap :: FilePath -> IO BS.ByteString
mmap :: FilePath -> IO ByteString
mmap FilePath
filepath = do
  (ForeignPtr Word8
fptr :: ForeignPtr Word8, Int
offset, Int
size) <- forall a.
FilePath
-> Mode -> Maybe (Int64, Int) -> IO (ForeignPtr a, Int, Int)
IO.mmapFileForeignPtr FilePath
filepath Mode
IO.ReadOnly forall a. Maybe a
Nothing
  let !bs :: ByteString
bs = ForeignPtr Word8 -> Int -> Int -> ByteString
BSI.fromForeignPtr (forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr Word8
fptr) Int
offset Int
size
  forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
bs

padded :: Int -> BS.ByteString -> BS.ByteString
padded :: Int -> ByteString -> ByteString
padded Int
n ByteString
v = ByteString
v forall a. Semigroup a => a -> a -> a
<> Int -> Word8 -> ByteString
BS.replicate ((Int
n forall a. Num a => a -> a -> a
- ByteString -> Int
BS.length ByteString
v) forall a. Ord a => a -> a -> a
`max` Int
0) Word8
0
{-# INLINE padded #-}