{-# language BangPatterns #-}
{-# language BlockArguments #-}
{-# language DuplicateRecordFields #-}
{-# language MagicHash #-}
{-# language NamedFieldPuns #-}
{-# language RankNTypes #-}
{-# language TypeApplications #-}
{-# language UnboxedTuples #-}
module Data.Bytes
(
Bytes
, Pure.empty
, Pure.emptyPinned
, Pure.emptyPinnedU
, null
, Pure.length
, uncons
, unsnoc
, any
, all
, singleton
, doubleton
, tripleton
, replicate
, singletonU
, doubletonU
, tripletonU
, replicateU
, takeWhile
, dropWhile
, takeWhileEnd
, dropWhileEnd
, foldl
, Pure.foldl'
, foldr
, foldr'
, ifoldl'
, elem
, Byte.split
, Byte.splitU
, Byte.splitInit
, Byte.splitInitU
, Byte.splitNonEmpty
, Byte.splitStream
, Byte.split1
, Byte.split2
, Byte.split3
, Byte.split4
, Byte.splitEnd1
, intercalate
, intercalateByte2
, Byte.count
, isPrefixOf
, isSuffixOf
, stripPrefix
, stripOptionalPrefix
, stripSuffix
, stripOptionalSuffix
, longestCommonPrefix
, stripCStringPrefix
, isBytePrefixOf
, isByteSuffixOf
, equalsLatin1
, equalsLatin2
, equalsLatin3
, equalsLatin4
, equalsLatin5
, equalsLatin6
, equalsLatin7
, equalsLatin8
, equalsLatin9
, equalsLatin10
, equalsLatin11
, equalsLatin12
, equalsCString
, Pure.fnv1a32
, Pure.fnv1a64
, unsafeTake
, unsafeDrop
, unsafeIndex
, Pure.unsafeCopy
, Pure.pin
, Pure.contents
, touch
, Pure.toByteArray
, Pure.toByteArrayClone
, Pure.toPinnedByteArray
, Pure.toPinnedByteArrayClone
, fromAsciiString
, fromLatinString
, Pure.fromByteArray
, toLatinString
, fromCString#
, toByteString
, fromByteString
, fromShortByteString
, toShortByteString
, toShortByteStringClone
, toLowerAsciiByteArrayClone
, BIO.hGet
, readFile
, BIO.hPut
) where
import Prelude hiding (length,takeWhile,dropWhile,null,foldl,foldr,elem,replicate,any,all,readFile)
import Control.Monad.Primitive (PrimMonad,primitive_,unsafeIOToPrim)
import Control.Monad.ST (ST)
import Control.Monad.ST.Run (runByteArrayST)
import Data.ByteString (ByteString)
import Data.ByteString.Short.Internal (ShortByteString(SBS))
import Data.Bytes.Compat (cstringLength#)
import Data.Bytes.Pure (length,fromByteArray)
import Data.Bytes.Types (Bytes(Bytes,array,offset))
import Data.Char (ord)
import Data.Primitive (ByteArray(ByteArray))
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr,plusPtr,castPtr)
import GHC.Exts (Int(I#),Char(C#),Ptr(Ptr),word2Int#,chr#)
import GHC.Exts (Addr#,Word#,Int#)
import GHC.IO (unsafeIOToST)
import GHC.Word (Word8(W8#))
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Internal as ByteString
import qualified Data.ByteString.Unsafe as ByteString
import qualified Data.Bytes.Byte as Byte
import qualified Data.Bytes.Chunks as Chunks
import qualified Data.Bytes.IO as BIO
import qualified Data.Bytes.Pure as Pure
import qualified Data.Bytes.Types as Types
import qualified Data.Foldable as F
import qualified Data.List as List
import qualified Data.Primitive as PM
import qualified Data.Primitive.Ptr as PM
import qualified GHC.Exts as Exts
import qualified GHC.ForeignPtr as ForeignPtr
null :: Bytes -> Bool
null (Bytes _ _ len) = len == 0
uncons :: Bytes -> Maybe (Word8, Bytes)
uncons b = case length b of
0 -> Nothing
_ -> Just (unsafeIndex b 0, unsafeDrop 1 b)
unsnoc :: Bytes -> Maybe (Bytes, Word8)
unsnoc b@(Bytes arr off len) = case len of
0 -> Nothing
_ -> let !len' = len - 1 in
Just (Bytes arr off len', unsafeIndex b len')
isBytePrefixOf :: Word8 -> Bytes -> Bool
isBytePrefixOf w b = case length b of
0 -> False
_ -> unsafeIndex b 0 == w
isByteSuffixOf :: Word8 -> Bytes -> Bool
isByteSuffixOf w b = case len of
0 -> False
_ -> unsafeIndex b (len - 1) == w
where
len = length b
isPrefixOf :: Bytes -> Bytes -> Bool
isPrefixOf (Bytes a aOff aLen) (Bytes b bOff bLen) =
if aLen <= bLen
then compareByteArrays a aOff b bOff aLen == EQ
else False
isSuffixOf :: Bytes -> Bytes -> Bool
isSuffixOf (Bytes a aOff aLen) (Bytes b bOff bLen) =
if aLen <= bLen
then compareByteArrays a aOff b (bOff + bLen - aLen) aLen == EQ
else False
longestCommonPrefix :: Bytes -> Bytes -> Bytes
longestCommonPrefix a b = loop 0
where
loop :: Int -> Bytes
loop !into
| into < maxLen
&& unsafeIndex a into == unsafeIndex b into
= loop (into + 1)
| otherwise = unsafeTake into a
maxLen = min (length a) (length b)
singleton :: Word8 -> Bytes
singleton !a = Bytes (singletonU a) 0 1
doubleton :: Word8 -> Word8 -> Bytes
doubleton !a !b = Bytes (doubletonU a b) 0 2
tripleton :: Word8 -> Word8 -> Word8 -> Bytes
tripleton !a !b !c = Bytes (tripletonU a b c) 0 3
singletonU :: Word8 -> ByteArray
singletonU !a = runByteArrayST do
arr <- PM.newByteArray 1
PM.writeByteArray arr 0 a
PM.unsafeFreezeByteArray arr
doubletonU :: Word8 -> Word8 -> ByteArray
doubletonU !a !b = runByteArrayST do
arr <- PM.newByteArray 2
PM.writeByteArray arr 0 a
PM.writeByteArray arr 1 b
PM.unsafeFreezeByteArray arr
tripletonU :: Word8 -> Word8 -> Word8 -> ByteArray
tripletonU !a !b !c = runByteArrayST do
arr <- PM.newByteArray 3
PM.writeByteArray arr 0 a
PM.writeByteArray arr 1 b
PM.writeByteArray arr 2 c
PM.unsafeFreezeByteArray arr
replicate ::
Int
-> Word8
-> Bytes
replicate !n !w = Bytes (replicateU n w) 0 n
replicateU :: Int -> Word8 -> ByteArray
replicateU !n !w = runByteArrayST do
arr <- PM.newByteArray n
PM.setByteArray arr 0 n w
PM.unsafeFreezeByteArray arr
stripPrefix :: Bytes -> Bytes -> Maybe Bytes
stripPrefix !pre !str = if pre `isPrefixOf` str
then Just (Bytes (array str) (offset str + length pre) (length str - length pre))
else Nothing
stripOptionalPrefix :: Bytes -> Bytes -> Bytes
stripOptionalPrefix !pre !str = if pre `isPrefixOf` str
then Bytes (array str) (offset str + length pre) (length str - length pre)
else str
stripSuffix :: Bytes -> Bytes -> Maybe Bytes
stripSuffix !suf !str = if suf `isSuffixOf` str
then Just (Bytes (array str) (offset str) (length str - length suf))
else Nothing
stripOptionalSuffix :: Bytes -> Bytes -> Bytes
stripOptionalSuffix !suf !str = if suf `isSuffixOf` str
then Bytes (array str) (offset str) (length str - length suf)
else str
elem :: Word8 -> Bytes -> Bool
elem (W8# w) b = case elemLoop 0# w b of
1# -> True
_ -> False
elemLoop :: Int# -> Word# -> Bytes -> Int#
elemLoop !r !w (Bytes arr@(ByteArray arr# ) off@(I# off# ) len) = case len of
0 -> r
_ -> elemLoop (Exts.orI# r (Exts.eqWord# w (Exts.indexWord8Array# arr# off# ) )) w (Bytes arr (off + 1) (len - 1))
takeWhile :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline takeWhile #-}
takeWhile k b = unsafeTake (countWhile k b) b
dropWhile :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline dropWhile #-}
dropWhile k b = unsafeDrop (countWhile k b) b
unsafeIndex :: Bytes -> Int -> Word8
unsafeIndex (Bytes arr off _) ix = PM.indexByteArray arr (off + ix)
dropWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline dropWhileEnd #-}
dropWhileEnd k !b = unsafeTake (length b - countWhileEnd k b) b
takeWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline takeWhileEnd #-}
takeWhileEnd k !b =
let n = countWhileEnd k b
in Bytes (array b) (offset b + length b - n) n
unsafeTake :: Int -> Bytes -> Bytes
{-# inline unsafeTake #-}
unsafeTake n (Bytes arr off _) =
Bytes arr off n
unsafeDrop :: Int -> Bytes -> Bytes
{-# inline unsafeDrop #-}
unsafeDrop n (Bytes arr off len) =
Bytes arr (off + n) (len - n)
countWhile :: (Word8 -> Bool) -> Bytes -> Int
{-# inline countWhile #-}
countWhile k (Bytes arr off0 len0) = go off0 len0 0 where
go !off !len !n = if len > 0
then if k (PM.indexByteArray arr off)
then go (off + 1) (len - 1) (n + 1)
else n
else n
countWhileEnd :: (Word8 -> Bool) -> Bytes -> Int
{-# inline countWhileEnd #-}
countWhileEnd k (Bytes arr off0 len0) = go (off0 + len0 - 1) (len0 - 1) 0 where
go !off !len !n = if len >= 0
then if k (PM.indexByteArray arr off)
then go (off - 1) (len - 1) (n + 1)
else n
else n
foldl :: (a -> Word8 -> a) -> a -> Bytes -> a
{-# inline foldl #-}
foldl f a0 (Bytes arr off0 len0) =
go (off0 + len0 - 1) (len0 - 1)
where
go !off !ix = case ix of
(-1) -> a0
_ -> f (go (off - 1) (ix - 1)) (PM.indexByteArray arr off)
foldr :: (Word8 -> a -> a) -> a -> Bytes -> a
{-# inline foldr #-}
foldr f a0 (Bytes arr off0 len0) = go off0 len0 where
go !off !len = case len of
0 -> a0
_ -> f (PM.indexByteArray arr off) (go (off + 1) (len - 1))
ifoldl' :: (a -> Int -> Word8 -> a) -> a -> Bytes -> a
{-# inline ifoldl' #-}
ifoldl' f a0 (Bytes arr off0 len0) = go a0 0 off0 len0 where
go !a !ix !off !len = case len of
0 -> a
_ -> go (f a ix (PM.indexByteArray arr off)) (ix + 1) (off + 1) (len - 1)
foldr' :: (Word8 -> a -> a) -> a -> Bytes -> a
{-# inline foldr' #-}
foldr' f a0 (Bytes arr off0 len0) =
go a0 (off0 + len0 - 1) (len0 - 1)
where
go !a !off !ix = case ix of
(-1) -> a
_ -> go (f (PM.indexByteArray arr off) a) (off - 1) (ix - 1)
fromAsciiString :: String -> Bytes
fromAsciiString = fromByteArray
. Exts.fromList
. map (\c -> let i = ord c in if i < 128 then fromIntegral @Int @Word8 i else 0)
fromLatinString :: String -> Bytes
fromLatinString =
fromByteArray . Exts.fromList . map (fromIntegral @Int @Word8 . ord)
toLatinString :: Bytes -> String
toLatinString = foldr (\(W8# w) xs -> C# (chr# (word2Int# w)) : xs) []
fromCString# :: Addr# -> Bytes
fromCString# a = Bytes
( runByteArrayST $ do
dst@(PM.MutableByteArray dst# ) <- PM.newByteArray len
PM.copyPtrToMutablePrimArray
(PM.MutablePrimArray dst# ) 0 (Ptr a :: Ptr Word8) len
PM.unsafeFreezeByteArray dst
) 0 len
where
len = I# (cstringLength# a)
compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
{-# INLINE compareByteArrays #-}
compareByteArrays (ByteArray ba1#) (I# off1#) (ByteArray ba2#) (I# off2#) (I# n#) =
compare (I# (Exts.compareByteArrays# ba1# off1# ba2# off2# n#)) 0
equalsLatin1 :: Char -> Bytes -> Bool
equalsLatin1 !c0 (Bytes arr off len) = case len of
1 -> c0 == indexCharArray arr off
_ -> False
equalsLatin2 :: Char -> Char -> Bytes -> Bool
equalsLatin2 !c0 !c1 (Bytes arr off len) = case len of
2 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1)
_ -> False
equalsLatin3 :: Char -> Char -> Char -> Bytes -> Bool
equalsLatin3 !c0 !c1 !c2 (Bytes arr off len) = case len of
3 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2)
_ -> False
equalsLatin4 :: Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin4 !c0 !c1 !c2 !c3 (Bytes arr off len) = case len of
4 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3)
_ -> False
equalsLatin5 :: Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin5 !c0 !c1 !c2 !c3 !c4 (Bytes arr off len) = case len of
5 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4)
_ -> False
equalsLatin6 :: Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin6 !c0 !c1 !c2 !c3 !c4 !c5 (Bytes arr off len) = case len of
6 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5)
_ -> False
equalsLatin7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin7 !c0 !c1 !c2 !c3 !c4 !c5 !c6 (Bytes arr off len) = case len of
7 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6)
_ -> False
equalsLatin8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin8 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 (Bytes arr off len) = case len of
8 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6) &&
c7 == indexCharArray arr (off + 7)
_ -> False
equalsLatin9 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin9 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 (Bytes arr off len) = case len of
9 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6) &&
c7 == indexCharArray arr (off + 7) &&
c8 == indexCharArray arr (off + 8)
_ -> False
equalsLatin10 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin10 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 (Bytes arr off len) = case len of
10 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6) &&
c7 == indexCharArray arr (off + 7) &&
c8 == indexCharArray arr (off + 8) &&
c9 == indexCharArray arr (off + 9)
_ -> False
equalsLatin11 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin11 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 (Bytes arr off len) = case len of
11 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6) &&
c7 == indexCharArray arr (off + 7) &&
c8 == indexCharArray arr (off + 8) &&
c9 == indexCharArray arr (off + 9) &&
c10 == indexCharArray arr (off + 10)
_ -> False
equalsLatin12 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin12 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 (Bytes arr off len) = case len of
12 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6) &&
c7 == indexCharArray arr (off + 7) &&
c8 == indexCharArray arr (off + 8) &&
c9 == indexCharArray arr (off + 9) &&
c10 == indexCharArray arr (off + 10) &&
c11 == indexCharArray arr (off + 11)
_ -> False
equalsCString :: CString -> Bytes -> Bool
{-# inline equalsCString #-}
equalsCString !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 where
go !ptr !off !len = case len of
0 -> PM.indexOffPtr ptr 0 == (0 :: Word8)
_ -> case PM.indexOffPtr ptr 0 of
0 -> False
c -> c == PM.indexByteArray arr off && go (plusPtr ptr 1) (off + 1) (len - 1)
stripCStringPrefix :: CString -> Bytes -> Maybe Bytes
{-# inline stripCStringPrefix #-}
stripCStringPrefix !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 where
go !ptr !off !len = case PM.indexOffPtr ptr 0 of
0 -> Just (Bytes arr off len)
c -> case len of
0 -> Nothing
_ -> case c == PM.indexByteArray arr off of
True -> go (plusPtr ptr 1) (off + 1) (len - 1)
False -> Nothing
touch :: PrimMonad m => Bytes -> m ()
touch (Bytes (ByteArray arr) _ _) = unsafeIOToPrim
(primitive_ (\s -> Exts.touch# arr s))
indexCharArray :: ByteArray -> Int -> Char
indexCharArray (ByteArray arr) (I# off) = C# (Exts.indexCharArray# arr off)
readFile :: FilePath -> IO Bytes
readFile f = Chunks.concat <$> Chunks.readFile f
intercalate ::
Bytes
-> [Bytes]
-> Bytes
intercalate !_ [] = mempty
intercalate !_ [x] = x
intercalate (Bytes sarr soff slen) (Bytes arr0 off0 len0 : bs) = Bytes r 0 fullLen
where
!fullLen = List.foldl' (\acc (Bytes _ _ len) -> acc + len + slen) 0 bs + len0
r = runByteArrayST $ do
marr <- PM.newByteArray fullLen
PM.copyByteArray marr 0 arr0 off0 len0
!_ <- F.foldlM
(\ !currLen (Bytes arr off len) -> do
PM.copyByteArray marr currLen sarr soff slen
PM.copyByteArray marr (currLen + slen) arr off len
pure (currLen + len + slen)
) len0 bs
PM.unsafeFreezeByteArray marr
intercalateByte2 ::
Word8
-> Bytes
-> Bytes
-> Bytes
intercalateByte2 !sep !a !b = Bytes
{ Types.array = runByteArrayST $ do
dst <- PM.newByteArray len
Pure.unsafeCopy dst 0 a
PM.writeByteArray dst (length a) sep
Pure.unsafeCopy dst (length a + 1) b
PM.unsafeFreezeByteArray dst
, Types.length = len
, Types.offset = 0
}
where len = length a + length b + 1
any :: (Word8 -> Bool) -> Bytes -> Bool
{-# inline any #-}
any f = foldr (\b r -> f b || r) False
all :: (Word8 -> Bool) -> Bytes -> Bool
{-# inline all #-}
all f = foldr (\b r -> f b && r) True
toByteString :: Bytes -> ByteString
toByteString !b = pinnedToByteString (Pure.pin b)
toShortByteString :: Bytes -> ShortByteString
toShortByteString !b = case Pure.toByteArray b of
PM.ByteArray x -> SBS x
toShortByteStringClone :: Bytes -> ShortByteString
toShortByteStringClone !b = case Pure.toByteArrayClone b of
PM.ByteArray x -> SBS x
fromShortByteString :: ShortByteString -> Bytes
fromShortByteString (SBS x) = fromByteArray (ByteArray x)
toLowerAsciiByteArrayClone :: Bytes -> ByteArray
toLowerAsciiByteArrayClone (Bytes src off0 len0) =
runByteArrayST action
where
action :: forall s. ST s ByteArray
action = do
dst <- PM.newByteArray len0
let go !off !ix !len = if len == 0
then pure ()
else do
let w = PM.indexByteArray src off :: Word8
w' = if w >= 0x41 && w <= 0x5A
then w + 32
else w
PM.writeByteArray dst ix w'
go (off + 1) (ix + 1) (len - 1)
go off0 0 len0
PM.unsafeFreezeByteArray dst
fromByteString :: ByteString -> Bytes
fromByteString !b = Bytes
( runByteArrayST $ unsafeIOToST $ do
dst@(PM.MutableByteArray dst# ) <- PM.newByteArray len
ByteString.unsafeUseAsCString b $ \src -> do
PM.copyPtrToMutablePrimArray (PM.MutablePrimArray dst# ) 0 src len
PM.unsafeFreezeByteArray dst
) 0 len
where
!len = ByteString.length b
pinnedToByteString :: Bytes -> ByteString
pinnedToByteString (Bytes y@(PM.ByteArray x) off len) =
ByteString.PS
(ForeignPtr.ForeignPtr
(case plusPtr (PM.byteArrayContents y) off of {Exts.Ptr p -> p})
(ForeignPtr.PlainPtr (Exts.unsafeCoerce# x))
)
0 len