module Z.Data.CBytes
( CBytes(CB)
, toPrimArray
, pack
, unpack
, null , length
, empty, append, concat, intercalate, intercalateElem
, toBytes, fromBytes, toText, toTextMaybe, fromText, toBuilder, buildCBytes
, fromCString, fromCStringN
, withCBytesUnsafe, withCBytes, allocCBytesUnsafe, allocCBytes
, CString
, V.c2w, V.w2c
) where
import Control.DeepSeq
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Data.Bits
import Data.Foldable (foldlM)
import Data.Hashable (Hashable(..))
import qualified Data.List as List
import Data.Primitive.PrimArray
import Data.Word
import Foreign.C.String
import GHC.Exts
import GHC.CString
import GHC.Ptr
import GHC.Stack
import Prelude hiding (all, any, appendFile, break,
concat, concatMap, drop, dropWhile,
elem, filter, foldl, foldl1, foldr,
foldr1, getContents, getLine, head,
init, interact, last, length, lines,
map, maximum, minimum, notElem, null,
putStr, putStrLn, readFile, replicate,
reverse, scanl, scanl1, scanr, scanr1,
span, splitAt, tail, take, takeWhile,
unlines, unzip, writeFile, zip,
zipWith)
import Z.Data.Array
import Z.Data.Array.Unaligned
import qualified Z.Data.Builder as B
import qualified Z.Data.Text as T
import qualified Z.Data.Text.ShowT as T
import Z.Data.Text.UTF8Codec (encodeCharModifiedUTF8, decodeChar)
import qualified Z.Data.Vector.Base as V
import Z.Foreign
import System.IO.Unsafe (unsafeDupablePerformIO)
import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
data CBytes = CBytes
{
toPrimArray :: {-# UNPACK #-} !(PrimArray Word8)
}
pattern CB :: PrimArray Word8 -> CBytes
pattern CB arr <- CBytes arr where
CB arr = fromPrimArray arr
fromPrimArray :: PrimArray Word8 -> CBytes
{-# INLINE fromPrimArray #-}
fromPrimArray arr = runST (
case V.elemIndex 0 arr of
Just i -> do
mpa <- newPrimArray i
copyPrimArray mpa 0 arr 0 i
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
_ -> return (CBytes arr))
instance Show CBytes where
showsPrec p t = showsPrec p (unpack t)
instance Read CBytes where
readsPrec p s = [(pack x, r) | (x, r) <- readsPrec p s]
instance NFData CBytes where
{-# INLINE rnf #-}
rnf (CBytes _) = ()
instance Eq CBytes where
{-# INLINE (==) #-}
CBytes ba == CBytes bb = ba == bb
instance Ord CBytes where
{-# INLINE compare #-}
CBytes ba `compare` CBytes bb = ba `compare` bb
instance Semigroup CBytes where
(<>) = append
instance Monoid CBytes where
{-# INLINE mempty #-}
mempty = empty
{-# INLINE mappend #-}
mappend = append
{-# INLINE mconcat #-}
mconcat = concat
instance Hashable CBytes where
hashWithSalt salt (CBytes pa@(PrimArray ba#)) = unsafeDupablePerformIO $ do
V.c_fnv_hash_ba ba# 0 (sizeofPrimArray pa) salt
instance Arbitrary CBytes where
arbitrary = pack <$> arbitrary
shrink a = pack <$> shrink (unpack a)
instance CoArbitrary CBytes where
coarbitrary = coarbitrary . unpack
instance Unaligned CBytes where
{-# INLINE unalignedSize #-}
unalignedSize (CBytes arr) = sizeofPrimArray arr + 1
{-# INLINE peekMBA #-}
peekMBA mba# i = do
b <- getSizeofMutableByteArray (MutableByteArray mba#)
let rest = b-i
l <- c_memchr mba# i 0 rest
let l' = if l == -1 then rest else l
mpa <- newPrimArray l'
copyMutablePrimArray mpa 0 (MutablePrimArray mba#) i l'
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
{-# INLINE pokeMBA #-}
pokeMBA mba# i (CBytes pa) = do
let l = sizeofPrimArray pa
copyPrimArray (MutablePrimArray mba# :: MutablePrimArray RealWorld Word8) i pa 0 l
writePrimArray (MutablePrimArray mba# :: MutablePrimArray RealWorld Word8) (i+l) 0
{-# INLINE indexBA #-}
indexBA ba# i = runST (do
let b = sizeofByteArray (ByteArray ba#)
rest = b-i
l = V.c_memchr ba# i 0 rest
l' = if l == -1 then rest else l
mpa <- newPrimArray l'
copyPrimArray mpa 0 (PrimArray ba#) i l'
pa <- unsafeFreezePrimArray mpa
return (CBytes pa))
instance T.ShowT CBytes where
{-# INLINE toTextBuilder #-}
toTextBuilder _ = T.stringUTF8 . show . unpack
append :: CBytes -> CBytes -> CBytes
{-# INLINABLE append #-}
append strA@(CBytes pa) strB@(CBytes pb)
| lenA == 0 = strB
| lenB == 0 = strA
| otherwise = unsafeDupablePerformIO $ do
mpa <- newPrimArray (lenA+lenB)
copyPrimArray mpa 0 pa 0 lenA
copyPrimArray mpa lenA pb 0 lenB
pa' <- unsafeFreezePrimArray mpa
return (CBytes pa')
where
lenA = length strA
lenB = length strB
empty :: CBytes
{-# NOINLINE empty #-}
empty = CBytes (V.empty)
concat :: [CBytes] -> CBytes
{-# INLINABLE concat #-}
concat bss = case pre 0 0 bss of
(0, _) -> empty
(1, _) -> let Just b = List.find (not . null) bss in b
(_, l) -> runST $ do
buf <- newPrimArray l
copy bss 0 buf
CBytes <$> unsafeFreezePrimArray buf
where
pre :: Int -> Int -> [CBytes] -> (Int, Int)
pre !nacc !lacc [] = (nacc, lacc)
pre !nacc !lacc (b:bs)
| l <= 0 = pre nacc lacc bs
| otherwise = pre (nacc+1) (l + lacc) bs
where !l = length b
copy :: [CBytes] -> Int -> MutablePrimArray s Word8 -> ST s ()
copy [] !_ !_ = return ()
copy (b@(CBytes ba):bs) !i !mba = do
let l = length b
when (l /= 0) (copyPrimArray mba i ba 0 l)
copy bs (i+l) mba
intercalate :: CBytes -> [CBytes] -> CBytes
{-# INLINE intercalate #-}
intercalate s = concat . List.intersperse s
intercalateElem :: Word8 -> [CBytes] -> CBytes
{-# INLINABLE intercalateElem #-}
intercalateElem 0 [] = empty
intercalateElem 0 (bs:_) = bs
intercalateElem w8 bss = case len bss 0 of
0 -> empty
l -> runST $ do
buf <- newPrimArray l
copy bss 0 buf
CBytes <$> unsafeFreezePrimArray buf
where
len [] !acc = acc
len [b] !acc = length b + acc
len (b:bs) !acc = len bs (acc + length b + 1)
copy :: [CBytes] -> Int -> MutablePrimArray s Word8 -> ST s ()
copy (b@(CBytes ba):bs) !i !mba = do
let l = length b
when (l /= 0) (copyPrimArray mba i ba 0 l)
case bs of
[] -> return ()
_ -> do
let i' = i + l
writePrimArray mba i' w8
copy bs (i'+1) mba
instance IsString CBytes where
{-# INLINE fromString #-}
fromString = pack
{-# RULES
"CBytes pack/unpackCString#" forall addr# .
pack (unpackCString# addr#) = packAddr addr#
#-}
{-# RULES
"CBytes pack/unpackCStringUtf8#" forall addr# .
pack (unpackCStringUtf8# addr#) = packAddr addr#
#-}
packAddr :: Addr# -> CBytes
packAddr addr0# = go addr0#
where
len = (fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr0#)
go addr# = runST $ do
marr <- newPrimArray len
copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
arr <- unsafeFreezePrimArray marr
return (CBytes arr)
pack :: String -> CBytes
{-# INLINE CONLIKE [1] pack #-}
pack s = runST $ do
mba <- newPrimArray V.defaultInitSize
(SP2 i mba') <- foldlM go (SP2 0 mba) s
shrinkMutablePrimArray mba' i
ba <- unsafeFreezePrimArray mba'
return (CBytes ba)
where
go :: SP2 s -> Char -> ST s (SP2 s)
go (SP2 i mba) !c = do
siz <- getSizeofMutablePrimArray mba
if i < siz - 3
then do
i' <- encodeCharModifiedUTF8 mba i c
return (SP2 i' mba)
else do
let !siz' = siz `shiftL` 1
!mba' <- resizeMutablePrimArray mba siz'
i' <- encodeCharModifiedUTF8 mba' i c
return (SP2 i' mba')
data SP2 s = SP2 {-# UNPACK #-}!Int {-# UNPACK #-}!(MutablePrimArray s Word8)
unpack :: CBytes -> String
{-# INLINE [1] unpack #-}
unpack (CBytes arr) = go 0
where
!end = sizeofPrimArray arr
go !idx
| idx >= end = []
| otherwise = let (# c, i #) = decodeChar arr idx in c : go (idx + i)
unpackFB :: CBytes -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackFB #-}
unpackFB (CBytes arr) k z = go 0
where
!end = sizeofPrimArray arr
go !idx
| idx >= end = z
| otherwise = let (# c, i #) = decodeChar arr idx in c `k` go (idx + i)
{-# RULES
"unpack" [~1] forall t . unpack t = build (\ k z -> unpackFB t k z)
"unpackFB" [1] forall t . unpackFB t (:) [] = unpack t
#-}
null :: CBytes -> Bool
{-# INLINE null #-}
null (CBytes pa) = sizeofPrimArray pa == 0
length :: CBytes -> Int
{-# INLINE length #-}
length (CBytes pa) = sizeofPrimArray pa
toBytes :: CBytes -> V.Bytes
{-# INLINABLE toBytes #-}
toBytes (CBytes arr) = V.PrimVector arr 0 (sizeofPrimArray arr)
fromBytes :: V.Bytes -> CBytes
{-# INLINABLE fromBytes #-}
fromBytes v@(V.PrimVector arr s l) = runST (do
case V.elemIndex 0 v of
Just i -> do
mpa <- newPrimArray i
copyPrimArray mpa 0 arr s i
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
_ | s == 0 && sizeofPrimArray arr == l -> return (CBytes arr)
| otherwise -> do
mpa <- newPrimArray l
copyPrimArray mpa 0 arr s l
pa <- unsafeFreezePrimArray mpa
return (CBytes pa))
toText :: CBytes -> T.Text
{-# INLINABLE toText #-}
toText = T.validate . toBytes
toTextMaybe :: CBytes -> Maybe T.Text
{-# INLINABLE toTextMaybe #-}
toTextMaybe = T.validateMaybe . toBytes
fromText :: T.Text -> CBytes
{-# INLINABLE fromText #-}
fromText = fromBytes . T.getUTF8Bytes
toBuilder :: CBytes -> B.Builder ()
toBuilder = B.bytes . toBytes
buildCBytes :: B.Builder a -> CBytes
buildCBytes = fromBytes . B.buildBytes
fromCString :: CString -> IO CBytes
{-# INLINABLE fromCString #-}
fromCString cstring = do
if cstring == nullPtr
then return empty
else do
len <- fromIntegral <$> c_strlen_ptr cstring
mpa <- newPrimArray len
copyPtrToMutablePrimArray mpa 0 (castPtr cstring) len
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
fromCStringN :: CString -> Int -> IO CBytes
{-# INLINABLE fromCStringN #-}
fromCStringN cstring len0 = do
if cstring == nullPtr || len0 == 0
then return empty
else do
len1 <- fromIntegral <$> c_strlen_ptr cstring
let len = min len0 len1
mpa <- newPrimArray len
copyPtrToMutablePrimArray mpa 0 (castPtr cstring) len
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
withCBytesUnsafe :: CBytes -> (BA# Word8 -> IO a) -> IO a
{-# INLINABLE withCBytesUnsafe #-}
withCBytesUnsafe (CBytes pa) f = do
let l = sizeofPrimArray pa
mpa <- newPrimArray (l+1)
copyPrimArray mpa 0 pa 0 l
writePrimArray mpa l 0
pa' <- unsafeFreezePrimArray mpa
withPrimArrayUnsafe pa' (\ p _ -> f p)
withCBytes :: CBytes -> (Ptr Word8 -> IO a) -> IO a
{-# INLINABLE withCBytes #-}
withCBytes (CBytes pa) f = do
let l = sizeofPrimArray pa
mpa <- newPinnedPrimArray (l+1)
copyPrimArray mpa 0 pa 0 l
writePrimArray mpa l 0
pa' <- unsafeFreezePrimArray mpa
withPrimArraySafe pa' (\ p _ -> f p)
allocCBytesUnsafe :: HasCallStack
=> Int
-> (MBA# Word8 -> IO a)
-> IO (CBytes, a)
{-# INLINABLE allocCBytesUnsafe #-}
allocCBytesUnsafe n fill | n <= 0 = withPrimUnsafe (0::Word8) fill >>=
\ (_, b) -> return (empty, b)
| otherwise = do
mba@(MutablePrimArray mba#) <- newPrimArray n :: IO (MutablePrimArray RealWorld Word8)
a <- fill mba#
l <- fromIntegral <$> (c_memchr mba# 0 0 n)
shrinkMutablePrimArray mba (if l == -1 then n else l)
bs <- unsafeFreezePrimArray mba
return (CBytes bs, a)
allocCBytes :: HasCallStack
=> Int
-> (CString -> IO a)
-> IO (CBytes, a)
{-# INLINABLE allocCBytes #-}
allocCBytes n fill | n <= 0 = fill nullPtr >>= \ a -> return (empty, a)
| otherwise = do
mba@(MutablePrimArray mba#) <- newPinnedPrimArray n :: IO (MutablePrimArray RealWorld Word8)
a <- withMutablePrimArrayContents mba (fill . castPtr)
l <- fromIntegral <$> (c_memchr mba# 0 0 n)
shrinkMutablePrimArray mba (if l == -1 then n else l)
bs <- unsafeFreezePrimArray mba
return (CBytes bs, a)
c_strlen_ptr :: CString -> IO CSize
{-# INLINE c_strlen_ptr #-}
c_strlen_ptr (Ptr a#) = V.c_strlen a#
foreign import ccall unsafe "hs_memchr" c_memchr :: MBA# Word8 -> Int -> Word8 -> Int -> IO Int