module Z.Data.CBytes
(
CBytes(CB)
, rawPrimArray, fromPrimArray
, toBytes, fromBytes, toText, toTextMaybe, fromText, toBuilder, buildCBytes
, pack
, unpack
, null, length
, empty, singleton, append, concat, intercalate, intercalateElem
, fromCString, fromCStringN
, withCBytesUnsafe, withCBytes, allocCBytesUnsafe, allocCBytes
, withCBytesListUnsafe, withCBytesListSafe
, CString
, V.c2w, V.w2c
) where
import Control.DeepSeq
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Data.Bits
import Data.Foldable (foldlM)
import Data.Hashable (Hashable(..))
import qualified Data.List as List
import Data.Primitive.PrimArray
import Data.Word
import Foreign.C.String
import GHC.Exts
import GHC.CString
import GHC.Ptr
import GHC.Stack
import Prelude hiding (all, any, appendFile, break,
concat, concatMap, drop, dropWhile,
elem, filter, foldl, foldl1, foldr,
foldr1, getContents, getLine, head,
init, interact, last, length, lines,
map, maximum, minimum, notElem, null,
putStr, putStrLn, readFile, replicate,
reverse, scanl, scanl1, scanr, scanr1,
span, splitAt, tail, take, takeWhile,
unlines, unzip, writeFile, zip,
zipWith)
import Z.Data.Array
import Z.Data.Array.Unaligned
import qualified Z.Data.Builder as B
import qualified Z.Data.Text as T
import qualified Z.Data.Text.ShowT as T
import qualified Z.Data.Text.UTF8Codec as T
import qualified Z.Data.JSON.Base as JSON
import Z.Data.JSON.Base ((<?>))
import Z.Data.Text.UTF8Codec (encodeCharModifiedUTF8, decodeChar)
import qualified Z.Data.Vector.Base as V
import Z.Foreign
import System.IO.Unsafe (unsafeDupablePerformIO)
import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
newtype CBytes = CBytes
{
rawPrimArray :: PrimArray Word8
}
fromPrimArray :: PrimArray Word8 -> CBytes
{-# INLINE fromPrimArray #-}
fromPrimArray arr = runST (do
let l = case V.elemIndex 0 arr of
Just i -> i
_ -> sizeofPrimArray arr
mpa <- newPrimArray (l+1)
copyPrimArray mpa 0 arr 0 l
writePrimArray mpa l 0
pa <- unsafeFreezePrimArray mpa
return (CBytes pa))
pattern CB :: V.Bytes -> CBytes
{-# COMPLETE CB #-}
pattern CB bs <- (toBytes -> bs) where
CB bs = fromBytes bs
instance Show CBytes where
showsPrec p t = showsPrec p (unpack t)
instance Read CBytes where
readsPrec p s = [(pack x, r) | (x, r) <- readsPrec p s]
instance NFData CBytes where
{-# INLINE rnf #-}
rnf (CBytes _) = ()
instance Eq CBytes where
{-# INLINE (==) #-}
CBytes ba == CBytes bb = ba == bb
instance Ord CBytes where
{-# INLINE compare #-}
CBytes ba `compare` CBytes bb = ba `compare` bb
instance Semigroup CBytes where
(<>) = append
instance Monoid CBytes where
{-# INLINE mempty #-}
mempty = empty
{-# INLINE mappend #-}
mappend = append
{-# INLINE mconcat #-}
mconcat = concat
instance Hashable CBytes where
hashWithSalt salt (CBytes pa@(PrimArray ba#)) = unsafeDupablePerformIO $ do
V.c_fnv_hash_ba ba# 0 (sizeofPrimArray pa - 1) salt
instance Arbitrary CBytes where
arbitrary = pack <$> arbitrary
shrink a = pack <$> shrink (unpack a)
instance CoArbitrary CBytes where
coarbitrary = coarbitrary . unpack
instance Unaligned CBytes where
{-# INLINE unalignedSize #-}
unalignedSize (CBytes arr) = sizeofPrimArray arr
{-# INLINE peekMBA #-}
peekMBA mba# i = do
b <- getSizeofMutableByteArray (MutableByteArray mba#)
let rest = b-i
l <- c_memchr mba# i 0 rest
let l' = if l == -1 then rest else l
mpa <- newPrimArray (l'+1)
copyMutablePrimArray mpa 0 (MutablePrimArray mba#) i l'
writePrimArray mpa l' 0
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
{-# INLINE pokeMBA #-}
pokeMBA mba# i (CBytes pa) = do
let l = sizeofPrimArray pa
copyPrimArray (MutablePrimArray mba# :: MutablePrimArray RealWorld Word8) i pa 0 l
{-# INLINE indexBA #-}
indexBA ba# i = runST (do
let b = sizeofByteArray (ByteArray ba#)
rest = b-i
l = V.c_memchr ba# i 0 rest
l' = if l == -1 then rest else l
mpa <- newPrimArray (l'+1)
copyPrimArray mpa 0 (PrimArray ba#) i l'
writePrimArray mpa l' 0
pa <- unsafeFreezePrimArray mpa
return (CBytes pa))
instance T.ShowT CBytes where
{-# INLINE toTextBuilder #-}
toTextBuilder _ = T.stringUTF8 . show . unpack
instance JSON.FromValue CBytes where
{-# INLINE fromValue #-}
fromValue value =
case value of
JSON.String t ->
return (fromText t)
JSON.Array arr ->
fromBytes <$> V.traverseWithIndex
(\ k v -> JSON.fromValue v <?> JSON.Index k) arr
_ -> JSON.fail'
"converting Z.Data.CBytes.CBytes failed, expected array or string"
instance JSON.ToValue CBytes where
{-# INLINE toValue #-}
toValue cbytes = case toTextMaybe cbytes of
Just t -> JSON.toValue t
Nothing -> JSON.toValue (toBytes cbytes)
instance JSON.EncodeJSON CBytes where
{-# INLINE encodeJSON #-}
encodeJSON cbytes = case toTextMaybe cbytes of
Just t -> JSON.encodeJSON t
Nothing -> B.square . JSON.commaVec' . toBytes $ cbytes
append :: CBytes -> CBytes -> CBytes
{-# INLINABLE append #-}
append strA@(CBytes pa) strB@(CBytes pb)
| lenA == 0 = strB
| lenB == 0 = strA
| otherwise = unsafeDupablePerformIO $ do
mpa <- newPrimArray (lenA+lenB+1)
copyPrimArray mpa 0 pa 0 lenA
copyPrimArray mpa lenA pb 0 lenB
writePrimArray mpa (lenA + lenB) 0
pa' <- unsafeFreezePrimArray mpa
return (CBytes pa')
where
lenA = length strA
lenB = length strB
empty :: CBytes
{-# NOINLINE empty #-}
empty = CBytes (V.singleton 0)
singleton :: Word8 -> CBytes
{-# INLINE singleton #-}
singleton w = runST (do
buf <- newPrimArray 2
writePrimArray buf 0 w
writePrimArray buf 1 0
pa <- unsafeFreezePrimArray buf
return (CBytes pa))
concat :: [CBytes] -> CBytes
{-# INLINABLE concat #-}
concat bss = case pre 0 0 bss of
(0, _) -> empty
(1, _) -> let Just b = List.find (not . null) bss in b
(_, l) -> runST $ do
buf <- newPrimArray (l+1)
copy bss 0 buf
writePrimArray buf l 0
CBytes <$> unsafeFreezePrimArray buf
where
pre :: Int -> Int -> [CBytes] -> (Int, Int)
pre !nacc !lacc [] = (nacc, lacc)
pre !nacc !lacc (b:bs)
| l <= 0 = pre nacc lacc bs
| otherwise = pre (nacc+1) (l + lacc) bs
where !l = length b
copy :: [CBytes] -> Int -> MutablePrimArray s Word8 -> ST s ()
copy [] !_ !_ = return ()
copy (b@(CBytes ba):bs) !i !mba = do
let l = length b
when (l /= 0) (copyPrimArray mba i ba 0 l)
copy bs (i+l) mba
intercalate :: CBytes -> [CBytes] -> CBytes
{-# INLINE intercalate #-}
intercalate s = concat . List.intersperse s
intercalateElem :: Word8 -> [CBytes] -> CBytes
{-# INLINABLE intercalateElem #-}
intercalateElem 0 [] = empty
intercalateElem 0 (bs:_) = bs
intercalateElem w8 bss = case len bss 0 of
0 -> empty
l -> runST $ do
buf <- newPrimArray (l+1)
copy bss 0 buf
writePrimArray buf l 0
CBytes <$> unsafeFreezePrimArray buf
where
len [] !acc = acc
len [b] !acc = length b + acc
len (b:bs) !acc = len bs (acc + length b + 1)
copy :: [CBytes] -> Int -> MutablePrimArray s Word8 -> ST s ()
copy (b@(CBytes ba):bs) !i !mba = do
let l = length b
when (l /= 0) (copyPrimArray mba i ba 0 l)
case bs of
[] -> return ()
_ -> do
let i' = i + l
writePrimArray mba i' w8
copy bs (i'+1) mba
instance IsString CBytes where
{-# INLINE fromString #-}
fromString = pack
{-# RULES
"CBytes pack/unpackCString#" forall addr# .
pack (unpackCString# addr#) = packAddr addr#
#-}
{-# RULES
"CBytes pack/unpackCStringUtf8#" forall addr# .
pack (unpackCStringUtf8# addr#) = packAddr addr#
#-}
packAddr :: Addr# -> CBytes
packAddr addr0# = go addr0#
where
len = (fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr0#) + 1
go addr# = runST $ do
marr <- newPrimArray len
copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
arr <- unsafeFreezePrimArray marr
return (CBytes arr)
pack :: String -> CBytes
{-# INLINE CONLIKE [1] pack #-}
pack s = runST $ do
mba <- newPrimArray V.defaultInitSize
(SP2 i mba') <- foldlM go (SP2 0 mba) s
writePrimArray mba' i 0
shrinkMutablePrimArray mba' (i+1)
ba <- unsafeFreezePrimArray mba'
return (CBytes ba)
where
go :: SP2 s -> Char -> ST s (SP2 s)
go (SP2 i mba) !c = do
siz <- getSizeofMutablePrimArray mba
if i < siz - 4
then do
i' <- encodeCharModifiedUTF8 mba i c
return (SP2 i' mba)
else do
let !siz' = siz `shiftL` 1
!mba' <- resizeMutablePrimArray mba siz'
i' <- encodeCharModifiedUTF8 mba' i c
return (SP2 i' mba')
data SP2 s = SP2 {-# UNPACK #-}!Int {-# UNPACK #-}!(MutablePrimArray s Word8)
unpack :: CBytes -> String
{-# INLINE [1] unpack #-}
unpack (CBytes arr) = go 0
where
!end = sizeofPrimArray arr - 1
go !idx
| idx >= end = []
| idx + T.decodeCharLen arr idx > end = [T.replacementChar]
| otherwise = let (# c, i #) = decodeChar arr idx in c : go (idx + i)
unpackFB :: CBytes -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackFB #-}
unpackFB (CBytes arr) k z = go 0
where
!end = sizeofPrimArray arr - 1
go !idx
| idx >= end = z
| idx + T.decodeCharLen arr idx > end = T.replacementChar `k` z
| otherwise = let (# c, i #) = decodeChar arr idx in c `k` go (idx + i)
{-# RULES
"unpack" [~1] forall t . unpack t = build (\ k z -> unpackFB t k z)
"unpackFB" [1] forall t . unpackFB t (:) [] = unpack t
#-}
null :: CBytes -> Bool
{-# INLINE null #-}
null (CBytes pa) = indexPrimArray pa 0 == 0
length :: CBytes -> Int
{-# INLINE length #-}
length (CBytes pa) = sizeofPrimArray pa - 1
toBytes :: CBytes -> V.Bytes
{-# INLINABLE toBytes #-}
toBytes (CBytes arr) = V.PrimVector arr 0 (sizeofPrimArray arr - 1)
fromBytes :: V.Bytes -> CBytes
{-# INLINABLE fromBytes #-}
fromBytes v@(V.PrimVector arr s l)
| s == 0 && sizeofPrimArray arr == (l+1) && indexPrimArray arr l == 0 =
CBytes arr
| otherwise = runST (do
let l' = case V.elemIndex 0 v of
Just i -> i
_ -> l
mpa <- newPrimArray (l'+1)
copyPrimArray mpa 0 arr s l'
writePrimArray mpa l' 0
pa <- unsafeFreezePrimArray mpa
return (CBytes pa))
toText :: CBytes -> T.Text
{-# INLINABLE toText #-}
toText = T.validate . toBytes
toTextMaybe :: CBytes -> Maybe T.Text
{-# INLINABLE toTextMaybe #-}
toTextMaybe = T.validateMaybe . toBytes
fromText :: T.Text -> CBytes
{-# INLINABLE fromText #-}
fromText = fromBytes . T.getUTF8Bytes
toBuilder :: CBytes -> B.Builder ()
toBuilder = B.bytes . toBytes
buildCBytes :: B.Builder a -> CBytes
buildCBytes = fromBytes . B.buildBytes
fromCString :: CString -> IO CBytes
{-# INLINABLE fromCString #-}
fromCString cstring = do
if cstring == nullPtr
then return empty
else do
len <- fromIntegral <$> c_strlen_ptr cstring
let len' = len + 1
mpa <- newPrimArray len'
copyPtrToMutablePrimArray mpa 0 (castPtr cstring) len'
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
fromCStringN :: CString -> Int -> IO CBytes
{-# INLINABLE fromCStringN #-}
fromCStringN cstring len0 = do
if cstring == nullPtr || len0 == 0
then return empty
else do
len1 <- fromIntegral <$> c_strlen_ptr cstring
let len = min len0 len1
mpa <- newPrimArray (len+1)
copyPtrToMutablePrimArray mpa 0 (castPtr cstring) len
writePrimArray mpa len 0
pa <- unsafeFreezePrimArray mpa
return (CBytes pa)
withCBytesUnsafe :: CBytes -> (BA# Word8 -> IO a) -> IO a
{-# INLINABLE withCBytesUnsafe #-}
withCBytesUnsafe (CBytes pa) f = withPrimArrayUnsafe pa (\ p _ -> f p)
withCBytesListUnsafe :: [CBytes] -> (BAArray# Word8 -> Int -> IO a) -> IO a
{-# INLINABLE withCBytesListUnsafe #-}
withCBytesListUnsafe pas = withPrimArrayListUnsafe (List.map rawPrimArray pas)
withCBytes :: CBytes -> (Ptr Word8 -> IO a) -> IO a
{-# INLINABLE withCBytes #-}
withCBytes (CBytes pa) f = withPrimArraySafe pa (\ p _ -> f p)
withCBytesListSafe :: [CBytes] -> (Ptr (Ptr Word8) -> Int -> IO a) -> IO a
{-# INLINABLE withCBytesListSafe #-}
withCBytesListSafe pas = withPrimArrayListSafe (List.map rawPrimArray pas)
allocCBytesUnsafe :: HasCallStack
=> Int
-> (MBA# Word8 -> IO a)
-> IO (CBytes, a)
{-# INLINABLE allocCBytesUnsafe #-}
allocCBytesUnsafe n fill | n <= 0 = withPrimUnsafe (0::Word8) fill >>=
\ (_, b) -> return (empty, b)
| otherwise = do
mba@(MutablePrimArray mba#) <- newPrimArray n :: IO (MutablePrimArray RealWorld Word8)
a <- fill mba#
l <- fromIntegral <$> (c_memchr mba# 0 0 n)
let l' = if l == -1 then (n-1) else l
shrinkMutablePrimArray mba (l'+1)
writePrimArray mba l' 0
bs <- unsafeFreezePrimArray mba
return (CBytes bs, a)
allocCBytes :: HasCallStack
=> Int
-> (CString -> IO a)
-> IO (CBytes, a)
{-# INLINABLE allocCBytes #-}
allocCBytes n fill | n <= 0 = fill nullPtr >>= \ a -> return (empty, a)
| otherwise = do
mba@(MutablePrimArray mba#) <- newPinnedPrimArray n :: IO (MutablePrimArray RealWorld Word8)
a <- withMutablePrimArrayContents mba (fill . castPtr)
l <- fromIntegral <$> (c_memchr mba# 0 0 n)
let l' = if l == -1 then (n-1) else l
shrinkMutablePrimArray mba (l'+1)
writePrimArray mba l' 0
bs <- unsafeFreezePrimArray mba
return (CBytes bs, a)
c_strlen_ptr :: CString -> IO CSize
{-# INLINE c_strlen_ptr #-}
c_strlen_ptr (Ptr a#) = V.c_strlen a#
foreign import ccall unsafe "hs_memchr" c_memchr :: MBA# Word8 -> Int -> Word8 -> Int -> IO Int