{-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ViewPatterns #-} -- | -- Module : System.AbstractFilePath.Data.ByteString.Short.Internal -- Copyright : © 2022 Julian Ospald -- License : MIT -- -- Maintainer : Julian Ospald -- Stability : experimental -- Portability : portable -- -- Internal low-level utilities mostly for 'System.AbstractFilePath.Data.ByteString.Short.Word16', -- such as byte-array operations and other stuff not meant to be exported from Word16 module. module System.AbstractFilePath.Data.ByteString.Short.Internal where import Control.Monad.ST import Control.Exception (assert, throwIO) import Data.ByteString.Short.Internal (ShortByteString(..), length) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup ( Semigroup((<>)) ) #endif #if !MIN_VERSION_bytestring(0,10,9) import Foreign.Marshal.Alloc (allocaBytes) import Foreign.C.String ( CString, CStringLen ) import Foreign.C.Types ( CSize(..) ) import Foreign.Storable (pokeByteOff) #endif import Foreign.Marshal.Array (withArray0, peekArray0, newArray0, withArrayLen, peekArray) import GHC.Exts import GHC.Word import GHC.ST ( ST (ST) ) import GHC.Stack ( HasCallStack ) import Prelude hiding ( length ) import qualified Data.ByteString.Short.Internal as BS import qualified Data.Char as C import qualified Data.List as List _nul :: Word16 _nul = 0x00 isSpace :: Word16 -> Bool isSpace = C.isSpace . word16ToChar -- | Total conversion to char. word16ToChar :: Word16 -> Char word16ToChar = C.chr . fromIntegral create :: Int -> (forall s. MBA s -> ST s ()) -> ShortByteString create len fill = runST $ do mba <- newByteArray len fill mba BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) {-# INLINE create #-} asBA :: ShortByteString -> BA asBA (SBS ba#) = BA# ba# data BA = BA# ByteArray# data MBA s = MBA# (MutableByteArray# s) newPinnedByteArray :: Int -> ST s (MBA s) newPinnedByteArray (I# len#) = ST $ \s -> case newPinnedByteArray# len# s of (# s', mba# #) -> (# s', MBA# mba# #) newByteArray :: Int -> ST s (MBA s) newByteArray (I# len#) = ST $ \s -> case newByteArray# len# s of (# s', mba# #) -> (# s', MBA# mba# #) copyByteArray :: BA -> Int -> MBA s -> Int -> Int -> ST s () copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of s' -> (# s', () #) unsafeFreezeByteArray :: MBA s -> ST s BA unsafeFreezeByteArray (MBA# mba#) = ST $ \s -> case unsafeFreezeByteArray# mba# s of (# s', ba# #) -> (# s', BA# ba# #) copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld () copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of s' -> (# s', () #) -- this is a copy-paste from bytestring #if !MIN_VERSION_bytestring(0,10,9) ------------------------------------------------------------------------ -- Primop replacements -- --------------------------------------------------------------------- -- -- Standard C functions -- foreign import ccall unsafe "string.h strlen" c_strlen :: CString -> IO CSize -- --------------------------------------------------------------------- -- -- Uses our C code -- -- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The -- resulting @ShortByteString@ is an immutable copy of the original -- @CString@, and is managed on the Haskell heap. The original -- @CString@ must be null terminated. -- -- @since 0.10.10.0 packCString :: CString -> IO ShortByteString packCString cstr = do len <- c_strlen cstr packCStringLen (cstr, fromIntegral len) -- | /O(n)./ Construct a new @ShortByteString@ from a @CStringLen@. The -- resulting @ShortByteString@ is an immutable copy of the original @CStringLen@. -- The @ShortByteString@ is a normal Haskell value and will be managed on the -- Haskell heap. -- -- @since 0.10.10.0 packCStringLen :: CStringLen -> IO ShortByteString packCStringLen (cstr, len) | len >= 0 = BS.createFromPtr cstr len packCStringLen (_, len) = moduleErrorIO "packCStringLen" ("negative length: " ++ show len) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a -- null-terminated @CString@. The @CString@ is a copy and will be freed -- automatically; it must not be stored or used after the -- subcomputation finishes. -- -- @since 0.10.10.0 useAsCString :: ShortByteString -> (CString -> IO a) -> IO a useAsCString bs action = allocaBytes (l+1) $ \buf -> do BS.copyToPtr bs 0 buf (fromIntegral l) pokeByteOff buf l (0::Word8) action buf where l = length bs -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CStringLen@. -- As for @useAsCString@ this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- @since 0.10.10.0 useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a useAsCStringLen bs action = allocaBytes l $ \buf -> do BS.copyToPtr bs 0 buf (fromIntegral l) action (buf, l) where l = length bs #endif -- | /O(n)./ Construct a new @ShortByteString@ from a @CWString@. The -- resulting @ShortByteString@ is an immutable copy of the original -- @CWString@, and is managed on the Haskell heap. The original -- @CWString@ must be null terminated. -- -- @since 0.10.10.0 packCWString :: Ptr Word16 -> IO ShortByteString packCWString cwstr = do cs <- peekArray0 _nul cwstr return (packWord16 cs) -- | /O(n)./ Construct a new @ShortByteString@ from a @CWStringLen@. The -- resulting @ShortByteString@ is an immutable copy of the original @CWStringLen@. -- The @ShortByteString@ is a normal Haskell value and will be managed on the -- Haskell heap. -- -- @since 0.10.10.0 packCWStringLen :: (Ptr Word16, Int) -> IO ShortByteString packCWStringLen (cp, len) = do cs <- peekArray len cp return (packWord16 cs) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a -- null-terminated @CWString@. The @CWString@ is a copy and will be freed -- automatically; it must not be stored or used after the -- subcomputation finishes. -- -- @since 0.10.10.0 useAsCWString :: ShortByteString -> (Ptr Word16 -> IO a) -> IO a useAsCWString = withArray0 _nul . unpackWord16 -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. -- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- @since 0.10.10.0 useAsCWStringLen :: ShortByteString -> ((Ptr Word16, Int) -> IO a) -> IO a useAsCWStringLen bs action = withArrayLen (unpackWord16 bs) $ \ len ptr -> action (ptr, len) -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. -- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. -- It must not be stored or used after the subcomputation finishes. -- -- @since 0.10.10.0 newCWString :: ShortByteString -> IO (Ptr Word16) newCWString = newArray0 _nul . unpackWord16 -- --------------------------------------------------------------------- -- Internal utilities moduleErrorIO :: String -> String -> IO a moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg {-# NOINLINE moduleErrorIO #-} moduleErrorMsg :: String -> String -> String moduleErrorMsg fun msg = "System.AbstractFilePath.Data.ByteString.Short." ++ fun ++ ':':' ':msg packWord16 :: [Word16] -> ShortByteString packWord16 cs = packLenWord16 (List.length cs) cs packLenWord16 :: Int -> [Word16] -> ShortByteString packLenWord16 len ws0 = create (len * 2) (\mba -> go mba 0 ws0) where go :: MBA s -> Int -> [Word16] -> ST s () go !_ !_ [] = return () go !mba !i (w:ws) = do writeWord16Array mba i w go mba (i+2) ws unpackWord16 :: ShortByteString -> [Word16] unpackWord16 sbs = go len [] where len = length sbs go !i !acc | i < 1 = acc | otherwise = let !w = indexWord16Array (asBA sbs) (i - 2) in go (i - 2) (w:acc) packWord16Rev :: [Word16] -> ShortByteString packWord16Rev cs = packLenWord16Rev (List.length cs * 2) cs packLenWord16Rev :: Int -> [Word16] -> ShortByteString packLenWord16Rev len ws0 = create len (\mba -> go mba len ws0) where go :: MBA s -> Int -> [Word16] -> ST s () go !_ !_ [] = return () go !mba !i (w:ws) = do writeWord16Array mba (i - 2) w go mba (i - 2) ws -- | This isn't strictly Word16 array write. Instead it's two consecutive Word8 array -- writes to avoid endianness issues due to primops doing automatic alignment based -- on host platform. We want to always write LE to the byte array. writeWord16Array :: MBA s -> Int -- ^ Word8 index (not Word16) -> Word16 -> ST s () writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = case encodeWord16LE# w# of (# lsb#, msb# #) -> ST (\s -> case writeWord8Array# mba# i# lsb# s of s' -> (# s', () #)) >> ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of s' -> (# s', () #)) -- | This isn't strictly Word16 array read. Instead it's two Word8 array reads -- to avoid endianness issues due to primops doing automatic alignment based -- on host platform. We expect the byte array to be LE always. indexWord16Array :: BA -> Int -- ^ Word8 index (not Word16) -> Word16 indexWord16Array (BA# ba#) (I# i#) = case (# indexWord8Array# ba# i#, indexWord8Array# ba# (i# +# 1#) #) of (# lsb#, msb# #) -> W16# (decodeWord16LE# (# lsb#, msb# #)) #if !MIN_VERSION_base(4,16,0) encodeWord16LE# :: Word# -- ^ Word16 -> (# Word#, Word# #) -- ^ Word8 (LSB, MSB) encodeWord16LE# x# = (# x# `and#` int2Word# 0xff# , x# `and#` int2Word# 0xff00# `shiftRL#` 8# #) decodeWord16LE# :: (# Word#, Word# #) -- ^ Word8 (LSB, MSB) -> Word# -- ^ Word16 decodeWord16LE# (# lsb#, msb# #) = msb# `shiftL#` 8# `or#` lsb# #else encodeWord16LE# :: Word16# -- ^ Word16 -> (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) encodeWord16LE# x# = (# word16ToWord8# x# , word16ToWord8# (x# `uncheckedShiftRLWord16#` 8#) #) where word16ToWord8# y = wordToWord8# (word16ToWord# y) decodeWord16LE# :: (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) -> Word16# -- ^ Word16 decodeWord16LE# (# lsb#, msb# #) = ((word8ToWord16# msb# `uncheckedShiftLWord16#` 8#) `orWord16#` word8ToWord16# lsb#) where word8ToWord16# y = wordToWord16# (word8ToWord# y) #endif setByteArray :: MBA s -> Int -> Int -> Int -> ST s () setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) = ST $ \s -> case setByteArray# dst# off# len# c# s of s' -> (# s', () #) copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s () copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of s' -> (# s', () #) -- | Given the maximum size needed and a function to make the contents -- of a ShortByteString, createAndTrim makes the 'ShortByteString'. -- The generating function is required to return the actual final size -- (<= the maximum size) and the result value. The resulting byte array -- is realloced to this size. createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a) createAndTrim l fill = runST $ do mba <- newByteArray l (l', res) <- fill mba if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#, res) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#, res) {-# INLINE createAndTrim #-} createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString createAndTrim' l fill = runST $ do mba <- newByteArray l l' <- fill mba if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#) {-# INLINE createAndTrim' #-} createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString) createAndTrim'' l fill = runST $ do mba1 <- newByteArray l mba2 <- newByteArray l (l1, l2) <- fill mba1 mba2 sbs1 <- freeze' l1 mba1 sbs2 <- freeze' l2 mba2 pure (sbs1, sbs2) where freeze' :: Int -> MBA s -> ST s ShortByteString freeze' l' mba = if assert (l' <= l) $ l' >= l then do BA# ba# <- unsafeFreezeByteArray mba return (SBS ba#) else do mba2 <- newByteArray l' copyMutableByteArray mba 0 mba2 0 l' BA# ba# <- unsafeFreezeByteArray mba2 return (SBS ba#) {-# INLINE createAndTrim'' #-} -- Returns the index of the first match or the length of the whole -- bytestring if nothing matched. findIndexOrLength :: (Word16 -> Bool) -> ShortByteString -> Int findIndexOrLength k (assertEven -> sbs) = go 0 where l = BS.length sbs ba = asBA sbs w = indexWord16Array ba go !n | n >= l = l `div` 2 | k (w n) = n `div` 2 | otherwise = go (n + 2) {-# INLINE findIndexOrLength #-} -- | Returns the length of the substring matching, not the index. -- If no match, returns 0. findFromEndUntil :: (Word16 -> Bool) -> ShortByteString -> Int findFromEndUntil k sbs = go (BS.length sbs - 2) where ba = asBA sbs w = indexWord16Array ba go !n | n < 0 = 0 | k (w n) = (n `div` 2) + 1 | otherwise = go (n - 2) {-# INLINE findFromEndUntil #-} assertEven :: ShortByteString -> ShortByteString assertEven sbs@(SBS barr#) | even (I# (sizeofByteArray# barr#)) = sbs | otherwise = error ("Uneven number of bytes: " <> show (BS.length sbs) <> ". This is not a Word16 bytestream.") -- Common up near identical calls to `error' to reduce the number -- constant strings created when compiled: errorEmptySBS :: HasCallStack => String -> a errorEmptySBS fun = moduleError fun "empty ShortByteString" {-# NOINLINE errorEmptySBS #-} moduleError :: HasCallStack => String -> String -> a moduleError fun msg = error (moduleErrorMsg fun msg) {-# NOINLINE moduleError #-}