Copyright | (c) Dong Han 2017-2018 |
---|---|
License | BSD |
Maintainer | winterland1989@gmail.com |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
UTF-8 codecs and helpers.
Synopsis
- encodeCharLength :: Char -> Int
- encodeChar :: MutablePrimArray s Word8 -> Int -> Char -> ST s Int
- encodeChar# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##)
- encodeCharModifiedUTF8 :: PrimMonad m => MutablePrimArray (PrimState m) Word8 -> Int -> Char -> m Int
- encodeCharModifiedUTF8# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##)
- decodeChar :: PrimArray Word8 -> Int -> (#Char, Int#)
- decodeChar_ :: PrimArray Word8 -> Int -> Char
- decodeChar# :: ByteArray# -> Int# -> (#Char#, Int##)
- decodeCharLen :: PrimArray Word8 -> Int -> Int
- decodeCharLen# :: ByteArray# -> Int# -> Int#
- decodeCharReverse :: PrimArray Word8 -> Int -> (#Char, Int#)
- decodeCharReverse_ :: PrimArray Word8 -> Int -> Char
- decodeCharReverse# :: ByteArray# -> Int# -> (#Char#, Int##)
- decodeCharLenReverse :: PrimArray Word8 -> Int -> Int
- decodeCharLenReverse# :: ByteArray# -> Int# -> Int#
- between# :: Word# -> Word# -> Word# -> Bool
- isContinueByte# :: Word# -> Bool
- chr1# :: Word# -> Char#
- chr2# :: Word# -> Word# -> Char#
- chr3# :: Word# -> Word# -> Word# -> Char#
- chr4# :: Word# -> Word# -> Word# -> Word# -> Char#
- copyChar :: Int -> MutablePrimArray s Word8 -> Int -> PrimArray Word8 -> Int -> ST s ()
- copyChar' :: Int -> MutablePrimArray s Word8 -> Int -> MutablePrimArray s Word8 -> Int -> ST s ()
- replacementChar :: Char
Documentation
encodeCharLength :: Char -> Int Source #
Return a codepoint's encoded length in bytes
If the codepoint is invalid, we return 3(encoded bytes length of replacement char U+FFFD
).
encodeChar :: MutablePrimArray s Word8 -> Int -> Char -> ST s Int Source #
Encode a Char
into bytes, write replacementChar
for invalid unicode codepoint.
This function assumed there're enough space for encoded bytes, and return the advanced index.
encodeChar# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##) Source #
The unboxed version of encodeChar
.
This function is marked as NOINLINE
to reduce code size, and stop messing up simplifier
due to too much branches.
encodeCharModifiedUTF8 :: PrimMonad m => MutablePrimArray (PrimState m) Word8 -> Int -> Char -> m Int Source #
Encode a Char
into bytes with non-standard UTF-8 encoding(Used in Data.CBytes).
'\NUL' is encoded as two bytes C0 80
, '\xD800' ~ '\xDFFF' is encoded as a three bytes normal UTF-8 codepoint.
This function assumed there're enough space for encoded bytes, and return the advanced index.
encodeCharModifiedUTF8# :: MutableByteArray# s -> Int# -> Char# -> State# s -> (#State# s, Int##) Source #
The unboxed version of encodeCharModifiedUTF8
.
decodeChar :: PrimArray Word8 -> Int -> (#Char, Int#) Source #
Decode a Char
from bytes
This function assumed all bytes are UTF-8 encoded, and the index param point to the beginning of a codepoint, the decoded character and the advancing offset are returned.
It's annoying to use unboxed tuple here but we really don't want allocation even if GHC can't optimize it away.
decodeChar# :: ByteArray# -> Int# -> (#Char#, Int##) Source #
The unboxed version of decodeChar
This function is marked as NOINLINE
to reduce code size, and stop messing up simplifier
due to too much branches.
decodeCharLen :: PrimArray Word8 -> Int -> Int Source #
Decode a codepoint's length in bytes
This function assumed all bytes are UTF-8 encoded, and the index param point to the beginning of a codepoint.
decodeCharLen# :: ByteArray# -> Int# -> Int# Source #
The unboxed version of decodeCharLen
This function is marked as NOINLINE
to reduce code size, and stop messing up simplifier
due to too much branches.
decodeCharReverse :: PrimArray Word8 -> Int -> (#Char, Int#) Source #
Decode a Char
from bytes in rerverse order.
This function assumed all bytes are UTF-8 encoded, and the index param point to the end of a codepoint, the decoded character and the backward advancing offset are returned.
decodeCharReverse# :: ByteArray# -> Int# -> (#Char#, Int##) Source #
The unboxed version of decodeCharReverse
This function is marked as NOINLINE
to reduce code size, and stop messing up simplifier
due to too much branches.
decodeCharLenReverse :: PrimArray Word8 -> Int -> Int Source #
Decode a codepoint's length in bytes in reverse order.
This function assumed all bytes are UTF-8 encoded, and the index param point to the end of a codepoint.
decodeCharLenReverse# :: ByteArray# -> Int# -> Int# Source #
The unboxed version of decodeCharLenReverse
This function is marked as NOINLINE
to reduce code size, and stop messing up simplifier
due to too much branches.
isContinueByte# :: Word# -> Bool Source #
copyChar :: Int -> MutablePrimArray s Word8 -> Int -> PrimArray Word8 -> Int -> ST s () Source #
Unrolled copy loop for copying a utf8-encoded codepoint from source array to target array.
copyChar' :: Int -> MutablePrimArray s Word8 -> Int -> MutablePrimArray s Word8 -> Int -> ST s () Source #
Unrolled copy loop for copying a utf8-encoded codepoint from source array to target array.
replacementChar :: Char Source #
xFFFD
, which will be encoded as 0xEF 0xBF 0xBD
3 bytes.