{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, MagicHash,
UnliftedFFITypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ViewPatterns #-}
module Data.Text.Internal.Encoding
( validateUtf8Chunk
, validateUtf8More
, decodeUtf8Chunk
, decodeUtf8More
, decodeUtf8With1
, decodeUtf8With2
, Utf8State
, startUtf8State
, StrictBuilder()
, strictBuilderToText
, textToStrictBuilder
, skipIncomplete
, getCompleteLen
, getPartialUtf8
) where
#if defined(ASSERTS)
import Control.Exception (assert)
#endif
import Data.Bits ((.&.), shiftL, shiftR)
import Data.ByteString (ByteString)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup (Semigroup(..))
#endif
import Data.Word (Word32, Word8)
import Foreign.Storable (pokeElemOff)
import Data.Text.Encoding.Error (OnDecodeError)
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Encoding.Utf8
(DecoderState, utf8AcceptState, utf8RejectState, updateDecoderState)
import Data.Text.Internal.StrictBuilder (StrictBuilder)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Short.Internal as SBS
import qualified Data.Text.Array as A
import qualified Data.Text.Internal.StrictBuilder as SB
#if defined(ASSERTS)
import GHC.Stack (HasCallStack)
#endif
#ifdef SIMDUTF
import Data.Text.Internal.ByteStringCompat (withBS)
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr)
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Foreign.C.Types (CSize(..))
import Foreign.C.Types (CInt(..))
import Foreign.Ptr (Ptr)
#endif
strictBuilderToText :: StrictBuilder -> Text
strictBuilderToText :: StrictBuilder -> Text
strictBuilderToText = StrictBuilder -> Text
SB.toText
textToStrictBuilder :: Text -> StrictBuilder
textToStrictBuilder :: Text -> StrictBuilder
textToStrictBuilder = Text -> StrictBuilder
SB.fromText
data Utf8State = Utf8State
{
Utf8State -> DecoderState
utf8CodePointState :: {-# UNPACK #-} !DecoderState
, Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint :: {-# UNPACK #-} !PartialUtf8CodePoint
}
deriving (Utf8State -> Utf8State -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Utf8State -> Utf8State -> Bool
$c/= :: Utf8State -> Utf8State -> Bool
== :: Utf8State -> Utf8State -> Bool
$c== :: Utf8State -> Utf8State -> Bool
Eq, Int -> Utf8State -> ShowS
[Utf8State] -> ShowS
Utf8State -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Utf8State] -> ShowS
$cshowList :: [Utf8State] -> ShowS
show :: Utf8State -> String
$cshow :: Utf8State -> String
showsPrec :: Int -> Utf8State -> ShowS
$cshowsPrec :: Int -> Utf8State -> ShowS
Show)
startUtf8State :: Utf8State
startUtf8State :: Utf8State
startUtf8State = DecoderState -> PartialUtf8CodePoint -> Utf8State
Utf8State DecoderState
utf8AcceptState PartialUtf8CodePoint
partUtf8Empty
newtype PartialUtf8CodePoint = PartialUtf8CodePoint Word32
deriving (PartialUtf8CodePoint -> PartialUtf8CodePoint -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PartialUtf8CodePoint -> PartialUtf8CodePoint -> Bool
$c/= :: PartialUtf8CodePoint -> PartialUtf8CodePoint -> Bool
== :: PartialUtf8CodePoint -> PartialUtf8CodePoint -> Bool
$c== :: PartialUtf8CodePoint -> PartialUtf8CodePoint -> Bool
Eq, Int -> PartialUtf8CodePoint -> ShowS
[PartialUtf8CodePoint] -> ShowS
PartialUtf8CodePoint -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PartialUtf8CodePoint] -> ShowS
$cshowList :: [PartialUtf8CodePoint] -> ShowS
show :: PartialUtf8CodePoint -> String
$cshow :: PartialUtf8CodePoint -> String
showsPrec :: Int -> PartialUtf8CodePoint -> ShowS
$cshowsPrec :: Int -> PartialUtf8CodePoint -> ShowS
Show)
partUtf8Empty :: PartialUtf8CodePoint
partUtf8Empty :: PartialUtf8CodePoint
partUtf8Empty = Word32 -> PartialUtf8CodePoint
PartialUtf8CodePoint Word32
0
partUtf8Len :: PartialUtf8CodePoint -> Int
partUtf8Len :: PartialUtf8CodePoint -> Int
partUtf8Len (PartialUtf8CodePoint Word32
w) = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word32
w forall a. Bits a => a -> Int -> a
`shiftR` Int
24
partUtf8CompleteLen :: PartialUtf8CodePoint -> Int
partUtf8CompleteLen :: PartialUtf8CodePoint -> Int
partUtf8CompleteLen c :: PartialUtf8CodePoint
c@(PartialUtf8CodePoint Word32
w)
| PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
c forall a. Eq a => a -> a -> Bool
== Int
0 = Int
0
| Word32
0xf0 forall a. Ord a => a -> a -> Bool
<= Word32
firstByte = Int
4
| Word32
0xe0 forall a. Ord a => a -> a -> Bool
<= Word32
firstByte = Int
3
| Word32
0xc2 forall a. Ord a => a -> a -> Bool
<= Word32
firstByte = Int
2
| Bool
otherwise = Int
0
where
firstByte :: Word32
firstByte = (Word32
w forall a. Bits a => a -> Int -> a
`shiftR` Int
16) forall a. Bits a => a -> a -> a
.&. Word32
255
partUtf8UnsafeIndex ::
#if defined(ASSERTS)
HasCallStack =>
#endif
PartialUtf8CodePoint -> Int -> Word8
partUtf8UnsafeIndex :: PartialUtf8CodePoint -> Int -> Word8
partUtf8UnsafeIndex _c :: PartialUtf8CodePoint
_c@(PartialUtf8CodePoint Word32
w) Int
n =
#if defined(ASSERTS)
assert (0 <= n && n < partUtf8Len _c) $
#endif
forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word32
w forall a. Bits a => a -> Int -> a
`shiftR` (Int
16 forall a. Num a => a -> a -> a
- Int
8 forall a. Num a => a -> a -> a
* Int
n)
partUtf8UnsafeAppend ::
#if defined(ASSERTS)
HasCallStack =>
#endif
PartialUtf8CodePoint -> ByteString -> PartialUtf8CodePoint
partUtf8UnsafeAppend :: PartialUtf8CodePoint -> ByteString -> PartialUtf8CodePoint
partUtf8UnsafeAppend c :: PartialUtf8CodePoint
c@(PartialUtf8CodePoint Word32
word) ByteString
bs =
#if defined(ASSERTS)
assert (lenc + lenbs <= 3) $
#endif
Word32 -> PartialUtf8CodePoint
PartialUtf8CodePoint forall a b. (a -> b) -> a -> b
$
forall {a}. (Num a, Bits a) => Int -> a -> a
tryPush Int
0 forall a b. (a -> b) -> a -> b
$ forall {a}. (Num a, Bits a) => Int -> a -> a
tryPush Int
1 forall a b. (a -> b) -> a -> b
$ forall {a}. (Num a, Bits a) => Int -> a -> a
tryPush Int
2 forall a b. (a -> b) -> a -> b
$ Word32
word forall a. Num a => a -> a -> a
+ (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenbs forall a. Bits a => a -> Int -> a
`shiftL` Int
24)
where
lenc :: Int
lenc = PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
c
lenbs :: Int
lenbs = ByteString -> Int
B.length ByteString
bs
tryPush :: Int -> a -> a
tryPush Int
i a
w =
if Int
i forall a. Ord a => a -> a -> Bool
< Int
lenbs
then a
w forall a. Num a => a -> a -> a
+ (forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs Int
i) forall a. Bits a => a -> Int -> a
`shiftL` forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
16 forall a. Num a => a -> a -> a
- Int
8 forall a. Num a => a -> a -> a
* (Int
lenc forall a. Num a => a -> a -> a
+ Int
i)))
else a
w
{-# INLINE partUtf8Foldr #-}
partUtf8Foldr :: (Word8 -> a -> a) -> a -> PartialUtf8CodePoint -> a
partUtf8Foldr :: forall a. (Word8 -> a -> a) -> a -> PartialUtf8CodePoint -> a
partUtf8Foldr Word8 -> a -> a
f a
x0 PartialUtf8CodePoint
c = case PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
c of
Int
0 -> a
x0
Int
1 -> Int -> a -> a
build Int
0 a
x0
Int
2 -> Int -> a -> a
build Int
0 (Int -> a -> a
build Int
1 a
x0)
Int
_ -> Int -> a -> a
build Int
0 (Int -> a -> a
build Int
1 (Int -> a -> a
build Int
2 a
x0))
where
build :: Int -> a -> a
build Int
i a
x = Word8 -> a -> a
f (PartialUtf8CodePoint -> Int -> Word8
partUtf8UnsafeIndex PartialUtf8CodePoint
c Int
i) a
x
partUtf8ToByteString :: PartialUtf8CodePoint -> B.ByteString
partUtf8ToByteString :: PartialUtf8CodePoint -> ByteString
partUtf8ToByteString PartialUtf8CodePoint
c = Int -> (Ptr Word8 -> IO ()) -> ByteString
BI.unsafeCreate (PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
c) forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
forall a. (Word8 -> a -> a) -> a -> PartialUtf8CodePoint -> a
partUtf8Foldr (\Word8
w Int -> IO ()
k Int
i -> forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
i Word8
w forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> IO ()
k (Int
iforall a. Num a => a -> a -> a
+Int
1)) (\Int
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) PartialUtf8CodePoint
c Int
0
getCompleteLen :: Utf8State -> Int
getCompleteLen :: Utf8State -> Int
getCompleteLen = PartialUtf8CodePoint -> Int
partUtf8CompleteLen forall b c a. (b -> c) -> (a -> b) -> a -> c
. Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint
getPartialUtf8 :: Utf8State -> B.ByteString
getPartialUtf8 :: Utf8State -> ByteString
getPartialUtf8 = PartialUtf8CodePoint -> ByteString
partUtf8ToByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint
#ifdef SIMDUTF
foreign import ccall unsafe "_hs_text_is_valid_utf8" c_is_valid_utf8
:: Ptr Word8 -> CSize -> IO CInt
#endif
validateUtf8Chunk :: ByteString -> (Int, Maybe Utf8State)
validateUtf8Chunk :: ByteString -> (Int, Maybe Utf8State)
validateUtf8Chunk ByteString
bs = forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8ChunkFrom Int
0 ByteString
bs (,)
{-# INLINE validateUtf8ChunkFrom #-}
validateUtf8ChunkFrom :: forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8ChunkFrom :: forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8ChunkFrom Int
ofs ByteString
bs Int -> Maybe Utf8State -> r
k
#if defined(SIMDUTF) || MIN_VERSION_bytestring(0,11,2)
| Int
guessUtf8Boundary forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&&
(
#ifdef SIMDUTF
forall r. ByteString -> (ForeignPtr Word8 -> Int -> r) -> r
withBS (Int -> ByteString -> ByteString
B.drop Int
ofs ByteString
bs) forall a b. (a -> b) -> a -> b
$ \ ForeignPtr Word8
fp Int
_ -> forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
fp forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> (forall a. Eq a => a -> a -> Bool
/= CInt
0) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Ptr Word8 -> CSize -> IO CInt
c_is_valid_utf8 Ptr Word8
ptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
guessUtf8Boundary)
#else
B.isValidUtf8 $ B.take guessUtf8Boundary (B.drop ofs bs)
#endif
) = Int -> r
slowValidateUtf8ChunkFrom (Int
ofs forall a. Num a => a -> a -> a
+ Int
guessUtf8Boundary)
| Bool
otherwise = Int -> r
slowValidateUtf8ChunkFrom Int
ofs
where
len :: Int
len = ByteString -> Int
B.length ByteString
bs forall a. Num a => a -> a -> a
- Int
ofs
isBoundary :: Int -> (Word8 -> Bool) -> Bool
isBoundary Int
n Word8 -> Bool
p = Int
len forall a. Ord a => a -> a -> Bool
>= Int
n Bool -> Bool -> Bool
&& Word8 -> Bool
p (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs (Int
ofs forall a. Num a => a -> a -> a
+ Int
len forall a. Num a => a -> a -> a
- Int
n))
guessUtf8Boundary :: Int
guessUtf8Boundary
| Int -> (Word8 -> Bool) -> Bool
isBoundary Int
1 (forall a. Ord a => a -> a -> Bool
<= Word8
0x80) = Int
len
| Int -> (Word8 -> Bool) -> Bool
isBoundary Int
1 (Word8
0xc2 forall a. Ord a => a -> a -> Bool
<=) = Int
len forall a. Num a => a -> a -> a
- Int
1
| Int -> (Word8 -> Bool) -> Bool
isBoundary Int
2 (Word8
0xe0 forall a. Ord a => a -> a -> Bool
<=) = Int
len forall a. Num a => a -> a -> a
- Int
2
| Int -> (Word8 -> Bool) -> Bool
isBoundary Int
3 (Word8
0xf0 forall a. Ord a => a -> a -> Bool
<=) = Int
len forall a. Num a => a -> a -> a
- Int
3
| Bool
otherwise = Int
len
#else
= slowValidateUtf8ChunkFrom ofs
where
#endif
slowValidateUtf8ChunkFrom :: Int -> r
slowValidateUtf8ChunkFrom :: Int -> r
slowValidateUtf8ChunkFrom Int
ofs1 = Int -> Int -> DecoderState -> r
slowLoop Int
ofs1 Int
ofs1 DecoderState
utf8AcceptState
slowLoop :: Int -> Int -> DecoderState -> r
slowLoop !Int
utf8End Int
i DecoderState
s
| Int
i forall a. Ord a => a -> a -> Bool
< ByteString -> Int
B.length ByteString
bs =
case Word8 -> DecoderState -> DecoderState
updateDecoderState (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs Int
i) DecoderState
s of
DecoderState
s' | DecoderState
s' forall a. Eq a => a -> a -> Bool
== DecoderState
utf8RejectState -> Int -> Maybe Utf8State -> r
k Int
utf8End forall a. Maybe a
Nothing
| DecoderState
s' forall a. Eq a => a -> a -> Bool
== DecoderState
utf8AcceptState -> Int -> Int -> DecoderState -> r
slowLoop (Int
i forall a. Num a => a -> a -> a
+ Int
1) (Int
i forall a. Num a => a -> a -> a
+ Int
1) DecoderState
s'
| Bool
otherwise -> Int -> Int -> DecoderState -> r
slowLoop Int
utf8End (Int
i forall a. Num a => a -> a -> a
+ Int
1) DecoderState
s'
| Bool
otherwise = Int -> Maybe Utf8State -> r
k Int
utf8End (forall a. a -> Maybe a
Just (DecoderState -> PartialUtf8CodePoint -> Utf8State
Utf8State DecoderState
s (PartialUtf8CodePoint -> ByteString -> PartialUtf8CodePoint
partUtf8UnsafeAppend PartialUtf8CodePoint
partUtf8Empty (Int -> ByteString -> ByteString
B.drop Int
utf8End ByteString
bs))))
validateUtf8More :: Utf8State -> ByteString -> (Int, Maybe Utf8State)
validateUtf8More :: Utf8State -> ByteString -> (Int, Maybe Utf8State)
validateUtf8More Utf8State
st ByteString
bs = forall r.
Utf8State -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8MoreCont Utf8State
st ByteString
bs (,)
{-# INLINE validateUtf8MoreCont #-}
validateUtf8MoreCont :: Utf8State -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8MoreCont :: forall r.
Utf8State -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8MoreCont st :: Utf8State
st@(Utf8State DecoderState
s0 PartialUtf8CodePoint
part) ByteString
bs Int -> Maybe Utf8State -> r
k
| Int
len forall a. Ord a => a -> a -> Bool
> Int
0 = Int -> DecoderState -> r
loop Int
0 DecoderState
s0
| Bool
otherwise = Int -> Maybe Utf8State -> r
k (- PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
part) (forall a. a -> Maybe a
Just Utf8State
st)
where
len :: Int
len = ByteString -> Int
B.length ByteString
bs
loop :: Int -> DecoderState -> r
loop !Int
i DecoderState
s
| DecoderState
s forall a. Eq a => a -> a -> Bool
== DecoderState
utf8AcceptState = forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8ChunkFrom Int
i ByteString
bs Int -> Maybe Utf8State -> r
k
| Int
i forall a. Ord a => a -> a -> Bool
< Int
len =
case Word8 -> DecoderState -> DecoderState
updateDecoderState (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs Int
i) DecoderState
s of
DecoderState
s' | DecoderState
s' forall a. Eq a => a -> a -> Bool
== DecoderState
utf8RejectState -> Int -> Maybe Utf8State -> r
k (- PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
part) forall a. Maybe a
Nothing
| Bool
otherwise -> Int -> DecoderState -> r
loop (Int
i forall a. Num a => a -> a -> a
+ Int
1) DecoderState
s'
| Bool
otherwise = Int -> Maybe Utf8State -> r
k (- PartialUtf8CodePoint -> Int
partUtf8Len PartialUtf8CodePoint
part) (forall a. a -> Maybe a
Just (DecoderState -> PartialUtf8CodePoint -> Utf8State
Utf8State DecoderState
s (PartialUtf8CodePoint -> ByteString -> PartialUtf8CodePoint
partUtf8UnsafeAppend PartialUtf8CodePoint
part ByteString
bs)))
partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictBuilder
partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictBuilder
partUtf8ToStrictBuilder PartialUtf8CodePoint
c =
forall a. (Word8 -> a -> a) -> a -> PartialUtf8CodePoint -> a
partUtf8Foldr (forall a. Semigroup a => a -> a -> a
(<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> StrictBuilder
SB.unsafeFromWord8) forall a. Monoid a => a
mempty PartialUtf8CodePoint
c
utf8StateToStrictBuilder :: Utf8State -> StrictBuilder
utf8StateToStrictBuilder :: Utf8State -> StrictBuilder
utf8StateToStrictBuilder = PartialUtf8CodePoint -> StrictBuilder
partUtf8ToStrictBuilder forall b c a. (b -> c) -> (a -> b) -> a -> c
. Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint
decodeUtf8More :: Utf8State -> ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8More :: Utf8State
-> ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8More Utf8State
s ByteString
bs =
forall r.
Utf8State -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8MoreCont Utf8State
s ByteString
bs forall a b. (a -> b) -> a -> b
$ \Int
len Maybe Utf8State
ms ->
let builder :: StrictBuilder
builder | Int
len forall a. Ord a => a -> a -> Bool
<= Int
0 = forall a. Monoid a => a
mempty
| Bool
otherwise = Utf8State -> StrictBuilder
utf8StateToStrictBuilder Utf8State
s
forall a. Semigroup a => a -> a -> a
<> ByteString -> StrictBuilder
SB.unsafeFromByteString (Int -> ByteString -> ByteString
B.take Int
len ByteString
bs)
in (StrictBuilder
builder, Int -> ByteString -> ByteString
B.drop Int
len ByteString
bs, Maybe Utf8State
ms)
decodeUtf8Chunk :: ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8Chunk :: ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8Chunk = Utf8State
-> ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8More Utf8State
startUtf8State
{-# INLINE skipIncomplete #-}
skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete OnDecodeError
onErr String
msg Utf8State
s =
forall a. (Word8 -> a -> a) -> a -> PartialUtf8CodePoint -> a
partUtf8Foldr
(forall a. Semigroup a => a -> a -> a
(<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error OnDecodeError
onErr String
msg)
forall a. Monoid a => a
mempty (Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint Utf8State
s)
{-# INLINE handleUtf8Error #-}
handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error OnDecodeError
onErr String
msg Word8
w = case OnDecodeError
onErr String
msg (forall a. a -> Maybe a
Just Word8
w) of
Just Char
c -> Char -> StrictBuilder
SB.fromChar Char
c
Maybe Char
Nothing -> forall a. Monoid a => a
mempty
decodeUtf8With1 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
OnDecodeError -> String -> ByteString -> Text
decodeUtf8With1 :: OnDecodeError -> String -> ByteString -> Text
decodeUtf8With1 OnDecodeError
onErr String
msg ByteString
bs = forall r. Int -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8ChunkFrom Int
0 ByteString
bs forall a b. (a -> b) -> a -> b
$ \Int
len Maybe Utf8State
ms -> case Maybe Utf8State
ms of
Just Utf8State
s
| Int
len forall a. Eq a => a -> a -> Bool
== ByteString -> Int
B.length ByteString
bs ->
let !(SBS.SBS ByteArray#
arr) = ByteString -> ShortByteString
SBS.toShort ByteString
bs in
Array -> Int -> Int -> Text
Text (ByteArray# -> Array
A.ByteArray ByteArray#
arr) Int
0 Int
len
| Bool
otherwise -> StrictBuilder -> Text
SB.toText forall a b. (a -> b) -> a -> b
$
ByteString -> StrictBuilder
SB.unsafeFromByteString (Int -> ByteString -> ByteString
B.take Int
len ByteString
bs) forall a. Semigroup a => a -> a -> a
<> OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete OnDecodeError
onErr String
msg Utf8State
s
Maybe Utf8State
Nothing ->
let (StrictBuilder
builder, ByteString
_, Utf8State
s) = OnDecodeError
-> String
-> Utf8State
-> ByteString
-> (StrictBuilder, ByteString, Utf8State)
decodeUtf8With2 OnDecodeError
onErr String
msg Utf8State
startUtf8State (Int -> ByteString -> ByteString
B.drop (Int
len forall a. Num a => a -> a -> a
+ Int
1) ByteString
bs) in
StrictBuilder -> Text
SB.toText forall a b. (a -> b) -> a -> b
$
ByteString -> StrictBuilder
SB.unsafeFromByteString (Int -> ByteString -> ByteString
B.take Int
len ByteString
bs) forall a. Semigroup a => a -> a -> a
<>
OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error OnDecodeError
onErr String
msg (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs Int
len) forall a. Semigroup a => a -> a -> a
<>
StrictBuilder
builder forall a. Semigroup a => a -> a -> a
<>
OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete OnDecodeError
onErr String
msg Utf8State
s
decodeUtf8With2 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
OnDecodeError -> String -> Utf8State -> ByteString -> (StrictBuilder, ByteString, Utf8State)
decodeUtf8With2 :: OnDecodeError
-> String
-> Utf8State
-> ByteString
-> (StrictBuilder, ByteString, Utf8State)
decodeUtf8With2 OnDecodeError
onErr String
msg Utf8State
s0 ByteString
bs = Utf8State
-> Int -> StrictBuilder -> (StrictBuilder, ByteString, Utf8State)
loop Utf8State
s0 Int
0 forall a. Monoid a => a
mempty
where
loop :: Utf8State
-> Int -> StrictBuilder -> (StrictBuilder, ByteString, Utf8State)
loop Utf8State
s Int
i !StrictBuilder
builder =
let nonEmptyPrefix :: Int -> StrictBuilder
nonEmptyPrefix Int
len = StrictBuilder
builder
forall a. Semigroup a => a -> a -> a
<> Utf8State -> StrictBuilder
utf8StateToStrictBuilder Utf8State
s
forall a. Semigroup a => a -> a -> a
<> ByteString -> StrictBuilder
SB.unsafeFromByteString (Int -> ByteString -> ByteString
B.take Int
len (Int -> ByteString -> ByteString
B.drop Int
i ByteString
bs))
in forall r.
Utf8State -> ByteString -> (Int -> Maybe Utf8State -> r) -> r
validateUtf8MoreCont Utf8State
s (Int -> ByteString -> ByteString
B.drop Int
i ByteString
bs) forall a b. (a -> b) -> a -> b
$ \Int
len Maybe Utf8State
ms -> case Maybe Utf8State
ms of
Maybe Utf8State
Nothing ->
if Int
len forall a. Ord a => a -> a -> Bool
< Int
0
then
let builder' :: StrictBuilder
builder' = StrictBuilder
builder forall a. Semigroup a => a -> a -> a
<> OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete OnDecodeError
onErr String
msg Utf8State
s
in Utf8State
-> Int -> StrictBuilder -> (StrictBuilder, ByteString, Utf8State)
loop Utf8State
startUtf8State Int
i StrictBuilder
builder'
else
let builder' :: StrictBuilder
builder' = Int -> StrictBuilder
nonEmptyPrefix Int
len
forall a. Semigroup a => a -> a -> a
<> OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error OnDecodeError
onErr String
msg (HasCallStack => ByteString -> Int -> Word8
B.index ByteString
bs (Int
i forall a. Num a => a -> a -> a
+ Int
len))
in Utf8State
-> Int -> StrictBuilder -> (StrictBuilder, ByteString, Utf8State)
loop Utf8State
startUtf8State (Int
i forall a. Num a => a -> a -> a
+ Int
len forall a. Num a => a -> a -> a
+ Int
1) StrictBuilder
builder'
Just Utf8State
s' ->
let builder' :: StrictBuilder
builder' = if Int
len forall a. Ord a => a -> a -> Bool
<= Int
0 then StrictBuilder
builder else Int -> StrictBuilder
nonEmptyPrefix Int
len
undecoded :: ByteString
undecoded = if ByteString -> Int
B.length ByteString
bs forall a. Ord a => a -> a -> Bool
>= PartialUtf8CodePoint -> Int
partUtf8Len (Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint Utf8State
s')
then Int -> ByteString -> ByteString
B.drop (Int
i forall a. Num a => a -> a -> a
+ Int
len) ByteString
bs
else PartialUtf8CodePoint -> ByteString
partUtf8ToByteString (Utf8State -> PartialUtf8CodePoint
partialUtf8CodePoint Utf8State
s')
in (StrictBuilder
builder', ByteString
undecoded, Utf8State
s')