{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE ForeignFunctionInterface   #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash                  #-}
{-# LANGUAGE Rank2Types                 #-}
{-# LANGUAGE ScopedTypeVariables        #-}
{-# LANGUAGE UnliftedFFITypes           #-}

--
-- Module      : Data.Text.Lazy.Encoding.Fusion
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Fusible 'Stream'-oriented functions for converting between lazy
-- 'Text' and several common encodings.

-- | Provides a stream-based approach to decoding Unicode data. Each function
-- below works the same way: you give it a chunk of data, and it gives back a
-- @DecodeResult@. If the parse was a success, then you get a chunk of @Text@
-- (possibly empty) and a continuation parsing function. If the parse was a
-- failure, you get a chunk of successfully decoded @Text@ (possibly empty) and
-- the unconsumed bytes.
--
-- In order to indicate end of stream, you pass an empty @ByteString@ to the
-- decode function. This call may result in a failure, if there were unused
-- bytes left over from a previous step which formed part of a code sequence.
module Data.Streaming.Text
    (
    -- * Streaming
      decodeUtf8
    , decodeUtf8Pure
    , decodeUtf16LE
    , decodeUtf16BE
    , decodeUtf32LE
    , decodeUtf32BE

    -- * Type
    , DecodeResult (..)
    ) where

import           Control.Monad.ST                  (ST, runST)
import           Control.Monad.ST.Unsafe           (unsafeIOToST, unsafeSTToIO)
import           Data.Bits                         ((.|.), shiftL)
import qualified Data.ByteString                   as B
import           Data.ByteString.Internal          (ByteString (PS))
import qualified Data.ByteString.Unsafe            as B
import           Data.Text                         (Text)
import qualified Data.Text                         as T
import qualified Data.Text.Array                   as A
import           Data.Text.Internal                (text)
import qualified Data.Text.Internal.Encoding.Utf16 as U16
import qualified Data.Text.Internal.Encoding.Utf32 as U32
import qualified Data.Text.Internal.Encoding.Utf8  as U8
import           Data.Text.Internal.Unsafe.Char    (unsafeWrite, unsafeChr32,
                                                    unsafeChr8)
import           Data.Word                         (Word32, Word8)
import           Foreign.C.Types                   (CSize (..))
import           Foreign.ForeignPtr                (withForeignPtr)
import           Foreign.Marshal.Utils             (with)
import           Foreign.Ptr                       (Ptr, minusPtr, nullPtr,
                                                    plusPtr)
import           Foreign.Storable                  (Storable, peek, poke)
import           GHC.Base                          (MutableByteArray#)

#if MIN_VERSION_text(2,0,0)
import           Control.Exception                 (try, evaluate)
import qualified Data.Text.Encoding                as TE
import qualified Data.Text.Encoding.Error          as TE
import           Data.Text.Internal.Unsafe.Char    (unsafeChr16)
import           System.IO.Unsafe                  (unsafePerformIO)
#else
import           Data.Text.Internal.Unsafe.Char    (unsafeChr)
unsafeChr16 :: Word16 -> Char
unsafeChr16 = Word16 -> Char
unsafeChr
#endif

data S = S0
       | S1 {-# UNPACK #-} !Word8
       | S2 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
       | S3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
    deriving Int -> S -> ShowS
[S] -> ShowS
S -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [S] -> ShowS
$cshowList :: [S] -> ShowS
show :: S -> String
$cshow :: S -> String
showsPrec :: Int -> S -> ShowS
$cshowsPrec :: Int -> S -> ShowS
Show

data DecodeResult
    = DecodeResultSuccess !Text !(B.ByteString -> DecodeResult)
    | DecodeResultFailure !Text !B.ByteString

toBS :: S -> B.ByteString
toBS :: S -> ByteString
toBS S
S0 = ByteString
B.empty
toBS (S1 Word8
a) = [Word8] -> ByteString
B.pack [Word8
a]
toBS (S2 Word8
a Word8
b) = [Word8] -> ByteString
B.pack [Word8
a, Word8
b]
toBS (S3 Word8
a Word8
b Word8
c) = [Word8] -> ByteString
B.pack [Word8
a, Word8
b, Word8
c]
{-# INLINE toBS #-}

getText :: Int -> A.MArray s -> ST s Text
getText :: forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr = do
    Array
arr <- forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Array -> Int -> Int -> Text
text Array
arr Int
0 Int
j
{-# INLINE getText #-}

#include "text_cbits.h"

foreign import ccall unsafe "_hs_streaming_commons_decode_utf8_state" c_decode_utf8_with_state
    :: MutableByteArray# s -> Ptr CSize
    -> Ptr (Ptr Word8) -> Ptr Word8
    -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)

newtype CodePoint = CodePoint Word32 deriving (CodePoint -> CodePoint -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CodePoint -> CodePoint -> Bool
$c/= :: CodePoint -> CodePoint -> Bool
== :: CodePoint -> CodePoint -> Bool
$c== :: CodePoint -> CodePoint -> Bool
Eq, Int -> CodePoint -> ShowS
[CodePoint] -> ShowS
CodePoint -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CodePoint] -> ShowS
$cshowList :: [CodePoint] -> ShowS
show :: CodePoint -> String
$cshow :: CodePoint -> String
showsPrec :: Int -> CodePoint -> ShowS
$cshowsPrec :: Int -> CodePoint -> ShowS
Show, Integer -> CodePoint
CodePoint -> CodePoint
CodePoint -> CodePoint -> CodePoint
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> CodePoint
$cfromInteger :: Integer -> CodePoint
signum :: CodePoint -> CodePoint
$csignum :: CodePoint -> CodePoint
abs :: CodePoint -> CodePoint
$cabs :: CodePoint -> CodePoint
negate :: CodePoint -> CodePoint
$cnegate :: CodePoint -> CodePoint
* :: CodePoint -> CodePoint -> CodePoint
$c* :: CodePoint -> CodePoint -> CodePoint
- :: CodePoint -> CodePoint -> CodePoint
$c- :: CodePoint -> CodePoint -> CodePoint
+ :: CodePoint -> CodePoint -> CodePoint
$c+ :: CodePoint -> CodePoint -> CodePoint
Num, Ptr CodePoint -> IO CodePoint
Ptr CodePoint -> Int -> IO CodePoint
Ptr CodePoint -> Int -> CodePoint -> IO ()
Ptr CodePoint -> CodePoint -> IO ()
CodePoint -> Int
forall b. Ptr b -> Int -> IO CodePoint
forall b. Ptr b -> Int -> CodePoint -> IO ()
forall a.
(a -> Int)
-> (a -> Int)
-> (Ptr a -> Int -> IO a)
-> (Ptr a -> Int -> a -> IO ())
-> (forall b. Ptr b -> Int -> IO a)
-> (forall b. Ptr b -> Int -> a -> IO ())
-> (Ptr a -> IO a)
-> (Ptr a -> a -> IO ())
-> Storable a
poke :: Ptr CodePoint -> CodePoint -> IO ()
$cpoke :: Ptr CodePoint -> CodePoint -> IO ()
peek :: Ptr CodePoint -> IO CodePoint
$cpeek :: Ptr CodePoint -> IO CodePoint
pokeByteOff :: forall b. Ptr b -> Int -> CodePoint -> IO ()
$cpokeByteOff :: forall b. Ptr b -> Int -> CodePoint -> IO ()
peekByteOff :: forall b. Ptr b -> Int -> IO CodePoint
$cpeekByteOff :: forall b. Ptr b -> Int -> IO CodePoint
pokeElemOff :: Ptr CodePoint -> Int -> CodePoint -> IO ()
$cpokeElemOff :: Ptr CodePoint -> Int -> CodePoint -> IO ()
peekElemOff :: Ptr CodePoint -> Int -> IO CodePoint
$cpeekElemOff :: Ptr CodePoint -> Int -> IO CodePoint
alignment :: CodePoint -> Int
$calignment :: CodePoint -> Int
sizeOf :: CodePoint -> Int
$csizeOf :: CodePoint -> Int
Storable)
newtype DecoderState = DecoderState Word32 deriving (DecoderState -> DecoderState -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecoderState -> DecoderState -> Bool
$c/= :: DecoderState -> DecoderState -> Bool
== :: DecoderState -> DecoderState -> Bool
$c== :: DecoderState -> DecoderState -> Bool
Eq, Int -> DecoderState -> ShowS
[DecoderState] -> ShowS
DecoderState -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DecoderState] -> ShowS
$cshowList :: [DecoderState] -> ShowS
show :: DecoderState -> String
$cshow :: DecoderState -> String
showsPrec :: Int -> DecoderState -> ShowS
$cshowsPrec :: Int -> DecoderState -> ShowS
Show, Integer -> DecoderState
DecoderState -> DecoderState
DecoderState -> DecoderState -> DecoderState
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> DecoderState
$cfromInteger :: Integer -> DecoderState
signum :: DecoderState -> DecoderState
$csignum :: DecoderState -> DecoderState
abs :: DecoderState -> DecoderState
$cabs :: DecoderState -> DecoderState
negate :: DecoderState -> DecoderState
$cnegate :: DecoderState -> DecoderState
* :: DecoderState -> DecoderState -> DecoderState
$c* :: DecoderState -> DecoderState -> DecoderState
- :: DecoderState -> DecoderState -> DecoderState
$c- :: DecoderState -> DecoderState -> DecoderState
+ :: DecoderState -> DecoderState -> DecoderState
$c+ :: DecoderState -> DecoderState -> DecoderState
Num, Ptr DecoderState -> IO DecoderState
Ptr DecoderState -> Int -> IO DecoderState
Ptr DecoderState -> Int -> DecoderState -> IO ()
Ptr DecoderState -> DecoderState -> IO ()
DecoderState -> Int
forall b. Ptr b -> Int -> IO DecoderState
forall b. Ptr b -> Int -> DecoderState -> IO ()
forall a.
(a -> Int)
-> (a -> Int)
-> (Ptr a -> Int -> IO a)
-> (Ptr a -> Int -> a -> IO ())
-> (forall b. Ptr b -> Int -> IO a)
-> (forall b. Ptr b -> Int -> a -> IO ())
-> (Ptr a -> IO a)
-> (Ptr a -> a -> IO ())
-> Storable a
poke :: Ptr DecoderState -> DecoderState -> IO ()
$cpoke :: Ptr DecoderState -> DecoderState -> IO ()
peek :: Ptr DecoderState -> IO DecoderState
$cpeek :: Ptr DecoderState -> IO DecoderState
pokeByteOff :: forall b. Ptr b -> Int -> DecoderState -> IO ()
$cpokeByteOff :: forall b. Ptr b -> Int -> DecoderState -> IO ()
peekByteOff :: forall b. Ptr b -> Int -> IO DecoderState
$cpeekByteOff :: forall b. Ptr b -> Int -> IO DecoderState
pokeElemOff :: Ptr DecoderState -> Int -> DecoderState -> IO ()
$cpokeElemOff :: Ptr DecoderState -> Int -> DecoderState -> IO ()
peekElemOff :: Ptr DecoderState -> Int -> IO DecoderState
$cpeekElemOff :: Ptr DecoderState -> Int -> IO DecoderState
alignment :: DecoderState -> Int
$calignment :: DecoderState -> Int
sizeOf :: DecoderState -> Int
$csizeOf :: DecoderState -> Int
Storable)

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using
-- UTF-8 encoding.
decodeUtf8 :: B.ByteString -> DecodeResult
#if MIN_VERSION_text(2,0,0)
decodeUtf8 = go mempty TE.streamDecodeUtf8
  where
    go :: B.ByteString -> (B.ByteString -> TE.Decoding) -> B.ByteString -> DecodeResult
    go prev decoder curr = case unsafePerformIO (try (evaluate (decoder curr))) of
      -- Caught exception does not allow to reconstruct 'DecodeResultFailure',
      -- so delegating this to 'decodeUtf8Pure'
      Left (_ :: TE.UnicodeException) -> decodeUtf8Pure (prev <> curr)
      Right (TE.Some decoded undecoded cont)
          -- An empty bytestring indicates end-of-input, if we still have undecoded bytes that
          -- becomes a failure.
          | B.null curr && not (B.null undecoded) -> DecodeResultFailure decoded undecoded
          | otherwise -> DecodeResultSuccess decoded (go undecoded cont)
#else
decodeUtf8 :: ByteString -> DecodeResult
decodeUtf8 = ByteString
-> CodePoint -> DecoderState -> ByteString -> DecodeResult
decodeChunk ByteString
B.empty CodePoint
0 DecoderState
0
 where
  decodeChunkCheck :: B.ByteString -> CodePoint -> DecoderState -> B.ByteString -> DecodeResult
  decodeChunkCheck :: ByteString
-> CodePoint -> DecoderState -> ByteString -> DecodeResult
decodeChunkCheck ByteString
bsOld CodePoint
codepoint DecoderState
state ByteString
bs
    | ByteString -> Bool
B.null ByteString
bs =
        if ByteString -> Bool
B.null ByteString
bsOld
            then Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty ByteString -> DecodeResult
decodeUtf8
            else Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty ByteString
bsOld
    | Bool
otherwise = ByteString
-> CodePoint -> DecoderState -> ByteString -> DecodeResult
decodeChunk ByteString
bsOld CodePoint
codepoint DecoderState
state ByteString
bs
  -- We create a slightly larger than necessary buffer to accommodate a
  -- potential surrogate pair started in the last buffer
  decodeChunk :: B.ByteString -> CodePoint -> DecoderState -> B.ByteString -> DecodeResult
  decodeChunk :: ByteString
-> CodePoint -> DecoderState -> ByteString -> DecodeResult
decodeChunk ByteString
bsOld CodePoint
codepoint0 DecoderState
state0 bs :: ByteString
bs@(PS ForeignPtr Word8
fp Int
off Int
len) =
    forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ (forall a s. IO a -> ST s a
unsafeIOToST forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. MArray s -> IO DecodeResult
decodeChunkToBuffer) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall s. Int -> ST s (MArray s)
A.new (Int
lenforall a. Num a => a -> a -> a
+Int
1)
   where
    decodeChunkToBuffer :: A.MArray s -> IO DecodeResult
    decodeChunkToBuffer :: forall s. MArray s -> IO DecodeResult
decodeChunkToBuffer MArray s
dest = forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fp forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
      forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (CSize
0::CSize) forall a b. (a -> b) -> a -> b
$ \Ptr CSize
destOffPtr ->
      forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with CodePoint
codepoint0 forall a b. (a -> b) -> a -> b
$ \Ptr CodePoint
codepointPtr ->
      forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with DecoderState
state0 forall a b. (a -> b) -> a -> b
$ \Ptr DecoderState
statePtr ->
      forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with forall a. Ptr a
nullPtr forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr Word8)
curPtrPtr ->
        let end :: Ptr b
end = Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
off forall a. Num a => a -> a -> a
+ Int
len)
            loop :: Ptr Word8 -> IO DecodeResult
loop Ptr Word8
curPtr = do
              forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr (Ptr Word8)
curPtrPtr Ptr Word8
curPtr
              Ptr Word8
_ <- forall s.
MutableByteArray# s
-> Ptr CSize
-> Ptr (Ptr Word8)
-> Ptr Word8
-> Ptr CodePoint
-> Ptr DecoderState
-> IO (Ptr Word8)
c_decode_utf8_with_state (forall s. MArray s -> MutableByteArray# s
A.maBA MArray s
dest) Ptr CSize
destOffPtr
                         Ptr (Ptr Word8)
curPtrPtr forall a. Ptr a
end Ptr CodePoint
codepointPtr Ptr DecoderState
statePtr
              DecoderState
state <- forall a. Storable a => Ptr a -> IO a
peek Ptr DecoderState
statePtr
              CSize
n <- forall a. Storable a => Ptr a -> IO a
peek Ptr CSize
destOffPtr
              Text
chunkText <- forall s a. ST s a -> IO a
unsafeSTToIO forall a b. (a -> b) -> a -> b
$ do
                  Array
arr <- forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
dest
                  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Array -> Int -> Int -> Text
text Array
arr Int
0 (forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
n)
              Ptr Word8
lastPtr <- forall a. Storable a => Ptr a -> IO a
peek Ptr (Ptr Word8)
curPtrPtr
              let left :: Int
left = Ptr Word8
lastPtr forall a b. Ptr a -> Ptr b -> Int
`minusPtr` Ptr Word8
curPtr
                  -- The logic here is: if any text was generated, then the
                  -- previous leftovers were completely consumed already.
                  -- If no text was generated, then any leftovers from the
                  -- previous step are still leftovers now.
                  unused :: ByteString
unused
                    | Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
chunkText = Int -> ByteString -> ByteString
B.unsafeDrop Int
left ByteString
bs
                    | ByteString -> Bool
B.null ByteString
bsOld = ByteString
bs
                    | Bool
otherwise = ByteString -> ByteString -> ByteString
B.append ByteString
bsOld ByteString
bs
              case ByteString
unused seq :: forall a b. a -> b -> b
`seq` DecoderState
state of
                UTF8_REJECT ->
                  -- We encountered an encoding error
                  return $! DecodeResultFailure chunkText unused
                DecoderState
_ -> do
                  CodePoint
codepoint <- forall a. Storable a => Ptr a -> IO a
peek Ptr CodePoint
codepointPtr
                  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
chunkText
                         forall a b. (a -> b) -> a -> b
$! ByteString
-> CodePoint -> DecoderState -> ByteString -> DecodeResult
decodeChunkCheck ByteString
unused CodePoint
codepoint DecoderState
state
        in Ptr Word8 -> IO DecodeResult
loop (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
off)
#endif

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using
-- UTF-8 encoding.
decodeUtf8Pure :: B.ByteString -> DecodeResult
decodeUtf8Pure :: ByteString -> DecodeResult
decodeUtf8Pure =
    S -> ByteString -> DecodeResult
beginChunk S
S0
  where
    beginChunk :: S -> B.ByteString -> DecodeResult
    beginChunk :: S -> ByteString -> DecodeResult
beginChunk S
s ByteString
bs | ByteString -> Bool
B.null ByteString
bs =
        case S
s of
            S
S0 -> Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
S0)
            S
_  -> Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty forall a b. (a -> b) -> a -> b
$ S -> ByteString
toBS S
s
    beginChunk S
s0 ByteString
ps = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        let initLen :: Int
initLen = ByteString -> Int
B.length ByteString
ps
#if MIN_VERSION_text(2,0,0)
        -- Worst-case scenario: the very first byte finishes a 4-byte sequence,
        -- so decoding results in 4 + (initLen - 1) bytes.
        marr <- A.new (initLen + 3)
#else
        MArray s
marr <- forall s. Int -> ST s (MArray s)
A.new (Int
initLen forall a. Num a => a -> a -> a
+ Int
1)
#endif
        let start :: Int -> Int -> ST s DecodeResult
start !Int
i !Int
j
                | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
S0)
                |                Word8 -> Bool
U8.validate1 Word8
a       = Int -> Char -> ST s DecodeResult
addChar' Int
1 (Word8 -> Char
unsafeChr8 Word8
a)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word8 -> Word8 -> Bool
U8.validate2 Word8
a Word8
b     = Int -> Char -> ST s DecodeResult
addChar' Int
2 (Word8 -> Word8 -> Char
U8.chr2 Word8
a Word8
b)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Bool
U8.validate3 Word8
a Word8
b Word8
c   = Int -> Char -> ST s DecodeResult
addChar' Int
3 (Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Word8 -> Bool
U8.validate4 Word8
a Word8
b Word8
c Word8
d = Int -> Char -> ST s DecodeResult
addChar' Int
4 (Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
a Word8
b Word8
c Word8
d)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
t (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> S
S2 Word8
a Word8
b)
                | Bool
otherwise   = S -> ST s DecodeResult
continue (Word8 -> S
S1 Word8
a)
                  where
                    a :: Word8
a = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                    b :: Word8
b = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
1)
                    c :: Word8
c = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
2)
                    d :: Word8
d = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
3)
                    addChar' :: Int -> Char -> ST s DecodeResult
addChar' Int
deltai Char
char = do
                        Int
deltaj <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
char
                        Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
deltai) (Int
j forall a. Num a => a -> a -> a
+ Int
deltaj)
                    continue :: S -> ST s DecodeResult
continue S
s = do
                        Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
s)

            checkCont :: S -> Int -> ST s DecodeResult
checkCont S
s !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
s)
            checkCont S
s !Int
i =
                case S
s of
                    S
S0 -> Int -> Int -> ST s DecodeResult
start Int
i Int
0
                    S1 Word8
a
                        | Word8 -> Word8 -> Bool
U8.validate2 Word8
a Word8
x     -> Char -> ST s DecodeResult
addChar' (Word8 -> Word8 -> Char
U8.chr2 Word8
a Word8
x)
                        | Bool
otherwise            -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> S
S2 Word8
a Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S2 Word8
a Word8
b
                        | Word8 -> Word8 -> Word8 -> Bool
U8.validate3 Word8
a Word8
b Word8
x   -> Char -> ST s DecodeResult
addChar' (Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
a Word8
b Word8
x)
                        | Bool
otherwise            -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S3 Word8
a Word8
b Word8
c
                        | Word8 -> Word8 -> Word8 -> Word8 -> Bool
U8.validate4 Word8
a Word8
b Word8
c Word8
x -> Char -> ST s DecodeResult
addChar' (Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
a Word8
b Word8
c Word8
x)
                    S
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty
                                forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
B.append (S -> ByteString
toBS S
s) (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
              where
                x :: Word8
x = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                addChar' :: Char -> ST s DecodeResult
addChar' Char
c = do
                    Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
0 Char
c
                    Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
d

        S -> Int -> ST s DecodeResult
checkCont S
s0 Int
0
      where
        len :: Int
len = ByteString -> Int
B.length ByteString
ps
    {-# INLINE beginChunk #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
-- endian UTF-16 encoding.
decodeUtf16LE :: B.ByteString -> DecodeResult
decodeUtf16LE :: ByteString -> DecodeResult
decodeUtf16LE =
    S -> ByteString -> DecodeResult
beginChunk S
S0
  where
    beginChunk :: S -> B.ByteString -> DecodeResult
    beginChunk :: S -> ByteString -> DecodeResult
beginChunk S
s ByteString
bs | ByteString -> Bool
B.null ByteString
bs =
        case S
s of
            S
S0 -> Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
S0)
            S
_  -> Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty forall a b. (a -> b) -> a -> b
$ S -> ByteString
toBS S
s
    beginChunk S
s0 ByteString
ps = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        let initLen :: Int
initLen = ByteString -> Int
B.length ByteString
ps
#if MIN_VERSION_text(2,0,0)
        -- Worst-case scenario: each Word16 in UTF16 gives three Word8 in UTF8
        -- and left-over from a previous chunk gives four Word8 in UTF8
        marr <- A.new ((initLen `div` 2) * 3 + 4) -- of Word8
#else
        MArray s
marr <- forall s. Int -> ST s (MArray s)
A.new (Int
initLen forall a. Num a => a -> a -> a
+ Int
1) -- of Word16
#endif
        let start :: Int -> Int -> ST s DecodeResult
start !Int
i !Int
j
                | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
S0)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word16 -> Bool
U16.validate1 Word16
x1    = Int -> Char -> ST s DecodeResult
addChar' Int
2 (Word16 -> Char
unsafeChr16 Word16
x1)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2 = Int -> Char -> ST s DecodeResult
addChar' Int
4 (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
t (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> S
S2 Word8
a Word8
b)
                | Bool
otherwise   = S -> ST s DecodeResult
continue (Word8 -> S
S1 Word8
a)
                  where
                    a :: Word8
a = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                    b :: Word8
b = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
1)
                    c :: Word8
c = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
2)
                    d :: Word8
d = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
3)
                    x1 :: Word16
x1   = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
b
                    x2 :: Word16
x2   = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
c Word8
d
                    addChar' :: Int -> Char -> ST s DecodeResult
addChar' Int
deltai Char
char = do
                        Int
deltaj <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
char
                        Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
deltai) (Int
j forall a. Num a => a -> a -> a
+ Int
deltaj)
                    continue :: S -> ST s DecodeResult
continue S
s = do
                        Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
s)

            checkCont :: S -> Int -> ST s DecodeResult
checkCont S
s !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
s)
            checkCont S
s !Int
i =
                case S
s of
                    S
S0 -> Int -> Int -> ST s DecodeResult
start Int
i Int
0
                    S1 Word8
a ->
                        let x1 :: Word16
x1 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
x
                         in if Word16 -> Bool
U16.validate1 Word16
x1
                                then Char -> ST s DecodeResult
addChar' (Word16 -> Char
unsafeChr16 Word16
x1)
                                else S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> S
S2 Word8
a Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S2 Word8
a Word8
b -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S3 Word8
a Word8
b Word8
c ->
                        let x1 :: Word16
x1 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
b
                            x2 :: Word16
x2 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
c Word8
x
                         in if Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2
                                then Char -> ST s DecodeResult
addChar' (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2)
                                else forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty
                                            forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
B.append (S -> ByteString
toBS S
s) (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
              where
                x :: Word8
x = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                addChar' :: Char -> ST s DecodeResult
addChar' Char
c = do
                    Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
0 Char
c
                    Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
d

        S -> Int -> ST s DecodeResult
checkCont S
s0 Int
0
      where
        len :: Int
len = ByteString -> Int
B.length ByteString
ps
        combine :: a -> a -> a
combine a
w1 a
w2 = forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w1 forall a. Bits a => a -> a -> a
.|. (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w2 forall a. Bits a => a -> Int -> a
`shiftL` Int
8)
    {-# INLINE beginChunk #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
-- endian UTF-16 encoding.
decodeUtf16BE :: B.ByteString -> DecodeResult
decodeUtf16BE :: ByteString -> DecodeResult
decodeUtf16BE =
    S -> ByteString -> DecodeResult
beginChunk S
S0
  where
    beginChunk :: S -> B.ByteString -> DecodeResult
    beginChunk :: S -> ByteString -> DecodeResult
beginChunk S
s ByteString
bs | ByteString -> Bool
B.null ByteString
bs =
        case S
s of
            S
S0 -> Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
S0)
            S
_  -> Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty forall a b. (a -> b) -> a -> b
$ S -> ByteString
toBS S
s
    beginChunk S
s0 ByteString
ps = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        let initLen :: Int
initLen = ByteString -> Int
B.length ByteString
ps
#if MIN_VERSION_text(2,0,0)
        -- Worst-case scenario: each Word16 in UTF16 gives three Word8 in UTF8
        -- and left-over from a previous chunk gives four Word8 in UTF8
        marr <- A.new ((initLen `div` 2) * 3 + 4) -- of Word8
#else
        MArray s
marr <- forall s. Int -> ST s (MArray s)
A.new (Int
initLen forall a. Num a => a -> a -> a
+ Int
1) -- of Word16
#endif
        let start :: Int -> Int -> ST s DecodeResult
start !Int
i !Int
j
                | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
S0)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word16 -> Bool
U16.validate1 Word16
x1    = Int -> Char -> ST s DecodeResult
addChar' Int
2 (Word16 -> Char
unsafeChr16 Word16
x1)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2 = Int -> Char -> ST s DecodeResult
addChar' Int
4 (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
t (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> S
S2 Word8
a Word8
b)
                | Bool
otherwise   = S -> ST s DecodeResult
continue (Word8 -> S
S1 Word8
a)
                  where
                    a :: Word8
a = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                    b :: Word8
b = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
1)
                    c :: Word8
c = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
2)
                    d :: Word8
d = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
3)
                    x1 :: Word16
x1   = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
b
                    x2 :: Word16
x2   = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
c Word8
d
                    addChar' :: Int -> Char -> ST s DecodeResult
addChar' Int
deltai Char
char = do
                        Int
deltaj <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
char
                        Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
deltai) (Int
j forall a. Num a => a -> a -> a
+ Int
deltaj)
                    continue :: S -> ST s DecodeResult
continue S
s = do
                        Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
s)

            checkCont :: S -> Int -> ST s DecodeResult
checkCont S
s !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
s)
            checkCont S
s !Int
i =
                case S
s of
                    S
S0 -> Int -> Int -> ST s DecodeResult
start Int
i Int
0
                    S1 Word8
a ->
                        let x1 :: Word16
x1 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
x
                         in if Word16 -> Bool
U16.validate1 Word16
x1
                                then Char -> ST s DecodeResult
addChar' (Word16 -> Char
unsafeChr16 Word16
x1)
                                else S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> S
S2 Word8
a Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S2 Word8
a Word8
b -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S3 Word8
a Word8
b Word8
c ->
                        let x1 :: Word16
x1 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
a Word8
b
                            x2 :: Word16
x2 = forall {a} {a} {a}.
(Bits a, Integral a, Integral a, Num a) =>
a -> a -> a
combine Word8
c Word8
x
                         in if Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2
                                then Char -> ST s DecodeResult
addChar' (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2)
                                else forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty
                                            forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
B.append (S -> ByteString
toBS S
s) (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
              where
                x :: Word8
x = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                addChar' :: Char -> ST s DecodeResult
addChar' Char
c = do
                    Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
0 Char
c
                    Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
d

        S -> Int -> ST s DecodeResult
checkCont S
s0 Int
0
      where
        len :: Int
len = ByteString -> Int
B.length ByteString
ps
        combine :: a -> a -> a
combine a
w1 a
w2 = (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w1 forall a. Bits a => a -> Int -> a
`shiftL` Int
8) forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w2
    {-# INLINE beginChunk #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
-- endian UTF-32 encoding.
decodeUtf32LE :: B.ByteString -> DecodeResult
decodeUtf32LE :: ByteString -> DecodeResult
decodeUtf32LE =
    S -> ByteString -> DecodeResult
beginChunk S
S0
  where
    beginChunk :: S -> B.ByteString -> DecodeResult
    beginChunk :: S -> ByteString -> DecodeResult
beginChunk S
s ByteString
bs | ByteString -> Bool
B.null ByteString
bs =
        case S
s of
            S
S0 -> Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
S0)
            S
_  -> Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty forall a b. (a -> b) -> a -> b
$ S -> ByteString
toBS S
s
    beginChunk S
s0 ByteString
ps = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        let initLen :: Int
initLen = ByteString -> Int
B.length ByteString
ps forall a. Integral a => a -> a -> a
`div` Int
2
#if MIN_VERSION_text(2,0,0)
        -- Worst-case scenario: the very first byte finishes a 4-byte UTF8 sequence,
        -- and other codepoints have 4-byte UTF8 representation as well.
        -- This gives 4 + (B.length ps - 1), or (for odd B.length) initLen * 2 + 4.
        marr <- A.new (initLen * 2 + 4) -- of Word8
#else
        MArray s
marr <- forall s. Int -> ST s (MArray s)
A.new (Int
initLen forall a. Num a => a -> a -> a
+ Int
1) -- of Word16
#endif
        let start :: Int -> Int -> ST s DecodeResult
start !Int
i !Int
j
                | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
S0)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word32 -> Bool
U32.validate Word32
x1 = Int -> Char -> ST s DecodeResult
addChar' Int
4 (Word32 -> Char
unsafeChr32 Word32
x1)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
t (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> S
S2 Word8
a Word8
b)
                | Bool
otherwise   = S -> ST s DecodeResult
continue (Word8 -> S
S1 Word8
a)
                  where
                    a :: Word8
a = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                    b :: Word8
b = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
1)
                    c :: Word8
c = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
2)
                    d :: Word8
d = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
3)
                    x1 :: Word32
x1   = forall {a} {a} {a} {a} {a}.
(Bits a, Integral a, Integral a, Integral a, Integral a, Num a) =>
a -> a -> a -> a -> a
combine Word8
a Word8
b Word8
c Word8
d
                    addChar' :: Int -> Char -> ST s DecodeResult
addChar' Int
deltai Char
char = do
                        Int
deltaj <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
char
                        Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
deltai) (Int
j forall a. Num a => a -> a -> a
+ Int
deltaj)
                    continue :: S -> ST s DecodeResult
continue S
s = do
                        Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
s)

            checkCont :: S -> Int -> ST s DecodeResult
checkCont S
s !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
s)
            checkCont S
s !Int
i =
                case S
s of
                    S
S0 -> Int -> Int -> ST s DecodeResult
start Int
i Int
0
                    S1 Word8
a -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> S
S2 Word8
a Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S2 Word8
a Word8
b -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S3 Word8
a Word8
b Word8
c ->
                        let x1 :: Word32
x1 = forall {a} {a} {a} {a} {a}.
(Bits a, Integral a, Integral a, Integral a, Integral a, Num a) =>
a -> a -> a -> a -> a
combine Word8
a Word8
b Word8
c Word8
x
                         in if Word32 -> Bool
U32.validate Word32
x1
                                then Char -> ST s DecodeResult
addChar' (Word32 -> Char
unsafeChr32 Word32
x1)
                                else forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty
                                            forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
B.append (S -> ByteString
toBS S
s) (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
              where
                x :: Word8
x = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                addChar' :: Char -> ST s DecodeResult
addChar' Char
c = do
                    Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
0 Char
c
                    Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
d

        S -> Int -> ST s DecodeResult
checkCont S
s0 Int
0
      where
        len :: Int
len = ByteString -> Int
B.length ByteString
ps
        combine :: a -> a -> a -> a -> a
combine a
w1 a
w2 a
w3 a
w4 =
                forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w4) Int
24
            forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w3) Int
16
            forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w2) Int
8
            forall a. Bits a => a -> a -> a
.|.        (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w1)
    {-# INLINE beginChunk #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
-- endian UTF-32 encoding.
decodeUtf32BE :: B.ByteString -> DecodeResult
decodeUtf32BE :: ByteString -> DecodeResult
decodeUtf32BE =
    S -> ByteString -> DecodeResult
beginChunk S
S0
  where
    beginChunk :: S -> B.ByteString -> DecodeResult
    beginChunk :: S -> ByteString -> DecodeResult
beginChunk S
s ByteString
bs | ByteString -> Bool
B.null ByteString
bs =
        case S
s of
            S
S0 -> Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
S0)
            S
_  -> Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty forall a b. (a -> b) -> a -> b
$ S -> ByteString
toBS S
s
    beginChunk S
s0 ByteString
ps = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        let initLen :: Int
initLen = ByteString -> Int
B.length ByteString
ps forall a. Integral a => a -> a -> a
`div` Int
2
#if MIN_VERSION_text(2,0,0)
        -- Worst-case scenario: the very first byte finishes a 4-byte UTF8 sequence,
        -- and other codepoints have 4-byte UTF8 representation as well.
        -- This gives 4 + (B.length ps - 1), or (for odd B.length) initLen * 2 + 4.
        marr <- A.new (initLen * 2 + 4) -- of Word8
#else
        MArray s
marr <- forall s. Int -> ST s (MArray s)
A.new (Int
initLen forall a. Num a => a -> a -> a
+ Int
1) -- of Word16
#endif
        let start :: Int -> Int -> ST s DecodeResult
start !Int
i !Int
j
                | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
S0)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len Bool -> Bool -> Bool
&& Word32 -> Bool
U32.validate Word32
x1 = Int -> Char -> ST s DecodeResult
addChar' Int
4 (Word32 -> Char
unsafeChr32 Word32
x1)
                | Int
i forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< Int
len = do
                    Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
t (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
                | Int
i forall a. Num a => a -> a -> a
+ Int
2 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
c)
                | Int
i forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
< Int
len = S -> ST s DecodeResult
continue (Word8 -> Word8 -> S
S2 Word8
a Word8
b)
                | Bool
otherwise   = S -> ST s DecodeResult
continue (Word8 -> S
S1 Word8
a)
                  where
                    a :: Word8
a = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                    b :: Word8
b = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
1)
                    c :: Word8
c = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
2)
                    d :: Word8
d = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps (Int
iforall a. Num a => a -> a -> a
+Int
3)
                    x1 :: Word32
x1   = forall {a} {a} {a} {a} {a}.
(Bits a, Integral a, Integral a, Integral a, Integral a, Num a) =>
a -> a -> a -> a -> a
combine Word8
a Word8
b Word8
c Word8
d
                    addChar' :: Int -> Char -> ST s DecodeResult
addChar' Int
deltai Char
char = do
                        Int
deltaj <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
j Char
char
                        Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
deltai) (Int
j forall a. Num a => a -> a -> a
+ Int
deltaj)
                    continue :: S -> ST s DecodeResult
continue S
s = do
                        Text
t <- forall s. Int -> MArray s -> ST s Text
getText Int
j MArray s
marr
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
t (S -> ByteString -> DecodeResult
beginChunk S
s)

            checkCont :: S -> Int -> ST s DecodeResult
checkCont S
s !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> (ByteString -> DecodeResult) -> DecodeResult
DecodeResultSuccess Text
T.empty (S -> ByteString -> DecodeResult
beginChunk S
s)
            checkCont S
s !Int
i =
                case S
s of
                    S
S0 -> Int -> Int -> ST s DecodeResult
start Int
i Int
0
                    S1 Word8
a -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> S
S2 Word8
a Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S2 Word8
a Word8
b -> S -> Int -> ST s DecodeResult
checkCont (Word8 -> Word8 -> Word8 -> S
S3 Word8
a Word8
b Word8
x) (Int
i forall a. Num a => a -> a -> a
+ Int
1)
                    S3 Word8
a Word8
b Word8
c ->
                        let x1 :: Word32
x1 = forall {a} {a} {a} {a} {a}.
(Bits a, Integral a, Integral a, Integral a, Integral a, Num a) =>
a -> a -> a -> a -> a
combine Word8
a Word8
b Word8
c Word8
x
                         in if Word32 -> Bool
U32.validate Word32
x1
                                then Char -> ST s DecodeResult
addChar' (Word32 -> Char
unsafeChr32 Word32
x1)
                                else forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Text -> ByteString -> DecodeResult
DecodeResultFailure Text
T.empty
                                            forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
B.append (S -> ByteString
toBS S
s) (Int -> ByteString -> ByteString
B.unsafeDrop Int
i ByteString
ps)
              where
                x :: Word8
x = ByteString -> Int -> Word8
B.unsafeIndex ByteString
ps Int
i
                addChar' :: Char -> ST s DecodeResult
addChar' Char
c = do
                    Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
marr Int
0 Char
c
                    Int -> Int -> ST s DecodeResult
start (Int
i forall a. Num a => a -> a -> a
+ Int
1) Int
d

        S -> Int -> ST s DecodeResult
checkCont S
s0 Int
0
      where
        len :: Int
len = ByteString -> Int
B.length ByteString
ps
        combine :: a -> a -> a -> a -> a
combine a
w1 a
w2 a
w3 a
w4 =
                forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w1) Int
24
            forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w2) Int
16
            forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w3) Int
8
            forall a. Bits a => a -> a -> a
.|.        (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w4)
    {-# INLINE beginChunk #-}