-- WARNING: This file is security sensitive as it uses unsafeWrite which does
-- not check bounds. Any changes should be made with care and we would love to
-- get informed about them, just cc us in any PR targetting this file: @eskimor @jprider63
-- We would be happy to review the changes!

-- The security check at the end (pos > length) only works if pos grows
-- monotonously, if this condition does not hold, the check is flawed.
module Data.Aeson.Parser.UnescapePure
    (
      unescapeText
    ) where

import Control.Exception (evaluate, throw, try)
import Control.Monad (when)
import Data.ByteString as B
import Data.Bits (Bits, shiftL, shiftR, (.&.), (.|.))
import Data.Text (Text)
import qualified Data.Text.Array as A
import Data.Text.Encoding.Error (UnicodeException (..))
import Data.Text.Internal.Private (runText)
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Data.Word (Word8, Word16, Word32)
import GHC.ST (ST)

-- Different UTF states.
data Utf =
      UtfGround
    | UtfTail1
    | UtfU32e0
    | UtfTail2
    | UtfU32ed
    | Utf843f0
    | UtfTail3
    | Utf843f4
    deriving (Utf -> Utf -> Bool
(Utf -> Utf -> Bool) -> (Utf -> Utf -> Bool) -> Eq Utf
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Utf -> Utf -> Bool
$c/= :: Utf -> Utf -> Bool
== :: Utf -> Utf -> Bool
$c== :: Utf -> Utf -> Bool
Eq)

data State =
      StateNone
    | StateUtf !Utf !Word32
    | StateBackslash
    | StateU0
    | StateU1 !Word16
    | StateU2 !Word16
    | StateU3 !Word16
    | StateS0
    | StateS1
    | StateSU0
    | StateSU1 !Word16
    | StateSU2 !Word16
    | StateSU3 !Word16
    deriving (State -> State -> Bool
(State -> State -> Bool) -> (State -> State -> Bool) -> Eq State
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: State -> State -> Bool
$c/= :: State -> State -> Bool
== :: State -> State -> Bool
$c== :: State -> State -> Bool
Eq)

-- References:
-- http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
-- https://github.com/jwilm/vte/blob/master/utf8parse/src/table.rs.in

setByte1 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte1 :: a -> b -> a
setByte1 a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0x3f)
{-# INLINE setByte1 #-}

setByte2 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 :: a -> b -> a
setByte2 a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0x3f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
{-# INLINE setByte2 #-}

setByte2Top :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2Top :: a -> b -> a
setByte2Top a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0x1f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
{-# INLINE setByte2Top #-}

setByte3 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 :: a -> b -> a
setByte3 a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0x3f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
{-# INLINE setByte3 #-}

setByte3Top :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top :: a -> b -> a
setByte3Top a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0xf) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
{-# INLINE setByte3Top #-}

setByte4 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 :: a -> b -> a
setByte4 a
point b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. b
0x7) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
18)
{-# INLINE setByte4 #-}

decode :: Utf -> Word32 -> Word8 -> (Utf, Word32)
decode :: Utf -> Word32 -> Word8 -> (Utf, Word32)
decode Utf
UtfGround Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x00 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7f -> (Utf
UtfGround, Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
word)
    Word8
w | Word8
0xc2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xdf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2Top Word32
point Word8
word)
    Word8
0xe0                       -> (Utf
UtfU32e0, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    Word8
w | Word8
0xe1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xec -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    Word8
0xed                       -> (Utf
UtfU32ed, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    Word8
w | Word8
0xee Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xef -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    Word8
0xf0                       -> (Utf
Utf843f0, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    Word8
w | Word8
0xf1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xf3 -> (Utf
UtfTail3, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    Word8
0xf4                       -> (Utf
Utf843f4, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
UtfU32e0 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0xa0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xbf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
UtfU32ed Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x9f -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
Utf843f0 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x90 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xbf -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
Utf843f4 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x8f -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
UtfTail3 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xbf -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
UtfTail2 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xbf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf
UtfTail1 Word32
point Word8
word = case Word8
word of
    Word8
w | Word8
0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xbf -> (Utf
UtfGround, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte1 Word32
point Word8
word)
    Word8
_                          -> (Utf, Word32)
forall a. a
throwDecodeError

decodeHex :: Word8 -> Word16
decodeHex :: Word8 -> Word16
decodeHex Word8
x
  | Word8
48 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<=  Word8
57 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- Word16
48  -- 0-9
  | Word8
65 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<=  Word8
70 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- Word16
55  -- A-F
  | Word8
97 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
102 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- Word16
87  -- a-f
  | Bool
otherwise = Word16
forall a. a
throwDecodeError

unescapeText' :: ByteString -> Text
unescapeText' :: ByteString -> Text
unescapeText' ByteString
bs = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \MArray s -> Int -> ST s Text
done -> do
    MArray s
dest <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
len

    (Int
pos, State
finalState) <- MArray s -> (Int, State) -> Int -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop MArray s
dest (Int
0, State
StateNone) Int
0

    -- Check final state. Currently pos gets only increased over time, so this check should catch overflows.
    Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ( State
finalState State -> State -> Bool
forall a. Eq a => a -> a -> Bool
/= State
StateNone Bool -> Bool -> Bool
|| Int
pos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
len)
      ST s ()
forall a. a
throwDecodeError

    MArray s -> Int -> ST s Text
done MArray s
dest Int
pos -- TODO: pos, pos-1??? XXX

    where
      len :: Int
len = ByteString -> Int
B.length ByteString
bs

      runUtf :: MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf MArray s
dest Int
pos Utf
st Word32
point Word8
c = case Utf -> Word32 -> Word8 -> (Utf, Word32)
decode Utf
st Word32
point Word8
c of
        (Utf
UtfGround, Word32
92) -> -- Backslash
            (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateBackslash)
        (Utf
UtfGround, Word32
w) | Word32
w Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0xffff ->
            MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos (Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w) State
StateNone
        (Utf
UtfGround, Word32
w) -> do
            MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
write MArray s
dest Int
pos (Word16
0xd7c0 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
10))
            MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest (Int
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Word16
0xdc00 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff)) State
StateNone
        (Utf
st', Word32
p) ->
            (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Utf -> Word32 -> State
StateUtf Utf
st' Word32
p)

      loop :: A.MArray s -> (Int, State) -> Int -> ST s (Int, State)
      loop :: MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop MArray s
_ (Int, State)
ps Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int, State)
ps
      loop MArray s
dest (Int, State)
ps Int
i = do
        let c :: Word8
c = ByteString -> Int -> Word8
B.index ByteString
bs Int
i -- JP: We can use unsafe index once we prove bounds with Liquid Haskell.
        (Int, State)
ps' <- MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
f MArray s
dest (Int, State)
ps Word8
c
        MArray s -> (Int, State) -> Int -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop MArray s
dest (Int, State)
ps' (Int -> ST s (Int, State)) -> Int -> ST s (Int, State)
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1

      -- No pending state.
      f :: MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
f MArray s
dest (Int
pos, State
StateNone) Word8
c = MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
forall s.
MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf MArray s
dest Int
pos Utf
UtfGround Word32
0 Word8
c

      -- In the middle of parsing a UTF string.
      f MArray s
dest (Int
pos, StateUtf Utf
st Word32
point) Word8
c = MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
forall s.
MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf MArray s
dest Int
pos Utf
st Word32
point Word8
c

      -- In the middle of escaping a backslash.
      f MArray s
dest (Int
pos, State
StateBackslash)  Word8
34 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
34 State
StateNone -- "
      f MArray s
dest (Int
pos, State
StateBackslash)  Word8
92 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
92 State
StateNone -- Backslash
      f MArray s
dest (Int
pos, State
StateBackslash)  Word8
47 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
47 State
StateNone -- /
      f MArray s
dest (Int
pos, State
StateBackslash)  Word8
98 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos  Word16
8 State
StateNone -- b
      f MArray s
dest (Int
pos, State
StateBackslash) Word8
102 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
12 State
StateNone -- f
      f MArray s
dest (Int
pos, State
StateBackslash) Word8
110 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
10 State
StateNone -- n
      f MArray s
dest (Int
pos, State
StateBackslash) Word8
114 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
13 State
StateNone -- r
      f MArray s
dest (Int
pos, State
StateBackslash) Word8
116 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos  Word16
9 State
StateNone -- t
      f    MArray s
_ (Int
pos, State
StateBackslash) Word8
117 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateU0)                -- u
      f    MArray s
_ (  Int
_, State
StateBackslash) Word8
_   = ST s (Int, State)
forall a. a
throwDecodeError

      -- Processing '\u'.
      f MArray s
_ (Int
pos, State
StateU0) Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU1 (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
12))

      f MArray s
_ (Int
pos, StateU1 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU2 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
8)))

      f MArray s
_ (Int
pos, StateU2 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU3 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
4)))

      f MArray s
dest (Int
pos, StateU3 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        let u :: Word16
u = Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
w in

        -- Get next state based on surrogates.
        let st :: State
st
              | Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xd800 Bool -> Bool -> Bool
&& Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word16
0xdbff = -- High surrogate.
                State
StateS0
              | Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word16
0xdfff = -- Low surrogate.
                State
forall a. a
throwDecodeError
              | Bool
otherwise =
                State
StateNone
        in
        MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
u State
st

      -- Handle surrogates.
      f MArray s
_ (Int
pos, State
StateS0) Word8
92 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateS1) -- Backslash
      f MArray s
_ (  Int
_, State
StateS0)  Word8
_ = ST s (Int, State)
forall a. a
throwDecodeError

      f MArray s
_ (Int
pos, State
StateS1) Word8
117 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateSU0) -- u
      f MArray s
_ (  Int
_, State
StateS1)   Word8
_ = ST s (Int, State)
forall a. a
throwDecodeError

      f MArray s
_ (Int
pos, State
StateSU0) Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU1 (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
12))

      f MArray s
_ (Int
pos, StateSU1 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU2 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
8)))

      f MArray s
_ (Int
pos, StateSU2 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU3 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
4)))

      f MArray s
dest (Int
pos, StateSU3 Word16
w') Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        let u :: Word16
u = Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
w in

        -- Check if not low surrogate.
        if Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xdc00 Bool -> Bool -> Bool
|| Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xdfff then
          ST s (Int, State)
forall a. a
throwDecodeError
        else
          MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
u State
StateNone

write :: A.MArray s -> Int -> Word16 -> ST s ()
write :: MArray s -> Int -> Word16 -> ST s ()
write MArray s
dest Int
pos Word16
char =
    MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
dest Int
pos Word16
char
{-# INLINE write #-}

writeAndReturn :: A.MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn :: MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
char t
res = do
    MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
write MArray s
dest Int
pos Word16
char
    (Int, t) -> ST s (Int, t)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, t
res)
{-# INLINE writeAndReturn #-}

throwDecodeError :: a
throwDecodeError :: a
throwDecodeError =
    let desc :: [Char]
desc = [Char]
"Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream" in
    UnicodeException -> a
forall a e. Exception e => e -> a
throw ([Char] -> Maybe Word8 -> UnicodeException
DecodeError [Char]
desc Maybe Word8
forall a. Maybe a
Nothing)

unescapeText :: ByteString -> Either UnicodeException Text
unescapeText :: ByteString -> Either UnicodeException Text
unescapeText = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> (ByteString -> IO (Either UnicodeException Text))
-> ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO Text -> IO (Either UnicodeException Text))
-> (ByteString -> IO Text)
-> ByteString
-> IO (Either UnicodeException Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO Text
forall a. a -> IO a
evaluate (Text -> IO Text) -> (ByteString -> Text) -> ByteString -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
unescapeText'