{-# LANGUAGE BangPatterns     #-}
{-# LANGUAGE CPP              #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PatternSynonyms  #-}
{-# LANGUAGE RankNTypes       #-}
{-# LANGUAGE RecordWildCards  #-}

-- |
-- Module      : Streamly.Data.Internal.Unicode.Stream
-- Copyright   : (c) 2018 Composewell Technologies
--               (c) Bjoern Hoehrmann 2008-2009
--
-- License     : BSD3
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--

#include "inline.hs"

module Streamly.Internal.Data.Unicode.Stream
    (
    -- * Construction (Decoding)
      decodeLatin1
    , decodeUtf8
    , decodeUtf8Lax
    , DecodeError(..)
    , DecodeState
    , CodePoint
    , decodeUtf8Either
    , resumeDecodeUtf8Either
    , decodeUtf8Arrays
    , decodeUtf8ArraysLenient

    -- * Elimination (Encoding)
    , encodeLatin1
    , encodeLatin1Lax
    , encodeUtf8
    {-
    -- * Operations on character strings
    , strip -- (dropAround isSpace)
    , stripEnd
    -}

    -- * StreamD UTF8 Encoding / Decoding transformations.
    , decodeUtf8D
    , encodeUtf8D
    , decodeUtf8LenientD
    , decodeUtf8EitherD
    , resumeDecodeUtf8EitherD
    , decodeUtf8ArraysD
    , decodeUtf8ArraysLenientD

    -- * Transformation
    , stripStart
    , lines
    , words
    , unlines
    , unwords
    )
where

import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Bits (shiftR, shiftL, (.|.), (.&.))
import Data.Char (ord)
import Data.Word (Word8)
import Foreign.ForeignPtr (touchForeignPtr)
import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
import Foreign.Storable (Storable(..))
import GHC.Base (assert, unsafeChr)
import GHC.ForeignPtr (ForeignPtr (..))
import GHC.IO.Encoding.Failure (isSurrogate)
import GHC.Ptr (Ptr (..), plusPtr)
import Prelude hiding (String, lines, words, unlines, unwords)
import System.IO.Unsafe (unsafePerformIO)

import Streamly (IsStream)
import Streamly.Data.Fold (Fold)
import Streamly.Memory.Array (Array)
import Streamly.Internal.Data.Unfold (Unfold)
import Streamly.Internal.Data.SVar (adaptState)
import Streamly.Internal.Data.Stream.StreamD (Stream(..), Step (..))
import Streamly.Internal.Data.Strict (Tuple'(..))

#if __GLASGOW_HASKELL__ < 800
import Streamly.Internal.Data.Stream.StreamD (pattern Stream)
#endif

import qualified Streamly.Internal.Memory.Array.Types as A
import qualified Streamly.Internal.Prelude as S
import qualified Streamly.Internal.Data.Stream.StreamD as D

-------------------------------------------------------------------------------
-- Encoding/Decoding Unicode (UTF-8) Characters
-------------------------------------------------------------------------------

-- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8.

data WList = WCons !Word8 !WList | WNil

{-# INLINE ord2 #-}
ord2 :: Char -> WList
ord2 :: Char -> WList
ord2 Char
c = Bool -> WList -> WList
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x80 Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x07ff) (Word8 -> WList -> WList
WCons Word8
x1 (Word8 -> WList -> WList
WCons Word8
x2 WList
WNil))
  where
    n :: Int
n = Char -> Int
ord Char
c
    x1 :: Word8
x1 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xC0
    x2 :: Word8
x2 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80

{-# INLINE ord3 #-}
ord3 :: Char -> WList
ord3 :: Char -> WList
ord3 Char
c = Bool -> WList -> WList
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x0800 Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff) (Word8 -> WList -> WList
WCons Word8
x1 (Word8 -> WList -> WList
WCons Word8
x2 (Word8 -> WList -> WList
WCons Word8
x3 WList
WNil)))
  where
    n :: Int
n = Char -> Int
ord Char
c
    x1 :: Word8
x1 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xE0
    x2 :: Word8
x2 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
    x3 :: Word8
x3 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80

{-# INLINE ord4 #-}
ord4 :: Char -> WList
ord4 :: Char -> WList
ord4 Char
c = Bool -> WList -> WList
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x10000)  (Word8 -> WList -> WList
WCons Word8
x1 (Word8 -> WList -> WList
WCons Word8
x2 (Word8 -> WList -> WList
WCons Word8
x3 (Word8 -> WList -> WList
WCons Word8
x4 WList
WNil))))
  where
    n :: Int
n = Char -> Int
ord Char
c
    x1 :: Word8
x1 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
18) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xF0
    x2 :: Word8
x2 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
    x3 :: Word8
x3 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
    x4 :: Word8
x4 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80

data CodingFailureMode
    = TransliterateCodingFailure
    | ErrorOnCodingFailure
    deriving (Int -> CodingFailureMode -> ShowS
[CodingFailureMode] -> ShowS
CodingFailureMode -> String
(Int -> CodingFailureMode -> ShowS)
-> (CodingFailureMode -> String)
-> ([CodingFailureMode] -> ShowS)
-> Show CodingFailureMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CodingFailureMode] -> ShowS
$cshowList :: [CodingFailureMode] -> ShowS
show :: CodingFailureMode -> String
$cshow :: CodingFailureMode -> String
showsPrec :: Int -> CodingFailureMode -> ShowS
$cshowsPrec :: Int -> CodingFailureMode -> ShowS
Show)

{-# INLINE replacementChar #-}
replacementChar :: Char
replacementChar :: Char
replacementChar = Char
'\xFFFD'

-- Int helps in cheaper conversion from Int to Char
type CodePoint = Int
type DecodeState = Word8

-- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.

-- XXX Use names decodeSuccess = 0, decodeFailure = 12

decodeTable :: [Word8]
decodeTable :: [Word8]
decodeTable = [
   -- The first part of the table maps bytes to character classes that
   -- to reduce the size of the transition table and create bitmasks.
   Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,  Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,
   Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,  Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,
   Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,  Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,
   Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,  Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,Word8
0,
   Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,Word8
1,  Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,Word8
9,
   Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,  Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,Word8
7,
   Word8
8,Word8
8,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,  Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,Word8
2,
  Word8
10,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
3,Word8
4,Word8
3,Word8
3, Word8
11,Word8
6,Word8
6,Word8
6,Word8
5,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,Word8
8,

   -- The second part is a transition table that maps a combination
   -- of a state of the automaton and a character class to a state.
   Word8
0,Word8
12,Word8
24,Word8
36,Word8
60,Word8
96,Word8
84,Word8
12,Word8
12,Word8
12,Word8
48,Word8
72, Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,
  Word8
12, Word8
0,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12, Word8
0,Word8
12, Word8
0,Word8
12,Word8
12, Word8
12,Word8
24,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
24,Word8
12,Word8
24,Word8
12,Word8
12,
  Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
24,Word8
12,Word8
12,Word8
12,Word8
12, Word8
12,Word8
24,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
24,Word8
12,Word8
12,
  Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
36,Word8
12,Word8
36,Word8
12,Word8
12, Word8
12,Word8
36,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
36,Word8
12,Word8
36,Word8
12,Word8
12,
  Word8
12,Word8
36,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12,Word8
12
  ]

utf8d :: A.Array Word8
utf8d :: Array Word8
utf8d =
      IO (Array Word8) -> Array Word8
forall a. IO a -> a
unsafePerformIO
    -- Aligning to cacheline makes a barely noticeable difference
    -- XXX currently alignment is not implemented for unmanaged allocation
    (IO (Array Word8) -> Array Word8)
-> IO (Array Word8) -> Array Word8
forall a b. (a -> b) -> a -> b
$ Fold IO Word8 (Array Word8) -> Stream IO Word8 -> IO (Array Word8)
forall (m :: * -> *) a b.
Monad m =>
Fold m a b -> Stream m a -> m b
D.runFold (Int -> Int -> Fold IO Word8 (Array Word8)
forall (m :: * -> *) a.
(MonadIO m, Storable a) =>
Int -> Int -> Fold m a (Array a)
A.writeNAlignedUnmanaged Int
64 ([Word8] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
decodeTable))
              ([Word8] -> Stream IO Word8
forall (m :: * -> *) a. Applicative m => [a] -> Stream m a
D.fromList [Word8]
decodeTable)

-- | Return element at the specified index without checking the bounds.
-- and without touching the foreign ptr.
{-# INLINE_NORMAL unsafePeekElemOff #-}
unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a
unsafePeekElemOff :: Ptr a -> Int -> a
unsafePeekElemOff Ptr a
p Int
i = let !x :: a
x = IO a -> a
forall a. IO a -> a
A.unsafeInlineIO (IO a -> a) -> IO a -> a
forall a b. (a -> b) -> a -> b
$ Ptr a -> Int -> IO a
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr a
p Int
i in a
x

-- decode is split into two separate cases to avoid branching instructions.
-- From the higher level flow we already know which case we are in so we can
-- call the appropriate decode function.
--
-- When the state is 0
{-# INLINE decode0 #-}
decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint
decode0 :: Ptr Word8 -> Word8 -> Tuple' Word8 Int
decode0 Ptr Word8
table Word8
byte =
    let !t :: Word8
t = Ptr Word8
table Ptr Word8 -> Int -> Word8
forall a. Storable a => Ptr a -> Int -> a
`unsafePeekElemOff` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte
        !codep' :: Int
codep' = (Int
0xff Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
t)) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte
        !state' :: Word8
state' = Ptr Word8
table Ptr Word8 -> Int -> Word8
forall a. Storable a => Ptr a -> Int -> a
`unsafePeekElemOff` (Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
t)
     in Bool -> Tuple' Word8 Int -> Tuple' Word8 Int
forall a. (?callStack::CallStack) => Bool -> a -> a
assert ((Word8
byte Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7f Bool -> Bool -> Bool
|| String -> Bool
forall a. (?callStack::CallStack) => String -> a
error String
showByte)
                Bool -> Bool -> Bool
&& (Word8
state' Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0 Bool -> Bool -> Bool
|| String -> Bool
forall a. (?callStack::CallStack) => String -> a
error (String
showByte String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
showTable)))
               (Word8 -> Int -> Tuple' Word8 Int
forall a b. a -> b -> Tuple' a b
Tuple' Word8
state' Int
codep')

    where

    utf8table :: Array Word8
utf8table =
        let !(Ptr Addr#
addr) = Ptr Word8
table
            end :: Ptr b
end = Ptr Word8
table Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
364
        in ForeignPtr Word8 -> Ptr Word8 -> Ptr Word8 -> Array Word8
forall a. ForeignPtr a -> Ptr a -> Ptr a -> Array a
A.Array (Addr# -> ForeignPtrContents -> ForeignPtr Word8
forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
addr ForeignPtrContents
forall a. (?callStack::CallStack) => a
undefined) Ptr Word8
forall b. Ptr b
end Ptr Word8
forall b. Ptr b
end :: A.Array Word8
    showByte :: String
showByte = String
"Streamly: decode0: byte: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word8 -> String
forall a. Show a => a -> String
show Word8
byte
    showTable :: String
showTable = String
" table: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Array Word8 -> String
forall a. Show a => a -> String
show Array Word8
utf8table

-- When the state is not 0
{-# INLINE decode1 #-}
decode1
    :: Ptr Word8
    -> DecodeState
    -> CodePoint
    -> Word8
    -> Tuple' DecodeState CodePoint
decode1 :: Ptr Word8 -> Word8 -> Int -> Word8 -> Tuple' Word8 Int
decode1 Ptr Word8
table Word8
state Int
codep Word8
byte =
    -- Remember codep is Int type!
    -- Can it be unsafe to convert the resulting Int to Char?
    let !t :: Word8
t = Ptr Word8
table Ptr Word8 -> Int -> Word8
forall a. Storable a => Ptr a -> Int -> a
`unsafePeekElemOff` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte
        !codep' :: Int
codep' = (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
codep Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
        !state' :: Word8
state' = Ptr Word8
table Ptr Word8 -> Int -> Word8
forall a. Storable a => Ptr a -> Int -> a
`unsafePeekElemOff`
                    (Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
state Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
t)
     in Bool -> Tuple' Word8 Int -> Tuple' Word8 Int
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
codep' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF
                    Bool -> Bool -> Bool
|| String -> Bool
forall a. (?callStack::CallStack) => String -> a
error (String
showByte String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word8 -> Int -> String
forall a a. (Show a, Show a) => a -> a -> String
showState Word8
state Int
codep))
               (Word8 -> Int -> Tuple' Word8 Int
forall a b. a -> b -> Tuple' a b
Tuple' Word8
state' Int
codep')
    where

    utf8table :: Array Word8
utf8table =
        let !(Ptr Addr#
addr) = Ptr Word8
table
            end :: Ptr b
end = Ptr Word8
table Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
364
        in ForeignPtr Word8 -> Ptr Word8 -> Ptr Word8 -> Array Word8
forall a. ForeignPtr a -> Ptr a -> Ptr a -> Array a
A.Array (Addr# -> ForeignPtrContents -> ForeignPtr Word8
forall a. Addr# -> ForeignPtrContents -> ForeignPtr a
ForeignPtr Addr#
addr ForeignPtrContents
forall a. (?callStack::CallStack) => a
undefined) Ptr Word8
forall b. Ptr b
end Ptr Word8
forall b. Ptr b
end :: A.Array Word8
    showByte :: String
showByte = String
"Streamly: decode1: byte: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word8 -> String
forall a. Show a => a -> String
show Word8
byte
    showState :: a -> a -> String
showState a
st a
cp =
        String
" state: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
st String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
" codepoint: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
cp String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
" table: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Array Word8 -> String
forall a. Show a => a -> String
show Array Word8
utf8table

-- We can divide the errors in three general categories:
-- * A non-starter was encountered in a begin state
-- * A starter was encountered without completing a codepoint
-- * The last codepoint was not complete (input underflow)
--
data DecodeError = DecodeError !DecodeState !CodePoint deriving Int -> DecodeError -> ShowS
[DecodeError] -> ShowS
DecodeError -> String
(Int -> DecodeError -> ShowS)
-> (DecodeError -> String)
-> ([DecodeError] -> ShowS)
-> Show DecodeError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DecodeError] -> ShowS
$cshowList :: [DecodeError] -> ShowS
show :: DecodeError -> String
$cshow :: DecodeError -> String
showsPrec :: Int -> DecodeError -> ShowS
$cshowsPrec :: Int -> DecodeError -> ShowS
Show

data FreshPoint s a
    = FreshPointDecodeInit s
    | FreshPointDecodeInit1 s Word8
    | FreshPointDecodeFirst s Word8
    | FreshPointDecoding s !DecodeState !CodePoint
    | YieldAndContinue a (FreshPoint s a)
    | Done

-- XXX Add proper error messages
-- XXX Implement this in terms of decodeUtf8Either
{-# INLINE_NORMAL decodeUtf8WithD #-}
decodeUtf8WithD :: Monad m => CodingFailureMode -> Stream m Word8 -> Stream m Char
decodeUtf8WithD :: CodingFailureMode -> Stream m Word8 -> Stream m Char
decodeUtf8WithD CodingFailureMode
cfm (Stream State Stream m Word8 -> s -> m (Step s Word8)
step s
state) =
    let A.Array ForeignPtr Word8
p Ptr Word8
_ Ptr Word8
_ = Array Word8
utf8d
        !ptr :: Ptr Word8
ptr = (ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
p)
    in (State Stream m Char
 -> FreshPoint s Char -> m (Step (FreshPoint s Char) Char))
-> FreshPoint s Char -> Stream m Char
forall (m :: * -> *) a s.
(State Stream m a -> s -> m (Step s a)) -> s -> Stream m a
Stream (Ptr Word8
-> State Stream m Char
-> FreshPoint s Char
-> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a.
Ptr Word8
-> State Stream m a
-> FreshPoint s Char
-> m (Step (FreshPoint s Char) Char)
step' Ptr Word8
ptr) (s -> FreshPoint s Char
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
state)
  where
    {-# INLINE transliterateOrError #-}
    transliterateOrError :: String -> FreshPoint s Char -> FreshPoint s Char
transliterateOrError String
e FreshPoint s Char
s =
        case CodingFailureMode
cfm of
            CodingFailureMode
ErrorOnCodingFailure -> String -> FreshPoint s Char
forall a. (?callStack::CallStack) => String -> a
error String
e
            CodingFailureMode
TransliterateCodingFailure -> Char -> FreshPoint s Char -> FreshPoint s Char
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue Char
replacementChar FreshPoint s Char
s
    {-# INLINE inputUnderflow #-}
    inputUnderflow :: FreshPoint s Char
inputUnderflow =
        case CodingFailureMode
cfm of
            CodingFailureMode
ErrorOnCodingFailure ->
                String -> FreshPoint s Char
forall a. (?callStack::CallStack) => String -> a
error String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Input Underflow"
            CodingFailureMode
TransliterateCodingFailure -> Char -> FreshPoint s Char -> FreshPoint s Char
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue Char
replacementChar FreshPoint s Char
forall s a. FreshPoint s a
Done
    {-# INLINE_LATE step' #-}
    step' :: Ptr Word8
-> State Stream m a
-> FreshPoint s Char
-> m (Step (FreshPoint s Char) Char)
step' Ptr Word8
_ State Stream m a
gst (FreshPointDecodeInit s
st) = do
        Step s Word8
r <- State Stream m Word8 -> s -> m (Step s Word8)
step (State Stream m a -> State Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ case Step s Word8
r of
            Yield Word8
x s
s -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (s -> Word8 -> FreshPoint s Char
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeInit1 s
s Word8
x)
            Skip s
s -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (s -> FreshPoint s Char
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
s)
            Step s Word8
Stop   -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip FreshPoint s Char
forall s a. FreshPoint s a
Done

    step' Ptr Word8
_ State Stream m a
_ (FreshPointDecodeInit1 s
st Word8
x) = do
        -- Note: It is important to use a ">" instead of a "<=" test
        -- here for GHC to generate code layout for default branch
        -- prediction for the common case. This is fragile and might
        -- change with the compiler versions, we need a more reliable
        -- "likely" primitive to control branch predication.
        case Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7f of
            Bool
False ->
                Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (FreshPoint s Char -> Step (FreshPoint s Char) Char)
-> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall a b. (a -> b) -> a -> b
$ Char -> FreshPoint s Char -> FreshPoint s Char
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue
                    (Int -> Char
unsafeChr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x))
                    (s -> FreshPoint s Char
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
st)
            -- Using a separate state here generates a jump to a
            -- separate code block in the core which seems to perform
            -- slightly better for the non-ascii case.
            Bool
True -> Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (FreshPoint s Char -> Step (FreshPoint s Char) Char)
-> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall a b. (a -> b) -> a -> b
$ s -> Word8 -> FreshPoint s Char
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeFirst s
st Word8
x

    -- XXX should we merge it with FreshPointDecodeInit1?
    step' Ptr Word8
table State Stream m a
_ (FreshPointDecodeFirst s
st Word8
x) = do
        let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Tuple' Word8 Int
decode0 Ptr Word8
table Word8
x
        Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$
            case Word8
sv of
                Word8
12 ->
                    FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (FreshPoint s Char -> Step (FreshPoint s Char) Char)
-> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall a b. (a -> b) -> a -> b
$
                    String -> FreshPoint s Char -> FreshPoint s Char
forall s. String -> FreshPoint s Char -> FreshPoint s Char
transliterateOrError
                        String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Invalid UTF8 codepoint encountered"
                        (s -> FreshPoint s Char
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
st)
                Word8
0 -> String -> Step (FreshPoint s Char) Char
forall a. (?callStack::CallStack) => String -> a
error String
"unreachable state"
                Word8
_ -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s Char
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
st Word8
sv Int
cp)

    -- We recover by trying the new byte x a starter of a new codepoint.
    -- XXX need to use the same recovery in array decoding routine as well
    step' Ptr Word8
table State Stream m a
gst (FreshPointDecoding s
st Word8
statePtr Int
codepointPtr) = do
        Step s Word8
r <- State Stream m Word8 -> s -> m (Step s Word8)
step (State Stream m a -> State Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        case Step s Word8
r of
            Yield Word8
x s
s -> do
                let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Int -> Word8 -> Tuple' Word8 Int
decode1 Ptr Word8
table Word8
statePtr Int
codepointPtr Word8
x
                Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$
                    case Word8
sv of
                        Word8
0 -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (FreshPoint s Char -> Step (FreshPoint s Char) Char)
-> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall a b. (a -> b) -> a -> b
$ Char -> FreshPoint s Char -> FreshPoint s Char
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue (Int -> Char
unsafeChr Int
cp)
                                        (s -> FreshPoint s Char
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
s)
                        Word8
12 ->
                            FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (FreshPoint s Char -> Step (FreshPoint s Char) Char)
-> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall a b. (a -> b) -> a -> b
$
                            String -> FreshPoint s Char -> FreshPoint s Char
forall s. String -> FreshPoint s Char -> FreshPoint s Char
transliterateOrError
                                String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8With: Invalid UTF8 codepoint encountered"
                                (s -> Word8 -> FreshPoint s Char
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeInit1 s
s Word8
x)
                        Word8
_ -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s Char
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
s Word8
sv Int
cp)
            Skip s
s -> Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s Char
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
s Word8
statePtr Int
codepointPtr)
            Step s Word8
Stop -> Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. s -> Step s a
Skip FreshPoint s Char
forall s. FreshPoint s Char
inputUnderflow

    step' Ptr Word8
_ State Stream m a
_ (YieldAndContinue Char
c FreshPoint s Char
s) = Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FreshPoint s Char) Char
 -> m (Step (FreshPoint s Char) Char))
-> Step (FreshPoint s Char) Char
-> m (Step (FreshPoint s Char) Char)
forall a b. (a -> b) -> a -> b
$ Char -> FreshPoint s Char -> Step (FreshPoint s Char) Char
forall s a. a -> s -> Step s a
Yield Char
c FreshPoint s Char
s
    step' Ptr Word8
_ State Stream m a
_ FreshPoint s Char
Done = Step (FreshPoint s Char) Char -> m (Step (FreshPoint s Char) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return Step (FreshPoint s Char) Char
forall s a. Step s a
Stop

{-# INLINE decodeUtf8D #-}
decodeUtf8D :: Monad m => Stream m Word8 -> Stream m Char
decodeUtf8D :: Stream m Word8 -> Stream m Char
decodeUtf8D = CodingFailureMode -> Stream m Word8 -> Stream m Char
forall (m :: * -> *).
Monad m =>
CodingFailureMode -> Stream m Word8 -> Stream m Char
decodeUtf8WithD CodingFailureMode
ErrorOnCodingFailure

{-# INLINE decodeUtf8LenientD #-}
decodeUtf8LenientD :: Monad m => Stream m Word8 -> Stream m Char
decodeUtf8LenientD :: Stream m Word8 -> Stream m Char
decodeUtf8LenientD = CodingFailureMode -> Stream m Word8 -> Stream m Char
forall (m :: * -> *).
Monad m =>
CodingFailureMode -> Stream m Word8 -> Stream m Char
decodeUtf8WithD CodingFailureMode
TransliterateCodingFailure

{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-}
resumeDecodeUtf8EitherD
    :: Monad m
    => DecodeState
    -> CodePoint
    -> Stream m Word8
    -> Stream m (Either DecodeError Char)
resumeDecodeUtf8EitherD :: Word8
-> Int -> Stream m Word8 -> Stream m (Either DecodeError Char)
resumeDecodeUtf8EitherD Word8
dst Int
codep (Stream State Stream m Word8 -> s -> m (Step s Word8)
step s
state) =
    let A.Array ForeignPtr Word8
p Ptr Word8
_ Ptr Word8
_ = Array Word8
utf8d
        !ptr :: Ptr Word8
ptr = (ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
p)
        stt :: FreshPoint s a
stt =
            if Word8
dst Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
            then s -> FreshPoint s a
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
state
            else s -> Word8 -> Int -> FreshPoint s a
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
state Word8
dst Int
codep
    in (State Stream m (Either DecodeError Char)
 -> FreshPoint s (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> FreshPoint s (Either DecodeError Char)
-> Stream m (Either DecodeError Char)
forall (m :: * -> *) a s.
(State Stream m a -> s -> m (Step s a)) -> s -> Stream m a
Stream (Ptr Word8
-> State Stream m (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a.
Ptr Word8
-> State Stream m a
-> FreshPoint s (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
step' Ptr Word8
ptr) FreshPoint s (Either DecodeError Char)
forall a. FreshPoint s a
stt
  where
    {-# INLINE_LATE step' #-}
    step' :: Ptr Word8
-> State Stream m a
-> FreshPoint s (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
step' Ptr Word8
_ State Stream m a
gst (FreshPointDecodeInit s
st) = do
        Step s Word8
r <- State Stream m Word8 -> s -> m (Step s Word8)
step (State Stream m a -> State Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ case Step s Word8
r of
            Yield Word8
x s
s -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (s -> Word8 -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeInit1 s
s Word8
x)
            Skip s
s -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (s -> FreshPoint s (Either DecodeError Char)
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
s)
            Step s Word8
Stop   -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip FreshPoint s (Either DecodeError Char)
forall s a. FreshPoint s a
Done

    step' Ptr Word8
_ State Stream m a
_ (FreshPointDecodeInit1 s
st Word8
x) = do
        -- Note: It is important to use a ">" instead of a "<=" test
        -- here for GHC to generate code layout for default branch
        -- prediction for the common case. This is fragile and might
        -- change with the compiler versions, we need a more reliable
        -- "likely" primitive to control branch predication.
        case Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7f of
            Bool
False ->
                Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue
                    (Char -> Either DecodeError Char
forall a b. b -> Either a b
Right (Char -> Either DecodeError Char)
-> Char -> Either DecodeError Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
unsafeChr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x))
                    (s -> FreshPoint s (Either DecodeError Char)
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
st)
            -- Using a separate state here generates a jump to a
            -- separate code block in the core which seems to perform
            -- slightly better for the non-ascii case.
            Bool
True -> Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ s -> Word8 -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeFirst s
st Word8
x

    -- XXX should we merge it with FreshPointDecodeInit1?
    step' Ptr Word8
table State Stream m a
_ (FreshPointDecodeFirst s
st Word8
x) = do
        let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Tuple' Word8 Int
decode0 Ptr Word8
table Word8
x
        Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$
            case Word8
sv of
                Word8
12 ->
                    FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue (DecodeError -> Either DecodeError Char
forall a b. a -> Either a b
Left (DecodeError -> Either DecodeError Char)
-> DecodeError -> Either DecodeError Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int -> DecodeError
DecodeError Word8
0 (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x))
                                            (s -> FreshPoint s (Either DecodeError Char)
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
st)
                Word8
0 -> String
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a. (?callStack::CallStack) => String -> a
error String
"unreachable state"
                Word8
_ -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
st Word8
sv Int
cp)

    -- We recover by trying the new byte x a starter of a new codepoint.
    -- XXX need to use the same recovery in array decoding routine as well
    step' Ptr Word8
table State Stream m a
gst (FreshPointDecoding s
st Word8
statePtr Int
codepointPtr) = do
        Step s Word8
r <- State Stream m Word8 -> s -> m (Step s Word8)
step (State Stream m a -> State Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        case Step s Word8
r of
            Yield Word8
x s
s -> do
                let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Int -> Word8 -> Tuple' Word8 Int
decode1 Ptr Word8
table Word8
statePtr Int
codepointPtr Word8
x
                Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$
                    case Word8
sv of
                        Word8
0 -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue (Char -> Either DecodeError Char
forall a b. b -> Either a b
Right (Char -> Either DecodeError Char)
-> Char -> Either DecodeError Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
unsafeChr Int
cp)
                                        (s -> FreshPoint s (Either DecodeError Char)
forall s a. s -> FreshPoint s a
FreshPointDecodeInit s
s)
                        Word8
12 ->
                            FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue (DecodeError -> Either DecodeError Char
forall a b. a -> Either a b
Left (DecodeError -> Either DecodeError Char)
-> DecodeError -> Either DecodeError Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int -> DecodeError
DecodeError Word8
statePtr Int
codepointPtr)
                                        (s -> Word8 -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> FreshPoint s a
FreshPointDecodeInit1 s
s Word8
x)
                        Word8
_ -> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
s Word8
sv Int
cp)
            Skip s
s -> Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (s -> Word8 -> Int -> FreshPoint s (Either DecodeError Char)
forall s a. s -> Word8 -> Int -> FreshPoint s a
FreshPointDecoding s
s Word8
statePtr Int
codepointPtr)
            Step s Word8
Stop -> Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. s -> Step s a
Skip (FreshPoint s (Either DecodeError Char)
 -> Step
      (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> FreshPoint s (Either DecodeError Char)
forall s a. a -> FreshPoint s a -> FreshPoint s a
YieldAndContinue (DecodeError -> Either DecodeError Char
forall a b. a -> Either a b
Left (DecodeError -> Either DecodeError Char)
-> DecodeError -> Either DecodeError Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int -> DecodeError
DecodeError Word8
statePtr Int
codepointPtr) FreshPoint s (Either DecodeError Char)
forall s a. FreshPoint s a
Done

    step' Ptr Word8
_ State Stream m a
_ (YieldAndContinue Either DecodeError Char
c FreshPoint s (Either DecodeError Char)
s) = Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Step
   (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
 -> m (Step
         (FreshPoint s (Either DecodeError Char))
         (Either DecodeError Char)))
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall a b. (a -> b) -> a -> b
$ Either DecodeError Char
-> FreshPoint s (Either DecodeError Char)
-> Step
     (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. a -> s -> Step s a
Yield Either DecodeError Char
c FreshPoint s (Either DecodeError Char)
s
    step' Ptr Word8
_ State Stream m a
_ FreshPoint s (Either DecodeError Char)
Done = Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
-> m (Step
        (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char))
forall (m :: * -> *) a. Monad m => a -> m a
return Step
  (FreshPoint s (Either DecodeError Char)) (Either DecodeError Char)
forall s a. Step s a
Stop

{-# INLINE_NORMAL decodeUtf8EitherD #-}
decodeUtf8EitherD :: Monad m
    => Stream m Word8 -> Stream m (Either DecodeError Char)
decodeUtf8EitherD :: Stream m Word8 -> Stream m (Either DecodeError Char)
decodeUtf8EitherD = Word8
-> Int -> Stream m Word8 -> Stream m (Either DecodeError Char)
forall (m :: * -> *).
Monad m =>
Word8
-> Int -> Stream m Word8 -> Stream m (Either DecodeError Char)
resumeDecodeUtf8EitherD Word8
0 Int
0

data FlattenState s a
    = OuterLoop s !(Maybe (DecodeState, CodePoint))
    | InnerLoopDecodeInit s (ForeignPtr a) !(Ptr a) !(Ptr a)
    | InnerLoopDecodeFirst s (ForeignPtr a) !(Ptr a) !(Ptr a) Word8
    | InnerLoopDecoding s (ForeignPtr a) !(Ptr a) !(Ptr a)
        !DecodeState !CodePoint
    | YAndC !Char (FlattenState s a) -- These constructors can be
                                     -- encoded in the FreshPoint
                                     -- type, I prefer to keep these
                                     -- flat even though that means
                                     -- coming up with new names
    | D

-- The normal decodeUtf8 above should fuse with flattenArrays
-- to create this exact code but it doesn't for some reason, as of now this
-- remains the fastest way I could figure out to decodeUtf8.
--
-- XXX Add Proper error messages
{-# INLINE_NORMAL decodeUtf8ArraysWithD #-}
decodeUtf8ArraysWithD ::
       MonadIO m
    => CodingFailureMode
    -> Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysWithD :: CodingFailureMode -> Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysWithD CodingFailureMode
cfm (Stream State Stream m (Array Word8) -> s -> m (Step s (Array Word8))
step s
state) =
    let A.Array ForeignPtr Word8
p Ptr Word8
_ Ptr Word8
_ = Array Word8
utf8d
        !ptr :: Ptr Word8
ptr = (ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
p)
    in (State Stream m Char
 -> FlattenState s Word8 -> m (Step (FlattenState s Word8) Char))
-> FlattenState s Word8 -> Stream m Char
forall (m :: * -> *) a s.
(State Stream m a -> s -> m (Step s a)) -> s -> Stream m a
Stream (Ptr Word8
-> State Stream m Char
-> FlattenState s Word8
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a.
Ptr Word8
-> State Stream m a
-> FlattenState s Word8
-> m (Step (FlattenState s Word8) Char)
step' Ptr Word8
ptr) (s -> Maybe (Word8, Int) -> FlattenState s Word8
forall s a. s -> Maybe (Word8, Int) -> FlattenState s a
OuterLoop s
state Maybe (Word8, Int)
forall a. Maybe a
Nothing)
  where
    {-# INLINE transliterateOrError #-}
    transliterateOrError :: String -> FlattenState s a -> FlattenState s a
transliterateOrError String
e FlattenState s a
s =
        case CodingFailureMode
cfm of
            CodingFailureMode
ErrorOnCodingFailure -> String -> FlattenState s a
forall a. (?callStack::CallStack) => String -> a
error String
e
            CodingFailureMode
TransliterateCodingFailure -> Char -> FlattenState s a -> FlattenState s a
forall s a. Char -> FlattenState s a -> FlattenState s a
YAndC Char
replacementChar FlattenState s a
s
    {-# INLINE inputUnderflow #-}
    inputUnderflow :: FlattenState s a
inputUnderflow =
        case CodingFailureMode
cfm of
            CodingFailureMode
ErrorOnCodingFailure ->
                String -> FlattenState s a
forall a. (?callStack::CallStack) => String -> a
error
                    String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Input Underflow"
            CodingFailureMode
TransliterateCodingFailure -> Char -> FlattenState s a -> FlattenState s a
forall s a. Char -> FlattenState s a -> FlattenState s a
YAndC Char
replacementChar FlattenState s a
forall s a. FlattenState s a
D
    {-# INLINE_LATE step' #-}
    step' :: Ptr Word8
-> State Stream m a
-> FlattenState s Word8
-> m (Step (FlattenState s Word8) Char)
step' Ptr Word8
_ State Stream m a
gst (OuterLoop s
st Maybe (Word8, Int)
Nothing) = do
        Step s (Array Word8)
r <- State Stream m (Array Word8) -> s -> m (Step s (Array Word8))
step (State Stream m a -> State Stream m (Array Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$
            case Step s (Array Word8)
r of
                Yield A.Array {Ptr Word8
ForeignPtr Word8
aBound :: forall a. Array a -> Ptr a
aEnd :: forall a. Array a -> Ptr a
aStart :: forall a. Array a -> ForeignPtr a
aBound :: Ptr Word8
aEnd :: Ptr Word8
aStart :: ForeignPtr Word8
..} s
s ->
                    let p :: Ptr Word8
p = ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
aStart
                     in FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> FlattenState s Word8
forall s a. s -> ForeignPtr a -> Ptr a -> Ptr a -> FlattenState s a
InnerLoopDecodeInit s
s ForeignPtr Word8
aStart Ptr Word8
p Ptr Word8
aEnd)
                Skip s
s -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s -> Maybe (Word8, Int) -> FlattenState s Word8
forall s a. s -> Maybe (Word8, Int) -> FlattenState s a
OuterLoop s
s Maybe (Word8, Int)
forall a. Maybe a
Nothing)
                Step s (Array Word8)
Stop -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip FlattenState s Word8
forall s a. FlattenState s a
D
    step' Ptr Word8
_ State Stream m a
gst (OuterLoop s
st dst :: Maybe (Word8, Int)
dst@(Just (Word8
ds, Int
cp))) = do
        Step s (Array Word8)
r <- State Stream m (Array Word8) -> s -> m (Step s (Array Word8))
step (State Stream m a -> State Stream m (Array Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$
            case Step s (Array Word8)
r of
                Yield A.Array {Ptr Word8
ForeignPtr Word8
aBound :: Ptr Word8
aEnd :: Ptr Word8
aStart :: ForeignPtr Word8
aBound :: forall a. Array a -> Ptr a
aEnd :: forall a. Array a -> Ptr a
aStart :: forall a. Array a -> ForeignPtr a
..} s
s ->
                    let p :: Ptr Word8
p = ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
aStart
                     in FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Word8
-> Int
-> FlattenState s Word8
forall s a.
s
-> ForeignPtr a
-> Ptr a
-> Ptr a
-> Word8
-> Int
-> FlattenState s a
InnerLoopDecoding s
s ForeignPtr Word8
aStart Ptr Word8
p Ptr Word8
aEnd Word8
ds Int
cp)
                Skip s
s -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s -> Maybe (Word8, Int) -> FlattenState s Word8
forall s a. s -> Maybe (Word8, Int) -> FlattenState s a
OuterLoop s
s Maybe (Word8, Int)
dst)
                Step s (Array Word8)
Stop -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip FlattenState s Word8
forall s a. FlattenState s a
inputUnderflow
    step' Ptr Word8
_ State Stream m a
_ (InnerLoopDecodeInit s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end)
        | Ptr Word8
p Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Word8
end = do
            IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> IO () -> m ()
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> IO ()
forall a. ForeignPtr a -> IO ()
touchForeignPtr ForeignPtr Word8
startf
            Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$ FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$ s -> Maybe (Word8, Int) -> FlattenState s Word8
forall s a. s -> Maybe (Word8, Int) -> FlattenState s a
OuterLoop s
st Maybe (Word8, Int)
forall a. Maybe a
Nothing
    step' Ptr Word8
_ State Stream m a
_ (InnerLoopDecodeInit s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end) = do
        Word8
x <- IO Word8 -> m Word8
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Word8 -> m Word8) -> IO Word8 -> m Word8
forall a b. (a -> b) -> a -> b
$ Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
p
        -- Note: It is important to use a ">" instead of a "<=" test here for
        -- GHC to generate code layout for default branch prediction for the
        -- common case. This is fragile and might change with the compiler
        -- versions, we need a more reliable "likely" primitive to control
        -- branch predication.
        case Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7f of
            Bool
False ->
                Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$ FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$ Char -> FlattenState s Word8 -> FlattenState s Word8
forall s a. Char -> FlattenState s a -> FlattenState s a
YAndC
                    (Int -> Char
unsafeChr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x))
                    (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> FlattenState s Word8
forall s a. s -> ForeignPtr a -> Ptr a -> Ptr a -> FlattenState s a
InnerLoopDecodeInit s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end)
            -- Using a separate state here generates a jump to a separate code
            -- block in the core which seems to perform slightly better for the
            -- non-ascii case.
            Bool
True -> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$ FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$ s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Word8
-> FlattenState s Word8
forall s a.
s -> ForeignPtr a -> Ptr a -> Ptr a -> Word8 -> FlattenState s a
InnerLoopDecodeFirst s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end Word8
x

    step' Ptr Word8
table State Stream m a
_ (InnerLoopDecodeFirst s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end Word8
x) = do
        let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Tuple' Word8 Int
decode0 Ptr Word8
table Word8
x
        Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$
            case Word8
sv of
                Word8
12 ->
                    FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$
                    String -> FlattenState s Word8 -> FlattenState s Word8
forall s a. String -> FlattenState s a -> FlattenState s a
transliterateOrError
                        String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Invalid UTF8 codepoint encountered"
                        (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> FlattenState s Word8
forall s a. s -> ForeignPtr a -> Ptr a -> Ptr a -> FlattenState s a
InnerLoopDecodeInit s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end)
                Word8
0 -> String -> Step (FlattenState s Word8) Char
forall a. (?callStack::CallStack) => String -> a
error String
"unreachable state"
                Word8
_ -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Word8
-> Int
-> FlattenState s Word8
forall s a.
s
-> ForeignPtr a
-> Ptr a
-> Ptr a
-> Word8
-> Int
-> FlattenState s a
InnerLoopDecoding s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end Word8
sv Int
cp)
    step' Ptr Word8
_ State Stream m a
_ (InnerLoopDecoding s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end Word8
sv Int
cp)
        | Ptr Word8
p Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Word8
end = do
            IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> IO () -> m ()
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> IO ()
forall a. ForeignPtr a -> IO ()
touchForeignPtr ForeignPtr Word8
startf
            Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$ FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$ s -> Maybe (Word8, Int) -> FlattenState s Word8
forall s a. s -> Maybe (Word8, Int) -> FlattenState s a
OuterLoop s
st ((Word8, Int) -> Maybe (Word8, Int)
forall a. a -> Maybe a
Just (Word8
sv, Int
cp))
    step' Ptr Word8
table State Stream m a
_ (InnerLoopDecoding s
st ForeignPtr Word8
startf Ptr Word8
p Ptr Word8
end Word8
statePtr Int
codepointPtr) = do
        Word8
x <- IO Word8 -> m Word8
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Word8 -> m Word8) -> IO Word8 -> m Word8
forall a b. (a -> b) -> a -> b
$ Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
p
        let (Tuple' Word8
sv Int
cp) = Ptr Word8 -> Word8 -> Int -> Word8 -> Tuple' Word8 Int
decode1 Ptr Word8
table Word8
statePtr Int
codepointPtr Word8
x
        Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$
            case Word8
sv of
                Word8
0 ->
                    FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$
                    Char -> FlattenState s Word8 -> FlattenState s Word8
forall s a. Char -> FlattenState s a -> FlattenState s a
YAndC
                        (Int -> Char
unsafeChr Int
cp)
                        (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> FlattenState s Word8
forall s a. s -> ForeignPtr a -> Ptr a -> Ptr a -> FlattenState s a
InnerLoopDecodeInit s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end)
                Word8
12 ->
                    FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (FlattenState s Word8 -> Step (FlattenState s Word8) Char)
-> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall a b. (a -> b) -> a -> b
$
                    String -> FlattenState s Word8 -> FlattenState s Word8
forall s a. String -> FlattenState s a -> FlattenState s a
transliterateOrError
                        String
"Streamly.Internal.Data.Stream.StreamD.decodeUtf8ArraysWith: Invalid UTF8 codepoint encountered"
                        (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> FlattenState s Word8
forall s a. s -> ForeignPtr a -> Ptr a -> Ptr a -> FlattenState s a
InnerLoopDecodeInit s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end)
                Word8
_ -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. s -> Step s a
Skip (s
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Word8
-> Int
-> FlattenState s Word8
forall s a.
s
-> ForeignPtr a
-> Ptr a
-> Ptr a
-> Word8
-> Int
-> FlattenState s a
InnerLoopDecoding s
st ForeignPtr Word8
startf (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
1) Ptr Word8
end Word8
sv Int
cp)
    step' Ptr Word8
_ State Stream m a
_ (YAndC Char
c FlattenState s Word8
s) = Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (FlattenState s Word8) Char
 -> m (Step (FlattenState s Word8) Char))
-> Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall a b. (a -> b) -> a -> b
$ Char -> FlattenState s Word8 -> Step (FlattenState s Word8) Char
forall s a. a -> s -> Step s a
Yield Char
c FlattenState s Word8
s
    step' Ptr Word8
_ State Stream m a
_ FlattenState s Word8
D = Step (FlattenState s Word8) Char
-> m (Step (FlattenState s Word8) Char)
forall (m :: * -> *) a. Monad m => a -> m a
return Step (FlattenState s Word8) Char
forall s a. Step s a
Stop

{-# INLINE decodeUtf8ArraysD #-}
decodeUtf8ArraysD ::
       MonadIO m
    => Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysD :: Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysD = CodingFailureMode -> Stream m (Array Word8) -> Stream m Char
forall (m :: * -> *).
MonadIO m =>
CodingFailureMode -> Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysWithD CodingFailureMode
ErrorOnCodingFailure

{-# INLINE decodeUtf8ArraysLenientD #-}
decodeUtf8ArraysLenientD ::
       MonadIO m
    => Stream m (A.Array Word8)
    -> Stream m Char
decodeUtf8ArraysLenientD :: Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysLenientD = CodingFailureMode -> Stream m (Array Word8) -> Stream m Char
forall (m :: * -> *).
MonadIO m =>
CodingFailureMode -> Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysWithD CodingFailureMode
TransliterateCodingFailure

data EncodeState s = EncodeState s !WList

-- More yield points improve performance, but I am not sure if they can cause
-- too much code bloat or some trouble with fusion. So keeping only two yield
-- points for now, one for the ascii chars (fast path) and one for all other
-- paths (slow path).
{-# INLINE_NORMAL encodeUtf8D #-}
encodeUtf8D :: Monad m => Stream m Char -> Stream m Word8
encodeUtf8D :: Stream m Char -> Stream m Word8
encodeUtf8D (Stream State Stream m Char -> s -> m (Step s Char)
step s
state) = (State Stream m Word8
 -> EncodeState s -> m (Step (EncodeState s) Word8))
-> EncodeState s -> Stream m Word8
forall (m :: * -> *) a s.
(State Stream m a -> s -> m (Step s a)) -> s -> Stream m a
Stream State Stream m Word8
-> EncodeState s -> m (Step (EncodeState s) Word8)
forall (m :: * -> *) a.
State Stream m a -> EncodeState s -> m (Step (EncodeState s) Word8)
step' (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
state WList
WNil)
  where
    {-# INLINE_LATE step' #-}
    step' :: State Stream m a -> EncodeState s -> m (Step (EncodeState s) Word8)
step' State Stream m a
gst (EncodeState s
st WList
WNil) = do
        Step s Char
r <- State Stream m Char -> s -> m (Step s Char)
step (State Stream m a -> State Stream m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a (n :: * -> *) b.
State t m a -> State t n b
adaptState State Stream m a
gst) s
st
        Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8))
-> Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8)
forall a b. (a -> b) -> a -> b
$
            case Step s Char
r of
                Yield Char
c s
s ->
                    case Char -> Int
ord Char
c of
                        Int
x
                            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7F ->
                                Word8 -> EncodeState s -> Step (EncodeState s) Word8
forall s a. a -> s -> Step s a
Yield (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x) (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s WList
WNil)
                            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7FF -> EncodeState s -> Step (EncodeState s) Word8
forall s a. s -> Step s a
Skip (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s (Char -> WList
ord2 Char
c))
                            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFFFF ->
                                if Char -> Bool
isSurrogate Char
c
                                    then String -> Step (EncodeState s) Word8
forall a. (?callStack::CallStack) => String -> a
error
                                             String
"Streamly.Internal.Data.Stream.StreamD.encodeUtf8: Encountered a surrogate"
                                    else EncodeState s -> Step (EncodeState s) Word8
forall s a. s -> Step s a
Skip (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s (Char -> WList
ord3 Char
c))
                            | Bool
otherwise -> EncodeState s -> Step (EncodeState s) Word8
forall s a. s -> Step s a
Skip (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s (Char -> WList
ord4 Char
c))
                Skip s
s -> EncodeState s -> Step (EncodeState s) Word8
forall s a. s -> Step s a
Skip (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s WList
WNil)
                Step s Char
Stop -> Step (EncodeState s) Word8
forall s a. Step s a
Stop
    step' State Stream m a
_ (EncodeState s
s (WCons Word8
x WList
xs)) = Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8))
-> Step (EncodeState s) Word8 -> m (Step (EncodeState s) Word8)
forall a b. (a -> b) -> a -> b
$ Word8 -> EncodeState s -> Step (EncodeState s) Word8
forall s a. a -> s -> Step s a
Yield Word8
x (s -> WList -> EncodeState s
forall s. s -> WList -> EncodeState s
EncodeState s
s WList
xs)


-- | Decode a stream of bytes to Unicode characters by mapping each byte to a
-- corresponding Unicode 'Char' in 0-255 range.
--
-- /Since: 0.7.0/
{-# INLINE decodeLatin1 #-}
decodeLatin1 :: (IsStream t, Monad m) => t m Word8 -> t m Char
decodeLatin1 :: t m Word8 -> t m Char
decodeLatin1 = (Word8 -> Char) -> t m Word8 -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a b.
(IsStream t, Monad m) =>
(a -> b) -> t m a -> t m b
S.map (Int -> Char
unsafeChr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral)

-- | Encode a stream of Unicode characters to bytes by mapping each character
-- to a byte in 0-255 range. Throws an error if the input stream contains
-- characters beyond 255.
--
-- /Since: 0.7.0/
{-# INLINE encodeLatin1 #-}
encodeLatin1 :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1 :: t m Char -> t m Word8
encodeLatin1 = (Char -> Word8) -> t m Char -> t m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a b.
(IsStream t, Monad m) =>
(a -> b) -> t m a -> t m b
S.map Char -> Word8
forall p. Num p => Char -> p
convert
    where
    convert :: Char -> p
convert Char
c =
        let codepoint :: Int
codepoint = Char -> Int
ord Char
c
        in if Int
codepoint Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
255
           then String -> p
forall a. (?callStack::CallStack) => String -> a
error (String -> p) -> String -> p
forall a b. (a -> b) -> a -> b
$ String
"Streamly.String.encodeLatin1 invalid " String -> ShowS
forall a. [a] -> [a] -> [a]
++
                      String
"input char codepoint " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
codepoint
           else Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
codepoint

-- | Like 'encodeLatin1' but silently truncates and maps input characters beyond
-- 255 to (incorrect) chars in 0-255 range. No error or exception is thrown
-- when such truncation occurs.
--
-- /Since: 0.7.0/
{-# INLINE encodeLatin1Lax #-}
encodeLatin1Lax :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1Lax :: t m Char -> t m Word8
encodeLatin1Lax = (Char -> Word8) -> t m Char -> t m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a b.
(IsStream t, Monad m) =>
(a -> b) -> t m a -> t m b
S.map (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord)

-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
-- The incoming stream is truncated if an invalid codepoint is encountered.
--
-- /Since: 0.7.0/
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8 :: t m Word8 -> t m Char
decodeUtf8 = Stream m Char -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m Char -> t m Char)
-> (t m Word8 -> Stream m Char) -> t m Word8 -> t m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m Word8 -> Stream m Char
forall (m :: * -> *). Monad m => Stream m Word8 -> Stream m Char
decodeUtf8D (Stream m Word8 -> Stream m Char)
-> (t m Word8 -> Stream m Word8) -> t m Word8 -> Stream m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m Word8 -> Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8Arrays #-}
decodeUtf8Arrays :: (MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8Arrays :: t m (Array Word8) -> t m Char
decodeUtf8Arrays = Stream m Char -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m Char -> t m Char)
-> (t m (Array Word8) -> Stream m Char)
-> t m (Array Word8)
-> t m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m (Array Word8) -> Stream m Char
forall (m :: * -> *).
MonadIO m =>
Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysD (Stream m (Array Word8) -> Stream m Char)
-> (t m (Array Word8) -> Stream m (Array Word8))
-> t m (Array Word8)
-> Stream m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m (Array Word8) -> Stream m (Array Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
-- Any invalid codepoint encountered is replaced with the unicode replacement
-- character.
--
-- /Since: 0.7.0/
{-# INLINE decodeUtf8Lax #-}
decodeUtf8Lax :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8Lax :: t m Word8 -> t m Char
decodeUtf8Lax = Stream m Char -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m Char -> t m Char)
-> (t m Word8 -> Stream m Char) -> t m Word8 -> t m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m Word8 -> Stream m Char
forall (m :: * -> *). Monad m => Stream m Word8 -> Stream m Char
decodeUtf8LenientD (Stream m Word8 -> Stream m Char)
-> (t m Word8 -> Stream m Word8) -> t m Word8 -> Stream m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m Word8 -> Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8Either #-}
decodeUtf8Either :: (Monad m, IsStream t)
    => t m Word8 -> t m (Either DecodeError Char)
decodeUtf8Either :: t m Word8 -> t m (Either DecodeError Char)
decodeUtf8Either = Stream m (Either DecodeError Char) -> t m (Either DecodeError Char)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m (Either DecodeError Char)
 -> t m (Either DecodeError Char))
-> (t m Word8 -> Stream m (Either DecodeError Char))
-> t m Word8
-> t m (Either DecodeError Char)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m Word8 -> Stream m (Either DecodeError Char)
forall (m :: * -> *).
Monad m =>
Stream m Word8 -> Stream m (Either DecodeError Char)
decodeUtf8EitherD (Stream m Word8 -> Stream m (Either DecodeError Char))
-> (t m Word8 -> Stream m Word8)
-> t m Word8
-> Stream m (Either DecodeError Char)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m Word8 -> Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- |
--
-- /Internal/
{-# INLINE resumeDecodeUtf8Either #-}
resumeDecodeUtf8Either
    :: (Monad m, IsStream t)
    => DecodeState
    -> CodePoint
    -> t m Word8
    -> t m (Either DecodeError Char)
resumeDecodeUtf8Either :: Word8 -> Int -> t m Word8 -> t m (Either DecodeError Char)
resumeDecodeUtf8Either Word8
st Int
cp =
    Stream m (Either DecodeError Char) -> t m (Either DecodeError Char)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m (Either DecodeError Char)
 -> t m (Either DecodeError Char))
-> (t m Word8 -> Stream m (Either DecodeError Char))
-> t m Word8
-> t m (Either DecodeError Char)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8
-> Int -> Stream m Word8 -> Stream m (Either DecodeError Char)
forall (m :: * -> *).
Monad m =>
Word8
-> Int -> Stream m Word8 -> Stream m (Either DecodeError Char)
resumeDecodeUtf8EitherD Word8
st Int
cp (Stream m Word8 -> Stream m (Either DecodeError Char))
-> (t m Word8 -> Stream m Word8)
-> t m Word8
-> Stream m (Either DecodeError Char)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m Word8 -> Stream m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- |
--
-- /Internal/
{-# INLINE decodeUtf8ArraysLenient #-}
decodeUtf8ArraysLenient ::
       (MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8ArraysLenient :: t m (Array Word8) -> t m Char
decodeUtf8ArraysLenient =
    Stream m Char -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m Char -> t m Char)
-> (t m (Array Word8) -> Stream m Char)
-> t m (Array Word8)
-> t m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m (Array Word8) -> Stream m Char
forall (m :: * -> *).
MonadIO m =>
Stream m (Array Word8) -> Stream m Char
decodeUtf8ArraysLenientD (Stream m (Array Word8) -> Stream m Char)
-> (t m (Array Word8) -> Stream m (Array Word8))
-> t m (Array Word8)
-> Stream m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m (Array Word8) -> Stream m (Array Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream.
--
-- /Since: 0.7.0/
{-# INLINE encodeUtf8 #-}
encodeUtf8 :: (Monad m, IsStream t) => t m Char -> t m Word8
encodeUtf8 :: t m Char -> t m Word8
encodeUtf8 = Stream m Word8 -> t m Word8
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
Stream m a -> t m a
D.fromStreamD (Stream m Word8 -> t m Word8)
-> (t m Char -> Stream m Word8) -> t m Char -> t m Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream m Char -> Stream m Word8
forall (m :: * -> *). Monad m => Stream m Char -> Stream m Word8
encodeUtf8D (Stream m Char -> Stream m Word8)
-> (t m Char -> Stream m Char) -> t m Char -> Stream m Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t m Char -> Stream m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
t m a -> Stream m a
D.toStreamD

{-
-------------------------------------------------------------------------------
-- Utility operations on strings
-------------------------------------------------------------------------------

strip :: IsStream t => t m Char -> t m Char
strip = undefined

stripEnd :: IsStream t => t m Char -> t m Char
stripEnd = undefined
-}

-- | Remove leading whitespace from a string.
--
-- > stripStart = S.dropWhile isSpace
--
-- /Internal/
{-# INLINE stripStart #-}
stripStart :: (Monad m, IsStream t) => t m Char -> t m Char
stripStart :: t m Char -> t m Char
stripStart = (Char -> Bool) -> t m Char -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(IsStream t, Monad m) =>
(a -> Bool) -> t m a -> t m a
S.dropWhile Char -> Bool
isSpace

-- | Fold each line of the stream using the supplied 'Fold'
-- and stream the result.
--
-- >>> S.toList $ lines FL.toList (S.fromList "lines\nthis\nstring\n\n\n")
-- ["lines", "this", "string", "", ""]
--
-- > lines = S.splitOnSuffix (== '\n')
--
-- /Internal/
{-# INLINE lines #-}
lines :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
lines :: Fold m Char b -> t m Char -> t m b
lines = (Char -> Bool) -> Fold m Char b -> t m Char -> t m b
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a b.
(IsStream t, Monad m) =>
(a -> Bool) -> Fold m a b -> t m a -> t m b
S.splitOnSuffix (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n')

foreign import ccall unsafe "u_iswspace"
  iswspace :: Int -> Int

-- | Code copied from base/Data.Char to INLINE it
{-# INLINE isSpace #-}
isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace Char
c
  | Word
uc Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0x377 = Word
uc Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
32 Bool -> Bool -> Bool
|| Word
uc Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x9 Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
4 Bool -> Bool -> Bool
|| Word
uc Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0xa0
  | Bool
otherwise = Int -> Int
iswspace (Char -> Int
ord Char
c) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0
  where
    uc :: Word
uc = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word

-- | Fold each word of the stream using the supplied 'Fold'
-- and stream the result.
--
-- >>>  S.toList $ words FL.toList (S.fromList "fold these     words")
-- ["fold", "these", "words"]
--
-- > words = S.wordsBy isSpace
--
-- /Internal/
{-# INLINE words #-}
words :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
words :: Fold m Char b -> t m Char -> t m b
words = (Char -> Bool) -> Fold m Char b -> t m Char -> t m b
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a b.
(IsStream t, Monad m) =>
(a -> Bool) -> Fold m a b -> t m a -> t m b
S.wordsBy Char -> Bool
isSpace

-- | Unfold a stream to character streams using the supplied 'Unfold'
-- and concat the results suffixing a newline character @\\n@ to each stream.
--
-- /Internal/
{-# INLINE unlines #-}
unlines :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unlines :: Unfold m a Char -> t m a -> t m Char
unlines = Char -> Unfold m a Char -> t m a -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) c b.
(IsStream t, Monad m) =>
c -> Unfold m b c -> t m b -> t m c
S.interposeSuffix Char
'\n'

-- | Unfold the elements of a stream to character streams using the supplied
-- 'Unfold' and concat the results with a whitespace character infixed between
-- the streams.
--
-- /Internal/
{-# INLINE unwords #-}
unwords :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unwords :: Unfold m a Char -> t m a -> t m Char
unwords = Char -> Unfold m a Char -> t m a -> t m Char
forall (t :: (* -> *) -> * -> *) (m :: * -> *) c b.
(IsStream t, Monad m) =>
c -> Unfold m b c -> t m b -> t m c
S.interpose Char
' '