{-|
Description:    Character translation functions to and from the UTF-8 encoding scheme.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      experimental
Portability:    portable
-}
module Web.Willow.Common.Encoding.Utf8
    ( decoder
    , encoder
    , byteOrderMark
    ) where


import qualified Control.Applicative as A

import qualified Data.Bifunctor as F.B
import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Word as W

import Data.Functor ( ($>) )
import Data.Bits ( (.&.), (.|.) )

import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Parser


-- | The binary encoding of a byte-order mark character in UTF-8.
-- 
-- Fails if the stream does not start with the exact sequence @[0xEF, 0xBB,
-- 0xBF]@.  If successful, always returns 'Utf8'.
byteOrderMark
    :: (A.Alternative gather, Monad gather)
    => ParserT BS.ByteString gather Encoding
byteOrderMark :: ParserT ByteString gather Encoding
byteOrderMark = ByteString -> ParserT ByteString gather ByteString
forall (trans :: * -> *) stream token.
(MonadParser trans stream token, Eq stream) =>
stream -> trans stream
chunk ([Word8] -> ByteString
BS.pack [Word8
0xEF, Word8
0xBB, Word8
0xBF]) ParserT ByteString gather ByteString
-> Encoding -> ParserT ByteString gather Encoding
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Encoding
Utf8


-- | __Encoding:__
--      @[UTF-8 decoder]
--      (https://encoding.spec.whatwg.org/#utf-8-decoder)@
-- 
-- Decodes a 'Char' from a binary stream encoded with the 'Utf8' encoding
-- scheme, or returns 'Left' if the stream starts with an invalid byte
-- sequence.
decoder :: TextBuilder
decoder :: TextBuilder
decoder = do
    Word8
byte <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
    Either ShortByteString Word
c <- Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
decoder' (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte
    case Either ShortByteString Word
c of
        Left ShortByteString
err -> [Word8] -> TextBuilder
forall state. [Word8] -> StateTextBuilder state
decoderFailure ([Word8] -> TextBuilder)
-> ([Word8] -> [Word8]) -> [Word8] -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8
byte Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
:) ([Word8] -> TextBuilder) -> [Word8] -> TextBuilder
forall a b. (a -> b) -> a -> b
$ ShortByteString -> [Word8]
BS.SH.unpack ShortByteString
err
        Right Word
code -> [Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
byte] (Char -> TextBuilder) -> (Int -> Char) -> Int -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> TextBuilder) -> Int -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
code
  where decoder' :: Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
decoder' Word
b
            | Word
b Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0x7F = Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word -> Either ShortByteString Word
forall a b. b -> Either a b
Right Word
b
            | Word
b Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0xC1 = Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ ShortByteString -> Either ShortByteString Word
forall a b. a -> Either a b
Left ShortByteString
BS.SH.empty
            | Word
b Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0xDF = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
1 Word8
defL Word8
defH (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x1F
            | Word
b Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0xE0 = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
2 Word8
0xA0 Word8
defH (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0xF
            | Word
b Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0xED = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
2 Word8
defL Word8
0x9F (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0xF
            | Word
b Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0xEF = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
2 Word8
defL Word8
defH (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0xF
            | Word
b Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0xF0 = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
3 Word8
0x90 Word8
defH (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x7
            | Word
b Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0xF3 = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
3 Word8
defL Word8
defH (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x7
            | Word
b Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0xF4 = StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall c.
StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError (StateT (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> (Word
    -> StateT
         (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
3 Word8
defL Word8
0x8F (Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ Word
b Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x7
            | Bool
otherwise = Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString Word))
-> Either ShortByteString Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString Word)
forall a b. (a -> b) -> a -> b
$ ShortByteString -> Either ShortByteString Word
forall a b. a -> Either a b
Left ShortByteString
BS.SH.empty
        defL :: Word8
defL = Word8
0x80
        defH :: Word8
defH = Word8
0xBF
        packError :: StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
packError = (Either [Word8] c -> Either ShortByteString c)
-> StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Either [Word8] c -> Either ShortByteString c)
 -> StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either ShortByteString c))
-> (Either [Word8] c -> Either ShortByteString c)
-> StateT (Confidence, ()) (Parser ByteString) (Either [Word8] c)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either ShortByteString c)
forall a b. (a -> b) -> a -> b
$ ([Word8] -> ShortByteString)
-> Either [Word8] c -> Either ShortByteString c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first [Word8] -> ShortByteString
BS.SH.pack

-- | __Encoding:__
--      @[UTF-8 decoder]
--      (https://encoding.spec.whatwg.org/#utf-8-decoder)@
--      steps 4-11
-- 
-- Decodes the remainder of a 'Utf8' encoded 'Char', given its remaining length,
-- the allowed range of continuation bytes, and the so-far-parsed portion
-- (right-justified; i.e. when called for the second byte of a two-byte
-- character, the first byte would be trimmed with @'.&.' 0x1F@ /not/
-- 'B.shiftR').
decodeChar
    :: Word
        -- ^ The remaining length of the binary sequence.
    -> W.Word8
        -- ^ The lowest allowed value for this byte.
    -> W.Word8
        -- ^ The highest allowed value for this byte.
    -> Word
        -- ^ The accumulated code point bits so far.
    -> Decoder (Either [W.Word8] Word)
decodeChar :: Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar Word
0 Word8
_ Word8
_ Word
code = Either [Word8] Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either [Word8] Word
 -> StateT
      (Confidence, ()) (Parser ByteString) (Either [Word8] Word))
-> Either [Word8] Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
forall a b. (a -> b) -> a -> b
$ Word -> Either [Word8] Word
forall a b. b -> Either a b
Right Word
code
decodeChar Word
len Word8
low Word8
high Word
code = do
    Word8
byte <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
    if Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
low Word8
high Word8
byte
        then ([Word8] -> [Word8]) -> Either [Word8] Word -> Either [Word8] Word
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first (Word8
byte Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
:) (Either [Word8] Word -> Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
            Word
-> Word8
-> Word8
-> Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
decodeChar (Word
len Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
1) Word8
0x80 Word8
0xBF (Word -> Int -> Word
forall a. Bits a => a -> Int -> a
B.shiftL Word
code Int
6 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))
        else Word8 -> StateT (Confidence, ()) (Parser ByteString) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push Word8
byte StateT (Confidence, ()) (Parser ByteString) ()
-> Either [Word8] Word
-> StateT
     (Confidence, ()) (Parser ByteString) (Either [Word8] Word)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> [Word8] -> Either [Word8] Word
forall a b. a -> Either a b
Left []


-- | __Encoding:__
--      @[UTF-8 encoder]
--      (https://encoding.spec.whatwg.org/#utf-8-encoder)@
-- 
-- Encode the first 'Char' in a string according to the 'Utf8' encoding scheme,
-- or return that same character if that scheme doesn't define a binary
-- representation for it.
encoder :: BinaryBuilder
encoder :: BinaryBuilder
encoder = do
    Char
char <- StateT () (Parser Text) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
    (Int
len, Int
offset) <- Char -> StateT () (Parser Text) (Int, Int)
forall (f :: * -> *) a b.
(Num a, Num b, Alternative f) =>
Char -> f (a, b)
countOffset Char
char
    let first :: Word8
first = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Bits a => a -> Int -> a
B.shiftR (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
char) (Int
6 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
len) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset
    Either Char ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char ShortByteString -> BinaryBuilder)
-> ([Word8] -> Either Char ShortByteString)
-> [Word8]
-> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Either Char ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> Either Char ShortByteString)
-> ([Word8] -> ShortByteString)
-> [Word8]
-> Either Char ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> BinaryBuilder) -> [Word8] -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ Word8
first Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: Int -> Int -> [Word8]
forall t a. (Integral t, Bits t, Num a) => t -> Int -> [a]
trail (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
char) Int
len
  where countOffset :: Char -> f (a, b)
countOffset Char
char
            | Char -> Bool
C.isAscii Char
char = (a, b) -> f (a, b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a
0, b
0)
            | Char
char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x07FF' = (a, b) -> f (a, b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a
1, b
0xC0)
            | Char
char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFF' = (a, b) -> f (a, b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a
2, b
0xE0)
            | Char
char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF' = (a, b) -> f (a, b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (a
3, b
0xF0)
            -- 'Char' can't even be initialized this big, but include to make
            -- the pattern match complete.
            | Bool
otherwise = f (a, b)
forall (f :: * -> *) a. Alternative f => f a
A.empty
        trail :: t -> Int -> [a]
trail t
_ Int
0 = []
        trail t
code Int
count = t -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (t
0x80 t -> t -> t
forall a. Bits a => a -> a -> a
.|. (t -> Int -> t
forall a. Bits a => a -> Int -> a
B.shiftR t
code (Int
6 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) t -> t -> t
forall a. Bits a => a -> a -> a
.&. t
0x3F)) a -> [a] -> [a]
forall a. a -> [a] -> [a]
:
            t -> Int -> [a]
trail t
code (Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)