{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE Safe #-}
module Data.YAML.Token.Encoding
( decode
, Encoding(..)
) where
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import Util
data Encoding = UTF8
| UTF16LE
| UTF16BE
| UTF32LE
| UTF32BE
deriving (Encoding -> Encoding -> Bool
(Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool) -> Eq Encoding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Encoding -> Encoding -> Bool
$c/= :: Encoding -> Encoding -> Bool
== :: Encoding -> Encoding -> Bool
$c== :: Encoding -> Encoding -> Bool
Eq,(forall x. Encoding -> Rep Encoding x)
-> (forall x. Rep Encoding x -> Encoding) -> Generic Encoding
forall x. Rep Encoding x -> Encoding
forall x. Encoding -> Rep Encoding x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Encoding x -> Encoding
$cfrom :: forall x. Encoding -> Rep Encoding x
Generic)
instance Show Encoding where
show :: Encoding -> String
show UTF8 = "UTF-8"
show UTF16LE = "UTF-16LE"
show UTF16BE = "UTF-16BE"
show UTF32LE = "UTF-32LE"
show UTF32BE = "UTF-32BE"
instance NFData Encoding where rnf :: Encoding -> ()
rnf !Encoding
_ = ()
decode :: BLC.ByteString -> (Encoding, [(Int, Char)])
decode :: ByteString -> (Encoding, [(Int, Char)])
decode text :: ByteString
text = (Encoding
encoding, Encoding -> ByteString -> [(Int, Char)]
undoEncoding Encoding
encoding ByteString
text)
where encoding :: Encoding
encoding = String -> Encoding
detectEncoding (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BLC.unpack (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ Int64 -> ByteString -> ByteString
BLC.take 4 ByteString
text
detectEncoding :: [Char] -> Encoding
detectEncoding :: String -> Encoding
detectEncoding text :: String
text =
case String
text of
'\x00' : '\x00' : '\xFE' : '\xFF' : _ -> Encoding
UTF32BE
'\x00' : '\x00' : '\x00' : _ : _ -> Encoding
UTF32BE
'\xFF' : '\xFE' : '\x00' : '\x00' : _ -> Encoding
UTF32LE
_ : '\x00' : '\x00' : '\x00' : _ -> Encoding
UTF32LE
'\xFE' : '\xFF' : _ -> Encoding
UTF16BE
'\x00' : _ : _ -> Encoding
UTF16BE
'\xFF' : '\xFE' : _ -> Encoding
UTF16LE
_ : '\x00' : _ -> Encoding
UTF16LE
'\xEF' : '\xBB' : '\xBF' : _ -> Encoding
UTF8
_ -> Encoding
UTF8
undoEncoding :: Encoding -> BLC.ByteString -> [(Int, Char)]
undoEncoding :: Encoding -> ByteString -> [(Int, Char)]
undoEncoding encoding :: Encoding
encoding bytes :: ByteString
bytes =
case Encoding
encoding of
UTF8 -> ByteString -> Int -> [(Int, Char)]
undoUTF8 ByteString
bytes 0
UTF16LE -> [(Int, Char)] -> [(Int, Char)]
combinePairs ([(Int, Char)] -> [(Int, Char)]) -> [(Int, Char)] -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> [(Int, Char)]
undoUTF16LE ByteString
bytes 0
UTF16BE -> [(Int, Char)] -> [(Int, Char)]
combinePairs ([(Int, Char)] -> [(Int, Char)]) -> [(Int, Char)] -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> [(Int, Char)]
undoUTF16BE ByteString
bytes 0
UTF32LE -> [(Int, Char)] -> [(Int, Char)]
forall a. [(a, Char)] -> [(a, Char)]
validateScalars ([(Int, Char)] -> [(Int, Char)]) -> [(Int, Char)] -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> [(Int, Char)]
undoUTF32LE ByteString
bytes 0
UTF32BE -> [(Int, Char)] -> [(Int, Char)]
forall a. [(a, Char)] -> [(a, Char)]
validateScalars ([(Int, Char)] -> [(Int, Char)]) -> [(Int, Char)] -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> [(Int, Char)]
undoUTF32BE ByteString
bytes 0
where
validateScalars :: [(a, Char)] -> [(a, Char)]
validateScalars [] = []
validateScalars (x :: (a, Char)
x@(_,c :: Char
c):rest :: [(a, Char)]
rest)
| '\xD800' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c, Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xDFFF' = String -> [(a, Char)]
forall a. HasCallStack => String -> a
error "UTF-32 stream contains invalid surrogate code-point"
| Bool
otherwise = (a, Char)
x (a, Char) -> [(a, Char)] -> [(a, Char)]
forall a. a -> [a] -> [a]
: [(a, Char)] -> [(a, Char)]
validateScalars [(a, Char)]
rest
hasFewerThan :: Int -> BLC.ByteString -> Bool
hasFewerThan :: Int -> ByteString -> Bool
hasFewerThan n :: Int
n bytes :: ByteString
bytes
| Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 = ByteString -> Bool
BLC.null ByteString
bytes
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1 = ByteString -> Bool
BLC.null ByteString
bytes Bool -> Bool -> Bool
|| Int -> ByteString -> Bool
hasFewerThan (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) (ByteString -> ByteString
BLC.tail ByteString
bytes)
| Bool
otherwise = Bool
False
undoUTF32LE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF32LE :: ByteString -> Int -> [(Int, Char)]
undoUTF32LE bytes :: ByteString
bytes offset :: Int
offset
| ByteString -> Bool
BLC.null ByteString
bytes = []
| Int -> ByteString -> Bool
hasFewerThan 4 ByteString
bytes = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-32LE input contains invalid number of bytes"
| Bool
otherwise = let first :: Char
first = ByteString -> Char
BLC.head ByteString
bytes
bytes' :: ByteString
bytes' = ByteString -> ByteString
BLC.tail ByteString
bytes
second :: Char
second = ByteString -> Char
BLC.head ByteString
bytes'
bytes'' :: ByteString
bytes'' = ByteString -> ByteString
BLC.tail ByteString
bytes'
third :: Char
third = ByteString -> Char
BLC.head ByteString
bytes''
bytes''' :: ByteString
bytes''' = ByteString -> ByteString
BLC.tail ByteString
bytes''
fourth :: Char
fourth = ByteString -> Char
BLC.head ByteString
bytes'''
rest :: ByteString
rest = ByteString -> ByteString
BLC.tail ByteString
bytes'''
in (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4,
Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
first
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
second
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
third
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Char -> Int
ord Char
fourth)))(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:(ByteString -> Int -> [(Int, Char)]
undoUTF32LE ByteString
rest (Int -> [(Int, Char)]) -> Int -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4)
undoUTF32BE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF32BE :: ByteString -> Int -> [(Int, Char)]
undoUTF32BE bytes :: ByteString
bytes offset :: Int
offset
| ByteString -> Bool
BLC.null ByteString
bytes = []
| Int -> ByteString -> Bool
hasFewerThan 4 ByteString
bytes = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-32BE input contains invalid number of bytes"
| Bool
otherwise = let first :: Char
first = ByteString -> Char
BLC.head ByteString
bytes
bytes' :: ByteString
bytes' = ByteString -> ByteString
BLC.tail ByteString
bytes
second :: Char
second = ByteString -> Char
BLC.head ByteString
bytes'
bytes'' :: ByteString
bytes'' = ByteString -> ByteString
BLC.tail ByteString
bytes'
third :: Char
third = ByteString -> Char
BLC.head ByteString
bytes''
bytes''' :: ByteString
bytes''' = ByteString -> ByteString
BLC.tail ByteString
bytes''
fourth :: Char
fourth = ByteString -> Char
BLC.head ByteString
bytes'''
rest :: ByteString
rest = ByteString -> ByteString
BLC.tail ByteString
bytes'''
in (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4,
Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
fourth
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
third
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
second
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Char -> Int
ord Char
first)))(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:(ByteString -> Int -> [(Int, Char)]
undoUTF32BE ByteString
rest (Int -> [(Int, Char)]) -> Int -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4)
combinePairs :: [(Int, Char)] -> [(Int, Char)]
combinePairs :: [(Int, Char)] -> [(Int, Char)]
combinePairs [] = []
combinePairs (head' :: (Int, Char)
head'@(_, head_char :: Char
head_char):tail' :: [(Int, Char)]
tail')
| '\xD800' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
head_char Bool -> Bool -> Bool
&& Char
head_char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xDBFF' = (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
combineLead (Int, Char)
head' [(Int, Char)]
tail'
| '\xDC00' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
head_char Bool -> Bool -> Bool
&& Char
head_char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xDFFF' = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-16 contains trail surrogate without lead surrogate"
| Bool
otherwise = (Int, Char)
head'(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:[(Int, Char)] -> [(Int, Char)]
combinePairs [(Int, Char)]
tail'
combineLead :: (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
combineLead :: (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
combineLead _lead :: (Int, Char)
_lead [] = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-16 contains lead surrogate as final character"
combineLead (_, lead_char :: Char
lead_char) ((trail_offset :: Int
trail_offset, trail_char :: Char
trail_char):rest :: [(Int, Char)]
rest)
| '\xDC00' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
trail_char Bool -> Bool -> Bool
&& Char
trail_char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xDFFF' = (Int
trail_offset, Char -> Char -> Char
combineSurrogates Char
lead_char Char
trail_char)(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:[(Int, Char)] -> [(Int, Char)]
combinePairs [(Int, Char)]
rest
| Bool
otherwise = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-16 contains lead surrogate without trail surrogate"
surrogateOffset :: Int
surrogateOffset :: Int
surrogateOffset = 0x10000 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (0xD800 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 1024) Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0xDC00
combineSurrogates :: Char -> Char -> Char
combineSurrogates :: Char -> Char -> Char
combineSurrogates lead :: Char
lead trail :: Char
trail = Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
lead Int -> Int -> Int
forall a. Num a => a -> a -> a
* 1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
trail Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
surrogateOffset
undoUTF16LE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF16LE :: ByteString -> Int -> [(Int, Char)]
undoUTF16LE bytes :: ByteString
bytes offset :: Int
offset
| ByteString -> Bool
BLC.null ByteString
bytes = []
| Int -> ByteString -> Bool
hasFewerThan 2 ByteString
bytes = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-16LE input contains odd number of bytes"
| Bool
otherwise = let low :: Char
low = ByteString -> Char
BLC.head ByteString
bytes
bytes' :: ByteString
bytes' = ByteString -> ByteString
BLC.tail ByteString
bytes
high :: Char
high = ByteString -> Char
BLC.head ByteString
bytes'
rest :: ByteString
rest = ByteString -> ByteString
BLC.tail ByteString
bytes'
in (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2, Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
low Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
high Int -> Int -> Int
forall a. Num a => a -> a -> a
* 256)(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:(ByteString -> Int -> [(Int, Char)]
undoUTF16LE ByteString
rest (Int -> [(Int, Char)]) -> Int -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2)
undoUTF16BE :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF16BE :: ByteString -> Int -> [(Int, Char)]
undoUTF16BE bytes :: ByteString
bytes offset :: Int
offset
| ByteString -> Bool
BLC.null ByteString
bytes = []
| Int -> ByteString -> Bool
hasFewerThan 2 ByteString
bytes = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-16BE input contains odd number of bytes"
| Bool
otherwise = let high :: Char
high = ByteString -> Char
BLC.head ByteString
bytes
bytes' :: ByteString
bytes' = ByteString -> ByteString
BLC.tail ByteString
bytes
low :: Char
low = ByteString -> Char
BLC.head ByteString
bytes'
rest :: ByteString
rest = ByteString -> ByteString
BLC.tail ByteString
bytes'
in (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2, Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
low Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
high Int -> Int -> Int
forall a. Num a => a -> a -> a
* 256)(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
:(ByteString -> Int -> [(Int, Char)]
undoUTF16BE ByteString
rest (Int -> [(Int, Char)]) -> Int -> [(Int, Char)]
forall a b. (a -> b) -> a -> b
$ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2)
undoUTF8 :: BLC.ByteString -> Int -> [(Int, Char)]
undoUTF8 :: ByteString -> Int -> [(Int, Char)]
undoUTF8 bytes :: ByteString
bytes = [Word8] -> Int -> [(Int, Char)]
undoUTF8' (ByteString -> [Word8]
BL.unpack ByteString
bytes)
w2c :: Word8 -> Char
w2c :: Word8 -> Char
w2c = Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
w2i :: Word8 -> Int
w2i :: Word8 -> Int
w2i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
undoUTF8' :: [Word8] -> Int -> [(Int, Char)]
undoUTF8' :: [Word8] -> Int -> [(Int, Char)]
undoUTF8' [] _ = []
undoUTF8' (first :: Word8
first:rest :: [Word8]
rest) !Int
offset
| Word8
first Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 = (Int
offset', Char
c) (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: [Word8] -> Int -> [(Int, Char)]
undoUTF8' [Word8]
rest Int
offset'
where
!offset' :: Int
offset' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
!c :: Char
c = Word8 -> Char
w2c Word8
first
undoUTF8' (first :: Word8
first:rest :: [Word8]
rest) !Int
offset
| Word8
first Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0xC0 = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 input contains invalid first byte"
| Word8
first Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0xE0 = Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeTwoUTF8 Word8
first Int
offset [Word8]
rest
| Word8
first Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0xF0 = Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeThreeUTF8 Word8
first Int
offset [Word8]
rest
| Word8
first Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0xF8 = Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeFourUTF8 Word8
first Int
offset [Word8]
rest
| Bool
otherwise = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 input contains invalid first byte"
decodeTwoUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeTwoUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeTwoUTF8 first :: Word8
first offset :: Int
offset (second :: Word8
second:rest :: [Word8]
rest)
| Word8
second Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
second = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 double byte char has invalid second byte"
| Bool
otherwise = (Int
offset', Char
c) (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: [Word8] -> Int -> [(Int, Char)]
undoUTF8' [Word8]
rest Int
offset'
where
!offset' :: Int
offset' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2
!c :: Char
c = Int -> Char
chr ((Word8 -> Int
w2i Word8
first Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0xc0) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x40 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Word8 -> Int
w2i Word8
second Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80))
decodeTwoUTF8 _ _ [] = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 double byte char is missing second byte at eof"
decodeThreeUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeThreeUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeThreeUTF8 first :: Word8
first offset :: Int
offset (second :: Word8
second:third :: Word8
third:rest :: [Word8]
rest)
| Word8
second Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
second = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 triple byte char has invalid second byte"
| Word8
third Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
third = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 triple byte char has invalid third byte"
| Bool
otherwise = (Int
offset', Char
c)(Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: [Word8] -> Int -> [(Int, Char)]
undoUTF8' [Word8]
rest Int
offset'
where
!offset' :: Int
offset' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 3
!c :: Char
c = Int -> Char
chr((Word8 -> Int
w2i Word8
first Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0xE0) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x1000 Int -> Int -> Int
forall a. Num a => a -> a -> a
+
(Word8 -> Int
w2i Word8
second Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x40 Int -> Int -> Int
forall a. Num a => a -> a -> a
+
(Word8 -> Int
w2i Word8
third Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80))
decodeThreeUTF8 _ _ _ =String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 triple byte char is missing bytes at eof"
decodeFourUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeFourUTF8 :: Word8 -> Int -> [Word8] -> [(Int, Char)]
decodeFourUTF8 first :: Word8
first offset :: Int
offset (second :: Word8
second:third :: Word8
third:fourth :: Word8
fourth:rest :: [Word8]
rest)
| Word8
second Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
second = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 quad byte char has invalid second byte"
| Word8
third Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
third = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 quad byte char has invalid third byte"
| Word8
third Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 0x80 Bool -> Bool -> Bool
|| 0xBF Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
third = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 quad byte char has invalid fourth byte"
| Bool
otherwise = (Int
offset', Char
c) (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: [Word8] -> Int -> [(Int, Char)]
undoUTF8' [Word8]
rest Int
offset'
where
!offset' :: Int
offset' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4
!c :: Char
c = Int -> Char
chr((Word8 -> Int
w2i Word8
first Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0xF0) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x40000 Int -> Int -> Int
forall a. Num a => a -> a -> a
+
(Word8 -> Int
w2i Word8
second Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x1000 Int -> Int -> Int
forall a. Num a => a -> a -> a
+
(Word8 -> Int
w2i Word8
third Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 0x40 Int -> Int -> Int
forall a. Num a => a -> a -> a
+
(Word8 -> Int
w2i Word8
fourth Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x80))
decodeFourUTF8 _ _ _ = String -> [(Int, Char)]
forall a. HasCallStack => String -> a
error "UTF-8 quad byte char is missing bytes at eof"