{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language DataKinds #-}
{-# language DeriveFunctor #-}
{-# language DerivingStrategies #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language MultiWayIf #-}
{-# language PolyKinds #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language TypeApplications #-}
{-# language UnboxedSums #-}
{-# language UnboxedTuples #-}
{-# language CPP #-}
module Data.Bytes.Parser.Utf8
(
any#
, shortText
) where
import Prelude hiding (length,any,fail,takeWhile)
import Data.Bits ((.&.),(.|.),unsafeShiftL,xor)
import Data.Bytes.Parser.Internal (Parser(..))
import Data.Text.Short (ShortText)
import GHC.Exts (Int(I#),Char(C#),Int#,Char#,(-#),(+#),(>#),chr#)
import GHC.Word (Word8(W8#))
import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Bytes.Parser as Parser
import qualified Data.Primitive as PM
import qualified Data.Text.Short as TS
import qualified GHC.Exts as Exts
any# :: e -> Parser e s Char#
{-# noinline any# #-}
any# :: e -> Parser e s Char#
any# e
e = ((# ByteArray#, Int#, Int# #) -> ST# s (Result# e Char#))
-> Parser e s Char#
forall e s a.
((# ByteArray#, Int#, Int# #) -> ST# s (Result# e a))
-> Parser e s a
Parser
(\(# ByteArray#
arr, Int#
off, Int#
len #) State# s
s0 -> case Int#
len Int# -> Int# -> Int#
># Int#
0# of
Int#
1# ->
let !w0 :: Word#
w0 = ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr Int#
off
in if | Word8 -> Bool
oneByteChar (Word# -> Word8
W8# Word#
w0) ->
(# State# s
s0, (# | (# Int# -> Char#
chr# (Word# -> Int#
Exts.word2Int# (
#if MIN_VERSION_base(4,16,0)
Exts.word8ToWord#
#endif
Word#
w0)), Int#
off Int# -> Int# -> Int#
+# Int#
1#, Int#
len Int# -> Int# -> Int#
-# Int#
1# #) #) #)
| Word8 -> Bool
twoByteChar (Word# -> Word8
W8# Word#
w0) ->
if | Int# -> Int
I# Int#
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1
, Word#
w1 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
1#)
, Word8 -> Bool
followingByte (Word# -> Word8
W8# Word#
w1)
, C# Char#
c <- Word8 -> Word8 -> Char
codepointFromTwoBytes (Word# -> Word8
W8# Word#
w0) (Word# -> Word8
W8# Word#
w1)
-> (# State# s
s0, (# | (# Char#
c, Int#
off Int# -> Int# -> Int#
+# Int#
2#, Int#
len Int# -> Int# -> Int#
-# Int#
2# #) #) #)
| Bool
otherwise -> (# State# s
s0, (# e
e | #) #)
| Word8 -> Bool
threeByteChar (Word# -> Word8
W8# Word#
w0) ->
if | Int# -> Int
I# Int#
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
2
, Word#
w1 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
1# )
, Word#
w2 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
2# )
, Word8 -> Bool
followingByte (Word# -> Word8
W8# Word#
w1)
, !c :: Char
c@(C# Char#
c#) <- Word8 -> Word8 -> Word8 -> Char
codepointFromThreeBytes (Word# -> Word8
W8# Word#
w0) (Word# -> Word8
W8# Word#
w1) (Word# -> Word8
W8# Word#
w2)
, Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'\xD800' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\xDFFF'
-> (# State# s
s0, (# | (# Char#
c#, Int#
off Int# -> Int# -> Int#
+# Int#
3#, Int#
len Int# -> Int# -> Int#
-# Int#
3# #) #) #)
| Bool
otherwise -> (# State# s
s0, (# e
e | #) #)
| Word8 -> Bool
fourByteChar (Word# -> Word8
W8# Word#
w0) ->
if | Int# -> Int
I# Int#
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
3
, Word#
w1 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
1# )
, Word#
w2 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
2# )
, Word#
w3 <- ByteArray# -> Int# -> Word#
Exts.indexWord8Array# ByteArray#
arr (Int#
off Int# -> Int# -> Int#
+# Int#
3# )
, Word8 -> Bool
followingByte (Word# -> Word8
W8# Word#
w1)
, !(C# Char#
c#) <- Word8 -> Word8 -> Word8 -> Word8 -> Char
codepointFromFourBytes (Word# -> Word8
W8# Word#
w0) (Word# -> Word8
W8# Word#
w1) (Word# -> Word8
W8# Word#
w2) (Word# -> Word8
W8# Word#
w3)
-> (# State# s
s0, (# | (# Char#
c#, Int#
off Int# -> Int# -> Int#
+# Int#
4#, Int#
len Int# -> Int# -> Int#
-# Int#
4# #) #) #)
| Bool
otherwise -> (# State# s
s0, (# e
e | #) #)
| Bool
otherwise -> (# State# s
s0, (# e
e | #) #)
Int#
_ -> (# State# s
s0, (# e
e | #) #)
)
codepointFromFourBytes :: Word8 -> Word8 -> Word8 -> Word8 -> Char
codepointFromFourBytes :: Word8 -> Word8 -> Word8 -> Word8 -> Char
codepointFromFourBytes Word8
w1 Word8
w2 Word8
w3 Word8
w4 = Char# -> Char
C#
( Int# -> Char#
chr#
( Int -> Int#
unI (Int -> Int#) -> Int -> Int#
forall a b. (a -> b) -> a -> b
$ Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
( Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w1 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00001111) Int
18 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w2 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111) Int
12 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w3 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111) Int
6 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
(Word8 -> Word
word8ToWord Word8
w4 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111)
)
)
)
codepointFromThreeBytes :: Word8 -> Word8 -> Word8 -> Char
codepointFromThreeBytes :: Word8 -> Word8 -> Word8 -> Char
codepointFromThreeBytes Word8
w1 Word8
w2 Word8
w3 = Char# -> Char
C#
( Int# -> Char#
chr#
( Int -> Int#
unI (Int -> Int#) -> Int -> Int#
forall a b. (a -> b) -> a -> b
$ Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
( Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w1 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00001111) Int
12 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w2 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111) Int
6 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
(Word8 -> Word
word8ToWord Word8
w3 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111)
)
)
)
codepointFromTwoBytes :: Word8 -> Word8 -> Char
codepointFromTwoBytes :: Word8 -> Word8 -> Char
codepointFromTwoBytes Word8
w1 Word8
w2 = Char# -> Char
C#
( Int# -> Char#
chr#
( Int -> Int#
unI (Int -> Int#) -> Int -> Int#
forall a b. (a -> b) -> a -> b
$ Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word @Int
( Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word
word8ToWord Word8
w1 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00011111) Int
6 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.
(Word8 -> Word
word8ToWord Word8
w2 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0b00111111)
)
)
)
oneByteChar :: Word8 -> Bool
oneByteChar :: Word8 -> Bool
oneByteChar !Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b10000000 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
twoByteChar :: Word8 -> Bool
twoByteChar :: Word8 -> Bool
twoByteChar !Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b11100000 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0b11000000
threeByteChar :: Word8 -> Bool
threeByteChar :: Word8 -> Bool
threeByteChar !Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b11110000 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0b11100000
fourByteChar :: Word8 -> Bool
fourByteChar :: Word8 -> Bool
fourByteChar !Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b11111000 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0b11110000
followingByte :: Word8 -> Bool
followingByte :: Word8 -> Bool
followingByte !Word8
w = Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
xor Word8
w Word8
0b01000000 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b11000000 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0b11000000
word8ToWord :: Word8 -> Word
word8ToWord :: Word8 -> Word
word8ToWord = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral
unI :: Int -> Int#
unI :: Int -> Int#
unI (I# Int#
w) = Int#
w
shortText :: e -> ShortText -> Parser e s ()
shortText :: e -> ShortText -> Parser e s ()
shortText e
e !ShortText
t = e -> ByteArray -> Parser e s ()
forall e s. e -> ByteArray -> Parser e s ()
Parser.byteArray e
e
(ShortByteString -> ByteArray
shortByteStringToByteArray (ShortText -> ShortByteString
TS.toShortByteString ShortText
t))
shortByteStringToByteArray ::
BSS.ShortByteString
-> PM.ByteArray
shortByteStringToByteArray :: ShortByteString -> ByteArray
shortByteStringToByteArray (BSS.SBS ByteArray#
x) = ByteArray# -> ByteArray
PM.ByteArray ByteArray#
x