{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
module Text.PariPari.Internal.Chunk (
Chunk(..)
, showByte
, showByteString
, unsafeAsciiToChar
, asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p,
asc_minus, asc_plus, asc_point, asc_newline
) where
import Data.Bits (unsafeShiftL, (.|.), (.&.))
import Data.ByteString (ByteString)
import Data.Foldable (foldl')
import Data.String (fromString)
import Data.Text (Text)
import GHC.Base
import GHC.Word
import GHC.ForeignPtr
import GHC.Show (showLitChar)
import Numeric (showHex)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import qualified Data.Text.Array as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Internal as T
class Ord k => Chunk k where
type Buffer k
matchChunk :: Buffer k -> Int# -> k -> Int#
packChunk :: Buffer k -> Int# -> Int# -> k
unpackChunk :: k -> (# Buffer k, Int# #)
showChunk :: k -> String
matchChar :: Buffer k -> Int# -> Char# -> Int#
indexChar :: Buffer k -> Int# -> (# Char#, Int# #)
indexByte :: Buffer k -> Int# -> Word#
stringToChunk :: String -> k
instance Chunk ByteString where
type Buffer ByteString = ForeignPtr Word8
matchChunk (ForeignPtr p _) i (B.PS (ForeignPtr p' _) (I# j) (I# n)) = go 0#
where go :: Int# -> Int#
go k | 1# <- k >=# n = n
| w <- indexWord8OffAddr# p (i +# k),
w' <- indexWord8OffAddr# p' (j +# k),
1# <- w `neWord#` int2Word# 0#,
1# <- w `eqWord#` w' = go (k +# 1#)
| otherwise = -1#
{-# INLINE matchChunk #-}
packChunk b i n = B.PS b (I# i) (I# n)
{-# INLINE packChunk #-}
unpackChunk k =
let !(B.PS b (I# i) _) = k <> fromString "\0\0\0"
in (# b, i #)
{-# INLINE unpackChunk #-}
showChunk = showByteString
indexByte (ForeignPtr p _) i = indexWord8OffAddr# p i
{-# INLINE indexByte #-}
matchChar p i m
| C# m <= '\x7F' =
if | unsafeChr (at p i) == C# m -> 1#
| otherwise -> -1#
| C# m <= '\x7FF' =
if | a1 <- at p i, a2 <- at p (i +# 1#),
unsafeChr (((a1 .&. 31) `unsafeShiftL` 6)
.|. (a2 .&. 0x3F)) == C# m -> 2#
| otherwise -> -1#
| C# m <= '\xFFFF' =
if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#),
unsafeChr (((a1 .&. 15) `unsafeShiftL` 12)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
.|. (a3 .&. 0x3F)) == C# m -> 3#
| otherwise -> -1#
| otherwise =
if | a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#),
unsafeChr (((a1 .&. 7) `unsafeShiftL` 18)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
.|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
.|. (a4 .&. 0x3F)) == C# m -> 4#
| otherwise -> -1#
{-# INLINE matchChar #-}
indexChar p i
| a1 <- at p i,
a1 <= 0x7F =
(# unsafeChr# a1, 1# #)
| a1 <- at p i, a2 <- at p (i +# 1#),
(a1 .&. 0xE0) == 0xC0,
(a2 .&. 0xC0) == 0x80 =
(# unsafeChr# (((a1 .&. 31) `unsafeShiftL` 6)
.|. (a2 .&. 0x3F)), 2# #)
| a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#),
(a1 .&. 0xF0) == 0xE0,
(a2 .&. 0xC0) == 0x80,
(a3 .&. 0xC0) == 0x80 =
(# unsafeChr# (((a1 .&. 15) `unsafeShiftL` 12)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 6)
.|. (a3 .&. 0x3F)), 3# #)
| a1 <- at p i, a2 <- at p (i +# 1#), a3 <- at p (i +# 2#), a4 <- at p (i +# 3#),
(a1 .&. 0xF8) == 0xF0,
(a2 .&. 0xC0) == 0x80,
(a3 .&. 0xC0) == 0x80,
(a4 .&. 0xC0) == 0x80 =
(# unsafeChr# (((a1 .&. 7) `unsafeShiftL` 18)
.|. ((a2 .&. 0x3F) `unsafeShiftL` 12)
.|. ((a3 .&. 0x3F) `unsafeShiftL` 6)
.|. (a4 .&. 0x3F)), 4# #)
| otherwise = (# '\0'#, 0# #)
{-# INLINE indexChar #-}
stringToChunk t = T.encodeUtf8 $ fromString t
{-# INLINE stringToChunk #-}
instance Chunk Text where
type Buffer Text = T.Array
matchChunk a i (T.Text a' (I# j) (I# n)) = go 0#
where go :: Int# -> Int#
go k | 1# <- k >=# n = n
| w <- T.unsafeIndex a (I# (i +# k)),
w' <- T.unsafeIndex a' (I# (j +# k)),
w /= 0, w == w' = go (k +# 1#)
| otherwise = -1#
{-# INLINE matchChunk #-}
packChunk b i n = T.Text b (I# i) (I# n)
{-# INLINE packChunk #-}
unpackChunk k =
let !(T.Text b (I# i) _) = k <> fromString "\0"
in (# b, i #)
{-# INLINE unpackChunk #-}
showChunk = show
indexByte a i
| W16# c <- T.unsafeIndex a (I# i), 1# <- c `leWord#` (int2Word# 0xFF#) = c
| otherwise = int2Word# 0#
{-# INLINE indexByte #-}
indexChar a i
| hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)) =
if hi < 0xD800 || hi > 0xDFFF then
(# unsafeChr# (fromIntegral hi), 1# #)
else
(# unsafeChr# (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) + (fromIntegral lo - 0xDC00)), 2# #)
{-# INLINE indexChar #-}
matchChar a i m
| C# m <= '\xFFFF' =
if | unsafeChr (fromIntegral (T.unsafeIndex a (I# i))) == C# m -> 1#
| otherwise -> -1#
| otherwise =
if | hi <- T.unsafeIndex a (I# i), lo <- T.unsafeIndex a (I# (i +# 1#)),
hi >= 0xD800, hi <= 0xDFFF,
C# m == unsafeChr (0x10000 + ((fromIntegral $ hi - 0xD800) `unsafeShiftL` 10) +
(fromIntegral lo - 0xDC00)) -> 2#
| otherwise -> -1#
stringToChunk t = fromString t
{-# INLINE stringToChunk #-}
at :: ForeignPtr Word8 -> Int# -> Int
at p i = fromIntegral $ W8# (indexByte @ByteString p i)
{-# INLINE at #-}
asc_0, asc_9, asc_A, asc_E, asc_P, asc_a, asc_e, asc_p,
asc_minus, asc_plus, asc_point, asc_newline :: Word8
asc_0 = 48
asc_9 = 57
asc_A = 65
asc_E = 69
asc_P = 80
asc_a = 97
asc_e = 101
asc_p = 112
asc_minus = 45
asc_plus = 43
asc_point = 46
asc_newline = 10
unsafeAsciiToChar :: Word8 -> Char
unsafeAsciiToChar x = unsafeChr (fromIntegral x)
{-# INLINE unsafeAsciiToChar #-}
unsafeChr# :: Int -> Char#
unsafeChr# (I# i) = chr# i
{-# INLINE unsafeChr# #-}
byteS :: Word8 -> ShowS
byteS b
| b < 128 = showLitChar $ unsafeAsciiToChar b
| otherwise = ("\\x" <>) . showHex b
bytesS :: ByteString -> ShowS
bytesS b | B.length b == 1 = byteS $ B.head b
| otherwise = foldl' ((. byteS) . (.)) id $ B.unpack b
showByte :: Word8 -> String
showByte b = ('\'':) . byteS b . ('\'':) $ ""
showByteString :: ByteString -> String
showByteString b = ('"':) . bytesS b . ('"':) $ ""