{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language DataKinds #-}
{-# language DeriveFunctor #-}
{-# language DerivingStrategies #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language MultiWayIf #-}
{-# language PolyKinds #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language TypeApplications #-}
{-# language UnboxedSums #-}
{-# language UnboxedTuples #-}

-- | Parse input as ASCII-encoded text. Some parsers in this module,
-- like 'any' and 'peek', fail if they encounter a byte above @0x7F@.
-- Others, like numeric parsers and skipping parsers, leave the cursor
-- at the position of the offending byte without failing.
module Data.Bytes.Parser.Ascii
  ( -- * Matching
    Latin.char
  , Latin.char2
  , Latin.char3
  , Latin.char4
    -- * Case-Insensitive Matching
  , charInsensitive
    -- * Get Character
  , any
  , any#
  , peek
  , opt
    -- * Match Many
  , shortTrailedBy
  , takeShortWhile
    -- * Skip
  , Latin.skipDigits
  , Latin.skipDigits1
  , Latin.skipChar
  , Latin.skipChar1
  , skipAlpha
  , skipAlpha1
  , skipTrailedBy
  , skipWhile
    -- * Numbers
  , Latin.decWord
  , Latin.decWord8
  , Latin.decWord16
  , Latin.decWord32
  ) where

import Prelude hiding (length,any,fail,takeWhile)

import Data.Bits (clearBit)
import Data.Bytes.Types (Bytes(..))
import Data.Bytes.Parser.Internal (Parser(..),uneffectful,Result#,uneffectful#)
import Data.Bytes.Parser.Internal (Result(..),indexLatinCharArray,upcastUnitSuccess)
import Data.Char (ord)
import Data.Word (Word8)
import Data.Text.Short (ShortText)
import Control.Monad.ST.Run (runByteArrayST)
import GHC.Exts (Int(I#),Char(C#),Int#,Char#,(-#),(+#),(<#),ord#,indexCharArray#,chr#)
import GHC.Exts (gtChar#)

import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Text.Short.Unsafe as TS
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Primitive as PM

-- | Consume the next character, failing if it does not match the expected
-- value or if there is no more input. This check for equality is case
-- insensitive.
--
-- Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is
-- undefined if it is not.
charInsensitive :: e -> Char -> Parser e s ()
{-# inline charInsensitive #-}
charInsensitive :: e -> Char -> Parser e s ()
charInsensitive e
e !Char
c = (Bytes -> Result e ()) -> Parser e s ()
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e ()) -> Parser e s ())
-> (Bytes -> Result e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk -> if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then if Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
clearBit (ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8) Int
5 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
w
    then () -> Int -> Int -> Result e ()
forall e a. a -> Int -> Int -> Result e a
Success () (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    else e -> Result e ()
forall e a. e -> Result e a
Failure e
e
  else e -> Result e ()
forall e a. e -> Result e a
Failure e
e
  where
  w :: Word8
w = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
clearBit (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 (Char -> Int
ord Char
c)) Int
5

-- | Consume input until the trailer is found. Then, consume
-- the trailer as well. This fails if the trailer is not
-- found or if any non-ASCII characters are encountered.
skipTrailedBy :: e -> Char -> Parser e s ()
{-# inline skipTrailedBy #-}
skipTrailedBy :: e -> Char -> Parser e s ()
skipTrailedBy e
e !Char
c = do
  let go :: Parser e s ()
go = do
        !Char
d <- e -> Parser e s Char
forall e s. e -> Parser e s Char
any e
e
        if Char
d Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c
          then () -> Parser e s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          else Parser e s ()
go
  Parser e s ()
forall s. Parser e s ()
go

-- | Consume characters matching the predicate. The stops when it
-- encounters a non-matching character or when it encounters a byte
-- above @0x7F@. This never fails.
takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
{-# inline takeShortWhile #-}
takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
takeShortWhile Char -> Bool
p = do
  !Int
start <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  (Char -> Bool) -> Parser e s ()
forall e s. (Char -> Bool) -> Parser e s ()
skipWhile Char -> Bool
p
  Int
end <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  ByteArray
src <- Parser e s ByteArray
forall e s. Parser e s ByteArray
Unsafe.expose
  let len :: Int
len = Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start
      !r :: ByteArray
r = (forall s. ST s ByteArray) -> ByteArray
runByteArrayST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
        MutableByteArray s
marr <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len
        MutableByteArray (PrimState (ST s))
-> Int -> ByteArray -> Int -> Int -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m)
-> Int -> ByteArray -> Int -> Int -> m ()
PM.copyByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr Int
0 ByteArray
src Int
start Int
len
        MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr
  ShortText -> Parser e s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure
    (ShortText -> Parser e s ShortText)
-> ShortText -> Parser e s ShortText
forall a b. (a -> b) -> a -> b
$ ShortByteString -> ShortText
TS.fromShortByteStringUnsafe
    (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$ ByteArray -> ShortByteString
byteArrayToShortByteString
    (ByteArray -> ShortByteString) -> ByteArray -> ShortByteString
forall a b. (a -> b) -> a -> b
$ ByteArray
r

-- | Consume input through the next occurrence of the target
-- character and return the consumed input, excluding the
-- target character, as a 'ShortText'. This fails if it
-- encounters any bytes above @0x7F@.
shortTrailedBy :: e -> Char -> Parser e s ShortText
shortTrailedBy :: e -> Char -> Parser e s ShortText
shortTrailedBy e
e !Char
c = do
  !Int
start <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  e -> Char -> Parser e s ()
forall e s. e -> Char -> Parser e s ()
skipTrailedBy e
e Char
c
  Int
end <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  ByteArray
src <- Parser e s ByteArray
forall e s. Parser e s ByteArray
Unsafe.expose
  let len :: Int
len = Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      !r :: ByteArray
r = (forall s. ST s ByteArray) -> ByteArray
runByteArrayST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
        MutableByteArray s
marr <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len
        MutableByteArray (PrimState (ST s))
-> Int -> ByteArray -> Int -> Int -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m)
-> Int -> ByteArray -> Int -> Int -> m ()
PM.copyByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr Int
0 ByteArray
src Int
start Int
len
        MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr
  ShortText -> Parser e s ShortText
forall (f :: * -> *) a. Applicative f => a -> f a
pure
    (ShortText -> Parser e s ShortText)
-> ShortText -> Parser e s ShortText
forall a b. (a -> b) -> a -> b
$ ShortByteString -> ShortText
TS.fromShortByteStringUnsafe
    (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$ ByteArray -> ShortByteString
byteArrayToShortByteString
    (ByteArray -> ShortByteString) -> ByteArray -> ShortByteString
forall a b. (a -> b) -> a -> b
$ ByteArray
r


-- | Consumes and returns the next character in the input.
any :: e -> Parser e s Char
{-# inline any #-}
any :: e -> Parser e s Char
any e
e = (Bytes -> Result e Char) -> Parser e s Char
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e Char) -> Parser e s Char)
-> (Bytes -> Result e Char) -> Parser e s Char
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk -> if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then
    let c :: Char
c = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk)
     in if Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'\128'
          then Char -> Int -> Int -> Result e Char
forall e a. a -> Int -> Int -> Result e a
Success Char
c (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
          else e -> Result e Char
forall e a. e -> Result e a
Failure e
e
  else e -> Result e Char
forall e a. e -> Result e a
Failure e
e

-- | Variant of 'any' with unboxed result.
any# :: e -> Parser e s Char#
{-# inline any# #-}
any# :: e -> Parser e s Char#
any# e
e = ((# ByteArray#, Int#, Int# #) -> ST# s (Result# e Char#))
-> Parser e s Char#
forall e s a.
((# ByteArray#, Int#, Int# #) -> ST# s (Result# e a))
-> Parser e s a
Parser
  (\(# ByteArray#
arr, Int#
off, Int#
len #) State# s
s0 -> case Int#
len of
    Int#
0# -> (# State# s
s0, (# e
e | #) #)
    Int#
_ ->
      let !w :: Char#
w = ByteArray# -> Int# -> Char#
indexCharArray# ByteArray#
arr Int#
off
       in case Char# -> Int#
ord# Char#
w Int# -> Int# -> Int#
<# Int#
128# of
            Int#
1# -> (# State# s
s0, (# | (# Char#
w, Int#
off Int# -> Int# -> Int#
+# Int#
1#, Int#
len Int# -> Int# -> Int#
-# Int#
1# #) #) #)
            Int#
_ -> (# State# s
s0, (# e
e | #) #)
  )

unI :: Int -> Int#
{-# inline unI #-}
unI :: Int -> Int#
unI (I# Int#
w) = Int#
w

-- | Examine the next byte without consuming it, interpret it as an
-- ASCII-encoded character. This fails if the byte is above @0x7F@ or
-- if the end of input has been reached.
peek :: e -> Parser e s Char
{-# inline peek #-}
peek :: e -> Parser e s Char
peek e
e = (Bytes -> Result e Char) -> Parser e s Char
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e Char) -> Parser e s Char)
-> (Bytes -> Result e Char) -> Parser e s Char
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk -> if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then
    let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8
     in if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128
          then Char -> Int -> Int -> Result e Char
forall e a. a -> Int -> Int -> Result e a
Success
                 (Char# -> Char
C# (Int# -> Char#
chr# (Int -> Int#
unI (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w))))
                 (Bytes -> Int
offset Bytes
chunk)
                 (Bytes -> Int
length Bytes
chunk)
          else e -> Result e Char
forall e a. e -> Result e a
Failure e
e
  else e -> Result e Char
forall e a. e -> Result e a
Failure e
e

-- | Consume the next byte, interpreting it as an ASCII-encoded character.
-- Fails if the byte is above @0x7F@. Returns @Nothing@ if the
-- end of the input has been reached.
opt :: e -> Parser e s (Maybe Char)
{-# inline opt #-}
opt :: e -> Parser e s (Maybe Char)
opt e
e = (Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char)
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char))
-> (Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char)
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk -> if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then
    let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8
     in if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128
          then Maybe Char -> Int -> Int -> Result e (Maybe Char)
forall e a. a -> Int -> Int -> Result e a
Success
                 (Char -> Maybe Char
forall a. a -> Maybe a
Just (Char# -> Char
C# (Int# -> Char#
chr# (Int -> Int#
unI (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w)))))
                 (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                 (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
          else e -> Result e (Maybe Char)
forall e a. e -> Result e a
Failure e
e
  else Maybe Char -> Int -> Int -> Result e (Maybe Char)
forall e a. a -> Int -> Int -> Result e a
Success Maybe Char
forall a. Maybe a
Nothing (Bytes -> Int
offset Bytes
chunk) (Bytes -> Int
length Bytes
chunk)

-- | Consume characters matching the predicate. The stops when it
-- encounters a non-matching character or when it encounters a byte
-- above @0x7F@. This never fails.
skipWhile :: (Char -> Bool) -> Parser e s ()
{-# inline skipWhile #-}
skipWhile :: (Char -> Bool) -> Parser e s ()
skipWhile Char -> Bool
p = ((# ByteArray#, Int#, Int# #) -> ST# s (Result# e ()))
-> Parser e s ()
forall e s a.
((# ByteArray#, Int#, Int# #) -> ST# s (Result# e a))
-> Parser e s a
Parser
  ( \(# ByteArray#
arr, Int#
off0, Int#
len0 #) State# s
s0 ->
    let go :: Int# -> Int# -> (# (), Int#, Int# #)
go Int#
off Int#
len = case Int#
len of
          Int#
0# -> (# (), Int#
off, Int#
0# #)
          Int#
_ -> let c :: Char#
c = ByteArray# -> Int# -> Char#
indexCharArray# ByteArray#
arr Int#
off in
            case Char -> Bool
p (Char# -> Char
C# Char#
c) of
              Bool
True -> case Char# -> Char# -> Int#
gtChar# Char#
c Char#
'\x7F'# of
                Int#
1# -> (# (), Int#
off, Int#
len #)
                Int#
_ -> Int# -> Int# -> (# (), Int#, Int# #)
go (Int#
off Int# -> Int# -> Int#
+# Int#
1# ) (Int#
len Int# -> Int# -> Int#
-# Int#
1# )
              Bool
False -> (# (), Int#
off, Int#
len #)
     in (# State# s
s0, (# | Int# -> Int# -> (# (), Int#, Int# #)
go Int#
off0 Int#
len0 #) #)
  )

-- | Skip uppercase and lowercase letters until a non-alpha
-- character is encountered.
skipAlpha :: Parser e s ()
{-# inline skipAlpha #-}
skipAlpha :: Parser e s ()
skipAlpha = (Bytes -> Result# e ()) -> Parser e s ()
forall e a s. (Bytes -> Result# e a) -> Parser e s a
uneffectful# ((Bytes -> Result# e ()) -> Parser e s ())
-> (Bytes -> Result# e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
c ->
  (# Int#, Int# #) -> Result# e ()
forall e. (# Int#, Int# #) -> Result# e ()
upcastUnitSuccess (Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop Bytes
c)

-- | Skip uppercase and lowercase letters until a non-alpha
-- character is encountered.
skipAlpha1 :: e -> Parser e s ()
{-# inline skipAlpha1 #-}
skipAlpha1 :: e -> Parser e s ()
skipAlpha1 e
e = (Bytes -> Result# e ()) -> Parser e s ()
forall e a s. (Bytes -> Result# e a) -> Parser e s a
uneffectful# ((Bytes -> Result# e ()) -> Parser e s ())
-> (Bytes -> Result# e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
c ->
  e -> Bytes -> Result# e ()
forall e. e -> Bytes -> Result# e ()
skipAlphaAsciiLoop1Start e
e Bytes
c

skipAlphaAsciiLoop ::
     Bytes -- Chunk
  -> (# Int#, Int# #)
{-# inline skipAlphaAsciiLoop #-}
skipAlphaAsciiLoop :: Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop !Bytes
c = if Bytes -> Int
length Bytes
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then
    let w :: Char
w = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
c) (Bytes -> Int
offset Bytes
c)
     in if (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z') Bool -> Bool -> Bool
|| (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
          then Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop (Int -> Bytes -> Bytes
Bytes.unsafeDrop Int
1 Bytes
c)
          else (# Int -> Int#
unI (Bytes -> Int
offset Bytes
c), Int -> Int#
unI (Bytes -> Int
length Bytes
c) #)
  else (# Int -> Int#
unI (Bytes -> Int
offset Bytes
c), Int -> Int#
unI (Bytes -> Int
length Bytes
c) #)

skipAlphaAsciiLoop1Start ::
     e
  -> Bytes -- chunk
  -> Result# e ()
{-# inline skipAlphaAsciiLoop1Start #-}
skipAlphaAsciiLoop1Start :: e -> Bytes -> Result# e ()
skipAlphaAsciiLoop1Start e
e !Bytes
c = if Bytes -> Int
length Bytes
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  then 
    let w :: Char
w = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
c) (Bytes -> Int
offset Bytes
c)
     in if (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z') Bool -> Bool -> Bool
|| (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
          then (# Int#, Int# #) -> Result# e ()
forall e. (# Int#, Int# #) -> Result# e ()
upcastUnitSuccess (Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop (Int -> Bytes -> Bytes
Bytes.unsafeDrop Int
1 Bytes
c))
          else (# e
e | #)
  else (# e
e | #)

byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString
{-# inline byteArrayToShortByteString #-}
byteArrayToShortByteString :: ByteArray -> ShortByteString
byteArrayToShortByteString (PM.ByteArray ByteArray#
x) = ByteArray# -> ShortByteString
BSS.SBS ByteArray#
x