{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GADTSyntax #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedTuples #-}

{- | Parse input as ASCII-encoded text. Some parsers in this module,
like 'any' and 'peek', fail if they encounter a byte above @0x7F@.
Others, like numeric parsers and skipping parsers, leave the cursor
at the position of the offending byte without failing.
-}
module Data.Bytes.Parser.Ascii
  ( -- * Matching
    Latin.char
  , Latin.char2
  , Latin.char3
  , Latin.char4

    -- * Case-Insensitive Matching
  , charInsensitive

    -- * Get Character
  , any
  , any#
  , peek
  , opt

    -- * Match Many
  , shortTrailedBy
  , takeShortWhile

    -- * Skip
  , Latin.skipDigits
  , Latin.skipDigits1
  , Latin.skipChar
  , Latin.skipChar1
  , skipAlpha
  , skipAlpha1
  , skipTrailedBy
  , skipWhile

    -- * Numbers
  , Latin.decWord
  , Latin.decWord8
  , Latin.decWord16
  , Latin.decWord32
  ) where

import Prelude hiding (any, fail, length, takeWhile)

import Control.Monad.ST (runST)
import Data.Bits (clearBit)
import Data.Bytes.Parser.Internal (Parser (..), Result (..), Result#, indexLatinCharArray, uneffectful, uneffectful#, upcastUnitSuccess)
import Data.Bytes.Types (Bytes (..))
import Data.Char (ord)
import Data.Text.Short (ShortText)
import Data.Word (Word8)
import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#))

import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Primitive as PM
import qualified Data.Text.Short.Unsafe as TS

{- | Consume the next character, failing if it does not match the expected
value or if there is no more input. This check for equality is case
insensitive.

Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is
undefined if it is not.
-}
charInsensitive :: e -> Char -> Parser e s ()
{-# INLINE charInsensitive #-}
charInsensitive :: forall e s. e -> Char -> Parser e s ()
charInsensitive e
e !Char
c = (Bytes -> Result e ()) -> Parser e s ()
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e ()) -> Parser e s ())
-> (Bytes -> Result e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk ->
  if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      if Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
clearBit (ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8) Int
5 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
w
        then () -> Int -> Int -> Result e ()
forall e a. a -> Int -> Int -> Result e a
Success () (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        else e -> Result e ()
forall e a. e -> Result e a
Failure e
e
    else e -> Result e ()
forall e a. e -> Result e a
Failure e
e
 where
  w :: Word8
w = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
clearBit (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 (Char -> Int
ord Char
c)) Int
5

{- | Consume input until the trailer is found. Then, consume
the trailer as well. This fails if the trailer is not
found or if any non-ASCII characters are encountered.
-}
skipTrailedBy :: e -> Char -> Parser e s ()
{-# INLINE skipTrailedBy #-}
skipTrailedBy :: forall e s. e -> Char -> Parser e s ()
skipTrailedBy e
e !Char
c = do
  let go :: Parser e s ()
go = do
        !Char
d <- e -> Parser e s Char
forall e s. e -> Parser e s Char
any e
e
        if Char
d Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c
          then () -> Parser e s ()
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          else Parser e s ()
go
  Parser e s ()
forall {s}. Parser e s ()
go

{- | Consume characters matching the predicate. The stops when it
encounters a non-matching character or when it encounters a byte
above @0x7F@. This never fails.
-}
takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
{-# INLINE takeShortWhile #-}
takeShortWhile :: forall e s. (Char -> Bool) -> Parser e s ShortText
takeShortWhile Char -> Bool
p = do
  !Int
start <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  (Char -> Bool) -> Parser e s ()
forall e s. (Char -> Bool) -> Parser e s ()
skipWhile Char -> Bool
p
  Int
end <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  ByteArray
src <- Parser e s ByteArray
forall e s. Parser e s ByteArray
Unsafe.expose
  let len :: Int
len = Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start
      !r :: ByteArray
r = (forall s. ST s ByteArray) -> ByteArray
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
        MutableByteArray s
marr <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len
        MutableByteArray (PrimState (ST s))
-> Int -> ByteArray -> Int -> Int -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m)
-> Int -> ByteArray -> Int -> Int -> m ()
PM.copyByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr Int
0 ByteArray
src Int
start Int
len
        MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr
  ShortText -> Parser e s ShortText
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortText -> Parser e s ShortText)
-> ShortText -> Parser e s ShortText
forall a b. (a -> b) -> a -> b
$
    ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$
      ByteArray -> ShortByteString
byteArrayToShortByteString (ByteArray -> ShortByteString) -> ByteArray -> ShortByteString
forall a b. (a -> b) -> a -> b
$
        ByteArray
r

{- | Consume input through the next occurrence of the target
character and return the consumed input, excluding the
target character, as a 'ShortText'. This fails if it
encounters any bytes above @0x7F@.
-}
shortTrailedBy :: e -> Char -> Parser e s ShortText
shortTrailedBy :: forall e s. e -> Char -> Parser e s ShortText
shortTrailedBy e
e !Char
c = do
  !Int
start <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  e -> Char -> Parser e s ()
forall e s. e -> Char -> Parser e s ()
skipTrailedBy e
e Char
c
  Int
end <- Parser e s Int
forall e s. Parser e s Int
Unsafe.cursor
  ByteArray
src <- Parser e s ByteArray
forall e s. Parser e s ByteArray
Unsafe.expose
  let len :: Int
len = Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      !r :: ByteArray
r = (forall s. ST s ByteArray) -> ByteArray
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
        MutableByteArray s
marr <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len
        MutableByteArray (PrimState (ST s))
-> Int -> ByteArray -> Int -> Int -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m)
-> Int -> ByteArray -> Int -> Int -> m ()
PM.copyByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr Int
0 ByteArray
src Int
start Int
len
        MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
marr
  ShortText -> Parser e s ShortText
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortText -> Parser e s ShortText)
-> ShortText -> Parser e s ShortText
forall a b. (a -> b) -> a -> b
$
    ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$
      ByteArray -> ShortByteString
byteArrayToShortByteString (ByteArray -> ShortByteString) -> ByteArray -> ShortByteString
forall a b. (a -> b) -> a -> b
$
        ByteArray
r

-- | Consumes and returns the next character in the input.
any :: e -> Parser e s Char
{-# INLINE any #-}
any :: forall e s. e -> Parser e s Char
any e
e = (Bytes -> Result e Char) -> Parser e s Char
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e Char) -> Parser e s Char)
-> (Bytes -> Result e Char) -> Parser e s Char
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk ->
  if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      let c :: Char
c = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk)
       in if Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'\128'
            then Char -> Int -> Int -> Result e Char
forall e a. a -> Int -> Int -> Result e a
Success Char
c (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
            else e -> Result e Char
forall e a. e -> Result e a
Failure e
e
    else e -> Result e Char
forall e a. e -> Result e a
Failure e
e

-- | Variant of 'any' with unboxed result.
any# :: e -> Parser e s Char#
{-# INLINE any# #-}
any# :: forall e s. e -> Parser e s Char#
any# e
e =
  ((# ByteArray#, Int#, Int# #) -> ST# s (Result# e Char#))
-> Parser e s Char#
forall a b c.
((# ByteArray#, Int#, Int# #) -> ST# b (Result# a c))
-> Parser a b c
Parser
    ( \(# ByteArray#
arr, Int#
off, Int#
len #) State# s
s0 -> case Int#
len of
        Int#
0# -> (# State# s
s0, (# e
e | #) #)
        Int#
_ ->
          let !w :: Char#
w = ByteArray# -> Int# -> Char#
indexCharArray# ByteArray#
arr Int#
off
           in case Char# -> Int#
ord# Char#
w Int# -> Int# -> Int#
<# Int#
128# of
                Int#
1# -> (# State# s
s0, (# | (# Char#
w, Int#
off Int# -> Int# -> Int#
+# Int#
1#, Int#
len Int# -> Int# -> Int#
-# Int#
1# #) #) #)
                Int#
_ -> (# State# s
s0, (# e
e | #) #)
    )

unI :: Int -> Int#
{-# INLINE unI #-}
unI :: Int -> Int#
unI (I# Int#
w) = Int#
w

{- | Examine the next byte without consuming it, interpret it as an
ASCII-encoded character. This fails if the byte is above @0x7F@ or
if the end of input has been reached.
-}
peek :: e -> Parser e s Char
{-# INLINE peek #-}
peek :: forall e s. e -> Parser e s Char
peek e
e = (Bytes -> Result e Char) -> Parser e s Char
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e Char) -> Parser e s Char)
-> (Bytes -> Result e Char) -> Parser e s Char
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk ->
  if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8
       in if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128
            then
              Char -> Int -> Int -> Result e Char
forall e a. a -> Int -> Int -> Result e a
Success
                (Char# -> Char
C# (Int# -> Char#
chr# (Int -> Int#
unI (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w))))
                (Bytes -> Int
offset Bytes
chunk)
                (Bytes -> Int
length Bytes
chunk)
            else e -> Result e Char
forall e a. e -> Result e a
Failure e
e
    else e -> Result e Char
forall e a. e -> Result e a
Failure e
e

{- | Consume the next byte, interpreting it as an ASCII-encoded character.
Fails if the byte is above @0x7F@. Returns @Nothing@ if the
end of the input has been reached.
-}
opt :: e -> Parser e s (Maybe Char)
{-# INLINE opt #-}
opt :: forall e s. e -> Parser e s (Maybe Char)
opt e
e = (Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char)
forall e a s. (Bytes -> Result e a) -> Parser e s a
uneffectful ((Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char))
-> (Bytes -> Result e (Maybe Char)) -> Parser e s (Maybe Char)
forall a b. (a -> b) -> a -> b
$ \Bytes
chunk ->
  if Bytes -> Int
length Bytes
chunk Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray (Bytes -> ByteArray
array Bytes
chunk) (Bytes -> Int
offset Bytes
chunk) :: Word8
       in if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128
            then
              Maybe Char -> Int -> Int -> Result e (Maybe Char)
forall e a. a -> Int -> Int -> Result e a
Success
                (Char -> Maybe Char
forall a. a -> Maybe a
Just (Char# -> Char
C# (Int# -> Char#
chr# (Int -> Int#
unI (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w)))))
                (Bytes -> Int
offset Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                (Bytes -> Int
length Bytes
chunk Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
            else e -> Result e (Maybe Char)
forall e a. e -> Result e a
Failure e
e
    else Maybe Char -> Int -> Int -> Result e (Maybe Char)
forall e a. a -> Int -> Int -> Result e a
Success Maybe Char
forall a. Maybe a
Nothing (Bytes -> Int
offset Bytes
chunk) (Bytes -> Int
length Bytes
chunk)

{- | Consume characters matching the predicate. The stops when it
encounters a non-matching character or when it encounters a byte
above @0x7F@. This never fails.
-}
skipWhile :: (Char -> Bool) -> Parser e s ()
{-# INLINE skipWhile #-}
skipWhile :: forall e s. (Char -> Bool) -> Parser e s ()
skipWhile Char -> Bool
p =
  ((# ByteArray#, Int#, Int# #) -> ST# s (Result# e ()))
-> Parser e s ()
forall a b c.
((# ByteArray#, Int#, Int# #) -> ST# b (Result# a c))
-> Parser a b c
Parser
    ( \(# ByteArray#
arr, Int#
off0, Int#
len0 #) State# s
s0 ->
        let go :: Int# -> Int# -> (# (), Int#, Int# #)
go Int#
off Int#
len = case Int#
len of
              Int#
0# -> (# (), Int#
off, Int#
0# #)
              Int#
_ ->
                let c :: Char#
c = ByteArray# -> Int# -> Char#
indexCharArray# ByteArray#
arr Int#
off
                 in case Char -> Bool
p (Char# -> Char
C# Char#
c) of
                      Bool
True -> case Char# -> Char# -> Int#
gtChar# Char#
c Char#
'\x7F'# of
                        Int#
1# -> (# (), Int#
off, Int#
len #)
                        Int#
_ -> Int# -> Int# -> (# (), Int#, Int# #)
go (Int#
off Int# -> Int# -> Int#
+# Int#
1#) (Int#
len Int# -> Int# -> Int#
-# Int#
1#)
                      Bool
False -> (# (), Int#
off, Int#
len #)
         in (# State# s
s0, (# | Int# -> Int# -> (# (), Int#, Int# #)
go Int#
off0 Int#
len0 #) #)
    )

{- | Skip uppercase and lowercase letters until a non-alpha
character is encountered.
-}
skipAlpha :: Parser e s ()
{-# INLINE skipAlpha #-}
skipAlpha :: forall e s. Parser e s ()
skipAlpha = (Bytes -> Result# e ()) -> Parser e s ()
forall e a s. (Bytes -> Result# e a) -> Parser e s a
uneffectful# ((Bytes -> Result# e ()) -> Parser e s ())
-> (Bytes -> Result# e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
c ->
  (# Int#, Int# #) -> Result# e ()
forall e. (# Int#, Int# #) -> Result# e ()
upcastUnitSuccess (Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop Bytes
c)

{- | Skip uppercase and lowercase letters until a non-alpha
character is encountered.
-}
skipAlpha1 :: e -> Parser e s ()
{-# INLINE skipAlpha1 #-}
skipAlpha1 :: forall e s. e -> Parser e s ()
skipAlpha1 e
e = (Bytes -> Result# e ()) -> Parser e s ()
forall e a s. (Bytes -> Result# e a) -> Parser e s a
uneffectful# ((Bytes -> Result# e ()) -> Parser e s ())
-> (Bytes -> Result# e ()) -> Parser e s ()
forall a b. (a -> b) -> a -> b
$ \Bytes
c ->
  e -> Bytes -> Result# e ()
forall e. e -> Bytes -> Result# e ()
skipAlphaAsciiLoop1Start e
e Bytes
c

skipAlphaAsciiLoop ::
  Bytes -> -- Chunk
  (# Int#, Int# #)
{-# INLINE skipAlphaAsciiLoop #-}
skipAlphaAsciiLoop :: Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop !Bytes
c =
  if Bytes -> Int
length Bytes
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      let w :: Char
w = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
c) (Bytes -> Int
offset Bytes
c)
       in if (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z') Bool -> Bool -> Bool
|| (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
            then Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop (Int -> Bytes -> Bytes
Bytes.unsafeDrop Int
1 Bytes
c)
            else (# Int -> Int#
unI (Bytes -> Int
offset Bytes
c), Int -> Int#
unI (Bytes -> Int
length Bytes
c) #)
    else (# Int -> Int#
unI (Bytes -> Int
offset Bytes
c), Int -> Int#
unI (Bytes -> Int
length Bytes
c) #)

skipAlphaAsciiLoop1Start ::
  e ->
  Bytes -> -- chunk
  Result# e ()
{-# INLINE skipAlphaAsciiLoop1Start #-}
skipAlphaAsciiLoop1Start :: forall e. e -> Bytes -> Result# e ()
skipAlphaAsciiLoop1Start e
e !Bytes
c =
  if Bytes -> Int
length Bytes
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      let w :: Char
w = ByteArray -> Int -> Char
indexLatinCharArray (Bytes -> ByteArray
array Bytes
c) (Bytes -> Int
offset Bytes
c)
       in if (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z') Bool -> Bool -> Bool
|| (Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
w Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
            then (# Int#, Int# #) -> (# e | (# (), Int#, Int# #) #)
forall e. (# Int#, Int# #) -> Result# e ()
upcastUnitSuccess (Bytes -> (# Int#, Int# #)
skipAlphaAsciiLoop (Int -> Bytes -> Bytes
Bytes.unsafeDrop Int
1 Bytes
c))
            else (# e
e | #)
    else (# e
e | #)

byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString
{-# INLINE byteArrayToShortByteString #-}
byteArrayToShortByteString :: ByteArray -> ShortByteString
byteArrayToShortByteString (PM.ByteArray ByteArray#
x) = ByteArray# -> ShortByteString
BSS.SBS ByteArray#
x