{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GADTSyntax #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedTuples #-}

{- | Parse input as ASCII-encoded text. Some parsers in this module,
like 'any' and 'peek', fail if they encounter a byte above @0x7F@.
Others, like numeric parsers and skipping parsers, leave the cursor
at the position of the offending byte without failing.
-}
module Data.Bytes.Parser.Ascii
  ( -- * Matching
    Latin.char
  , Latin.char2
  , Latin.char3
  , Latin.char4

    -- * Case-Insensitive Matching
  , charInsensitive

    -- * Get Character
  , any
  , any#
  , peek
  , opt

    -- * Match Many
  , shortTrailedBy
  , takeShortWhile

    -- * Skip
  , Latin.skipDigits
  , Latin.skipDigits1
  , Latin.skipChar
  , Latin.skipChar1
  , skipAlpha
  , skipAlpha1
  , skipTrailedBy
  , skipWhile

    -- * Numbers
  , Latin.decWord
  , Latin.decWord8
  , Latin.decWord16
  , Latin.decWord32
  ) where

import Prelude hiding (any, fail, length, takeWhile)

import Control.Monad.ST (runST)
import Data.Bits (clearBit)
import Data.Bytes.Parser.Internal (Parser (..), Result (..), Result#, indexLatinCharArray, uneffectful, uneffectful#, upcastUnitSuccess)
import Data.Bytes.Types (Bytes (..))
import Data.Char (ord)
import Data.Text.Short (ShortText)
import Data.Word (Word8)
import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#))

import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Primitive as PM
import qualified Data.Text.Short.Unsafe as TS

{- | Consume the next character, failing if it does not match the expected
value or if there is no more input. This check for equality is case
insensitive.

Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is
undefined if it is not.
-}
charInsensitive :: e -> Char -> Parser e s ()
{-# INLINE charInsensitive #-}
charInsensitive e !c = uneffectful $ \chunk ->
  if length chunk > 0
    then
      if clearBit (PM.indexByteArray (array chunk) (offset chunk) :: Word8) 5 == w
        then Success () (offset chunk + 1) (length chunk - 1)
        else Failure e
    else Failure e
 where
  w = clearBit (fromIntegral @Int @Word8 (ord c)) 5

{- | Consume input until the trailer is found. Then, consume
the trailer as well. This fails if the trailer is not
found or if any non-ASCII characters are encountered.
-}
skipTrailedBy :: e -> Char -> Parser e s ()
{-# INLINE skipTrailedBy #-}
skipTrailedBy e !c = do
  let go = do
        !d <- any e
        if d == c
          then pure ()
          else go
  go

{- | Consume characters matching the predicate. The stops when it
encounters a non-matching character or when it encounters a byte
above @0x7F@. This never fails.
-}
takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
{-# INLINE takeShortWhile #-}
takeShortWhile p = do
  !start <- Unsafe.cursor
  skipWhile p
  end <- Unsafe.cursor
  src <- Unsafe.expose
  let len = end - start
      !r = runST $ do
        marr <- PM.newByteArray len
        PM.copyByteArray marr 0 src start len
        PM.unsafeFreezeByteArray marr
  pure $
    TS.fromShortByteStringUnsafe $
      byteArrayToShortByteString $
        r

{- | Consume input through the next occurrence of the target
character and return the consumed input, excluding the
target character, as a 'ShortText'. This fails if it
encounters any bytes above @0x7F@.
-}
shortTrailedBy :: e -> Char -> Parser e s ShortText
shortTrailedBy e !c = do
  !start <- Unsafe.cursor
  skipTrailedBy e c
  end <- Unsafe.cursor
  src <- Unsafe.expose
  let len = end - start - 1
      !r = runST $ do
        marr <- PM.newByteArray len
        PM.copyByteArray marr 0 src start len
        PM.unsafeFreezeByteArray marr
  pure $
    TS.fromShortByteStringUnsafe $
      byteArrayToShortByteString $
        r

-- | Consumes and returns the next character in the input.
any :: e -> Parser e s Char
{-# INLINE any #-}
any e = uneffectful $ \chunk ->
  if length chunk > 0
    then
      let c = indexLatinCharArray (array chunk) (offset chunk)
       in if c < '\128'
            then Success c (offset chunk + 1) (length chunk - 1)
            else Failure e
    else Failure e

-- | Variant of 'any' with unboxed result.
any# :: e -> Parser e s Char#
{-# INLINE any# #-}
any# e =
  Parser
    ( \(# arr, off, len #) s0 -> case len of
        0# -> (# s0, (# e | #) #)
        _ ->
          let !w = indexCharArray# arr off
           in case ord# w <# 128# of
                1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #)
                _ -> (# s0, (# e | #) #)
    )

unI :: Int -> Int#
{-# INLINE unI #-}
unI (I# w) = w

{- | Examine the next byte without consuming it, interpret it as an
ASCII-encoded character. This fails if the byte is above @0x7F@ or
if the end of input has been reached.
-}
peek :: e -> Parser e s Char
{-# INLINE peek #-}
peek e = uneffectful $ \chunk ->
  if length chunk > 0
    then
      let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
       in if w < 128
            then
              Success
                (C# (chr# (unI (fromIntegral w))))
                (offset chunk)
                (length chunk)
            else Failure e
    else Failure e

{- | Consume the next byte, interpreting it as an ASCII-encoded character.
Fails if the byte is above @0x7F@. Returns @Nothing@ if the
end of the input has been reached.
-}
opt :: e -> Parser e s (Maybe Char)
{-# INLINE opt #-}
opt e = uneffectful $ \chunk ->
  if length chunk > 0
    then
      let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
       in if w < 128
            then
              Success
                (Just (C# (chr# (unI (fromIntegral w)))))
                (offset chunk + 1)
                (length chunk - 1)
            else Failure e
    else Success Nothing (offset chunk) (length chunk)

{- | Consume characters matching the predicate. The stops when it
encounters a non-matching character or when it encounters a byte
above @0x7F@. This never fails.
-}
skipWhile :: (Char -> Bool) -> Parser e s ()
{-# INLINE skipWhile #-}
skipWhile p =
  Parser
    ( \(# arr, off0, len0 #) s0 ->
        let go off len = case len of
              0# -> (# (), off, 0# #)
              _ ->
                let c = indexCharArray# arr off
                 in case p (C# c) of
                      True -> case gtChar# c '\x7F'# of
                        1# -> (# (), off, len #)
                        _ -> go (off +# 1#) (len -# 1#)
                      False -> (# (), off, len #)
         in (# s0, (# | go off0 len0 #) #)
    )

{- | Skip uppercase and lowercase letters until a non-alpha
character is encountered.
-}
skipAlpha :: Parser e s ()
{-# INLINE skipAlpha #-}
skipAlpha = uneffectful# $ \c ->
  upcastUnitSuccess (skipAlphaAsciiLoop c)

{- | Skip uppercase and lowercase letters until a non-alpha
character is encountered.
-}
skipAlpha1 :: e -> Parser e s ()
{-# INLINE skipAlpha1 #-}
skipAlpha1 e = uneffectful# $ \c ->
  skipAlphaAsciiLoop1Start e c

skipAlphaAsciiLoop ::
  Bytes -> -- Chunk
  (# Int#, Int# #)
{-# INLINE skipAlphaAsciiLoop #-}
skipAlphaAsciiLoop !c =
  if length c > 0
    then
      let w = indexLatinCharArray (array c) (offset c)
       in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
            then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)
            else (# unI (offset c), unI (length c) #)
    else (# unI (offset c), unI (length c) #)

skipAlphaAsciiLoop1Start ::
  e ->
  Bytes -> -- chunk
  Result# e ()
{-# INLINE skipAlphaAsciiLoop1Start #-}
skipAlphaAsciiLoop1Start e !c =
  if length c > 0
    then
      let w = indexLatinCharArray (array c) (offset c)
       in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
            then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c))
            else (# e | #)
    else (# e | #)

byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString
{-# INLINE byteArrayToShortByteString #-}
byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x