-- |
-- Module      :  Data.ByteString.Parser.Char8
-- License     :  CC0-1.0
--
-- Maintainer  :  mordae@anilinux.org
-- Stability   :  unstable
-- Portability :  non-portable (ghc)
--
-- This module provides a parser for ASCII 'ByteString'.
--
--   * If you\'d like to parse Unicode text, look instead at the
--     "Data.Text.Parser". Is is slower, but in a way more correct.
--
--   * If you\'d like to parse byte sequences, look instead at the
--     "Data.ByteString.Parser". It reuses the same 'Parser', but
--     provides functions working with 'Word8' instead of 'Char'.
--

module Data.ByteString.Parser.Char8
  ( Parser(..)
  , Result(..)
  , parseOnly

    -- * Characters
  , char
  , notChar
  , anyChar
  , satisfy
  , space
  , isSpace
  , skipSpace
  , peekChar

    -- * Strings
  , string
  , stringCI
  , Data.ByteString.Parser.Char8.take
  , scan
  , runScanner
  , inRange
  , notInRange
  , Data.ByteString.Parser.Char8.takeWhile
  , takeWhile1
  , takeTill
  , takeTill1

    -- * Numbers
  , signed
  , decimal
  , hexadecimal
  , octal
  , fractional

    -- * Combinators
  , provided
  , choice
  , branch
  , Data.ByteString.Parser.count
  , optional
  , eitherP
  , option
  , many
  , many1
  , manyTill
  , sepBy
  , sepBy1
  , wrap
  , match
  , label
  , unlabel
  , validate

    -- * End Of Input
  , takeByteString
  , peekByteString
  , endOfInput
  , atEnd

    -- * Position
  , offset
  , position
  , explain
  , Explanation(..)

    -- * Miscelaneous
    -- |
    -- These are all generic methods, but since I sometimes forget about them,
    -- it is nice to have them listed here for reference what writing parsers.
  , Control.Applicative.empty
  , pure
  , guard
  , when
  , unless
  , void
  )
where
  import Prelude hiding (null, length, splitAt, take)

  import Control.Applicative
  import Control.Monad

  import Data.Maybe
  import Data.Word
  import Data.List qualified as List
  import GHC.Base (unsafeChr)

  import Data.ByteString as BS
  import Data.ByteString.Unsafe as BS

  import Snack.Combinators

  import Data.ByteString.Parser ( Parser(..), Result(..), parseOnly
                                , string, count, match, label, unlabel
                                , validate, branch
                                , takeByteString, peekByteString
                                , endOfInput, atEnd, offset
                                )

  import Data.ByteString.Lex.Fractional qualified as LF
  import Data.ByteString.Lex.Integral qualified as LI


  -- |
  -- Accepts a single, matching ASCII character.
  --
  {-# INLINE CONLIKE char #-}
  char :: Char -> Parser Char
  char c = label (show c) $ satisfy (c ==)


  -- |
  -- Accepts a single, differing ASCII character.
  --
  {-# INLINE CONLIKE notChar #-}
  notChar :: Char -> Parser Char
  notChar c = satisfy (c /=)


  -- |
  -- Accepts a single character.
  --
  {-# INLINE anyChar #-}
  anyChar :: Parser Char
  anyChar = Parser \inp ->
    if null inp
       then Failure ["any character"] inp
       else Success (w2c (unsafeHead inp)) (unsafeTail inp)


  -- |
  -- Accepts a single character matching the predicate.
  --
  {-# INLINE CONLIKE satisfy #-}
  satisfy :: (Char -> Bool) -> Parser Char
  satisfy isOk = Parser \inp ->
    if null inp
       then Failure ["more input"] inp
       else let c = w2c (unsafeHead inp)
             in if isOk c
                   then Success c (unsafeTail inp)
                   else Failure [] inp


  -- |
  -- Accepts a single ASCII white space character.
  -- See 'isSpace' for details.
  --
  {-# INLINE space #-}
  space :: Parser Char
  space = label "space" $ satisfy isSpace


  -- |
  -- Accepts multiple ASCII white space characters.
  -- See 'isSpace' for details.
  --
  {-# INLINE skipSpace #-}
  skipSpace :: Parser ()
  skipSpace = void $ Data.ByteString.Parser.Char8.takeWhile isSpace


  -- |
  -- True for any of the @[' ', '\\t', '\\n', '\\v', '\\f', '\\r']@ characters.
  --
  -- Please note that "Data.Text.Parser" re-exports 'Data.Char.isString', that
  -- considers more unicode codepoints, making it significantly slower.
  --
  {-# INLINE isSpace #-}
  isSpace :: Char -> Bool
  isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')


  -- |
  -- Peeks ahead, but does not consume.
  --
  -- Be careful, peeking behind end of the input fails.
  -- You might want to check using 'atEnd' beforehand.
  --
  {-# INLINE peekChar #-}
  peekChar :: Parser Char
  peekChar = Parser \inp ->
    if null inp
       then Failure ["more input"] inp
       else Success (w2c (unsafeHead inp)) inp


  -- |
  -- Accepts a matching string.
  -- Matching is performed in a case-insensitive manner under ASCII.
  --
  {-# INLINE CONLIKE stringCI #-}
  stringCI :: ByteString -> Parser ByteString
  stringCI str = Parser \inp ->
    let (pfx, sfx) = splitAt (length str) inp
     in case toCaseFold pfx == toCaseFold str of
          True -> Success pfx sfx
          False -> Failure [show str] inp


  -- |
  -- Perform simple ASCII case folding.
  --
  {-# INLINE toCaseFold #-}
  toCaseFold :: ByteString -> ByteString
  toCaseFold = BS.map foldCase
    where foldCase w | 65 <= w && w <= 90 = w + 32
          foldCase w = w


  -- |
  -- Accepts given number of bytes.
  -- Fails when not enough bytes are available.
  --
  {-# INLINE CONLIKE take #-}
  take :: Int -> Parser ByteString
  take n = Parser \inp ->
    if n > length inp
       then Failure [show n <> " more bytes"] inp
       else Success (unsafeTake n inp) (unsafeDrop n inp)


  -- |
  -- Scans ahead statefully and then accepts whatever bytes the scanner liked.
  -- Scanner returns 'Nothing' to mark end of the acceptable extent.
  --
  {-# INLINE CONLIKE scan #-}
  scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
  scan state scanner = fst <$> runScanner state scanner


  -- |
  -- Like 'scan', but also returns the final scanner state.
  --
  {-# INLINE CONLIKE runScanner #-}
  runScanner :: s -> (s -> Char -> Maybe s) -> Parser (ByteString, s)
  runScanner state scanner = Parser \inp -> loop inp state 0
    where
      loop inp !st !n =
        case n >= length inp of
          True -> Success (inp, st) mempty
          False ->
            case unsafeIndex inp n of
              w ->
                case scanner st (w2c w) of
                  Nothing -> Success (unsafeTake n inp, st) (unsafeDrop n inp)
                  Just st' -> loop inp st' (succ n)


  -- |
  -- Efficiently consume as long as the input characters match the predicate.
  -- An inverse of 'takeTill'.
  --
  {-# INLINE CONLIKE takeWhile #-}
  takeWhile :: (Char -> Bool) -> Parser ByteString
  takeWhile test = takeTill (not . test)


  -- |
  -- Like 'Data.ByteString.Parser.Char8.takeWhile',
  -- but requires at least a single character.
  --
  {-# INLINE CONLIKE takeWhile1 #-}
  takeWhile1 :: (Char -> Bool) -> Parser ByteString
  takeWhile1 test = Data.ByteString.Parser.Char8.takeWhile test `provided` (not . null)


  -- |
  -- Efficiently consume until a character matching the predicate is found.
  -- An inverse of 'Data.ByteString.Parser.Char8.takeWhile'.
  --
  {-# INLINE CONLIKE takeTill #-}
  takeTill :: (Char -> Bool) -> Parser ByteString
  takeTill test = Parser \inp ->
    let n = fromMaybe (length inp) $ findIndex (test . w2c) inp
     in Success (unsafeTake n inp) (unsafeDrop n inp)


  -- |
  -- Same as 'takeTill', but requires at least a single character.
  --
  {-# INLINE CONLIKE takeTill1 #-}
  takeTill1 :: (Char -> Bool) -> Parser ByteString
  takeTill1 test = Data.ByteString.Parser.Char8.takeTill test `provided` (not . null)


  -- |
  -- Accepts optional @\'+\'@ or @\'-\'@ character and then applies it to
  -- the following parser result.
  --
  {-# INLINE signed #-}
  signed :: (Num a) => Parser a -> Parser a
  signed runNumber = (char '-' *> fmap negate runNumber)
                 <|> (char '+' *> runNumber)
                 <|> (runNumber)


  -- |
  -- Accepts an integral number in the decimal format.
  --
  {-# INLINE decimal #-}
  decimal :: (Integral a) => Parser a
  decimal = Parser \inp ->
    case LI.readDecimal inp of
      Just (res, more) -> Success res more
      Nothing -> Failure ["decimal"] inp


  -- |
  -- Accepts an integral number in the hexadecimal format in either case.
  -- Does not look for @0x@ or similar prefixes.
  --
  {-# INLINE hexadecimal #-}
  hexadecimal :: (Integral a) => Parser a
  hexadecimal = Parser \inp ->
    case LI.readHexadecimal inp of
      Just (res, more) -> Success res more
      Nothing -> Failure ["hexadecimal"] inp


  -- |
  -- Accepts an integral number in the octal format.
  --
  {-# INLINE octal #-}
  octal :: (Integral a) => Parser a
  octal = Parser \inp ->
    case LI.readOctal inp of
      Just (res, more) -> Success res more
      Nothing -> Failure ["octal"] inp


  -- |
  -- Accepts a fractional number as a decimal optinally followed by a colon
  -- and the fractional part. Does not support exponentiation.
  --
  {-# INLINE fractional #-}
  fractional :: (Fractional a) => Parser a
  fractional = Parser \inp ->
    case LF.readDecimal inp of
      Just (res, more) -> Success res more
      Nothing -> Failure ["fractional"] inp


  {-# INLINE w2c #-}
  w2c :: Word8 -> Char
  w2c = unsafeChr . fromIntegral


  -- |
  -- Determine @(line, column)@ from the original input and the remainder.
  --
  -- Counts line feed characters leading to the 'offset', so only use it
  -- on your slow path. For example when describing parsing errors.
  --
  position :: ByteString -> ByteString -> (Int, Int)
  position inp more = (succ line, succ column)
    where
      column = length lastLine
      lastLine = takeWhileEnd (10 /=) leader
      line = BS.count 10 leader
      leader = dropEnd (length more) inp


  -- |
  -- More precise 'Result' description produced by 'explain'.
  --
  data Explanation
    = Explanation
      { exSource       :: String
        -- ^ Name of the source file.
      , exSpanFrom     :: (Int, Int)
        -- ^ Line and column where the problem starts.
      , exSpanTo       :: (Int, Int)
        -- ^ Line and column where the problem ends.
      , exMessage      :: String
        -- ^ Message associated with the problem.
      }
    deriving (Eq, Show)


  -- |
  -- Process the result for showing it to the user.
  --
  explain :: String -> ByteString -> Result a -> Explanation
  explain src inp (Success _ more) =
    Explanation { exSource   = src
                , exSpanFrom = pos
                , exSpanTo   = pos
                , exMessage  = "Parsed successfully up to this point."
                }
      where
        pos = position inp more


  explain src inp (Failure expected more) =
    Explanation { exSource   = src
                , exSpanFrom = pos
                , exSpanTo   = pos
                , exMessage =
                    case expected of
                      [] -> "Unexpected input."
                      ex -> "Expected " <> List.intercalate ", " ex <> "."
                }
      where
        pos = position inp more

  explain src inp (Error reason more len) =
    Explanation { exSource   = src
                , exSpanFrom = from
                , exSpanTo   = to
                , exMessage  = reason
                }
      where
        from = position inp more
        to   = position inp (BS.drop len more)


-- vim:set ft=haskell sw=2 ts=2 et: