{-# LANGUAGE BangPatterns, CPP, GADTs, Rank2Types, OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- |
-- Module      :  Data.Picoparsec.Monoid.Internal
-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014
-- License     :  BSD3
--
-- Maintainer  :  Mario Blažević
-- Stability   :  experimental
-- Portability :  unknown
--
-- Simple, efficient combinator parsing for
-- 'Data.Monoid.Cancellative.LeftGCDMonoid' and
-- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on
-- the Parsec library.

module Data.Picoparsec.Monoid.Internal
    (
    -- * Parser types
      Parser
    , Result

    -- * Running parsers
    , parse
    , parseOnly

    -- * Combinators
    , module Data.Picoparsec.Combinator

    -- * Parsing individual tokens
    , anyToken
    , notToken
    , satisfy
    , satisfyWith
    , skip
    , peekToken
    
    -- ** Parsing individual characters
    , anyChar
    , char
    , notChar
    , satisfyChar
    , peekChar
    , peekChar'

    -- * Efficient string handling
    , scan
    , skipWhile
    , string
    , stringTransform
    , take
    , takeWhile
    , takeWhile1
    , takeWith
    , takeTill

    -- ** Efficient character string handling
    , scanChars
    , skipCharsWhile
    , takeCharsWhile
    , takeCharsWhile1
    , takeCharsTill
    , takeTillChar
    , takeTillChar1

    -- ** Consume all remaining input
    , takeRest

    -- * Utilities
    , endOfLine
    , ensureOne
    ) where

import Prelude hiding (getChar, null, span, take, takeWhile)

import Control.Applicative ((<|>), (<$>))
import Control.Monad (when)
import Data.Picoparsec.Combinator
import Data.Picoparsec.Internal.Types
import Data.Picoparsec.Internal (demandInput, get, prompt, put, wantInput)
import Data.Monoid (Monoid(..), (<>))
import Data.Monoid.Cancellative (LeftGCDMonoid(..))
import Data.Monoid.Null (MonoidNull(null))
import qualified Data.Monoid.Factorial as Factorial
import Data.Monoid.Factorial (FactorialMonoid)
import Data.Monoid.Textual (TextualMonoid)
import qualified Data.Monoid.Textual as Textual
import Data.String (IsString(..))
import qualified Data.Picoparsec.Internal.Types as T

type Result = IResult

instance (IsString a, LeftGCDMonoid a, MonoidNull a, a ~ b) => IsString (Parser a b) where
    fromString = string . fromString

-- | If at least one token of input is available, return the current
-- input, otherwise fail.
ensureOne :: MonoidNull t => Parser t t
ensureOne = T.Parser $ \i0 a0 m0 kf ks ->
    if null (unI i0)
    then T.runParser (demandInput >> get) i0 a0 m0 kf ks
    else ks i0 a0 m0 (unI i0)
{-# INLINE ensureOne #-}

-- | This parser always succeeds.  It returns 'True' if any input is
-- available on demand, and 'False' if the end of all input has been reached.
wantMoreInput :: MonoidNull t => Parser t Bool
wantMoreInput = T.Parser $ \i0 a0 m0 _kf ks ->
  if m0 == Complete  
  then ks i0 a0 m0 False
  else let kf' i a m = ks i a m False
           ks' i a m = ks i a m True
       in prompt i0 a0 m0 kf' ks'

-- | The parser @satisfy p@ succeeds for any prime input token for
-- which the predicate @p@ returns 'True'. Returns the token that is
-- actually parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
satisfy :: FactorialMonoid t => (t -> Bool) -> Parser t t
satisfy p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
  if p first then put rest >> return first else fail "satisfy"
{-# INLINE satisfy #-}

-- | The parser @satisfy p@ succeeds for any input character for
-- which the predicate @p@ returns 'True'. Returns the character that 
-- is actually parsed.
--
-- >digit = satisfy isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
satisfyChar :: TextualMonoid t => (Char -> Bool) -> Parser t Char
satisfyChar p = do
  s <- ensureOne
  case Textual.splitCharacterPrefix s 
     of Just (first, rest) | p first -> put rest >> return first 
        _ -> fail "satisfy"
{-# INLINE satisfyChar #-}

-- | The parser @skip p@ succeeds for any prime input token for which
-- the predicate @p@ returns 'True'.
--
-- >skipDigit = skip isDigit
-- >    where isDigit w = w >= "0" && w <= "9"
skip :: FactorialMonoid t => (t -> Bool) -> Parser t ()
skip p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
  if p first then put rest else fail "skip"

-- | The parser @satisfyWith f p@ transforms an input token, and
-- succeeds if the predicate @p@ returns 'True' on the transformed
-- value. The parser returns the transformed token that was parsed.
satisfyWith :: FactorialMonoid t => (t -> a) -> (a -> Bool) -> Parser t a
satisfyWith f p = do
  s <- ensureOne
  let Just (first, rest) = Factorial.splitPrimePrefix s
      c = f $! first
  if p c then put rest >> return c else fail "satisfyWith"
{-# INLINE satisfyWith #-}

-- | Consume @n@ tokens of input, but succeed only if the predicate
-- returns 'True'.
takeWith :: FactorialMonoid t => Int -> (t -> Bool) -> Parser t t
takeWith n0 p =
  get >>= \i->
  let !(h, t) = Factorial.splitAt n0 i
      n1 = Factorial.length h
  in if null t && n1 < n0
     then put mempty
          >> demandInput
          >> takeWith' h n1 p
     else if p h
          then put t
               >> return h
          else fail "takeWith"
{-# INLINABLE takeWith #-}

-- The uncommon case
takeWith' :: FactorialMonoid t => t -> Int -> (t -> Bool) -> Parser t t
takeWith' h0 n0 p =
  get >>= \i->
  let !(h, t) = Factorial.splitAt n0 i
      n1 = Factorial.length h
      h1 = h0 <> h
  in if null t && n1 < n0
     then put mempty
          >> demandInput
          >> takeWith' h1 n1 p
     else if p h1
          then put t
               >> return h1
          else fail "takeWith"
{-# INLINABLE takeWith' #-}

-- | Consume exactly @n@ prime input tokens.
take :: FactorialMonoid t => Int -> Parser t t
take n = takeWith n (const True)
{-# INLINE take #-}

-- | @string s@ parses a prefix of input that identically matches
-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
-- input if it fails (even if a partial match).
--
-- /Note/: The behaviour of this parser is different to that of the
-- similarly-named parser in Parsec, as this one is all-or-nothing.
-- To illustrate the difference, the following parser will fail under
-- Parsec given an input of @\"for\"@:
--
-- >string "foo" <|> string "for"
--
-- The reason for its failure is that the first branch is a
-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
-- before failing.  In Attoparsec, the above parser will /succeed/ on
-- that input, because the failed first branch will consume nothing.
string :: (LeftGCDMonoid t, MonoidNull t) => t -> Parser t t
string s =
   get >>= \i->
   let !(p, s', i') = stripCommonPrefix s i
   in if null s'
      then put i' >> return s
      else if null i'
           then put mempty
                >> demandInput
                >> string' p s'
           else fail "string"
{-# INLINE string #-}

-- The uncommon case
string' :: (LeftGCDMonoid t, MonoidNull t) => t -> t -> Parser t t
string' consumed rest =
   get >>= \i->
   let !(p, s', i') = stripCommonPrefix rest i
   in if null s'
      then put i' >> return (consumed <> rest)
      else if null i'
           then put mempty
                >> demandInput
                >> string' (consumed <> p) s'
           else put (consumed <> i) 
                >> fail "string"

stringTransform :: (FactorialMonoid t, Eq t) => (t -> t) -> t
                -> Parser t t
stringTransform f s = takeWith (Factorial.length s) ((==f s) . f)
{-# INLINE stringTransform #-}

-- | Skip past input for as long as the predicate returns 'True'.
skipWhile :: FactorialMonoid t => (t -> Bool) -> Parser t ()
skipWhile p = go
 where
  go = do
    t <- Factorial.dropWhile p <$> get
    put t
    when (null t) $ do
      input <- wantMoreInput
      when input go
{-# INLINE skipWhile #-}

-- | Skip past input characters for as long as the predicate returns 'True'.
skipCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t ()
skipCharsWhile p = go
 where
  go = do
    t <- Textual.dropWhile_ False p <$> get
    put t
    when (null t) $ do
      input <- wantMoreInput
      when input go
{-# INLINE skipCharsWhile #-}

-- | Consume input as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTill :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeTill p = takeWhile (not . p)
{-# INLINE takeTill #-}

-- | Consume input characters as long as the predicate returns 'False'
-- (i.e. until it returns 'True'), and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeCharsTill :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsTill p = takeCharsWhile (not . p)

-- | Consume all input until the character for which the predicate 
-- returns 'True' and return the consumed input.
--
-- The only difference between 'takeCharsTill' and 'takeTillChar' is
-- in their handling of non-character data: The former never consumes
-- it, the latter always does.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'True' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeTillChar :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeTillChar p = go id
 where
  go acc = do
    (h,t) <- Textual.break_ False p <$> get
    put t
    if null t
      then do
        input <- wantInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeTillChar #-}

-- | Consume all input until the character for which the predicate 
-- returns 'True' and return the consumed input.
--
-- This parser always consumes at least one token: it will fail if the 
-- input starts with a character for which the predicate returns 
-- 'True' or if there is no input left.
takeTillChar1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeTillChar1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Textual.break_ False p <$> get
  when (null h && maybe True p (Textual.characterPrefix t)) $ fail "takeTillChar1"
  put t
  if null t
    then (h<>) <$> takeTillChar p
    else return h
{-# INLINE takeTillChar1 #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeWhile :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeWhile p = go id
 where
  go acc = do
    (h,t) <- Factorial.span p <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeWhile #-}

-- | Consume input characters as long as the predicate returns 'True', 
-- and return the consumed input.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'False' on the first input token.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
takeCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsWhile p = {-# SCC takeCharsWhile #-} go id
 where
  go acc = do
    (h,t) <- Textual.span_ False p <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE takeCharsWhile #-}

-- | Consume all remaining input and return it as a single string.
takeRest :: MonoidNull t => Parser t t
takeRest = go []
 where
  go acc = do
    input <- wantInput
    if input
      then do
        s <- get
        put mempty
        go (s:acc)
      else return (mconcat $ reverse acc)
{-# INLINABLE takeRest #-}

-- | Consume input as long as the predicate returns 'True', and return
-- the consumed input.
--
-- This parser requires the predicate to succeed on at least one input
-- token: it will fail if the predicate never returns 'True'
-- or if there is no input left.
takeWhile1 :: FactorialMonoid t => (t -> Bool) -> Parser t t
takeWhile1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Factorial.span p <$> get
  when (null h) $ fail "takeWhile1"
  put t
  if null t
    then (h<>) `fmap` takeWhile p
    else return h
{-# INLINE takeWhile1 #-}

takeCharsWhile1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
takeCharsWhile1 p = do
  (`when` demandInput) =<< null <$> get
  (h,t) <- Textual.span_ False p <$> get
  when (null h) $ fail "takeCharsWhile1"
  put t
  if null t
    then (h<>) `fmap` takeCharsWhile p
    else return h
{-# INLINE takeCharsWhile1 #-}

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each token of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first prime input factor.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
scan :: FactorialMonoid t => s -> (s -> t -> Maybe s) -> Parser t t
scan s0 f = go s0 id
 where
  go s acc = do
    (h,t,s') <- Factorial.spanMaybe' s f <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go s' (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE scan #-}

-- | A stateful scanner.  The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
-- invocations of the predicate on each token of the input until one
-- returns 'Nothing' or the input ends.
--
-- This parser does not fail.  It will return an empty string if the
-- predicate returns 'Nothing' on the first prime input factor.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
scanChars :: TextualMonoid t => s -> (s -> Char -> Maybe s) -> Parser t t
scanChars s0 fc = go s0 id
 where
  go s acc = do
    (h,t,s') <- Textual.spanMaybe_' s fc <$> get
    put t
    if null t
      then do
        input <- wantMoreInput
        if input
          then go s' (acc . mappend h)
          else return (acc h)
      else return (acc h)
{-# INLINE scanChars #-}

-- | Match any prime input token.
anyToken :: FactorialMonoid t => Parser t t
anyToken = satisfy $ const True
{-# INLINE anyToken #-}

-- | Match any prime input token except the given one.
notToken :: (Eq t, FactorialMonoid t) => t -> Parser t t
notToken t = satisfy (/= t)
{-# INLINE notToken #-}

-- | Match any prime input token. Returns 'mempty' if end of input
-- has been reached. Does not consume any input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
peekToken :: FactorialMonoid t => Parser t t
peekToken = T.Parser $ \i0 a0 m0 _kf ks ->
            if null (unI i0)
            then if m0 == Complete
                 then ks i0 a0 m0 mempty
                 else let k' i a m = ks i a m $! Factorial.primePrefix (unI i)
                      in prompt i0 a0 m0 k' k'
            else ks i0 a0 m0 $! Factorial.primePrefix (unI i0)
{-# INLINE peekToken #-}

-- | Match any character.
anyChar :: TextualMonoid t => Parser t Char
anyChar = satisfyChar $ const True
{-# INLINE anyChar #-}

-- | Match a specific character.
char :: TextualMonoid t => Char -> Parser t Char
char c = satisfyChar (== c) <?> show c
{-# INLINE char #-}

-- | Match any character except the given one.
notChar :: TextualMonoid t => Char -> Parser t Char
notChar c = satisfyChar (/= c) <?> "not" ++ show c
{-# INLINE notChar #-}

-- | Match any input character, if available. Does not consume any input.
--
-- /Note/: Because this parser does not fail, do not use it with
-- combinators such as 'many', because such parsers loop until a
-- failure occurs.  Careless use will thus result in an infinite loop.
peekChar :: TextualMonoid t => Parser t (Maybe Char)
peekChar = T.Parser $ \i0 a0 m0 _kf ks ->
           if null (unI i0)
           then if m0 == Complete
                then ks i0 a0 m0 Nothing
                else let k' i a m = ks i a m $! Textual.characterPrefix (unI i)
                     in prompt i0 a0 m0 k' k'
           else ks i0 a0 m0 $! Textual.characterPrefix (unI i0)
{-# INLINE peekChar #-}

-- | Match any input character, failing if the input doesn't start
-- with any. Does not consume any input.
peekChar' :: TextualMonoid t => Parser t Char
peekChar' = do
  s <- ensureOne
  case Textual.characterPrefix s 
     of Just c -> return c
        _ -> fail "peekChar'"
{-# INLINE peekChar' #-}

-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@.
endOfLine :: (Eq t, TextualMonoid t) => Parser t ()
endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())

-- | Terminal failure continuation.
failK :: Failure t a
failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
{-# INLINE failK #-}

-- | Terminal success continuation.
successK :: Success t a a
successK i0 _a0 _m0 a = Done (unI i0) a
{-# INLINE successK #-}

-- | Run a parser.
parse :: Monoid t => Parser t a -> t -> IResult t a
parse m s = T.runParser m (I s) mempty Incomplete failK successK
{-# INLINE parse #-}

-- | Run a parser that cannot be resupplied via a 'Partial' result.
parseOnly :: Monoid t => Parser t a -> t -> Either String a
parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of
                  Fail _ _ err -> Left err
                  Done _ a     -> Right a
                  _            -> error "parseOnly: impossible error!"
{-# INLINE parseOnly #-}