-- |
-- Module      :  Text.Megaparsec
-- Copyright   :  © 2015–2018 Megaparsec contributors
--                © 2007 Paolo Martini
--                © 1999–2001 Daan Leijen
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- This module includes everything you need to get started writing a parser.
-- If you are new to Megaparsec and don't know where to begin, take a look
-- at the tutorials <https://markkarpov.com/learn-haskell.html#megaparsec-tutorials>.
--
-- In addition to the "Text.Megaparsec" module, which exports and re-exports
-- most everything that you may need, we advise to import
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
-- or "Text.Megaparsec.Byte" if you intend to parse binary data.
--
-- It is common to start working with the library by defining a type synonym
-- like this:
--
-- > type Parser = Parsec Void Text
-- >                      ^    ^
-- >                      |    |
-- > Custom error component    Input stream type
--
-- Then you can write type signatures like @Parser 'Int'@—for a parser that
-- returns an 'Int' for example.
--
-- Similarly (since it's known to cause confusion), you should use
-- 'ParseErrorBundle' type parametrized like this:
--
-- > ParseErrorBundle Text Void
-- >                  ^    ^
-- >                  |    |
-- >  Input stream type    Custom error component (the same you used in Parser)
--
-- Megaparsec uses some type-level machinery to provide flexibility without
-- compromising on type safety. Thus type signatures are sometimes necessary
-- to avoid ambiguous types. If you're seeing a error message that reads
-- like “Type variable @e0@ is ambiguous …”, you need to give an explicit
-- signature to your parser to resolve the ambiguity. It's a good idea to
-- provide type signatures for all top-level definitions.

{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes            #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE UndecidableInstances  #-}

module Text.Megaparsec
  ( -- * Re-exports
    -- $reexports
    module Text.Megaparsec.Pos
  , module Text.Megaparsec.Error
  , module Text.Megaparsec.Stream
  , module Control.Monad.Combinators
    -- * Data types
  , State (..)
  , PosState (..)
  , Parsec
  , ParsecT
    -- * Running parser
  , parse
  , parseMaybe
  , parseTest
  , runParser
  , runParser'
  , runParserT
  , runParserT'
    -- * Primitive combinators
  , MonadParsec (..)
    -- * Derivatives of primitive combinators
  , single
  , satisfy
  , anySingle
  , anySingleBut
  , oneOf
  , noneOf
  , chunk
  , (<?>)
  , unexpected
  , customFailure
  , match
  , region
  , takeRest
  , atEnd
    -- * Parser state combinators
  , getInput
  , setInput
  , getSourcePos
  , getOffset
  , setOffset
  , setParserState )
where

import Control.Monad
import Control.Monad.Combinators
import Control.Monad.Identity
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (fromJust)
import Text.Megaparsec.Class
import Text.Megaparsec.Error
import Text.Megaparsec.Internal
import Text.Megaparsec.Pos
import Text.Megaparsec.State
import Text.Megaparsec.Stream
import qualified Data.Set as E

-- $reexports
--
-- Note that we re-export monadic combinators from
-- "Control.Monad.Combinators" because these are more efficient than
-- 'Applicative'-based ones. Thus 'many' and 'some' may clash with the
-- functions from "Control.Applicative". You need to hide the functions like
-- this:
--
-- > import Control.Applicative hiding (many, some)
--
-- Also note that you can import "Control.Monad.Combinators.NonEmpty" if you
-- wish that combinators like 'some' return 'NonEmpty' lists. The module
-- lives in the @parser-combinators@ package (you need at least version
-- /0.4.0/).
--
-- This module is intended to be imported qualified:
--
-- > import qualified Control.Monad.Combinators.NonEmpty as NE
--
-- Other modules of interest are:
--
--     * "Control.Monad.Combinators.Expr" for parsing of expressions.
--     * "Control.Applicative.Permutations" for parsing of permutations
--       phrases.

----------------------------------------------------------------------------
-- Data types

-- | 'Parsec' is a non-transformer variant of the more general 'ParsecT'
-- monad transformer.

type Parsec e s = ParsecT e s Identity

----------------------------------------------------------------------------
-- Running a parser

-- | @'parse' p file input@ runs parser @p@ over 'Identity' (see
-- 'runParserT' if you're using the 'ParsecT' monad transformer; 'parse'
-- itself is just a synonym for 'runParser'). It returns either a
-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
-- 'errorBundlePretty' can be used to turn 'ParseErrorBundle' into the
-- string representation of the error message. See "Text.Megaparsec.Error"
-- if you need to do more advanced error analysis.
--
-- > main = case parse numbers "" "11,2,43" of
-- >          Left bundle -> putStr (errorBundlePretty bundle)
-- >          Right xs -> print (sum xs)
-- >
-- > numbers = decimal `sepBy` char ','

parse
  :: Parsec e s a -- ^ Parser to run
  -> String       -- ^ Name of source file
  -> s            -- ^ Input for parser
  -> Either (ParseErrorBundle s e) a
parse = runParser

-- | @'parseMaybe' p input@ runs the parser @p@ on @input@ and returns the
-- result inside 'Just' on success and 'Nothing' on failure. This function
-- also parses 'eof', so if the parser doesn't consume all of its input, it
-- will fail.
--
-- The function is supposed to be useful for lightweight parsing, where
-- error messages (and thus file name) are not important and entire input
-- should be parsed. For example, it can be used when parsing of a single
-- number according to a specification of its format is desired.

parseMaybe :: (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
parseMaybe p s =
  case parse (p <* eof) "" s of
    Left  _ -> Nothing
    Right x -> Just x

-- | The expression @'parseTest' p input@ applies the parser @p@ against
-- input @input@ and prints the result to stdout. Useful for testing.

parseTest :: ( ShowErrorComponent e
             , Show a
             , Stream s
             )
  => Parsec e s a -- ^ Parser to run
  -> s            -- ^ Input for parser
  -> IO ()
parseTest p input =
  case parse p "" input of
    Left  e -> putStr (errorBundlePretty e)
    Right x -> print x

-- | @'runParser' p file input@ runs parser @p@ on the input stream of
-- tokens @input@, obtained from source @file@. The @file@ is only used in
-- error messages and may be the empty string. Returns either a 'ParseError'
-- ('Left') or a value of type @a@ ('Right').
--
-- > parseFromFile p file = runParser p file <$> readFile file

runParser
  :: Parsec e s a -- ^ Parser to run
  -> String     -- ^ Name of source file
  -> s          -- ^ Input for parser
  -> Either (ParseErrorBundle s e) a
runParser p name s = snd $ runParser' p (initialState name s)

-- | The function is similar to 'runParser' with the difference that it
-- accepts and returns parser state. This allows to specify arbitrary
-- textual position at the beginning of parsing, for example. This is the
-- most general way to run a parser over the 'Identity' monad.
--
-- @since 4.2.0

runParser'
  :: Parsec e s a -- ^ Parser to run
  -> State s    -- ^ Initial state
  -> (State s, Either (ParseErrorBundle s e) a)
runParser' p = runIdentity . runParserT' p

-- | @'runParserT' p file input@ runs parser @p@ on the input list of tokens
-- @input@, obtained from source @file@. The @file@ is only used in error
-- messages and may be the empty string. Returns a computation in the
-- underlying monad @m@ that returns either a 'ParseError' ('Left') or a
-- value of type @a@ ('Right').

runParserT :: Monad m
  => ParsecT e s m a -- ^ Parser to run
  -> String        -- ^ Name of source file
  -> s             -- ^ Input for parser
  -> m (Either (ParseErrorBundle s e) a)
runParserT p name s = snd `liftM` runParserT' p (initialState name s)

-- | This function is similar to 'runParserT', but like 'runParser'' it
-- accepts and returns parser state. This is thus the most general way to
-- run a parser.
--
-- @since 4.2.0

runParserT' :: Monad m
  => ParsecT e s m a -- ^ Parser to run
  -> State s       -- ^ Initial state
  -> m (State s, Either (ParseErrorBundle s e) a)
runParserT' p s = do
  (Reply s' _ result) <- runParsecT p s
  return $ case result of
    OK    x -> (s', Right x)
    Error e ->
      let bundle = ParseErrorBundle
            { bundleErrors = e :| []
            , bundlePosState = statePosState s
            }
      in (s', Left bundle)

-- | Given name of source file and input construct initial state for parser.

initialState :: String -> s -> State s
initialState name s = State
  { stateInput  = s
  , stateOffset = 0
  , statePosState = PosState
    { pstateInput = s
    , pstateOffset = 0
    , pstateSourcePos = initialPos name
    , pstateTabWidth = defaultTabWidth
    , pstateLinePrefix = ""
    }
  }

----------------------------------------------------------------------------
-- Derivatives of primitive combinators

-- | @'single' t@ only matches the single token @t@.
--
-- > semicolon = single ';'
--
-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
-- 'Text.Megaparsec.Char.char'.
--
-- @since 7.0.0

single :: MonadParsec e s m
  => Token s           -- ^ Token to match
  -> m (Token s)
single t = token testToken expected
  where
    testToken x = if x == t then Just x else Nothing
    expected    = E.singleton (Tokens (t:|[]))
{-# INLINE single #-}

-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
-- function @f@ returns 'True'. Returns the character that is actually
-- parsed.
--
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs  = satisfy (`elem` cs)
--
-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
--
-- @since 7.0.0

satisfy :: MonadParsec e s m
  => (Token s -> Bool) -- ^ Predicate to apply
  -> m (Token s)
satisfy f = token testChar E.empty
  where
    testChar x = if f x then Just x else Nothing
{-# INLINE satisfy #-}

-- | Parse and return a single token. It's a good idea to attach a 'label'
-- to this parser manually.
--
-- > anySingle = satisfy (const True)
--
-- See also: 'satisfy', 'anySingleBut'.
--
-- @since 7.0.0

anySingle :: MonadParsec e s m => m (Token s)
anySingle = satisfy (const True)
{-# INLINE anySingle #-}

-- | Match any token but the given one. It's a good idea to attach a 'label'
-- to this parser manually.
--
-- > anySingleBut t = satisfy (/= t)
--
-- See also: 'single', 'anySingle', 'satisfy'.
--
-- @since 7.0.0

anySingleBut :: MonadParsec e s m
  => Token s           -- ^ Token we should not match
  -> m (Token s)
anySingleBut t = satisfy (/= t)
{-# INLINE anySingleBut #-}

-- | @'oneOf' ts@ succeeds if the current token is in the supplied
-- collection of tokens @ts@. Returns the parsed token. Note that this
-- parser cannot automatically generate the “expected” component of error
-- message, so usually you should label it manually with 'label' or ('<?>').
--
-- > oneOf cs = satisfy (`elem` cs)
--
-- See also: 'satisfy'.
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
--
-- __Performance note__: prefer 'satisfy' when you can because it's faster
-- when you have only a couple of tokens to compare to:
--
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
-- > quoteSlow = oneOf "'\""
--
-- @since 7.0.0

oneOf :: (Foldable f, MonadParsec e s m)
  => f (Token s)       -- ^ Collection of matching tokens
  -> m (Token s)
oneOf cs = satisfy (`elem` cs)
{-# INLINE oneOf #-}

-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
-- Note that this parser cannot automatically generate the “expected”
-- component of error message, so usually you should label it manually with
-- 'label' or ('<?>').
--
-- > noneOf cs = satisfy (`notElem` cs)
--
-- See also: 'satisfy'.
--
-- __Performance note__: prefer 'satisfy' and 'singleBut' when you can
-- because it's faster.
--
-- @since 7.0.0

noneOf :: (Foldable f, MonadParsec e s m)
  => f (Token s)       -- ^ Collection of taken we should not match
  -> m (Token s)
noneOf cs = satisfy (`notElem` cs)
{-# INLINE noneOf #-}

-- | @'chunk' chk@ only matches the chunk @chk@.
--
-- > divOrMod = chunk "div" <|> chunk "mod"
--
-- See also: 'tokens', 'Text.Megaparsec.Char.string',
-- 'Text.Megaparsec.Byte.string'.
--
-- @since 7.0.0

chunk :: MonadParsec e s m
  => Tokens s          -- ^ Chunk to match
  -> m (Tokens s)
chunk = tokens (==)
{-# INLINE chunk #-}

-- | A synonym for 'label' in the form of an operator.

infix 0 <?>

(<?>) :: MonadParsec e s m => m a -> String -> m a
(<?>) = flip label
{-# INLINE (<?>) #-}

-- | The parser @'unexpected' item@ fails with an error message telling
-- about unexpected item @item@ without consuming any input.
--
-- > unexpected item = failure (Just item) Set.empty

unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
unexpected item = failure (Just item) E.empty
{-# INLINE unexpected #-}

-- | Report a custom parse error. For a more general version, see
-- 'fancyFailure'.
--
-- > customFailure = fancyFailure . E.singleton . ErrorCustom
--
-- @since 6.3.0

customFailure :: MonadParsec e s m => e -> m a
customFailure = fancyFailure . E.singleton . ErrorCustom
{-# INLINE customFailure #-}

-- | Return both the result of a parse and a chunk of input that was
-- consumed during parsing. This relies on the change of the 'stateOffset'
-- value to evaluate how many tokens were consumed. If you mess with it
-- manually in the argument parser, prepare for troubles.
--
-- @since 5.3.0

match :: MonadParsec e s m => m a -> m (Tokens s, a)
match p = do
  o  <- getOffset
  s  <- getInput
  r  <- p
  o' <- getOffset
  -- NOTE The 'fromJust' call here should never fail because if the stream
  -- is empty before 'p' (the only case when 'takeN_' can return 'Nothing'
  -- as per its invariants), (tp' - tp) won't be greater than 0, and in that
  -- case 'Just' is guaranteed to be returned as per another invariant of
  -- 'takeN_'.
  return ((fst . fromJust) (takeN_ (o' - o) s), r)
{-# INLINEABLE match #-}

-- | Specify how to process 'ParseError's that happen inside of this
-- wrapper. As a side effect of the current implementation changing
-- 'errorOffset' with this combinator will also change the final
-- 'stateOffset' in the parser state (try to avoid that because
-- 'stateOffset' will go out of sync with factual position in the input
-- stream and pretty-printing of parse errors afterwards will be incorrect).
--
-- @since 5.3.0

region :: MonadParsec e s m
  => (ParseError s e -> ParseError s e)
     -- ^ How to process 'ParseError's
  -> m a               -- ^ The “region” that the processing applies to
  -> m a
region f m = do
  r <- observing m
  case r of
    Left err ->
      case f err of
        TrivialError o us ps -> do
          updateParserState $ \st -> st { stateOffset = o }
          failure us ps
        FancyError o xs -> do
          updateParserState $ \st -> st { stateOffset = o }
          fancyFailure xs
    Right x -> return x
{-# INLINEABLE region #-}

-- | Consume the rest of the input and return it as a chunk. This parser
-- never fails, but may return the empty chunk.
--
-- > takeRest = takeWhileP Nothing (const True)
--
-- @since 6.0.0

takeRest :: MonadParsec e s m => m (Tokens s)
takeRest = takeWhileP Nothing (const True)
{-# INLINE takeRest #-}

-- | Return 'True' when end of input has been reached.
--
-- > atEnd = option False (True <$ hidden eof)
--
-- @since 6.0.0

atEnd :: MonadParsec e s m => m Bool
atEnd = option False (True <$ hidden eof)
{-# INLINE atEnd #-}

----------------------------------------------------------------------------
-- Parser state combinators

-- | Return the current input.

getInput :: MonadParsec e s m => m s
getInput = stateInput <$> getParserState
{-# INLINE getInput #-}

-- | @'setInput' input@ continues parsing with @input@.

setInput :: MonadParsec e s m => s -> m ()
setInput s = updateParserState (\(State _ o pst) -> State s o pst)
{-# INLINE setInput #-}

-- | Return the current source position. This function /is not cheap/, do
-- not call it e.g. on matching of every token, that's a bad idea. Still you
-- can use it to get 'SourcePos' to attach to things that you parse.
--
-- The function works under the assumption that we move in input stream only
-- forward and never backward, which is always true unless the user abuses
-- the library on purpose.
--
-- @since 7.0.0

getSourcePos :: MonadParsec e s m => m SourcePos
getSourcePos = do
  st <- getParserState
  let (pos, pst) = reachOffsetNoLine (stateOffset st) (statePosState st)
  setParserState st { statePosState = pst }
  return pos
{-# INLINE getSourcePos #-}

-- | Get the number of tokens processed so far.
--
-- See also: 'setOffset'.
--
-- @since 7.0.0

getOffset :: MonadParsec e s m => m Int
getOffset = stateOffset <$> getParserState
{-# INLINE getOffset #-}

-- | Set the number of tokens processed so far.
--
-- See also: 'getOffset'.
--
-- @since 7.0.0

setOffset :: MonadParsec e s m => Int -> m ()
setOffset o = updateParserState $ \(State s _ pst) ->
  State s o pst
{-# INLINE setOffset #-}

-- | @'setParserState' st@ sets the parser state to @st@.
--
-- See also: 'getParserState', 'updateParserState'.

setParserState :: MonadParsec e s m => State s -> m ()
setParserState st = updateParserState (const st)
{-# INLINE setParserState #-}