{-# LANGUAGE Safe #-}
{-# OPTIONS_GHC -Wno-missing-import-lists #-}
{-|
Module      : Text.Gigaparsec.Errors.Combinator
Description : This module contains combinators that can be used to directly influence error
              messages of parsers.
License     : BSD-3-Clause
Maintainer  : Jamie Willis, Gigaparsec Maintainers
Stability   : stable

Error messages are, by default, not /particularly/ descriptive. However, the combinators in this
module can be used to improve the generation of error messages by providing labels for expected
items, explanations for why things went wrong, custom error messages, custom unexpected error messages,
as well as correcting the offsets that error messages actually occurred at.

==== Terminology

__Observably consumes input__: a parser is said to /observably/ consume input when error messages generated by a parser @p@ occur at a deeper
offset than @p@ originally started at. While this sounds like it is the same as "having consumed input" for the
purposes of backtracking, they are disjoint concepts:

  1. in @atomic p@, @p@ can /observably/ consume input even though the wider parser does not consume input due to the @atomic@.
  2. in @amend p@, @p@ can consume input and may not backtrack even though the consumption is not /observable/ in the error
     message due to the @amend@.

@since 0.2.0.0
-}
module Text.Gigaparsec.Errors.Combinator (
  -- * Error Enrichment Combinators
  -- | These combinators add additional information - or refine the existing information within - to
  -- an error message that has been generated within the scope of the parser they have been called on.
  -- These are a very basic, but effective, way of improving the quality of error messages generated
  -- by gigaparsec.
    label, (<?>), hide, explain,
  -- * Failure Combinators
  -- | These combinators immediately fail the parser, with a more bespoke message.
    emptyWide,
    fail, failWide,
    unexpected, unexpectedWide,
  -- * Error Adjustment Combinators
  -- | These combinators can affect at what position an error is caused at. They are
  -- opposites: where 'amend' will ensure an error message is said to have generated
  -- at the position on entry to the combinator, 'entrench' will resist these changes.
    amend, partialAmend, entrench, dislodge, dislodgeBy,
    amendThenDislodge, amendThenDislodgeBy, partialAmendThenDislodge, partialAmendThenDislodgeBy,
    markAsToken
  ) where

{-
Future doc headings:

Filtering Combinators
=====================
These combinators perform filtering on a parser, with particular emphasis on generating meaningful
error messages if the filtering fails. This is particularly useful for data validation within the
parser, as very instructive error messages describing what went wrong can be generated. These combinators
often filter using a `PartialFunction`: this may be because they combine filtering with mapping (in which
case, the error message is provided separately), or the function may produce a `String`.

In these cases, the partial function is producing the error messages: if the input to the function is
defined, this means that it is invalid and the filtering will fail using the message obtained from the
successful partial function invocation.

Generic Filtering Combinators
=============================
This combinators generalise the combinators from above, which are all special cases of them. Each of these
takes the characteristic predicate or function of the regular variants, but takes an `errGen` object that
can be used to fine-tune the error messages. These offer some flexiblity not offered by the specialised
filtering combinators, but are a little more verbose to use.
-}

import Prelude hiding (fail)

import Text.Gigaparsec (Parsec)
-- We want to use this to make the docs point to the right definition for users.
import Text.Gigaparsec.Internal qualified as Internal (Parsec(Parsec), line, col, emptyErr, specialisedErr, raise, unexpectedErr, hints, consumed, useHints, adjustErr, hints, hintsValidOffset)
import Text.Gigaparsec.Internal.Errors (ParseError, CaretWidth(FlexibleCaret, RigidCaret), ExpectItem(ExpectNamed))
import Text.Gigaparsec.Internal.Errors qualified as Internal (setLexical, amendErr, entrenchErr, dislodgeErr, partialAmendErr, labelErr, explainErr)
import Text.Gigaparsec.Internal.Require (require)

import Data.Set (Set)
import Data.Set qualified as Set (empty, map)
import Data.List.NonEmpty (NonEmpty)
import Data.List.NonEmpty qualified as NonEmpty (toList)

{-|
This combinator changes the expected component of any errors generated by this parser.

When this parser fails having not /observably/ consumed input, the expected component of the generated
error message is set to be the given items.
-}
label :: Set String -- ^ the names to give to the expected component of any qualifying errors.
      -> Parsec a   -- ^ the parser to apply the labels to
      -> Parsec a
label :: forall a. Set String -> Parsec a -> Parsec a
label Set String
ls (Internal.Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) =
  forall a. Bool -> String -> String -> a -> a
require (Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null Set String
ls) Bool -> Bool -> Bool
&& Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any forall (t :: * -> *) a. Foldable t => t a -> Bool
null Set String
ls)) String
"Text.Gigaparsec.Errors.Combinator.label"
                                               String
"labels cannot be empty" forall a b. (a -> b) -> a -> b
$
    forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Internal.Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
good ParseError -> State -> RT r
bad ->
      let !origConsumed :: Word
origConsumed = State -> Word
Internal.consumed State
st
          good' :: a -> State -> RT r
good' a
x State
st'
            | State -> Word
Internal.consumed State
st' forall a. Eq a => a -> a -> Bool
/= Word
origConsumed = a -> State -> RT r
good a
x State
st'
            | Bool
otherwise = a -> State -> RT r
good a
x State
st' { hints :: Set ExpectItem
Internal.hints = forall b a. Ord b => (a -> b) -> Set a -> Set b
Set.map String -> ExpectItem
ExpectNamed Set String
ls }
          bad' :: ParseError -> State -> RT r
bad' ParseError
err = forall r.
(ParseError -> State -> RT r) -> ParseError -> State -> RT r
Internal.useHints ParseError -> State -> RT r
bad (Word -> Set String -> ParseError -> ParseError
Internal.labelErr Word
origConsumed Set String
ls ParseError
err)
      in forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
good' ParseError -> State -> RT r
bad'

{-|
This combinator suppresses the entire error message generated by a given parser.

When this parser fails having not /observably/ consumed input, this combinator
replaces any error generated by the given parser to match the 'Text.Gigaparsec.empty' combinator.

This can be useful, say, for hiding whitespace labels, which are not normally useful
information to include in an error message for whitespace insensitive grammars.
-}
hide :: Parsec a -> Parsec a
hide :: forall a. Parsec a -> Parsec a
hide (Internal.Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) =
  forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Internal.Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
good ParseError -> State -> RT r
bad ->
    let !origConsumed :: Word
origConsumed = State -> Word
Internal.consumed State
st
        good' :: a -> State -> RT r
good' a
x State
st' = a -> State -> RT r
good a
x State
st' { hints :: Set ExpectItem
Internal.hints = forall a. Set a
Set.empty }
        bad' :: ParseError -> State -> RT r
bad' ParseError
err State
st'
          | State -> Word
Internal.consumed State
st' forall a. Eq a => a -> a -> Bool
/= Word
origConsumed = ParseError -> State -> RT r
bad ParseError
err State
st'
          | Bool
otherwise = forall r.
(ParseError -> State -> RT r) -> ParseError -> State -> RT r
Internal.useHints ParseError -> State -> RT r
bad (State -> Word -> ParseError
Internal.emptyErr State
st' Word
0) State
st'
    in forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
good' ParseError -> State -> RT r
bad'

{-|
This combinator adds a reason to error messages generated by this parser.

When this parser fails having not /observably/ consumed input, this combinator adds
a reason to the error message, which should justify why the error occured. Unlike error
labels, which may persist if more progress is made having not consumed input, reasons
are not carried forward in the error message, and are lost.
-}
explain :: String   -- ^ reason the reason why a parser failed.
        -> Parsec a -- ^ the parser to apply the reason to
        -> Parsec a
explain :: forall a. String -> Parsec a -> Parsec a
explain String
reason (Internal.Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) =
  forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Internal.Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
good ParseError -> State -> RT r
bad ->
    let !origConsumed :: Word
origConsumed = State -> Word
Internal.consumed State
st
        bad' :: ParseError -> State -> RT r
bad' ParseError
err = forall r.
(ParseError -> State -> RT r) -> ParseError -> State -> RT r
Internal.useHints ParseError -> State -> RT r
bad (Word -> String -> ParseError -> ParseError
Internal.explainErr Word
origConsumed String
reason ParseError
err)
    in forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
good ParseError -> State -> RT r
bad'

{-|
This combinator fails immediately, with a caret of the given width and no other information.

By producing basically no information, this combinator is principally for adjusting the
caret-width of another error, rather than the value 'Text.Gigaparsec.empty', which is used to fail with
no effect on error content.
-}
emptyWide :: Word     -- ^ the width of the caret for the error produced by this combinator.
          -> Parsec a
emptyWide :: forall a. Word -> Parsec a
emptyWide Word
width = forall a. (State -> ParseError) -> Parsec a
Internal.raise (State -> Word -> ParseError
`Internal.emptyErr` Word
width)

{-|
This combinator consumes no input and fails immediately with the given error messages.

Produces a /specialised/ error message where all the lines of the error are the
given @msgs@ in order of appearance.

==== __Examples__
>>> let failing = fail ["hello,", "this is an error message", "broken across multiple lines"]

-}
fail :: NonEmpty String -- ^ the messages that will make up the error message.
     -> Parsec a
fail :: forall a. NonEmpty String -> Parsec a
fail = forall a. CaretWidth -> NonEmpty String -> Parsec a
_fail (Word -> CaretWidth
FlexibleCaret Word
1)

{-|
This combinator consumes no input and fails immediately with the given error messages.

Produces a /specialised/ error message where all the lines of the error are the
given @msgs@ in order of appearance. The caret width of the message is set to the
given value.

==== __Examples__
>>> let failing = fail 3 ["hello,", "this is an error message", "broken across multiple lines"]

-}
failWide :: Word            -- ^ the width of the caret for the error produced by this combinator.
         -> NonEmpty String -- ^ the messages that will make up the error message.
         -> Parsec a
failWide :: forall a. Word -> NonEmpty String -> Parsec a
failWide Word
width = forall a. CaretWidth -> NonEmpty String -> Parsec a
_fail (Word -> CaretWidth
RigidCaret Word
width)

{-# INLINE _fail #-}
_fail :: CaretWidth -> NonEmpty String -> Parsec a
_fail :: forall a. CaretWidth -> NonEmpty String -> Parsec a
_fail CaretWidth
width NonEmpty String
msgs = forall a. (State -> ParseError) -> Parsec a
Internal.raise (\State
st -> State -> [String] -> CaretWidth -> ParseError
Internal.specialisedErr State
st (forall a. NonEmpty a -> [a]
NonEmpty.toList NonEmpty String
msgs) CaretWidth
width)

{-|
This combinator consumes no input and fails immediately, setting the unexpected component
to the given item.

Produces a /trivial/ error message where the unexpected component of the error is
replaced with the given item.
-}
unexpected :: String   -- ^ the unexpected message for the error generated.
           -> Parsec a
unexpected :: forall a. String -> Parsec a
unexpected = forall a. CaretWidth -> String -> Parsec a
_unexpected (Word -> CaretWidth
FlexibleCaret Word
1)

{-|
This combinator consumes no input and fails immediately, setting the unexpected component
to the given item.

Produces a /trivial/ error message where the unexpected component of the error is
replaced with the given item. The caret width of the message is set to the
given value.
-}
unexpectedWide :: Word     -- ^ the width of the caret for the error produced by this combinator.
               -> String   -- ^ the unexpected message for the error generated.
               -> Parsec a
unexpectedWide :: forall a. Word -> String -> Parsec a
unexpectedWide Word
width = forall a. CaretWidth -> String -> Parsec a
_unexpected (Word -> CaretWidth
RigidCaret Word
width)

{-# INLINE _unexpected #-}
_unexpected :: CaretWidth -> String -> Parsec a
_unexpected :: forall a. CaretWidth -> String -> Parsec a
_unexpected CaretWidth
width String
name = forall a. (State -> ParseError) -> Parsec a
Internal.raise forall a b. (a -> b) -> a -> b
$ \State
st -> State -> Set ExpectItem -> String -> CaretWidth -> ParseError
Internal.unexpectedErr State
st forall a. Set a
Set.empty String
name CaretWidth
width

{-|
This combinator adjusts any error messages generated by the given parser so that they
occur at the position recorded on entry to this combinator (effectively as if no
input were consumed).

This is useful if validation work is done
on the output of a parser that may render it invalid, but the error should point to the
beginning of the structure. This combinators effect can be cancelled with [[entrench `entrench`]].

==== __Examples__
>>> let greeting = string "hello world" <* char '!'
>>> parseRepl (greeting <?> ["greeting"]) "hello world."
(line 1, column 12):
  unexpected "."
  expected "!"
  >hello world.
              ^
>>> parseRepl (amend greeting <?> ["greeting"]) "hello world."
(line 1, column 1):
  unexpected "h"
  expected greeting
  >hello world.
   ^
-}
amend :: Parsec a -> Parsec a
amend :: forall a. Parsec a -> Parsec a
amend = forall a.
(Word -> Word -> Word -> ParseError -> ParseError)
-> Parsec a -> Parsec a
_amend Word -> Word -> Word -> ParseError -> ParseError
Internal.amendErr

--TODO: examples
{-|
This combinator adjusts any error messages generated by the given parser so that they
occur at the position recorded on entry to this combinator, but retains the original offset.

Similar to 'amend', but retains the original offset the error occurred at. This is known
as its /underlying offset/ as opposed to the visual /presentation offset/. To the reader, the
error messages appears as if no input was consumed, but for the purposes of error message merging
the error is still deeper. A key thing to note is that two errors can only merge if they are at
the same presentation /and/ underlying offsets: if they are not the deeper of the two /dominates/.

The ability for an error to still dominate others after partial amendment can be useful for allowing
it to avoid being lost when merging with errors that are deeper than the presentation offset but
shallower than the underlying.
-}
partialAmend :: Parsec a -> Parsec a
partialAmend :: forall a. Parsec a -> Parsec a
partialAmend = forall a.
(Word -> Word -> Word -> ParseError -> ParseError)
-> Parsec a -> Parsec a
_amend Word -> Word -> Word -> ParseError -> ParseError
Internal.partialAmendErr

{-# INLINE _amend #-}
_amend :: (Word -> Word -> Word -> ParseError -> ParseError) -> Parsec a -> Parsec a
_amend :: forall a.
(Word -> Word -> Word -> ParseError -> ParseError)
-> Parsec a -> Parsec a
_amend Word -> Word -> Word -> ParseError -> ParseError
f (Internal.Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) =
  forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Internal.Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
good ParseError -> State -> RT r
bad ->
    let !origConsumed :: Word
origConsumed = State -> Word
Internal.consumed State
st
        !origLine :: Word
origLine = State -> Word
Internal.line State
st
        !origCol :: Word
origCol = State -> Word
Internal.col State
st
        !origHints :: Set ExpectItem
origHints = State -> Set ExpectItem
Internal.hints State
st
        !origHintsValidOffset :: Word
origHintsValidOffset = State -> Word
Internal.hintsValidOffset State
st
    in forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
good forall a b. (a -> b) -> a -> b
$ \ParseError
err State
st' -> ParseError -> State -> RT r
bad (Word -> Word -> Word -> ParseError -> ParseError
f Word
origConsumed Word
origLine Word
origCol ParseError
err)
                                   State
st' { hints :: Set ExpectItem
Internal.hints = Set ExpectItem
origHints
                                       , hintsValidOffset :: Word
Internal.hintsValidOffset = Word
origHintsValidOffset }

--TODO: examples
{-|
This combinator prevents the action of any enclosing 'amend' on the errors generated by the given
parser.

Sometimes, the error adjustments performed by 'amend' should only affect errors generated
within a certain part of a parser and not the whole thing; in this case, 'entrench' can be used
to protect sub-parsers from having their errors adjusted, providing a much more fine-grained
scope for error adjustment.
-}
entrench :: Parsec a -> Parsec a
entrench :: forall a. Parsec a -> Parsec a
entrench = forall a. (ParseError -> ParseError) -> Parsec a -> Parsec a
Internal.adjustErr ParseError -> ParseError
Internal.entrenchErr

{-|
This combinator undoes the action of any 'entrench' combinators on the given parser.

Entrenchment is important for preventing the incorrect amendment of certain parts of sub-errors
for a parser, but it may be then undesireable to block further amendments from elsewhere in the
parser. This combinator can be used to cancel all entrenchment after the critical section has
passed.
-}
dislodge :: Parsec a -> Parsec a
dislodge :: forall a. Parsec a -> Parsec a
dislodge = forall a. Word -> Parsec a -> Parsec a
dislodgeBy forall a. Bounded a => a
maxBound

{-|
This combinator undoes the action of the given number of 'entrench' combinators on the given parser.

Entrenchment is important for preventing the incorrect amendment of certain parts of sub-errors
for a parser, but it may be then undesireable to block further amendments from elsewhere in the
parser. This combinator can be used to cancel all entrenchment after the critical section has
passed.
-}
dislodgeBy :: Word -> Parsec a -> Parsec a
dislodgeBy :: forall a. Word -> Parsec a -> Parsec a
dislodgeBy Word
by = forall a. (ParseError -> ParseError) -> Parsec a -> Parsec a
Internal.adjustErr (Word -> ParseError -> ParseError
Internal.dislodgeErr Word
by)

{-|
This combinator first tries to amend the position of any error generated by the given parser,
and if the error was entrenched will dislodge it instead.
-}
amendThenDislodge :: Parsec a -> Parsec a
amendThenDislodge :: forall a. Parsec a -> Parsec a
amendThenDislodge = forall a. Parsec a -> Parsec a
dislodge forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsec a -> Parsec a
amend

{-|
This combinator first tries to amend the position of any error generated by the given parser,
and if the error was entrenched will dislodge it the given number of times instead.
-}
amendThenDislodgeBy :: Word -> Parsec a -> Parsec a
amendThenDislodgeBy :: forall a. Word -> Parsec a -> Parsec a
amendThenDislodgeBy Word
n = forall a. Word -> Parsec a -> Parsec a
dislodgeBy Word
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsec a -> Parsec a
amend

{-|
This combinator first tries to partially amend the position of any error generated by the given parser,
and if the error was entrenched will dislodge it instead.
-}
partialAmendThenDislodge :: Parsec a -> Parsec a
partialAmendThenDislodge :: forall a. Parsec a -> Parsec a
partialAmendThenDislodge = forall a. Parsec a -> Parsec a
dislodge forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsec a -> Parsec a
partialAmend

{-|
This combinator first tries to partially amend the position of any error generated by the given parser,
and if the error was entrenched will dislodge it the given number of times instead.
-}
partialAmendThenDislodgeBy :: Word -> Parsec a -> Parsec a
partialAmendThenDislodgeBy :: forall a. Word -> Parsec a -> Parsec a
partialAmendThenDislodgeBy Word
n = forall a. Word -> Parsec a -> Parsec a
dislodgeBy Word
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsec a -> Parsec a
partialAmend

{-|
This combinator marks any errors within the given parser as being /lexical errors/.

When an error is marked as a /lexical error/, it sets a flag within the error that is
passed to 'Text.Gigaparsec.Errors.ErrorBuilder.unexpectedToken': this
should be used to prevent @Lexer@-based token extraction from being performed on an error,
since lexing errors cannot be the result of unexpected tokens.
-}
markAsToken :: Parsec a -> Parsec a
markAsToken :: forall a. Parsec a -> Parsec a
markAsToken = forall a. (ParseError -> ParseError) -> Parsec a -> Parsec a
Internal.adjustErr ParseError -> ParseError
Internal.setLexical

{-|
This combinator changes the expected component of any errors generated by this parser.

This is just an alias for the 'label' combinator.
-}
{-# INLINE (<?>) #-}
infix 0 <?>
(<?>) :: Parsec a -> Set String -> Parsec a
<?> :: forall a. Parsec a -> Set String -> Parsec a
(<?>) = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Set String -> Parsec a -> Parsec a
label