-- |
-- Module      :  Text.Megaparsec.Error
-- Copyright   :  © 2015–2018 Megaparsec contributors
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- Parse errors. The current version of Megaparsec supports well-typed
-- errors instead of 'String'-based ones. This gives a lot of flexibility in
-- describing what exactly went wrong as well as a way to return arbitrary
-- data in case of failure.
--
-- You probably do not want to import this module directly because
-- "Text.Megaparsec" re-exports it anyway.

{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE CPP                 #-}
{-# LANGUAGE DeriveDataTypeable  #-}
{-# LANGUAGE DeriveFunctor       #-}
{-# LANGUAGE DeriveGeneric       #-}
{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE FlexibleInstances   #-}
{-# LANGUAGE LambdaCase          #-}
{-# LANGUAGE RecordWildCards     #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving  #-}
{-# LANGUAGE UndecidableInstances#-}

module Text.Megaparsec.Error
  ( -- * Parse error type
    ErrorItem (..)
  , ErrorFancy (..)
  , ParseError (..)
  , mapParseError
  , errorOffset
  , ParseErrorBundle (..)
  , attachSourcePos
    -- * Pretty-printing
  , ShowErrorComponent (..)
  , errorBundlePretty
  , parseErrorPretty
  , parseErrorTextPretty )
where

import Control.DeepSeq
import Control.Exception
import Control.Monad.State.Strict
import Data.Data (Data)
import Data.List (intercalate)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (isNothing)
import Data.Proxy
import Data.Set (Set)
import Data.Typeable (Typeable)
import Data.Void
import GHC.Generics
import Text.Megaparsec.Pos
import Text.Megaparsec.State
import Text.Megaparsec.Stream
import qualified Data.List.NonEmpty as NE
import qualified Data.Set           as E

#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
#endif

----------------------------------------------------------------------------
-- Parse error type

-- | Data type that is used to represent “unexpected\/expected” items in
-- 'ParseError'. The data type is parametrized over the token type @t@.
--
-- @since 5.0.0

data ErrorItem t
  = Tokens (NonEmpty t)      -- ^ Non-empty stream of tokens
  | Label (NonEmpty Char)    -- ^ Label (cannot be empty)
  | EndOfInput               -- ^ End of input
  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)

instance NFData t => NFData (ErrorItem t)

-- | Additional error data, extendable by user. When no custom data is
-- necessary, the type is typically indexed by 'Void' to “cancel” the
-- 'ErrorCustom' constructor.
--
-- @since 6.0.0

data ErrorFancy e
  = ErrorFail String
    -- ^ 'fail' has been used in parser monad
  | ErrorIndentation Ordering Pos Pos
    -- ^ Incorrect indentation error: desired ordering between reference
    -- level and actual level, reference indentation level, actual
    -- indentation level
  | ErrorCustom e
    -- ^ Custom error data, can be conveniently disabled by indexing
    -- 'ErrorFancy' by 'Void'
  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)

instance NFData a => NFData (ErrorFancy a) where
  rnf (ErrorFail str) = rnf str
  rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
  rnf (ErrorCustom a) = rnf a

-- | @'ParseError' s e@ represents a parse error parametrized over the
-- stream type @s@ and the custom data @e@.
--
-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse
-- errors from different branches of parsing. When merging two
-- 'ParseError's, the longest match is preferred; if positions are the same,
-- custom data sets and collections of message items are combined. Note that
-- fancy errors take precedence over trivial errors in merging.
--
-- @since 7.0.0

data ParseError s e
  = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s)))
    -- ^ Trivial errors, generated by Megaparsec's machinery. The data
    -- constructor includes the offset of error, unexpected token (if any),
    -- and expected tokens.
    --
    -- Type of the first argument was changed in the version /7.0.0/.
  | FancyError Int (Set (ErrorFancy e))
    -- ^ Fancy, custom errors.
    --
    -- Type of the first argument was changed in the version /7.0.0/.
  deriving (Typeable, Generic)

deriving instance ( Show (Token s)
                  , Show e
                  ) => Show (ParseError s e)

deriving instance ( Eq (Token s)
                  , Eq e
                  ) => Eq (ParseError s e)

deriving instance ( Data s
                  , Data (Token s)
                  , Ord (Token s)
                  , Data e
                  , Ord e
                  ) => Data (ParseError s e)

instance ( NFData (Token s)
         , NFData e
         ) => NFData (ParseError s e)

instance (Stream s, Ord e) => Semigroup (ParseError s e) where
  (<>) = mergeError
  {-# INLINE (<>) #-}

instance (Stream s, Ord e) => Monoid (ParseError s e) where
  mempty  = TrivialError 0 Nothing E.empty
  mappend = (<>)
  {-# INLINE mappend #-}

instance ( Show s
         , Show (Token s)
         , Show e
         , ShowErrorComponent e
         , Stream s
         , Typeable s
         , Typeable e )
  => Exception (ParseError s e) where
  displayException = parseErrorPretty

-- | Modify the custom data component in a parse error. This could be done
-- via 'fmap' if not for the 'Ord' constraint.
--
-- @since 7.0.0

mapParseError :: Ord e'
  => (e -> e')
  -> ParseError s e
  -> ParseError s e'
mapParseError _ (TrivialError o u p) = TrivialError o u p
mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x)

-- | Get offset of given 'ParseError'.
--
-- @since 7.0.0

errorOffset :: ParseError s e -> Int
errorOffset (TrivialError o _ _) = o
errorOffset (FancyError   o _)   = o

-- | Merge two error data structures into one joining their collections of
-- message items and preferring the longest match. In other words, earlier
-- error message is discarded. This may seem counter-intuitive, but
-- 'mergeError' is only used to merge error messages of alternative branches
-- of parsing and in this case longest match should be preferred.

mergeError :: (Stream s, Ord e)
  => ParseError s e
  -> ParseError s e
  -> ParseError s e
mergeError e1 e2 =
  case errorOffset e1 `compare` errorOffset e2 of
    LT -> e2
    EQ ->
      case (e1, e2) of
        (TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
          TrivialError s1 (n u1 u2) (E.union p1 p2)
        (FancyError {}, TrivialError {}) -> e1
        (TrivialError {}, FancyError {}) -> e2
        (FancyError s1 x1, FancyError _ x2) ->
          FancyError s1 (E.union x1 x2)
    GT -> e1
  where
    -- NOTE The logic behind this merging is that since we only combine
    -- parse errors that happen at exactly the same position, all the
    -- unexpected items will be prefixes of input stream at that position or
    -- labels referring to the same thing. Our aim here is to choose the
    -- longest prefix (merging with labels and end of input is somewhat
    -- arbitrary, but is necessary because otherwise we can't make
    -- ParseError lawful Monoid and have nice parse errors at the same
    -- time).
    n Nothing  Nothing = Nothing
    n (Just x) Nothing = Just x
    n Nothing (Just y) = Just y
    n (Just x) (Just y) = Just (max x y)
{-# INLINE mergeError #-}

-- | A non-empty collection of 'ParseError's equipped with 'PosState' that
-- allows to pretty-print the errors efficiently and correctly.
--
-- @since 7.0.0

data ParseErrorBundle s e = ParseErrorBundle
  { bundleErrors :: NonEmpty (ParseError s e)
    -- ^ A collection of 'ParseError's that is sorted by parse error offsets
  , bundlePosState :: PosState s
    -- ^ State that is used for line\/column calculation
  } deriving (Generic)

deriving instance ( Show s
                  , Show (Token s)
                  , Show e
                  ) => Show (ParseErrorBundle s e)

deriving instance ( Eq s
                  , Eq (Token s)
                  , Eq e
                  ) => Eq (ParseErrorBundle s e)

deriving instance ( Typeable s
                  , Typeable (Token s)
                  , Typeable e
                  ) => Typeable (ParseErrorBundle s e)

deriving instance ( Data s
                  , Data (Token s)
                  , Ord (Token s)
                  , Data e
                  , Ord e
                  ) => Data (ParseErrorBundle s e)

instance ( NFData s
         , NFData (Token s)
         , NFData e
         ) => NFData (ParseErrorBundle s e)

instance ( Show s
         , Show (Token s)
         , Show e
         , ShowErrorComponent e
         , Stream s
         , Typeable s
         , Typeable e
         ) => Exception (ParseErrorBundle s e) where
  displayException = errorBundlePretty

-- | Attach 'SourcePos'es to items in a 'Traversable' container given that
-- there is a projection allowing to get an offset per item.
--
-- Items must be in ascending order with respect to their offsets.
--
-- @since 7.0.0

attachSourcePos
  :: (Traversable t, Stream s)
  => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset')
  -> t a               -- ^ The collection of items
  -> PosState s        -- ^ Initial 'PosState'
  -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es
                                    -- added and the final 'PosState'
attachSourcePos projectOffset xs = runState (traverse f xs)
  where
    f a = do
      pst <- get
      let (spos, pst') = reachOffsetNoLine (projectOffset a) pst
      put pst'
      return (a, spos)
{-# INLINEABLE attachSourcePos #-}

----------------------------------------------------------------------------
-- Pretty-printing

-- | The type class defines how to print a custom component of 'ParseError'.
--
-- @since 5.0.0

class Ord a => ShowErrorComponent a where

  -- | Pretty-print a component of 'ParseError'.

  showErrorComponent :: a -> String

  -- | Length of the error component in characters, used for highlighting of
  -- parse errors in input string.
  --
  -- @since 7.0.0

  errorComponentLen :: a -> Int
  errorComponentLen _ = 1

instance ShowErrorComponent Void where
  showErrorComponent = absurd

-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will
-- be pretty-printed in order together with the corresponding offending
-- lines by doing a single efficient pass over the input stream. The
-- rendered 'String' always ends with a newline.
--
-- @since 7.0.0

errorBundlePretty
  :: forall s e. ( Stream s
                 , ShowErrorComponent e
                 )
  => ParseErrorBundle s e -- ^ Parse error bundle to display
  -> String               -- ^ Textual rendition of the bundle
errorBundlePretty ParseErrorBundle {..} =
  let (r, _) = foldl f (id, bundlePosState) bundleErrors
  in drop 1 (r "")
  where
    f :: (ShowS, PosState s)
      -> ParseError s e
      -> (ShowS, PosState s)
    f (o, !pst) e = (o . (outChunk ++), pst')
      where
        (epos, sline, pst') = reachOffset (errorOffset e) pst
        outChunk =
          "\n" <> sourcePosPretty epos <> ":\n" <>
          padding <> "|\n" <>
          lineNumber <> " | " <> sline <> "\n" <>
          padding <> "| " <> rpadding <> pointer <> "\n" <>
          parseErrorTextPretty e
        lineNumber = (show . unPos . sourceLine) epos
        padding = replicate (length lineNumber + 1) ' '
        rpadding =
          if pointerLen > 0
            then replicate rpshift ' '
            else ""
        rpshift = unPos (sourceColumn epos) - 1
        pointer = replicate pointerLen '^'
        pointerLen =
          if rpshift + elen > slineLen
            then slineLen - rpshift + 1
            else elen
        slineLen = length sline
        elen =
          case e of
            TrivialError _ Nothing _ -> 1
            TrivialError _ (Just x) _ -> errorItemLength x
            FancyError _ xs ->
              E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs

-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a
-- newline.
--
-- @since 5.0.0

parseErrorPretty
  :: (Stream s, ShowErrorComponent e)
  => ParseError s e    -- ^ Parse error to render
  -> String            -- ^ Result of rendering
parseErrorPretty e =
  "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e

-- | Pretty-print a textual part of a 'ParseError', that is, everything
-- except for its position. The rendered 'String' always ends with a
-- newline.
--
-- @since 5.1.0

parseErrorTextPretty
  :: forall s e. (Stream s, ShowErrorComponent e)
  => ParseError s e    -- ^ Parse error to render
  -> String            -- ^ Result of rendering
parseErrorTextPretty (TrivialError _ us ps) =
  if isNothing us && E.null ps
    then "unknown parse error\n"
    else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <>
         messageItemsPretty "expecting "  (showErrorItem pxy `E.map` ps)
  where
    pxy = Proxy :: Proxy s
parseErrorTextPretty (FancyError _ xs) =
  if E.null xs
    then "unknown fancy parse error\n"
    else unlines (showErrorFancy <$> E.toAscList xs)

----------------------------------------------------------------------------
-- Helpers

-- | Pretty-print an 'ErrorItem'.

showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String
showErrorItem pxy = \case
    Tokens   ts -> showTokens pxy ts
    Label label -> NE.toList label
    EndOfInput  -> "end of input"

-- | Get length of the “pointer” to display under a given 'ErrorItem'.

errorItemLength :: ErrorItem t -> Int
errorItemLength = \case
  Tokens ts -> NE.length ts
  _         -> 1

-- | Pretty-print an 'ErrorFancy'.

showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String
showErrorFancy = \case
  ErrorFail msg -> msg
  ErrorIndentation ord ref actual ->
    "incorrect indentation (got " <> show (unPos actual) <>
    ", should be " <> p <> show (unPos ref) <> ")"
    where
      p = case ord of
            LT -> "less than "
            EQ -> "equal to "
            GT -> "greater than "
  ErrorCustom a -> showErrorComponent a

-- | Get length of the “pointer” to display under a given 'ErrorFancy'.

errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int
errorFancyLength = \case
  ErrorCustom a -> errorComponentLen a
  _             -> 1

-- | Transforms a list of error messages into their textual representation.

messageItemsPretty
  :: String            -- ^ Prefix to prepend
  -> Set String        -- ^ Collection of messages
  -> String            -- ^ Result of rendering
messageItemsPretty prefix ts
  | E.null ts = ""
  | otherwise =
    prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n"

-- | Print a pretty list where items are separated with commas and the word
-- “or” according to the rules of English punctuation.

orList :: NonEmpty String -> String
orList (x:|[])  = x
orList (x:|[y]) = x <> " or " <> y
orList xs       = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs