-- SPDX-FileCopyrightText: 2021 Oxhead Alpha
-- SPDX-License-Identifier: LicenseRef-MIT-OA

-- TODO [#712]: Remove this next major release
{-# OPTIONS_GHC -Wno-deprecations #-}

-- TODO [#712]: Remove this next major release
{-# OPTIONS_GHC -Wno-redundant-constraints #-}

-- | Parsing of untyped Michelson values.

module Morley.Michelson.Parser.Value
  ( value'
  , mkLetVal

  -- * For tests
  , stringLiteral
  , bytesLiteral
  , intLiteral
  ) where

import Prelude hiding (many, note, try)

import Data.Char qualified as Char
import Data.Map qualified as Map
import Text.Hex qualified as Hex

import Text.Megaparsec (anySingle, choice, customFailure, label, manyTill, satisfy, takeWhileP, try)
import Text.Megaparsec.Char (char, string)
import Text.Megaparsec.Char.Lexer qualified as L

import Morley.Michelson.Let (LetValue(..))
import Morley.Michelson.Macro (ParsedOp, ParsedValue)
import Morley.Michelson.Parser.Error
import Morley.Michelson.Parser.Helpers
import Morley.Michelson.Parser.Lexer
import Morley.Michelson.Parser.Types (LetEnv, Parser, Parser', isLetEnv, letValues, withLetEnv)
import Morley.Michelson.Text (isMChar, mkMText)
import Morley.Michelson.Untyped qualified as U

{-
Note [Exponential backtracking]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Consider the following input string:

@
(Pair 1 (Pair 2 (Pair 3 (Pair 4 5))))
@

until we've parsed it completely, we can't decide whether the first
opening parenthesis is starting a tuple, or a simple expression.
The same applies to all subsequent parentheses. At one point, our parser
first tried to parse the whole expression as a tuple, then backtracked,
then tried to parse the same expression as a value (which includes
a tuple). Hence it incurred the exponential backtracking.

To avoid that, we first try to parse anything that starts with @(@, i.e.
a comma-separated tuple, or a plain value in parentheses. The choice
between the two is trivial: if there's one value it's just value, if
there are multiple, it's a tuple.

If we don't find @(@ we then try to parse anything that /doesn't/ start
with a parenthesis, i.e. everything else.
-}

-- | Parse untyped 'ParsedValue'. Take instruction parser as argument
-- to avoid cyclic dependencies between modules, hence ' in its name.
value' :: Parser le ParsedOp -> Parser le ParsedValue
value' opParser = parensOrTuple opParser <|> valueInnerWithoutParens opParser

parensOrTuple :: forall le. Parser le ParsedOp -> Parser le ParsedValue
parensOrTuple opParser = parens $
  case isLetEnv @le of
    Just{} -> foldr1 U.ValuePair <$> value' opParser `sepBy1` comma
    Nothing -> value' opParser

valueInnerWithoutParens :: Parser le ParsedOp -> Parser le ParsedValue
valueInnerWithoutParens opParser = label "value" $ choice $
  [ stringLiteral, bytesLiteral, intLiteral, unitValue
  , trueValue, falseValue, pairValueCore opParser, leftValue opParser
  , rightValue opParser, someValue opParser, noneValue, nilValue
  , seqOrLambda opParser, mapValue opParser, withLetEnv dataLetValue
  ]

seqOrLambda :: Parser le ParsedOp -> Parser le ParsedValue
seqOrLambda opParser = try (lambdaValue opParser) <|> seqValue opParser

stringLiteral :: forall le. Parser le ParsedValue
stringLiteral = lexeme $ U.ValueString . unsafe . mkMText . toText <$> do
  _ <- try $ string "\""
  manyTill validChar (string "\"")
  where
      validChar :: Parser le Char
      validChar = choice
        [ strEscape
        , satisfy (\x -> x /= '"' && isMChar x)
        , anySingle >>= stringLiteralFailure . InvalidChar
        ]

      strEscape :: Parser le Char
      strEscape = try (char '\\') >> esc
        where
          esc = choice
            [ char '\\'
            , char '"'
            , char 'n' $> '\n'
            , anySingle >>= stringLiteralFailure . InvalidEscapeSequence
            ]
      stringLiteralFailure = customFailure . StringLiteralException

-- It is safe not to use `try` here because bytesLiteral is the only
-- thing that starts from 0x (at least for now)
bytesLiteral :: Parser le (U.Value' op)
bytesLiteral = lexeme $ do
  string "0x"
  hexdigits <- takeWhileP Nothing Char.isHexDigit
  let mBytes = Hex.decodeHex hexdigits
  maybe
    (customFailure OddNumberBytesException)
    (return . U.ValueBytes . U.InternalByteString)
    mBytes

intLiteral :: Parser le (U.Value' op)
intLiteral = lexeme $ try $ U.ValueInt <$> L.signed pass L.decimal

unitValue :: Parser le ParsedValue
unitValue = word "Unit" U.ValueUnit

trueValue :: Parser le ParsedValue
trueValue = word "True" U.ValueTrue

falseValue :: Parser le ParsedValue
falseValue = word "False" U.ValueFalse

pairValueCore :: Parser le ParsedOp -> Parser le ParsedValue
pairValueCore opParser = symbol1 "Pair" *> pairInner
  where
    pairInner = U.ValuePair
      <$> value' opParser
      <*> (foldr1 U.ValuePair <$> some' (value' opParser))

leftValue :: Parser le ParsedOp -> Parser le ParsedValue
leftValue opParser = word "Left" U.ValueLeft <*> value' opParser

rightValue :: Parser le ParsedOp -> Parser le ParsedValue
rightValue opParser = word "Right" U.ValueRight <*> value' opParser

someValue :: Parser le ParsedOp -> Parser le ParsedValue
someValue opParser = word "Some" U.ValueSome <*> value' opParser

noneValue :: Parser le ParsedValue
noneValue = word "None" U.ValueNone

nilValue :: Parser le ParsedValue
nilValue = U.ValueNil <$ (try $ braces pass)

lambdaValue :: forall le. Parser le ParsedOp -> Parser le ParsedValue
lambdaValue opParser = U.ValueLambda <$> ops1
  where
    ops1 :: Parser le (NonEmpty ParsedOp)
    ops1 = braces $ sepEndBy1 opParser semicolon

seqValue :: Parser le ParsedOp -> Parser le ParsedValue
seqValue opParser =
  U.ValueSeq <$> (try $ braces $ sepEndBy1 (value' opParser) semicolon)

eltValue :: Parser le ParsedOp -> Parser le (U.Elt ParsedOp)
eltValue opParser = word "Elt" U.Elt <*> value' opParser <*> value' opParser

mapValue :: Parser le ParsedOp -> Parser le ParsedValue
mapValue opParser =
  U.ValueMap <$> (try $ braces $ sepEndBy1 (eltValue opParser) semicolon)

dataLetValue :: Parser' LetEnv ParsedValue
dataLetValue = do
  lvs <- asks letValues
  lvVal <$> (mkLetVal lvs)

mkLetVal :: Map Text LetValue -> Parser le LetValue
mkLetVal lvs = choice $ mkParser lvName <$> Map.elems lvs