-- SPDX-FileCopyrightText: 2020 Tocqueville Group
--
-- SPDX-License-Identifier: LicenseRef-MIT-TQ

-- | Parsing of untyped Michelson values.

module Morley.Michelson.Parser.Value
  ( value'
  , mkLetVal

  -- * For tests
  , stringLiteral
  , bytesLiteral
  , intLiteral
  ) where

import Prelude hiding (many, note, try)

import qualified Data.Char as Char
import qualified Data.Map as Map
import qualified Text.Hex as Hex

import Text.Megaparsec (anySingle, choice, customFailure, label, manyTill, satisfy, takeWhileP, try)
import Text.Megaparsec.Char (char, string)
import qualified Text.Megaparsec.Char.Lexer as L

import Morley.Michelson.Let (LetValue(..))
import Morley.Michelson.Macro (ParsedOp, ParsedValue)
import Morley.Michelson.Parser.Error
import Morley.Michelson.Parser.Helpers
import Morley.Michelson.Parser.Lexer
import Morley.Michelson.Parser.Types (Parser, letValues)
import Morley.Michelson.Text (isMChar, unsafeMkMText)
import qualified Morley.Michelson.Untyped as U

{-
Note [Exponential backtracking]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Consider the following input string:

@
(Pair 1 (Pair 2 (Pair 3 (Pair 4 5))))
@

until we've parsed it completely, we can't decide whether the first
opening parenthesis is starting a tuple, or a simple expression.
The same applies to all subsequent parentheses. At one point, our parser
first tried to parse the whole expression as a tuple, then backtracked,
then tried to parse the same expression as a value (which includes
a tuple). Hence it incurred the exponential backtracking.

To avoid that, we first try to parse anything that starts with @(@, i.e.
a comma-separated tuple, or a plain value in parentheses. The choice
between the two is trivial: if there's one value it's just value, if
there are multiple, it's a tuple.

If we don't find @(@ we then try to parse anything that /doesn't/ start
with a parenthesis, i.e. everything else.
-}

-- | Parse untyped 'ParsedValue'. Take instruction parser as argument
-- to avoid cyclic dependencies between modules, hence ' in its name.
value' :: Parser ParsedOp -> Parser ParsedValue
value' opParser = lexeme $ parensOrTuple opParser <|> valueInnerWithoutParens opParser

parensOrTuple :: Parser ParsedOp -> Parser ParsedValue
parensOrTuple opParser = parens $ foldr1 U.ValuePair <$> value' opParser `sepBy1` comma

valueInnerWithoutParens :: Parser ParsedOp -> Parser ParsedValue
valueInnerWithoutParens opParser = label "value" $ choice $
  [ stringLiteral, bytesLiteral, intLiteral, unitValue
  , trueValue, falseValue, pairValueCore opParser, leftValue opParser
  , rightValue opParser, someValue opParser, noneValue, nilValue
  , seqOrLambda opParser, mapValue opParser, dataLetValue
  ]

seqOrLambda :: Parser ParsedOp -> Parser ParsedValue
seqOrLambda opParser = try (lambdaValue opParser) <|> seqValue opParser

stringLiteral :: Parser ParsedValue
stringLiteral = U.ValueString . unsafeMkMText . toText <$> do
  _ <- try $ string "\""
  manyTill validChar (string "\"")
  where
      validChar :: Parser Char
      validChar = choice
        [ strEscape
        , satisfy (\x -> x /= '"' && isMChar x)
        , anySingle >>= stringLiteralFailure . InvalidChar
        ]

      strEscape :: Parser Char
      strEscape = try (char '\\') >> esc
        where
          esc = choice
            [ char '\\'
            , char '"'
            , char 'n' $> '\n'
            , anySingle >>= stringLiteralFailure . InvalidEscapeSequence
            ]
      stringLiteralFailure = customFailure . StringLiteralException

-- It is safe not to use `try` here because bytesLiteral is the only
-- thing that starts from 0x (at least for now)
bytesLiteral :: Parser (U.Value' op)
bytesLiteral = do
  string "0x"
  hexdigits <- takeWhileP Nothing Char.isHexDigit
  let mBytes = Hex.decodeHex hexdigits
  maybe
    (customFailure OddNumberBytesException)
    (return . U.ValueBytes . U.InternalByteString)
    mBytes

intLiteral :: Parser (U.Value' op)
intLiteral = try $ U.ValueInt <$> L.signed pass L.decimal

unitValue :: Parser ParsedValue
unitValue = word "Unit" U.ValueUnit

trueValue :: Parser ParsedValue
trueValue = word "True" U.ValueTrue

falseValue :: Parser ParsedValue
falseValue = word "False" U.ValueFalse

pairValueCore :: Parser ParsedOp -> Parser ParsedValue
pairValueCore opParser = symbol "Pair" *> pairInner
  where
    pairInner = U.ValuePair
      <$> value' opParser
      <*> (foldr1 U.ValuePair <$> some' (value' opParser))

leftValue :: Parser ParsedOp -> Parser ParsedValue
leftValue opParser = word "Left" U.ValueLeft <*> value' opParser

rightValue :: Parser ParsedOp -> Parser ParsedValue
rightValue opParser = word "Right" U.ValueRight <*> value' opParser

someValue :: Parser ParsedOp -> Parser ParsedValue
someValue opParser = word "Some" U.ValueSome <*> value' opParser

noneValue :: Parser ParsedValue
noneValue = word "None" U.ValueNone

nilValue :: Parser ParsedValue
nilValue = U.ValueNil <$ (try $ braces pass)

lambdaValue :: Parser ParsedOp -> Parser ParsedValue
lambdaValue opParser = U.ValueLambda <$> ops1
  where
    ops1 :: Parser (NonEmpty ParsedOp)
    ops1 = braces $ sepEndBy1 opParser semicolon

seqValue :: Parser ParsedOp -> Parser ParsedValue
seqValue opParser =
  U.ValueSeq <$> (try $ braces $ sepEndBy1 (value' opParser) semicolon)

eltValue :: Parser ParsedOp -> Parser (U.Elt ParsedOp)
eltValue opParser = word "Elt" U.Elt <*> value' opParser <*> value' opParser

mapValue :: Parser ParsedOp -> Parser ParsedValue
mapValue opParser =
  U.ValueMap <$> (try $ braces $ sepEndBy1 (eltValue opParser) semicolon)

dataLetValue :: Parser ParsedValue
dataLetValue = do
  lvs <- asks letValues
  lvVal <$> (mkLetVal lvs)

mkLetVal :: Map Text LetValue -> Parser LetValue
mkLetVal lvs = choice $ mkParser lvName <$> Map.elems lvs