-- SPDX-FileCopyrightText: 2021 Oxhead Alpha -- SPDX-License-Identifier: LicenseRef-MIT-OA -- TODO [#712]: Remove this next major release {-# OPTIONS_GHC -Wno-deprecations #-} -- TODO [#712]: Remove this next major release {-# OPTIONS_GHC -Wno-redundant-constraints #-} -- | Parsing of untyped Michelson values. module Morley.Michelson.Parser.Value ( value' , mkLetVal -- * For tests , stringLiteral , bytesLiteral , intLiteral ) where import Prelude hiding (many, note, try) import Data.Char qualified as Char import Data.Map qualified as Map import Text.Hex qualified as Hex import Text.Megaparsec (anySingle, choice, customFailure, label, manyTill, satisfy, takeWhileP, try) import Text.Megaparsec.Char (char, string) import Text.Megaparsec.Char.Lexer qualified as L import Morley.Michelson.Let (LetValue(..)) import Morley.Michelson.Macro (ParsedOp, ParsedValue) import Morley.Michelson.Parser.Error import Morley.Michelson.Parser.Helpers import Morley.Michelson.Parser.Lexer import Morley.Michelson.Parser.Types (LetEnv, Parser, Parser', isLetEnv, letValues, withLetEnv) import Morley.Michelson.Text (isMChar, mkMText) import Morley.Michelson.Untyped qualified as U {- Note [Exponential backtracking] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider the following input string: @ (Pair 1 (Pair 2 (Pair 3 (Pair 4 5)))) @ until we've parsed it completely, we can't decide whether the first opening parenthesis is starting a tuple, or a simple expression. The same applies to all subsequent parentheses. At one point, our parser first tried to parse the whole expression as a tuple, then backtracked, then tried to parse the same expression as a value (which includes a tuple). Hence it incurred the exponential backtracking. To avoid that, we first try to parse anything that starts with @(@, i.e. a comma-separated tuple, or a plain value in parentheses. The choice between the two is trivial: if there's one value it's just value, if there are multiple, it's a tuple. If we don't find @(@ we then try to parse anything that /doesn't/ start with a parenthesis, i.e. everything else. -} -- | Parse untyped 'ParsedValue'. Take instruction parser as argument -- to avoid cyclic dependencies between modules, hence ' in its name. value' :: Parser le ParsedOp -> Parser le ParsedValue value' opParser = parensOrTuple opParser <|> valueInnerWithoutParens opParser parensOrTuple :: forall le. Parser le ParsedOp -> Parser le ParsedValue parensOrTuple opParser = parens $ case isLetEnv @le of Just{} -> foldr1 U.ValuePair <$> value' opParser `sepBy1` comma Nothing -> value' opParser valueInnerWithoutParens :: Parser le ParsedOp -> Parser le ParsedValue valueInnerWithoutParens opParser = label "value" $ choice $ [ stringLiteral, bytesLiteral, intLiteral, unitValue , trueValue, falseValue, pairValueCore opParser, leftValue opParser , rightValue opParser, someValue opParser, noneValue, nilValue , seqOrLambda opParser, mapValue opParser, withLetEnv dataLetValue ] seqOrLambda :: Parser le ParsedOp -> Parser le ParsedValue seqOrLambda opParser = try (lambdaValue opParser) <|> seqValue opParser stringLiteral :: forall le. Parser le ParsedValue stringLiteral = lexeme $ U.ValueString . unsafe . mkMText . toText <$> do _ <- try $ string "\"" manyTill validChar (string "\"") where validChar :: Parser le Char validChar = choice [ strEscape , satisfy (\x -> x /= '"' && isMChar x) , anySingle >>= stringLiteralFailure . InvalidChar ] strEscape :: Parser le Char strEscape = try (char '\\') >> esc where esc = choice [ char '\\' , char '"' , char 'n' $> '\n' , anySingle >>= stringLiteralFailure . InvalidEscapeSequence ] stringLiteralFailure = customFailure . StringLiteralException -- It is safe not to use `try` here because bytesLiteral is the only -- thing that starts from 0x (at least for now) bytesLiteral :: Parser le (U.Value' op) bytesLiteral = lexeme $ do string "0x" hexdigits <- takeWhileP Nothing Char.isHexDigit let mBytes = Hex.decodeHex hexdigits maybe (customFailure OddNumberBytesException) (return . U.ValueBytes . U.InternalByteString) mBytes intLiteral :: Parser le (U.Value' op) intLiteral = lexeme $ try $ U.ValueInt <$> L.signed pass L.decimal unitValue :: Parser le ParsedValue unitValue = word "Unit" U.ValueUnit trueValue :: Parser le ParsedValue trueValue = word "True" U.ValueTrue falseValue :: Parser le ParsedValue falseValue = word "False" U.ValueFalse pairValueCore :: Parser le ParsedOp -> Parser le ParsedValue pairValueCore opParser = symbol1 "Pair" *> pairInner where pairInner = U.ValuePair <$> value' opParser <*> (foldr1 U.ValuePair <$> some' (value' opParser)) leftValue :: Parser le ParsedOp -> Parser le ParsedValue leftValue opParser = word "Left" U.ValueLeft <*> value' opParser rightValue :: Parser le ParsedOp -> Parser le ParsedValue rightValue opParser = word "Right" U.ValueRight <*> value' opParser someValue :: Parser le ParsedOp -> Parser le ParsedValue someValue opParser = word "Some" U.ValueSome <*> value' opParser noneValue :: Parser le ParsedValue noneValue = word "None" U.ValueNone nilValue :: Parser le ParsedValue nilValue = U.ValueNil <$ (try $ braces pass) lambdaValue :: forall le. Parser le ParsedOp -> Parser le ParsedValue lambdaValue opParser = U.ValueLambda <$> ops1 where ops1 :: Parser le (NonEmpty ParsedOp) ops1 = braces $ sepEndBy1 opParser semicolon seqValue :: Parser le ParsedOp -> Parser le ParsedValue seqValue opParser = U.ValueSeq <$> (try $ braces $ sepEndBy1 (value' opParser) semicolon) eltValue :: Parser le ParsedOp -> Parser le (U.Elt ParsedOp) eltValue opParser = word "Elt" U.Elt <*> value' opParser <*> value' opParser mapValue :: Parser le ParsedOp -> Parser le ParsedValue mapValue opParser = U.ValueMap <$> (try $ braces $ sepEndBy1 (eltValue opParser) semicolon) dataLetValue :: Parser' LetEnv ParsedValue dataLetValue = do lvs <- asks letValues lvVal <$> (mkLetVal lvs) mkLetVal :: Map Text LetValue -> Parser le LetValue mkLetVal lvs = choice $ mkParser lvName <$> Map.elems lvs