-- SPDX-FileCopyrightText: 2020 Tocqueville Group -- -- SPDX-License-Identifier: LicenseRef-MIT-TQ -- | Parsing of untyped Michelson values. module Morley.Michelson.Parser.Value ( value' , mkLetVal -- * For tests , stringLiteral , bytesLiteral , intLiteral ) where import Prelude hiding (many, note, try) import qualified Data.Char as Char import qualified Data.Map as Map import qualified Text.Hex as Hex import Text.Megaparsec (anySingle, choice, customFailure, label, manyTill, satisfy, takeWhileP, try) import Text.Megaparsec.Char (char, string) import qualified Text.Megaparsec.Char.Lexer as L import Morley.Michelson.Let (LetValue(..)) import Morley.Michelson.Macro (ParsedOp, ParsedValue) import Morley.Michelson.Parser.Error import Morley.Michelson.Parser.Helpers import Morley.Michelson.Parser.Lexer import Morley.Michelson.Parser.Types (Parser, letValues) import Morley.Michelson.Text (isMChar, unsafeMkMText) import qualified Morley.Michelson.Untyped as U {- Note [Exponential backtracking] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider the following input string: @ (Pair 1 (Pair 2 (Pair 3 (Pair 4 5)))) @ until we've parsed it completely, we can't decide whether the first opening parenthesis is starting a tuple, or a simple expression. The same applies to all subsequent parentheses. At one point, our parser first tried to parse the whole expression as a tuple, then backtracked, then tried to parse the same expression as a value (which includes a tuple). Hence it incurred the exponential backtracking. To avoid that, we first try to parse anything that starts with @(@, i.e. a comma-separated tuple, or a plain value in parentheses. The choice between the two is trivial: if there's one value it's just value, if there are multiple, it's a tuple. If we don't find @(@ we then try to parse anything that /doesn't/ start with a parenthesis, i.e. everything else. -} -- | Parse untyped 'ParsedValue'. Take instruction parser as argument -- to avoid cyclic dependencies between modules, hence ' in its name. value' :: Parser ParsedOp -> Parser ParsedValue value' opParser = lexeme $ parensOrTuple opParser <|> valueInnerWithoutParens opParser parensOrTuple :: Parser ParsedOp -> Parser ParsedValue parensOrTuple opParser = parens $ foldr1 U.ValuePair <$> value' opParser `sepBy1` comma valueInnerWithoutParens :: Parser ParsedOp -> Parser ParsedValue valueInnerWithoutParens opParser = label "value" $ choice $ [ stringLiteral, bytesLiteral, intLiteral, unitValue , trueValue, falseValue, pairValueCore opParser, leftValue opParser , rightValue opParser, someValue opParser, noneValue, nilValue , seqOrLambda opParser, mapValue opParser, dataLetValue ] seqOrLambda :: Parser ParsedOp -> Parser ParsedValue seqOrLambda opParser = try (lambdaValue opParser) <|> seqValue opParser stringLiteral :: Parser ParsedValue stringLiteral = U.ValueString . unsafeMkMText . toText <$> do _ <- try $ string "\"" manyTill validChar (string "\"") where validChar :: Parser Char validChar = choice [ strEscape , satisfy (\x -> x /= '"' && isMChar x) , anySingle >>= stringLiteralFailure . InvalidChar ] strEscape :: Parser Char strEscape = try (char '\\') >> esc where esc = choice [ char '\\' , char '"' , char 'n' $> '\n' , anySingle >>= stringLiteralFailure . InvalidEscapeSequence ] stringLiteralFailure = customFailure . StringLiteralException -- It is safe not to use `try` here because bytesLiteral is the only -- thing that starts from 0x (at least for now) bytesLiteral :: Parser (U.Value' op) bytesLiteral = do string "0x" hexdigits <- takeWhileP Nothing Char.isHexDigit let mBytes = Hex.decodeHex hexdigits maybe (customFailure OddNumberBytesException) (return . U.ValueBytes . U.InternalByteString) mBytes intLiteral :: Parser (U.Value' op) intLiteral = try $ U.ValueInt <$> L.signed pass L.decimal unitValue :: Parser ParsedValue unitValue = word "Unit" U.ValueUnit trueValue :: Parser ParsedValue trueValue = word "True" U.ValueTrue falseValue :: Parser ParsedValue falseValue = word "False" U.ValueFalse pairValueCore :: Parser ParsedOp -> Parser ParsedValue pairValueCore opParser = symbol "Pair" *> pairInner where pairInner = U.ValuePair <$> value' opParser <*> (foldr1 U.ValuePair <$> some' (value' opParser)) leftValue :: Parser ParsedOp -> Parser ParsedValue leftValue opParser = word "Left" U.ValueLeft <*> value' opParser rightValue :: Parser ParsedOp -> Parser ParsedValue rightValue opParser = word "Right" U.ValueRight <*> value' opParser someValue :: Parser ParsedOp -> Parser ParsedValue someValue opParser = word "Some" U.ValueSome <*> value' opParser noneValue :: Parser ParsedValue noneValue = word "None" U.ValueNone nilValue :: Parser ParsedValue nilValue = U.ValueNil <$ (try $ braces pass) lambdaValue :: Parser ParsedOp -> Parser ParsedValue lambdaValue opParser = U.ValueLambda <$> ops1 where ops1 :: Parser (NonEmpty ParsedOp) ops1 = braces $ sepEndBy1 opParser semicolon seqValue :: Parser ParsedOp -> Parser ParsedValue seqValue opParser = U.ValueSeq <$> (try $ braces $ sepEndBy1 (value' opParser) semicolon) eltValue :: Parser ParsedOp -> Parser (U.Elt ParsedOp) eltValue opParser = word "Elt" U.Elt <*> value' opParser <*> value' opParser mapValue :: Parser ParsedOp -> Parser ParsedValue mapValue opParser = U.ValueMap <$> (try $ braces $ sepEndBy1 (eltValue opParser) semicolon) dataLetValue :: Parser ParsedValue dataLetValue = do lvs <- asks letValues lvVal <$> (mkLetVal lvs) mkLetVal :: Map Text LetValue -> Parser LetValue mkLetVal lvs = choice $ mkParser lvName <$> Map.elems lvs