-- SPDX-FileCopyrightText: 2021 Oxhead Alpha -- SPDX-License-Identifier: LicenseRef-MIT-OA module Morley.Michelson.Parser ( -- * Main parser type Parser -- * Parsers , program , value -- * Errors , CustomParserException (..) , ParseErrorBundle , ParserException (..) , StringLiteralParserException (..) -- * Additional helpers , MichelsonSource (..) , codeSrc , parseNoEnv , parseValue , parseExpandValue , parseType , rawOpsSequence -- * For tests , codeEntry , ops , type_ , stringLiteral , bytesLiteral , intLiteral , parsedOp , cbParameterBare -- * Quoters , utypeQ , uparamTypeQ , notes -- * Re-exports , errorBundlePretty ) where import Prelude hiding (try) import Data.Default (def) import Fmt (nameF, pretty, unlinesF, (+|), (|+)) import Language.Haskell.TH qualified as TH import Language.Haskell.TH.Quote qualified as TH import Language.Haskell.TH.Syntax qualified as TH import Text.Megaparsec (Parsec, choice, eitherP, eof, errorBundlePretty, getSourcePos, hidden, parse, sepEndBy, try) import Text.Megaparsec.Pos (SourcePos(..), unPos) import Morley.Michelson.ErrorPos (SrcPos(..), mkPos) import Morley.Michelson.Macro import Morley.Michelson.Parser.Common import Morley.Michelson.Parser.Error import Morley.Michelson.Parser.Instr import Morley.Michelson.Parser.Lexer import Morley.Michelson.Parser.Macro import Morley.Michelson.Parser.Type import Morley.Michelson.Parser.Types import Morley.Michelson.Parser.Value import Morley.Michelson.Typed.Extract (withUType) import Morley.Michelson.Untyped import Morley.Michelson.Untyped qualified as U {- $setup >>> import Morley.Michelson.Parser.Lexer >>> import Text.Megaparsec (sepEndBy) -} ---------------------------------------------------------------------------- -- Helpers ---------------------------------------------------------------------------- -- | Parse with empty environment parseNoEnv :: Parser a -> MichelsonSource -> Text -> Either (ParseErrorBundle Text CustomParserException) a parseNoEnv p src = parse (p <* eof) (pretty src) ------------------------------------------------------------------------------- -- Parsers ------------------------------------------------------------------------------- -- Contract ------------------ -- | Michelson contract program :: Parsec CustomParserException Text (Contract' ParsedOp) program = programInner <* eof programInner :: Parser (Contract' ParsedOp) programInner = mSpace *> contract cbParameter :: Parser ParameterType cbParameter = symbol1 "parameter" *> cbParameterBare cbParameterBare :: Parser ParameterType cbParameterBare = uncurry ParameterType . swap <$> field cbStorage :: Parser Ty cbStorage = symbol1 "storage" *> type_ cbCode :: Parser ParsedOp cbCode = symbol "code" *> codeEntry cbView :: Parser (View' ParsedOp) cbView = do symbol "view" viewName <- viewName_ viewArgument <- type_ viewReturn <- type_ viewCode <- codeEntry return View{..} contractBlock :: Parser (ContractBlock ParsedOp) contractBlock = choice [ (CBParam <$> cbParameter) , (CBStorage <$> cbStorage) , (CBCode <$> cbCode) , (CBView <$> cbView) ] -- | This ensures that the error message will point to the correct line. ensureNotDuplicate :: [ContractBlock ParsedOp] -> ContractBlock ParsedOp -> Parser () ensureNotDuplicate blocks result = let failDuplicateField a = fail $ "Duplicate contract field: " +| a |+ "" in case (result, blocks) of (CBParam _, CBParam _ : _) -> failDuplicateField result (CBStorage _, CBStorage _: _) -> failDuplicateField result (CBCode _, CBCode _: _) -> failDuplicateField result (CBView View{viewName=n1}, CBView View{viewName=n2} : _) | n1 == n2 -> failDuplicateField result (_, _:xs) -> ensureNotDuplicate xs result (_, []) -> pure () -- | Michelson contract contract :: Parser (Contract' ParsedOp) contract = do mSpace result <- braces contractTuple <|> contractTuple either (fail . pretty . nameF "Contract parsing error" . unlinesF) pure $ orderContractBlock result where -- @ensureNotDuplicate@ provides a better message and point to the correct line -- when the parser fails. contractTuple = fmap reverse . executingStateT [] $ do (`sepEndBy` lift semicolon) $ do r <- lift contractBlock get >>= \prev -> lift $ ensureNotDuplicate prev r modify (r :) -- Value ------------------ value :: Parser ParsedValue value = value' ops -- | Parse untyped value from text which comes from something that is -- not a file (which is often the case). So we assume it does not need -- any parsing environment. -- -- >>> parseValue MSUnspecified "{PUSH int aaa}" & either (putStrLn . displayException) (const $ pure ()) -- 1:11: -- | -- 1 | {PUSH int aaa} -- | ^^^^ -- unexpected "aaa}" -- expecting value -- parseValue :: MichelsonSource -> Text -> Either ParserException ParsedValue parseValue = first ParserException ... parseNoEnv value parseType :: MichelsonSource -> Text -> Either ParserException Ty parseType = first ParserException ... parseNoEnv type_ -- | Like 'parseValue', but also expands macros. parseExpandValue :: MichelsonSource -> Text -> Either ParserException U.Value parseExpandValue = fmap expandValue ... parseValue -- Primitive instruction ------------------ prim :: Parser ParsedInstr prim = primInstr contract ops -- Parsed operations (primitive instructions, macros, extras, etc.) ------------------ -- | Parses code block after "code" keyword of a contract, or code in a view -- block. codeEntry :: Parser ParsedOp codeEntry = flip Seq <$> getSrcPos <*> bracewrappedOps <|> parsedOp bracewrappedOps :: Parser [ParsedOp] bracewrappedOps = symbol "{" *> rawOpsSequence (symbol "}") {-| Michelson sequence of instructions, separated with a semicolon. Last semicolon is optional, semicolon after @}@ is optional. The first argument is the sequence terminator, that is to say, usually @}@. This might look mysterious, until one considers the alternatives. For example: >>> let fmt = either (putStrLn . displayException . ParserException) (const $ pure ()) >>> parseNoEnv (braces (sepEndBy parsedOp semicolon)) "" "{ DIIIP CMPEQ }" & fmt ... 1 | { DIIIP CMPEQ } | ^ unexpected 'D' expecting '}' ... >>> parseNoEnv (symbol "{" *> rawOpsSequence (symbol "}")) "" "{ DIIIP CMPEQ }" & fmt ... 1 | { DIIIP CMPEQ } | ^ unexpected 'C' ... This happens because @braces . sepEndBy@ backtracks a bit too far. Note that @braces . sepEndBy@ doesn't match Michelson syntax exactly, it's used as an example only. -} rawOpsSequence :: Parser a -> Parser [ParsedOp] rawOpsSequence endP = inner <|> end where end = [] <$ endP inner = do op <- parsedOp let sep = case op of Seq{} -> void $ optional semicolon -- semicolon optional after } _ -> semicolon (op :) <$> ((sep *> rawOpsSequence endP) <|> end) {- | >>> let fmt = either (putStrLn . displayException . ParserException) (const $ pure ()) >>> parseNoEnv parsedOp "" "{a}" & fmt 1:2: | 1 | {a} | ^^ unexpected "a}" expecting '{', '}', macro, or primitive instruction >>> parseNoEnv parsedOp "" "{ UNIT; DIIIP CMPEQ }" & fmt 1:15: | 1 | { UNIT; DIIIP CMPEQ } | ^ unexpected 'C' -} parsedOp :: Parser ParsedOp parsedOp = do pos <- getSrcPos choice [ flip Prim pos <$> prim , flip Mac pos <$> macro ops , hidden primOrMac , flip Seq pos <$> bracewrappedOps ] getSrcPos :: Parser SrcPos getSrcPos = do sp <- getSourcePos let l = unPos $ sourceLine sp let c = unPos $ sourceColumn sp -- reindexing starting from 0 pure . unsafe $ SrcPos <$> (mkPos $ l - 1) <*> (mkPos $ c - 1) primWithPos :: Parser ParsedInstr -> Parser ParsedOp primWithPos act = do srcPos <- getSrcPos flip Prim srcPos <$> act macWithPos :: Parser Macro -> Parser ParsedOp macWithPos act = do srcPos <- getSrcPos flip Mac srcPos <$> act ops :: Parser (ParsedSeq ParsedOp) ops = do pos <- getSrcPos choice [ PSSequence <$> bracewrappedOps , PSSingleMacro pos <$> ( parens (allMacros ops) <|> allSingleTokenMacros ) ] ------------------------------------------------------------------------------- -- Mixed parsers -- These are needed for better error messages ------------------------------------------------------------------------------- ifOrIfX :: Parser ParsedOp ifOrIfX = do pos <- getSrcPos symbol "IF" a <- eitherP (try $ cmpOp $ pure def) ops case a of Left cmp -> flip Mac pos <$> (IFX cmp <$> ops <*> ops) Right op -> flip Prim pos . IF op <$> ops -- Some of the operations and macros have the same prefixes in their names -- So this case should be handled separately primOrMac :: Parser ParsedOp primOrMac = choice [ macWithPos (ifCmpMac ops), ifOrIfX , macWithPos (mapCadrMac ops), primWithPos (mapOp ops) , try (primWithPos pairOp), try (primWithPos pairNOp), macWithPos pairMac , try (macWithPos duupMac), primWithPos dupOp , try (macWithPos (diipMac bracewrappedOps)), primWithPos (dipOp ops) , try (macWithPos carnMac), try (macWithPos cdrnMac), try (macWithPos cadrMac) , primWithPos carOp , primWithPos cdrOp ] ------------------------------------------------------------------------------- -- Safe construction of Haskell values ------------------------------------------------------------------------------- parserToQuasiQuoter :: Parser (TH.Q TH.Exp) -> TH.QuasiQuoter parserToQuasiQuoter parser = TH.QuasiQuoter { TH.quoteExp = \s -> case parseNoEnv (mSpace *> parser) "quasi-quoter" (toText s) of Left err -> fail $ errorBundlePretty err Right qexp -> qexp , TH.quotePat = \_ -> fail "Cannot be used as pattern" , TH.quoteType = \_ -> fail "Cannot be used as type" , TH.quoteDec = \_ -> fail "Cannot be used as declaration" } -- | Creates 'U.Ty' by its Morley representation. -- -- >>> [utypeQ| or (int :a) (nat :b) |] -- Ty (TOr (UnsafeAnnotation @FieldTag "") (UnsafeAnnotation @FieldTag "") (Ty TInt (UnsafeAnnotation @TypeTag "a")) (Ty TNat (UnsafeAnnotation @TypeTag "b"))) (UnsafeAnnotation @TypeTag "") -- -- >>> [utypeQ|a|] -- -- ... -- | -- 1 | a -- | ^ -- unexpected 'a' -- expecting type -- ... utypeQ :: TH.QuasiQuoter utypeQ = parserToQuasiQuoter (TH.lift <$> type_) -- | Creates 'U.ParameterType' by its Morley representation. uparamTypeQ :: TH.QuasiQuoter uparamTypeQ = parserToQuasiQuoter (TH.lift <$> cbParameterBare) -- | Parses and typechecks a 'Morley.Michelson.Typed.Notes'. -- -- >>> [notes|int :ty|] -- NTInt (UnsafeAnnotation @TypeTag "ty") notes :: TH.QuasiQuoter notes = parserToQuasiQuoter do t <- type_ pure $ withUType t TH.lift