{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} module Typst.Parse ( parseTypst, ) where import Data.List (sortOn) import Control.Applicative (some) import Control.Monad (MonadPlus (mzero), guard, void, when) import Control.Monad.Identity (Identity) import Data.Char hiding (Space) import Data.Maybe (isJust, isNothing) import Data.Text (Text) import qualified Data.Text as T import Text.Parsec hiding (string) import qualified Text.Parsec as P import Text.Parsec.Expr import Text.Read (readMaybe) import Typst.Syntax import Typst.Shorthands (mathSymbolShorthands) -- import Debug.Trace -- | Parse text into a list of 'Markup' (or a Parsec @ParseError@). parseTypst :: -- | Filepath to Typst source text, only used for error messages FilePath -> -- | The Typst source text Text -> Either ParseError [Markup] parseTypst fp inp = case runParser (spaces *> many pMarkup <* pEndOfContent) initialState fp inp of Left e -> Left e Right r -> Right r data PState = PState { stIndent :: [Int], stLineStartCol :: !Int, stAllowNewlines :: !Int, -- allow newlines if > 0 stBeforeSpace :: Maybe (SourcePos, Text), stContentBlockNesting :: Int } deriving (Show) initialState :: PState initialState = PState { stIndent = [], stLineStartCol = 1, stAllowNewlines = 0, stBeforeSpace = Nothing, stContentBlockNesting = 0 } type P = Parsec Text PState string :: String -> P String string = try . P.string ws :: P () ws = do p1 <- getPosition inp <- getInput allowNewlines <- stAllowNewlines <$> getState let isSp c | allowNewlines > 0 = isSpace c | otherwise = isSpace c && c /= '\r' && c /= '\n' ( skipMany1 (void (satisfy isSp) <|> void pComment) *> updateState (\st -> st {stBeforeSpace = Just (p1, inp)}) ) <|> updateState (\st -> st {stBeforeSpace = Nothing}) lexeme :: P a -> P a lexeme pa = pa <* ws sym :: String -> P String sym = lexeme . string op :: String -> P () op s = try $ lexeme $ do void $ string s when ( s == "+" || s == "-" || s == "*" || s == "/" || s == "=" || s == "<" || s == ">" || s == "!" ) $ notFollowedBy (char '=') when (s == "-") $ notFollowedBy (char '>') -- arrows when (s == "<") $ notFollowedBy (char '-' <|> char '=') -- arrows when (s == "=") $ notFollowedBy (char '>' <|> char '=') withNewlines :: P a -> P a withNewlines pa = do updateState $ \st -> st {stAllowNewlines = stAllowNewlines st + 1} res <- pa updateState $ \st -> st {stAllowNewlines = stAllowNewlines st - 1} pure res inParens :: P a -> P a inParens pa = withNewlines (between (sym "(") (char ')') pa) <* ws inBraces :: P a -> P a inBraces pa = withNewlines (between (sym "{") (char '}') pa) <* ws pMarkup :: P Markup pMarkup = pSpace <|> pHeading <|> pComment <|> pEol <|> pHardbreak <|> pStrong <|> pEmph <|> pEquation <|> pListItem <|> pUrl <|> pText <|> pRawBlock <|> pRawInline <|> pEscaped <|> pNbsp <|> pDash <|> pEllipsis <|> pQuote <|> pLabelInContent <|> pRef <|> pHash <|> pBracketed <|> pSymbol -- We need to group paired brackets or the closing bracketed may be -- taken to close a pContent block: pBracketed :: P Markup pBracketed = Bracketed <$> try (between (char '[') (char ']') (many pMarkup)) pSymbol :: P Markup pSymbol = do blockNesting <- stContentBlockNesting <$> getState let isSpecial' c = isSpecial c && (c /= ']' || blockNesting == 0) Text . T.singleton <$> satisfy isSpecial' -- equation ::= ('$' math* '$') | ('$ ' math* ' $') pEquation :: P Markup pEquation = do void $ char '$' withNewlines $ do display <- option False $ True <$ lookAhead space ws maths <- many pMath void $ char '$' pure $ Equation display maths mathOperatorTable :: [[Operator Text PState Identity Markup]] mathOperatorTable = [ -- precedence 7 -- attachment with number, e.g. a_1 (see #17) [ Infix (attachBottom <$ (try (op "_" *> lookAhead mNumber))) AssocLeft, Infix (attachTop <$ (try (op "^" *> lookAhead mNumber))) AssocLeft ], -- precedence 6 [ Postfix ( try $ do mbBeforeSpace <- stBeforeSpace <$> getState -- NOTE: can't have space before () or [] arg in a -- function call! to prevent bugs with e.g. 'if 2<3 [...]'. guard $ isNothing mbBeforeSpace args <- mGrouped '(' ')' True pure $ \expr -> MGroup Nothing Nothing [expr, args] ) ], -- precedence 5 -- attachment with non-number, e.g. a_x [ Infix (attachBottom <$ op "_") AssocLeft, Infix (attachTop <$ op "^") AssocLeft ], -- precedence 4 -- factorial needs to take precedence over fraction [ Postfix (try $ do mbBeforeSpace <- stBeforeSpace <$> getState guard $ isNothing mbBeforeSpace lexeme $ char '!' *> notFollowedBy (char '=') pure (\expr -> MGroup Nothing Nothing [expr, Text "!"])) ], -- precedence 3 [ Infix (makeFrac <$ op "/") AssocLeft ] ] -- MAttach (Maybe bottom) (Maybe top) base attachBottom :: Markup -> Markup -> Markup attachBottom (MAttach Nothing y x) z = MAttach (Just (hideOuterParens z)) y x attachBottom z (MAttach Nothing y x) = MAttach (Just (hideOuterParens x)) y z attachBottom base x = MAttach (Just (hideOuterParens x)) Nothing base attachTop :: Markup -> Markup -> Markup attachTop (MAttach x Nothing y) z = MAttach x (Just (hideOuterParens z)) y attachTop z (MAttach x Nothing y) = MAttach x (Just (hideOuterParens y)) z attachTop base x = MAttach Nothing (Just (hideOuterParens x)) base makeFrac :: Markup -> Markup -> Markup makeFrac x y = MFrac x (hideOuterParens y) hideOuterParens :: Markup -> Markup hideOuterParens (MGroup (Just "(") (Just ")") x) = MGroup Nothing Nothing x hideOuterParens x = x mathExpressionTable :: [[Operator Text PState Identity Expr]] mathExpressionTable = take 16 (cycle [[mathFieldAccess], [mathFunctionCall]]) mathFieldAccess :: Operator Text PState Identity Expr mathFieldAccess = Postfix (FieldAccess <$> try (sym "." *> (Ident <$> pMathField))) where pMathField = lexeme $ do d <- satisfy (\c -> isIdentStart c && c /= '_') ds <- many $ satisfy (\c -> isIdentContinue c && c /= '_' && c /= '-') pure $ Identifier $ T.pack (d : ds) mathFunctionCall :: Operator Text PState Identity Expr mathFunctionCall = Postfix ( do mbBeforeSpace <- stBeforeSpace <$> getState -- NOTE: can't have space before () or [] arg in a -- function call! to prevent bugs with e.g. 'if 2<3 [...]'. guard $ isNothing mbBeforeSpace args <- mArgs pure $ \expr -> FuncCall expr args ) mExpr :: P Markup mExpr = Code <$> getPosition <*> pMathExpr pMathExpr :: P Expr pMathExpr = buildExpressionParser mathExpressionTable (pMathIdent <|> pMathLiteral) where pMathLiteral :: P Expr pMathLiteral = Block . Content <$> many1 (mLiteral <|> mEscaped <|> mShorthand) pMathIdent :: P Expr pMathIdent = (Ident <$> pMathIdentifier) <|> ( do void $ char '√' (Ident (Identifier "root") <$ lookAhead (char '(')) <|> ( do x <- pMath pure $ FuncCall (Ident (Identifier "root")) [NormalArg (Block (Content [x]))] ) ) pMathIdentifier :: P Identifier pMathIdentifier = lexeme $ try $ do d <- satisfy (\c -> isIdentStart c && c /= '_') ds <- many1 $ satisfy (\c -> isIdentContinue c && c /= '_' && c /= '-') pure $ Identifier $ T.pack (d : ds) pMath :: P Markup pMath = buildExpressionParser mathOperatorTable pBaseMath pBaseMath :: P Markup pBaseMath = mNumber <|> mLiteral <|> mEscaped <|> mShorthand <|> mBreak <|> mAlignPoint <|> mExpr <|> mGroup <|> mCode <|> mMid <|> mSymbol mGroup :: P Markup mGroup = mGrouped '(' ')' False <|> mGrouped '{' '}' False <|> mGrouped '[' ']' False mGrouped :: Char -> Char -> Bool -> P Markup mGrouped op' cl requireMatch = withNewlines $ try $ do void $ sym [op'] res <- many (notFollowedBy (char cl) *> pMath) (MGroup (Just (T.singleton op')) (Just (T.singleton cl)) res <$ void (sym [cl])) <|> (MGroup (Just (T.singleton op')) Nothing res <$ guard (not requireMatch)) mNumber :: P Markup mNumber = lexeme $ do ds <- T.pack <$> many1 digit opt <- option mempty ( do e <- char '.' es <- many1 digit pure $ T.pack (e : es) ) pure $ Text (ds <> opt) mLiteral :: P Markup mLiteral = do mbBeforeSpace <- stBeforeSpace <$> getState String t <- pStr -- ensure space in e.g. x "is natural": mbAfterSpace <- stBeforeSpace <$> getState pure $ Text $ (maybe "" (const " ") mbBeforeSpace) <> t <> (maybe "" (const " ") mbAfterSpace) mEscaped :: P Markup mEscaped = Text . T.singleton <$> lexeme (try pEsc) mBreak :: P Markup mBreak = HardBreak <$ lexeme (char '\\' *> skipMany (satisfy (isSpace))) -- we don't need to check for following whitespace, because -- anything else would have been parsed by mEsc. -- but we do skip following whitespace, since \160 wouldn't be gobbled by lexeme... mAlignPoint :: P Markup mAlignPoint = MAlignPoint <$ sym "&" -- Math args can't have a content block; they can use semicolons -- to separate array args. mArgs :: P [Arg] mArgs = inParens $ many (mKeyValArg <|> mArrayArg <|> mNormArg <|> mMathArg) where sep = void (sym ",") <|> void (lookAhead (char ')')) mNormArg = try $ NormalArg <$> (char '#' *> pExpr <* sep) mKeyValArg = do ident <- try $ pIdentifier <* sym ":" KeyValArg ident <$> ( (char '#' *> pExpr <* sep) <|> Block . Content <$> mathContent ) mathContent = do xs <- maths if null xs then void $ sym "," else sep pure xs mMathArg = BlockArg <$> mathContent mArrayArg = try $ do let pRow = sepBy' (toGroup <$> maths) (sym ",") rows <- many1 $ try (pRow <* sym ";") -- parse any regular items and form a last row lastrow <- many (toGroup <$> mathContent) let rows' = if null lastrow then rows else rows ++ [lastrow] pure $ ArrayArg rows' maths = many (notFollowedBy (oneOf ",;)") *> notFollowedBy mKeyValArg *> pMath) toGroup [m] = m toGroup ms = MGroup Nothing Nothing ms -- special sepBy' with an added try: sepBy' p s = sepBy1' p s <|> pure [] sepBy1' p s = do x <- p xs <- many (try (s *> p)) pure (x : xs) mCode :: P Markup mCode = lexeme $ char '#' *> (Code <$> getPosition <*> pBasicExpr) mMid :: P Markup mMid = try $ do getState >>= guard . isJust . stBeforeSpace void $ char '|' *> space *> ws pure $ MGroup Nothing Nothing [Nbsp, Text "|", Nbsp] mShorthand :: P Markup mShorthand = getPosition >>= \pos -> lexeme (Code pos <$> choice (map toShorthandParser shorthands)) where shorthands = reverse (sortOn (T.length . fst) mathSymbolShorthands) toShorthandParser (short, symname) = toSym symname <$ try (string (T.unpack short)) toSym name = case map (Ident . Identifier) $ T.split (== '.') name of [] -> Literal None [i] -> i (i:is) -> foldr FieldAccess i is mSymbol :: P Markup mSymbol = lexeme ( Text . T.singleton <$> satisfy (\c -> not (isSpace c) && c /= '$' && c /= '\\')) withIndent :: Int -> P a -> P a withIndent indent pa = do oldIndent <- stIndent <$> getState updateState $ \st -> st {stIndent = indent : oldIndent} ms <- pa updateState $ \st -> st {stIndent = oldIndent} pure ms -- list ::= '-' space markup -- enum ::= (digit+ '.' | '+') space markup -- desc ::= '/' space markup ':' space markup pListItem :: P Markup pListItem = do col <- sourceColumn <$> getPosition startLine <- stLineStartCol <$> getState guard (col == startLine) try ( do void $ char '-' void (char ' ') <|> pBlankline BulletListItem <$> withIndent col (many pMarkup) ) <|> try ( do start <- (Nothing <$ char '+') <|> (Just <$> enumListStart) void (char ' ') <|> pBlankline EnumListItem start <$> withIndent col (many pMarkup) ) <|> try ( do -- desc list void (char '/') void (many1 (char ' ')) term <- manyTill pMarkup (char ':') skipMany spaceChar optional pBlankline DescListItem term <$> withIndent col (many pMarkup) ) enumListStart :: P Int enumListStart = do ds <- many1 digit void $ char '.' case readMaybe ds of Nothing -> fail $ "could not read " <> ds <> " as digits" Just x -> pure x -- line-comment = '//' (!unicode(Newline))* -- block-comment = '/*' (. | block-comment)* '*/' pComment :: P Markup pComment = Comment <$ (pLineComment <|> pBlockComment) pLineComment :: P () pLineComment = do void $ string "//" skipMany (satisfy (\c -> c /= '\n' && c /= '\r')) void endOfLine <|> eof pBlockComment :: P () pBlockComment = do void $ string "/*" void $ manyTill ( pBlockComment <|> pLineComment <|> void anyChar ) (string "*/") pSpace :: P Markup pSpace = Space <$ some (satisfy (\c -> isSpace c && c /= '\r' && c /= '\n')) pEol :: P Markup pEol = do pBaseEol (ParBreak <$ many1 pBaseEol) <|> (ParBreak <$ pEndOfContent) <|> pure SoftBreak pBaseEol :: P () pBaseEol = try $ do void endOfLine -- fail if we can't indent enough indents <- stIndent <$> getState case indents of (i : _) -> void (try (count i (char ' '))) <|> pBlankline [] -> pure () eatPrefixSpaces eatPrefixSpaces :: P () eatPrefixSpaces = do skipMany spaceChar col <- sourceColumn <$> getPosition updateState $ \st -> st {stLineStartCol = col} spaceChar :: P Char spaceChar = satisfy (\c -> c == ' ' || c == '\t') pHardbreak :: P Markup pHardbreak = HardBreak <$ try (char '\\' *> (void spaceChar <|> pBaseEol) *> skipMany spaceChar) pBlankline :: P () pBlankline = try $ do skipMany spaceChar void (lookAhead endOfLine) <|> pEndOfContent pRawInline :: P Markup pRawInline = RawInline . T.pack <$> (char '`' *> manyTill anyChar (void (char '`') <|> eof)) pRawBlock :: P Markup pRawBlock = do void $ string "```" numticks <- (+ 3) . length <$> many (char '`') lang <- T.pack <$> (many alphaNum <* optional (char ' ')) optional $ try $ skipMany (char ' ') *> pEol let nl = newline <* optionalGobbleIndent code <- T.pack <$> manyTill (nl <|> anyChar) (string (replicate numticks '`')) skipMany (char '`') pure $ RawBlock lang code optionalGobbleIndent :: P () optionalGobbleIndent = do indents <- stIndent <$> getState case indents of (i : _) -> gobble i [] -> pure () where gobble :: Int -> P () gobble 0 = pure () gobble n = (char ' ' *> gobble (n - 1)) <|> pure () pStrong :: P Markup pStrong = Strong <$> (char '*' *> manyTill pMarkup (char '*')) pEmph :: P Markup pEmph = Emph <$> (char '_' *> manyTill pMarkup (char '_')) pHeading :: P Markup pHeading = try $ do col <- sourceColumn <$> getPosition lineStartCol <- stLineStartCol <$> getState guard (col == lineStartCol) lev <- length <$> many1 (char '=') void (many1 (char ' ')) <|> void (lookAhead endOfLine) -- Note: == hi _foo -- bar_ is parsed as a heading with "hi emph(foobar)" ms <- manyTill pMarkup ( void pEol <|> pEndOfContent <|> void (lookAhead (try (spaces *> pLabel))) <|> void (lookAhead (char ']'))) skipMany spaceChar pure $ Heading lev ms pUrl :: P Markup pUrl = try $ do prot <- T.pack <$> (string "http://" <|> string "https://") rest <- T.pack <$> pNonspaceWithBalancedBrackets 0 0 0 pure $ Url $ prot <> rest pNonspaceWithBalancedBrackets :: Int -> Int -> Int -> P [Char] pNonspaceWithBalancedBrackets parens brackets braces = ((:) <$> char '(' <*> pNonspaceWithBalancedBrackets (parens + 1) brackets braces) <|> ((:) <$> (guard (parens > 0) *> char ')') <*> pNonspaceWithBalancedBrackets (parens - 1) brackets braces) <|> ((:) <$> char '[' <*> pNonspaceWithBalancedBrackets parens (brackets + 1) braces) <|> ((:) <$> (guard (brackets > 0) *> char ']') <*> pNonspaceWithBalancedBrackets parens (brackets - 1) braces) <|> ((:) <$> char '{' <*> pNonspaceWithBalancedBrackets parens brackets (braces + 1)) <|> ((:) <$> (guard (braces > 0) *> char '}') *> pNonspaceWithBalancedBrackets parens brackets (braces - 1)) <|> (:) <$> noneOf " \t\r\n()[]{}" <*> pNonspaceWithBalancedBrackets parens brackets braces <|> pure [] pText :: P Markup pText = Text . mconcat <$> some ((do xs <- some alphaNum T.pack . (xs <>) <$> try (some (char '*' <|> char '_') <* lookAhead alphaNum) <|> pure (T.pack xs)) <|> (T.pack <$> some (satisfy (\c -> not (isSpace c || isSpecial c)))) ) pEscaped :: P Markup pEscaped = Text . T.singleton <$> pEsc pEsc :: P Char pEsc = char '\\' *> (uniEsc <|> satisfy (not . isSpace)) pStrEsc :: P Char pStrEsc = try $ char '\\' *> ( uniEsc <|> ('\\' <$ char '\\') <|> ('"' <$ char '"') <|> ('\n' <$ char 'n') <|> ('\t' <$ char 't') <|> ('\r' <$ char 'r') ) uniEsc :: P Char uniEsc = chr <$> (char 'u' *> char '{' *> hexnum <* char '}') where hexnum :: P Int hexnum = do ds <- many1 hexDigit case readMaybe ("0x" ++ ds) of Just i | i <= 1114112 -> pure i | otherwise -> pure 0xFFFD Nothing -> fail $ "Could not read hex number " ++ ds pNbsp :: P Markup pNbsp = Nbsp <$ char '~' pDash :: P Markup pDash = do void $ char '-' (Shy <$ char '?') <|> (char '-' *> ((EmDash <$ char '-') <|> pure EnDash)) <|> pure (Text "-") pEllipsis :: P Markup pEllipsis = do void $ char '.' (Ellipsis <$ string "..") <|> pure (Text ".") pQuote :: P Markup pQuote = Quote <$> (char '\'' <|> char '"') pLabelInContent :: P Markup pLabelInContent = Code <$> getPosition <*> pLabel pLabel :: P Expr pLabel = Label . T.pack <$> try ( char '<' *> many1 (satisfy isIdentContinue <|> char '_' <|> char '.' <|> char ':') <* char '>' ) pRef :: P Markup pRef = Ref <$> (char '@' *> (T.pack <$> many1 (satisfy isIdentContinue <|> char '_'))) <*> option (Literal Auto) (Block <$> pContent) -- "If a character would continue the expression but should be interpreted as -- text, the expression can forcibly be ended with a semicolon (;)." -- "A few kinds of expressions are not compatible with the hashtag syntax -- (e.g. binary operator expressions). To embed these into markup, you -- can use parentheses, as in #(1 + 2)." Hence pBasicExpr not pExpr. pHash :: P Markup pHash = do void $ char '#' res <- Code <$> getPosition <*> pBasicExpr <* optional (sym ";") -- rewind if we gobbled space: mbBeforeSpace <- stBeforeSpace <$> getState case mbBeforeSpace of Nothing -> pure () Just (pos, inp) -> do setPosition pos setInput inp pure res isSpecial :: Char -> Bool isSpecial '\\' = True isSpecial '[' = True isSpecial ']' = True isSpecial '#' = True isSpecial '-' = True isSpecial '.' = True isSpecial '"' = True isSpecial '\'' = True isSpecial '*' = True isSpecial '_' = True isSpecial '`' = True isSpecial '$' = True isSpecial '<' = True isSpecial '>' = True isSpecial '@' = True isSpecial '/' = True isSpecial ':' = True isSpecial '~' = True isSpecial '=' = True isSpecial '(' = True -- so we don't gobble ( before URLs isSpecial _ = False pIdentifier :: P Identifier pIdentifier = lexeme $ try $ do c <- satisfy isIdentStart cs <- many $ satisfy isIdentContinue pure $ Identifier $ T.pack (c : cs) -- ident_start ::= unicode(XID_Start) -- ID_Start characters are derived from the Unicode General_Category of -- uppercase letters, lowercase letters, titlecase letters, modifier letters, -- other letters, letter numbers, plus Other_ID_Start, minus Pattern_Syntax and -- Pattern_White_Space code points. isIdentStart :: Char -> Bool isIdentStart c = c == '_' || case generalCategory c of UppercaseLetter -> True LowercaseLetter -> True TitlecaseLetter -> True ModifierLetter -> True OtherLetter -> True LetterNumber -> True _ -> False -- ident_continue ::= unicode(XID_Continue) | '-' -- ID_Continue characters include ID_Start characters, plus characters having -- the Unicode General_Category of nonspacing marks, spacing combining marks, -- decimal number, connector punctuation, plus Other_ID_Continue, minus -- Pattern_Syntax and Pattern_White_Space code points. isIdentContinue :: Char -> Bool isIdentContinue c = isIdentStart c || c == '-' || c == '_' || case generalCategory c of NonSpacingMark -> True SpacingCombiningMark -> True DecimalNumber -> True ConnectorPunctuation -> True _ -> False pKeyword :: String -> P () pKeyword t = lexeme $ try $ string t *> notFollowedBy (satisfy isIdentContinue) -- NOTE: there can be field access lookups that require identifiers like -- 'not'. -- keywords :: [Text] -- keywords = ["none", "auto", "true", "false", "not", "and", "or", "let", -- "set", "show", "wrap", "if", "else", "for", "in", "as", "while", -- "break", "continue", "return", "import", "include", "from"] pExpr :: P Expr pExpr = buildExpressionParser operatorTable pBasicExpr -- A basic expression excludes the unary and binary operators outside of parens, -- but includes field access and function application. Needed for pHash. pBasicExpr :: P Expr pBasicExpr = buildExpressionParser basicOperatorTable pBaseExpr pQualifiedIdentifier :: P Expr pQualifiedIdentifier = buildExpressionParser (replicate 4 [fieldAccess]) pIdent pBaseExpr :: P Expr pBaseExpr = pLiteral <|> pKeywordExpr <|> pFuncExpr <|> pIdent <|> pArrayExpr <|> pDictExpr <|> inParens pExpr <|> pLabel <|> (Block . Content . (: []) <$> lexeme (pRawBlock <|> pRawInline <|> pEquation)) <|> pBlock pLiteral :: P Expr pLiteral = Literal <$> ( pNone <|> pAuto <|> pBoolean <|> pNumeric <|> pStr ) fieldAccess :: Operator Text PState Identity Expr fieldAccess = Postfix (FieldAccess <$> try (sym "." *> pIdent)) -- don't allow space after . restrictedFieldAccess :: Operator Text PState Identity Expr restrictedFieldAccess = Postfix (FieldAccess <$> try (char '.' *> pIdent)) functionCall :: Operator Text PState Identity Expr functionCall = Postfix ( do mbBeforeSpace <- stBeforeSpace <$> getState -- NOTE: can't have space before () or [] arg in a -- function call! to prevent bugs with e.g. 'if 2<3 [...]'. guard $ isNothing mbBeforeSpace args <- pArgs pure $ \expr -> FuncCall expr args ) -- The reason we cycle field access and function call -- is that a postfix operator will not -- be repeatable at the same precedence level...see docs for -- buildExpressionParser. basicOperatorTable :: [[Operator Text PState Identity Expr]] basicOperatorTable = take 16 (cycle [[restrictedFieldAccess], [functionCall]]) operatorTable :: [[Operator Text PState Identity Expr]] operatorTable = -- precedence 8 (real field access, perhaps with space after .) take 12 (cycle [[fieldAccess], [functionCall]]) ++ -- precedence 7 (repeated because of parsec's quirks with postfix, prefix) replicate 6 [Postfix (ToPower <$> try (char 'e' *> notFollowedBy letter *> pExpr))] ++ replicate 6 [Prefix (Negated <$ op "-"), Prefix (id <$ op "+")] ++ [ -- precedence 6 [ Infix (Times <$ op "*") AssocLeft, Infix (Divided <$ op "/") AssocLeft ], -- precedence 5 [ Infix (Plus <$ op "+") AssocLeft, Infix (Minus <$ op "-") AssocLeft ], -- precedence 4 [ Infix (Equals <$ op "==") AssocLeft, Infix ((\x y -> Not (Equals x y)) <$ op "!=") AssocLeft, Infix (LessThan <$ op "<") AssocLeft, Infix (LessThanOrEqual <$ op "<=") AssocLeft, Infix (GreaterThan <$ op ">") AssocLeft, Infix (GreaterThanOrEqual <$ op ">=") AssocLeft, Infix (InCollection <$ pKeyword "in") AssocLeft, Infix ( (\x y -> Not (InCollection x y)) <$ try (pKeyword "not" *> pKeyword "in") ) AssocLeft ], -- precedence 3 [ Prefix (Not <$ pKeyword "not"), Infix (And <$ pKeyword "and") AssocLeft ], -- precedence 2 [ Infix (Or <$ pKeyword "or") AssocLeft ], -- precedence 1 [ Infix (Assign <$ op "=") AssocRight, Infix ((\x y -> Assign x (Plus x y)) <$ op "+=") AssocRight, Infix ((\x y -> Assign x (Minus x y)) <$ op "-=") AssocRight, Infix ((\x y -> Assign x (Times x y)) <$ op "*=") AssocRight, Infix ((\x y -> Assign x (Divided x y)) <$ op "/=") AssocRight ] ] pNone :: P Literal pNone = None <$ pKeyword "none" pAuto :: P Literal pAuto = Auto <$ pKeyword "auto" pBoolean :: P Literal pBoolean = (Boolean True <$ pKeyword "true") <|> (Boolean False <$ pKeyword "false") pNumber :: P (Either Integer Double) pNumber = try $ do pref <- string "0b" <|> string "0x" <|> string "0o" <|> pure "" case pref of "0b" -> do nums <- many1 ((1 <$ char '1') <|> (0 <$ char '0')) pure $ Left $ sum $ zipWith (*) (reverse nums) (map (2 ^) [(0 :: Integer) ..]) "0x" -> do num <- many1 hexDigit case readMaybe ("0x" ++ num) of Just (i :: Integer) -> pure $ Left i _ -> fail $ "could not read " <> num <> " as hex digits" "0o" -> do num <- many1 octDigit case readMaybe ("0o" ++ num) of Just (i :: Integer) -> pure $ Left i _ -> fail $ "could not read " <> num <> " as octal digits" _ -> do as <- many1 digit <|> ("0" <$ lookAhead (try (char '.' *> digit))) pe <- option [] $ string "." bs <- many digit es <- option "" ( do void $ try $ char 'e' *> lookAhead (digit <|> char '-') minus <- option [] $ count 1 (char '-') ds <- many1 digit pure ("e" ++ minus ++ ds) ) let num = pref ++ as ++ pe ++ bs ++ es case readMaybe num of Just (i :: Integer) -> pure $ Left i Nothing -> case readMaybe num of Just (d :: Double) -> pure $ Right d Nothing -> fail $ "could not read " <> num <> " as integer" pNumeric :: P Literal pNumeric = lexeme $ do result <- pNumber ( do unit <- pUnit case result of Left i -> pure $ Numeric (fromIntegral i) unit Right d -> pure $ Numeric d unit ) <|> case result of Left i -> pure $ Int i Right d -> pure $ Float d pStr :: P Literal pStr = lexeme $ do void $ char '"' String . T.pack <$> manyTill (pStrEsc <|> satisfy (/= '"')) (char '"') pUnit :: P Unit pUnit = (Percent <$ sym "%") <|> (Pt <$ pKeyword "pt") <|> (Mm <$ pKeyword "mm") <|> (Cm <$ pKeyword "cm") <|> (In <$ pKeyword "in") <|> (Deg <$ pKeyword "deg") <|> (Rad <$ pKeyword "rad") <|> (Em <$ pKeyword "em") <|> (Fr <$ pKeyword "fr") pIdent :: P Expr pIdent = Ident <$> pIdentifier pBlock :: P Expr pBlock = Block <$> (pCodeBlock <|> pContent) pCodeBlock :: P Block pCodeBlock = CodeBlock <$> inBraces pCode pCode :: P [Expr] pCode = sepEndBy pExpr (void (sym ";") <|> ws) -- content-block ::= '[' markup ']' pContent :: P Block pContent = do void $ char '[' col <- sourceColumn <$> getPosition oldLineStartCol <- stLineStartCol <$> getState updateState $ \st -> st { stLineStartCol = col, stContentBlockNesting = stContentBlockNesting st + 1 } ms <- manyTill pMarkup (char ']') ws updateState $ \st -> st { stLineStartCol = oldLineStartCol, stContentBlockNesting = stContentBlockNesting st - 1 } pure $ Content ms pEndOfContent :: P () pEndOfContent = eof <|> do blockNesting <- stContentBlockNesting <$> getState if blockNesting > 0 then void (lookAhead (char ']')) else mzero -- array-expr ::= '(' ((expr ',') | (expr (',' expr)+ ','?))? ')' pArrayExpr :: P Expr pArrayExpr = try $ inParens $ ( do v <- pSpread <|> (Reg <$> pExpr) vs <- many $ try $ sym "," *> (pSpread <|> (Reg <$> pExpr)) if null vs then void $ sym "," else optional $ void $ sym "," pure $ Array (v : vs) ) <|> (Array [] <$ optional (void $ sym ",")) -- dict-expr ::= '(' (':' | ':'? (pair (',' pair)* ','?)) ')' -- pair ::= (ident | str) ':' expr pDictExpr :: P Expr pDictExpr = try $ inParens (sym ":" *> (pNonemptyDict <|> pure (Dict mempty)) <|> pNonemptyDict) where pNonemptyDict = Dict <$> sepEndBy1 (pSpread <|> pPair) (sym ",") pPair = Reg <$> ((,) <$> pExpr <*> try (sym ":" *> pExpr)) pSpread :: P (Spreadable a) pSpread = try $ string ".." *> (Spr <$> pExpr) -- func-expr ::= (params | ident) '=>' expr pFuncExpr :: P Expr pFuncExpr = try $ FuncExpr <$> pParamsOrIdent <*> (sym "=>" *> pExpr) where pParamsOrIdent = pParams <|> (do i <- pIdentifier if i == "_" then pure [SkipParam] else pure [NormalParam i]) pKeywordExpr :: P Expr pKeywordExpr = pLetExpr <|> pSetExpr <|> pShowExpr <|> pIfExpr <|> pWhileExpr <|> pForExpr <|> pImportExpr <|> pIncludeExpr <|> pBreakExpr <|> pContinueExpr <|> pReturnExpr -- args ::= ('(' (arg (',' arg)* ','?)? ')' content-block*) | content-block+ pArgs :: P [Arg] pArgs = do void $ lookAhead (char '(' <|> char '[') args <- option [] $ inParens $ sepEndBy pArg (sym ",") blocks <- many $ do -- make sure we haven't had a space skippedSpaces <- isJust . stBeforeSpace <$> getState if skippedSpaces then mzero else do Content ms <- pContent pure ms pure $ args ++ map BlockArg blocks -- arg ::= (ident ':')? expr pArg :: P Arg pArg = pKeyValArg <|> pSpreadArg <|> pNormalArg where pKeyValArg = KeyValArg <$> try (pIdentifier <* sym ":") <*> pExpr pNormalArg = NormalArg <$> pExpr pSpreadArg = SpreadArg <$> try (string ".." *> pExpr) -- params ::= '(' (param (',' param)* ','?)? ')' pParams :: P [Param] pParams = inParens $ sepEndBy pParam (sym ",") -- param ::= ident (':' expr)? pParam :: P Param pParam = pSinkParam <|> pDestructuringParam <|> pNormalOrDefaultParam <|> pSkipParam where pSinkParam = SinkParam <$> try ( sym ".." *> option Nothing (Just <$> pIdentifier) ) pSkipParam = SkipParam <$ sym "_" pNormalOrDefaultParam = do i <- pIdentifier (DefaultParam i <$> (sym ":" *> pExpr)) <|> pure (NormalParam i) pDestructuringParam = do DestructuringBind parts <- pDestructuringBind pure $ DestructuringParam parts pBind :: P Bind pBind = pBasicBind <|> pDestructuringBind pBasicBind :: P Bind pBasicBind = BasicBind <$> try (pBindIdentifier <|> inParens pBindIdentifier) pBindIdentifier :: P (Maybe Identifier) pBindIdentifier = do ident <- pIdentifier if ident == "_" then pure Nothing else pure $ Just ident pDestructuringBind :: P Bind pDestructuringBind = inParens $ DestructuringBind <$> (pBindPart `sepEndBy` (sym ",")) where pBindPart = do sink <- option False $ True <$ string ".." if sink then do ident <- option Nothing pBindIdentifier -- .. pure $ Sink ident else do ident <- pBindIdentifier case ident of Nothing -> pure (Simple ident) Just key -> (WithKey key <$> (sym ":" *> pBindIdentifier)) <|> pure (Simple ident) -- let-expr ::= 'let' ident params? '=' expr pLetExpr :: P Expr pLetExpr = do pKeyword "let" bind <- pBind case bind of BasicBind mbname -> do mbparams <- option Nothing $ Just <$> pParams mbexpr <- option Nothing $ Just <$> (sym "=" *> pExpr) case (mbparams, mbexpr, mbname) of (Nothing, Nothing, _) -> pure $ Let bind (Literal None) (Nothing, Just expr, _) -> pure $ Let bind expr (Just params, Just expr, Just name) -> pure $ LetFunc name params expr (Just _, Just _, Nothing) -> fail "expected name for function" (Just _, Nothing, _) -> fail "expected expression for let binding" _ -> Let bind <$> (sym "=" *> pExpr) -- set-expr ::= 'set' expr args pSetExpr :: P Expr pSetExpr = do oldAllowNewlines <- stAllowNewlines <$> getState -- see #23 -- 'set' doesn't go with 'if' unless it's on the same line updateState $ \st -> st {stAllowNewlines = 0} set <- pKeyword "set" *> (Set <$> pQualifiedIdentifier <*> pArgs) updateState $ \st -> st {stAllowNewlines = oldAllowNewlines} addCondition <- option id $ pKeyword "if" *> ((\c x -> If [(c, x)]) <$> pExpr) pure $ addCondition set pShowExpr :: P Expr pShowExpr = do pKeyword "show" from <- (Nothing <$ sym ":") <|> Just <$> (pBasicExpr <* sym ":") to <- pBasicExpr pure $ Show from to -- if-expr ::= 'if' expr block ('else' 'if' expr block)* ('else' block)? pIfExpr :: P Expr pIfExpr = do a <- pIf as <- many $ try (pKeyword "else" *> pIf) finalElse <- option [] $ -- we represent the final "else" as a conditional with expr True: (: []) . (Literal (Boolean True),) <$> (pKeyword "else" *> pBlock) return $ If (a : as ++ finalElse) where pIf = pKeyword "if" *> ((,) <$> pExpr <*> pBlock) -- while-expr ::= 'while' expr block pWhileExpr :: P Expr pWhileExpr = pKeyword "while" *> (While <$> pExpr <*> pBlock) -- for-expr ::= 'for' bind 'in' expr block pForExpr :: P Expr pForExpr = pKeyword "for" *> (For <$> pBind <*> (pKeyword "in" *> pExpr) <*> pBlock) pImportExpr :: P Expr pImportExpr = pKeyword "import" *> (Import <$> pExpr <*> pImportItems) where pImportItems = (sym ":" *> ( (AllIdentifiers <$ sym "*") <|> (SomeIdentifiers <$> sepEndBy pIdentifierAs (sym ",")) ) ) <|> (NoIdentifiers <$> pAs) pIdentifierAs = do ident <- pIdentifier mbAs <- pAs pure (ident, mbAs) pAs = option Nothing $ Just <$> (pKeyword "as" *> pIdentifier) pBreakExpr :: P Expr pBreakExpr = Break <$ pKeyword "break" pContinueExpr :: P Expr pContinueExpr = Continue <$ pKeyword "continue" pReturnExpr :: P Expr pReturnExpr = do pos <- getPosition pKeyword "return" pos' <- getPosition if sourceLine pos' > sourceLine pos then pure $ Return Nothing else Return <$> (option Nothing (Just <$> pExpr)) pIncludeExpr :: P Expr pIncludeExpr = Include <$> (pKeyword "include" *> pExpr)