module Text.BBCode.Internal.Parser ( Parser , runParserEnv , runParserMaybeEnv , parseTestEnv , parsers , bbcode , bbcode1 , hr , br , clear , bold , italic , underline , strikethrough , indent , nfo , oneline , everythingInElement , code , preformatted , box , boxAlign , image , imageAlign , quote , quoteNamed , spoiler , spoilerNamed , listElement , list , listFlavor , colorName , colorHex , url , size , align , font , plaintext ) where import Control.Applicative (Applicative (liftA2)) import Control.Monad.Reader import Data.Char (isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Functor (($>)) import Data.Map (Map) import Data.Map qualified as M import Data.Maybe (isNothing) import Data.Text qualified as T import Data.Text.IO qualified as T import Data.Void (Void) import Text.BBCode.Internal.Helper hiding (closing, opening, openingArg, surround, wrap, wrapArg) import Text.Megaparsec import Text.Megaparsec.Char {-| Parser with reader transformer inside. Reader environment contains current context for element. It is used for 'oneline' because @"[oneline][oneline][\/oneline][\/oneline]"@ should produce error. Same with 'nfo' Also used by 'plaintext' to end parsing on closing element of current environment. >>> parseTestEnv bbcode "[i]Bread[/b][/i]" ElSimple Italic (ElText "Bread[/b]") Notice it parsed @"[\/b]"@. 'plaintext' finishes parsing only on closing element that matches last parsed opening -} type Parser a = ParsecT Void Text (Reader [El]) a -- | 'runParser' specialized for 'Parser' runParserEnv :: ParsecT e s (Reader r) a -> r -> s -> Either (ParseErrorBundle s e) a runParserEnv p env input = runReader (runParserT p "" input) env {-# INLINEABLE runParserEnv #-} -- | 'parseMaybe' specialized for 'Parser' runParserMaybeEnv :: ParsecT e s (Reader r) a -> r -> s -> Maybe a runParserMaybeEnv p env input = runParserEnv p env input & \case (Right x) -> Just x _ -> Nothing {-# INLINEABLE runParserMaybeEnv #-} -- | 'parseTest' specialized for 'Parser'. Passes empty list to inner reader monad parseTestEnv :: Show a => Parser a -> Text -> IO () parseTestEnv p input = T.putStrLn . fromString . either errorBundlePretty show $ runParserEnv p [] input {-# INLINEABLE parseTestEnv #-} -- | Same as 'parseTestEnv' but works with HLS's codelenses mp :: Show a => Parser a -> Text -> IO String mp parser text = error . either errorBundlePretty show $ runParserEnv parser [] text opening :: El -> Parser () opening el = void . string' $ mconcat ["[", elName el, "]"] {-# INLINEABLE opening #-} openingArg :: El -> Parser a -> Parser a openingArg el p = do void . try . string' $ mconcat ["[", elName el, "="] res <- p void . char $ ']' pure res {-# INLINEABLE openingArg #-} closing :: El -> Parser () closing el = void . string' $ mconcat ["[/", elName el, "]"] {-# INLINEABLE closing #-} localAddEl :: MonadReader [r] m => r -> m a -> m a localAddEl el = local (<> [el]) {-# INLINEABLE localAddEl #-} elVoid :: El -> Parser BBCode elVoid el = opening el $> ElVoid el {-# INLINEABLE elVoid #-} elSimple :: El -> Parser BBCode -> Parser BBCode elSimple el body = do opening el res <- localAddEl el body closing el pure $ ElSimple el res {-# INLINEABLE elSimple #-} elArg :: El -> Parser Text -> Parser BBCode -> Parser BBCode elArg el arg body = do a <- openingArg el arg b <- localAddEl el body closing el pure $ ElArg el a b {-# INLINEABLE elArg #-} {-|All parsers used by 'bbcode' and 'bbcode1' Keys are element names and values are associated element parsers. Values are lists because some elements have optional element. That means they are two separate parsers, e.g. 'Box' element parsers are 'box' (simple element) and 'boxAlign' (element with parameter) -} parsers :: Map El [Parser BBCode] parsers = M.fromList [ (HR, [hr]) , (BR, [br]) , (Clear, [clear]) , (ListElement, [listElement]) , (Bold, [bold]) , (Italic, [italic]) , (Underline, [underline]) , (Strikethrough, [strikethrough]) , (Indent, [indent]) , (NFO, [nfo]) , (Oneline, [oneline]) , (Code, [code]) , (Preformatted, [preformatted]) , (Box, [box, boxAlign]) , (Image, [image, imageAlign]) , (Quote, [quote, quoteNamed]) , (Spoiler, [spoiler, spoilerNamed]) , (List, [list, listFlavor]) , (Color, [colorName, colorHex]) , (Size, [size]) , (URL, [url]) , (Align, [align]) , (Font, [font]) ] {-# NOINLINE parsers #-} {-| Parse zero or more BBCode elements Doesn't necessarily return value wrapped in 'ElDocument', it returns @('mempty' :: BBCode)@ if it parses no elements, or just element if parses just one element. Otherwise it is 'ElDocument' -} bbcode :: Parser BBCode bbcode = do many (choice $ concat parsers <> [plaintext]) <&> \case [] -> mempty [x] -> x xs -> ElDocument xs {-# NOINLINE bbcode #-} -- | Similar to 'bbcode' but parses one or more elements bbcode1 :: Parser BBCode bbcode1 = some (choice $ concat parsers) <&> \case [x] -> x xs -> ElDocument xs {-# NOINLINE bbcode1 #-} hr :: Parser BBCode hr = elVoid HR {-# INLINEABLE hr #-} br :: Parser BBCode br = elVoid BR {-# INLINEABLE br #-} clear :: Parser BBCode clear = elVoid Clear {-# INLINEABLE clear #-} bold :: Parser BBCode bold = elSimple Bold bbcode italic :: Parser BBCode italic = elSimple Italic bbcode underline :: Parser BBCode underline = elSimple Underline bbcode strikethrough :: Parser BBCode strikethrough = elSimple Strikethrough bbcode indent :: Parser BBCode indent = elSimple Indent bbcode nfo :: Parser BBCode nfo = do insideNFO <- preview (folded . filtered (== NFO)) if isNothing insideNFO then elSimple NFO bbcode else fail "Can't parse [nfo] inside [nfo]" oneline :: Parser BBCode oneline = do insideOneline <- preview (folded . filtered (== Oneline)) if isNothing insideOneline then elSimple Oneline bbcode else fail "Can't parse [oneline] inside [oneline]" -- BUG: in a string "[code][code][/code][/code]" parser only does -- -- >>> mp (everythingInElement Code) "[code][code][/code][/code]" -- ElSimple Code (ElText "[code]") -- >>> mp (everythingInElement Code <* eof) "[code][code][/code][/code]" -- 1:20: -- | -- 1 | [code][code][/code][/code] -- | ^ -- unexpected '[' -- expecting end of input everythingInElement :: El -> Parser BBCode everythingInElement el = elSimple el $ do let bc = do r <- takeWhileP Nothing (/= '[') end <- lookAhead . optional $ closing el if isNothing end then (r <>) <$> liftA2 (<>) (takeP Nothing 1) bc else pure r ElText <$> bc {-# INLINEABLE everythingInElement #-} -- >>> mp (code <* eof) "[code][code][/code][/code]" code :: Parser BBCode code = everythingInElement Code {-# INLINEABLE code #-} preformatted :: Parser BBCode preformatted = everythingInElement Preformatted {-# INLINEABLE preformatted #-} box :: Parser BBCode box = elSimple Box bbcode image :: Parser BBCode image = elSimple Image $ ElText <$> takeWhile1P (Just "URL") (/= '[') {-# INLINEABLE image #-} quote :: Parser BBCode quote = elSimple Quote bbcode spoiler :: Parser BBCode spoiler = elSimple Spoiler bbcode listElement :: Parser BBCode listElement = do isInsideList <- has (_Just . _List) <$> preview _last unless isInsideList $ fail "Not inside list" res <- elVoid ListElement void $ takeWhileP Nothing isSpace pure res list :: Parser BBCode list = elSimple List bbcode boxAlign :: Parser BBCode boxAlign = elArg Box (string "left" <|> string "center" <|> string "right") bbcode imageAlign :: Parser BBCode imageAlign = elArg Image (string "left" <|> string "right") $ ElText <$> takeWhile1P (Just "URL") (/= '[') {-# INLINEABLE imageAlign #-} {-| Can give cryptic error message if quote is ill-formed. >>>> parseTestEnv quoteNamed "[quote=\"QQ\"\"a][/quote]" 1:24: | 1 | [quote="QQ""a][/quote] | ^ unexpected end of input expecting quote author in quotes(e.g. [quote="Dan"]) Issue here is letter \'a\' before closing bracket -} quoteNamed :: Parser BBCode quoteNamed = elArg Quote arg bbcode where go :: Parser Text = do text <- takeWhileP (Just "\"]") (/= '"') end <- lookAhead . optional $ string "\"]" if isNothing end then (text <>) <$> liftA2 (<>) (hidden $ string "\"") go else pure text arg = label "quote author in quotes(e.g. [quote=\"Dan\"])" $ try $ char '"' *> go <* string "\"]" -- | See 'quoteNamed' spoilerNamed :: Parser BBCode spoilerNamed = elArg Spoiler arg bbcode where go :: Parser Text = do text <- takeWhileP (Just "\"]") (/= '"') end <- lookAhead . optional $ string "\"]" if isNothing end then (text <>) <$> liftA2 (<>) (hidden $ string "\"") go else pure text arg = label "spoiler name in quotes (e.g. [spoiler=\"The ending is\"])" $ try $ char '"' *> go <* string "\"]" listFlavor :: Parser BBCode listFlavor = elArg List (choice $ string <$> ["a", "A", "i", "I", "1"]) bbcode -- >>> mp colorName "[color=gray][/color]" -- ElArg Color "GreEn" (ElText "") colorName :: Parser BBCode colorName = elArg Color ( takeWhile1P (Just "Color name") (\c -> isAsciiLower c || isAsciiUpper c || isDigit c) ) bbcode -- >>> mp colorHex "[color=#333][/color]" -- ElArg Color "#333" (ElText "") -- >>> mp colorHex "[color=#333Faa][/color]" -- ElArg Color "#333Faa" (ElText "") colorHex :: Parser BBCode colorHex = elArg Color ( label "hex color(e.g. #333 or #123456)" $ do hash :: Char <- char '#' name :: String <- try (count 6 hexDigitChar) <|> count 3 hexDigitChar pure . T.pack $ hash : name ) bbcode url :: Parser BBCode url = elArg URL (takeWhileP (Just "URL") (/= ']')) bbcode size :: Parser BBCode size = elArg Size (choice $ string . fromString . show <$> ([10 .. 29] :: [Int])) bbcode align :: Parser BBCode align = elArg Align (choice $ string <$> ["left", "right", "center", "justify"]) bbcode font :: Parser BBCode font = elArg Font ( takeWhile1P (Just "font name(digits and latin letters)") (isAsciiLower ||| isAsciiUpper ||| isDigit) ) bbcode plaintext :: Parser BBCode plaintext = do currentElement <- preview _last let bc = do res <- takeWhile1P Nothing (/= '[') end <- -- Attempts to find element that will end parsing lookAhead . optional . choice . concat $ [ -- Opening element ends parsing text(only simple elements) elementOpenings ^.. folded . to elName . to (\el -> mconcat ["[", el, "]"]) . to string' , -- Opening element ends parsing text. -- Note that it produces opening element without closing bracket, -- because we can't predict what an argument will be elementArgOpenings ^.. folded . to elName . to ("[" <>) . to string' , -- Current closing element (last element in reader environment) currentElement ^.. _Just . to closing . to ($> "") , [eof $> ""] ] if isNothing end then -- continue parsing text (res <>) <$> liftA2 (<>) (takeP Nothing 1) bc else -- end parsing pure res ElText <$> bc {-# INLINEABLE plaintext #-}