-- This file is part of Diohsc -- Copyright (C) 2020 Martin Bays -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of version 3 of the GNU General Public License as -- published by the Free Software Foundation, or any later version. -- -- You should have received a copy of the GNU General Public License -- along with this program. If not, see http://www.gnu.org/licenses/. {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Safe #-} module TextGemini where import Control.Monad (forM) import Control.Monad.State (State, evalState, gets, modify) import Data.Maybe (catMaybes, isJust, mapMaybe) import ANSIColour import qualified Data.Text.Lazy as T import URI data Link = Link { linkUri :: URIRef, linkDescription :: T.Text } deriving (Eq,Ord,Show) data GeminiLine = TextLine T.Text | LinkLine { linkLineIndex :: Int, linkLineLink :: Link } | AltTextLine T.Text | PreformatToggleLine | PreformattedLine T.Text T.Text | HeadingLine Int T.Text | ItemLine T.Text | QuoteLine T.Text | ErrorLine T.Text deriving (Eq,Ord,Show) newtype GeminiDocument = GeminiDocument { geminiDocumentLines :: [GeminiLine] } deriving (Eq,Ord,Show) extractLinks :: GeminiDocument -> [Link] extractLinks (GeminiDocument ls) = mapMaybe linkOfLine ls where linkOfLine (LinkLine _ link) = Just link linkOfLine _ = Nothing data PreOpt = PreOptAlt | PreOptPre | PreOptBoth deriving (Eq,Ord,Show) showPreOpt :: PreOpt -> String showPreOpt PreOptAlt = "alt" showPreOpt PreOptPre = "pre" showPreOpt PreOptBoth = "both" data GemRenderOpts = GemRenderOpts { grOptsAnsi :: Bool , grOptsPre :: PreOpt , grOptsWrapWidth :: Int , grOptsLinkDescFirst :: Bool } deriving (Eq,Ord,Show) printGemDoc :: GemRenderOpts -> (URIRef -> T.Text) -> GeminiDocument -> [T.Text] printGemDoc (GemRenderOpts ansi preOpt width linkDescFirst) showUri (GeminiDocument doc) = concatMap printLine doc where printLine (TextLine line) = wrapWith "" False $ stripControl line printLine (AltTextLine line) | preOpt == PreOptPre || T.null line = [] | otherwise = (:[]) $ applyIf ansi withBoldStr "`` " <> stripControl line printLine PreformatToggleLine = [] printLine (PreformattedLine alt line) | preOpt == PreOptAlt && not (T.null alt) = [] | otherwise = -- We allow \t and CSI escape sequences in preformatted text only let sanitise = applyIf (not ansi) (stripCSI .) sanitiseForDisplay in (:[]) $ applyIf ansi ((resetCode <>) . withBoldStr) "` " <> sanitise line printLine (HeadingLine level line) = wrapWith (T.take (fromIntegral level) (T.repeat '#') <> " ") False . applyIf ansi ( applyIf (level /= 2) withUnderlineStr . applyIf (level < 3) withBoldStr ) $ stripControl line printLine (ItemLine line) = wrapWith "* " False $ stripControl line printLine (QuoteLine line) = wrapWith "> " True $ stripControl line printLine (ErrorLine line) = (:[]) $ applyIf ansi (withColourStr Red) "! Formatting error in text/gemini: " <> line printLine (LinkLine n (Link uri desc)) = wrapWith (T.pack $ '[' : show (n+1) ++ if n+1 < 10 then "] " else "] ") False $ (if T.null desc then id else (if linkDescFirst then id else flip) (\a b -> a <> " " <> b) $ applyIf ansi (withColourStr Cyan) (stripControl desc)) (showUri uri) wrapWith :: T.Text -> Bool -> T.Text -> [T.Text] wrapWith pre onAll line = concat . zipWith prependHeader lineHeaders $ wrap (width - n) line where n = visibleLength pre lineHeaders = (pre:) . repeat $ if onAll then pre else T.replicate (fromIntegral n) " " splitWordHeader = if n > 0 then "|" <> T.replicate (fromIntegral n - 1) " " else "" prependHeader header (l:ls) = header <> l : ((splitWordHeader <>) <$> ls) prependHeader _ [] = [] wrap :: Int -> T.Text -> [[T.Text]] wrap wrapWidth line = wrap' [] "" 0 $ T.words line where maxWCWidth = 2 ww = max maxWCWidth wrapWidth wrap' ls l _ [] = [ls <> [l] ] wrap' ls l n (w:ws) = let nw = visibleLength w l' = if T.null l then w else l <> " " <> w n' = n + nw + (if T.null l then 0 else 1) in if nw > ww && n + 1 < ww then let (a,b) = splitAtVisible ww l' in wrap' (ls <> [a]) "" 0 $ b:ws else if n' > ww then (ls <> [l]:) $ wrap' [] "" 0 $ w:ws else wrap' ls l' n' ws data GeminiParseState = GeminiParseState { numLinks :: Int, preformatted :: Maybe T.Text } initialParseState :: GeminiParseState initialParseState = GeminiParseState 0 Nothing parseGemini :: T.Text -> GeminiDocument parseGemini text = GeminiDocument . catMaybes $ evalState (forM (T.lines text) (parseLine . stripTrailingCR)) initialParseState where stripTrailingCR = T.dropWhileEnd (== '\r') parseLine :: T.Text -> State GeminiParseState (Maybe GeminiLine) parseLine line = do pre <- gets preformatted case T.take 1 line of "`" | T.take 3 line == "```", isJust pre -> do modify $ \s -> s { preformatted = Nothing } return $ case T.strip $ T.drop 3 line of "" -> Nothing _ -> Just . ErrorLine $ "Illegal non-empty text after closing '```'" -- ^The spec says we MUST ignore any text on a "```" line closing a preformatted -- block. This seems like a gaping extensibility hole to me, so I'm interpreting it -- as not disallowing an error message. "`" | T.take 3 line == "```" -> let alt = T.strip $ T.drop 3 line in do modify $ \s -> s { preformatted = Just alt } return . Just . AltTextLine $ alt _ | Just alt <- pre -> return . Just $ PreformattedLine alt line "=" | T.take 2 line == "=>" -> case parseLink . T.dropWhile isGemWhitespace $ T.drop 2 line of Nothing -> return . Just . ErrorLine $ "Unparseable link line: " <> line Just link -> do n <- gets numLinks modify $ \s -> s { numLinks = n + 1 } return . Just $ LinkLine n link "#" | headers <- T.length . T.takeWhile (== '#') $ line, headers > 0 && headers < 4 -> return . Just . HeadingLine (fromIntegral headers) . T.dropWhile isGemWhitespace . T.dropWhile (== '#') $ line "*" | T.take 2 line == "* " -> return . Just . ItemLine $ T.drop 2 line ">" -> return . Just . QuoteLine $ T.drop 1 line _ -> return . Just $ TextLine line parseLink :: T.Text -> Maybe Link parseLink linkInfo = let uriText = T.takeWhile (not . isGemWhitespace) linkInfo desc = T.dropWhile isGemWhitespace . T.dropWhile (not . isGemWhitespace) $ linkInfo in (`Link` desc) <$> parseUriReference ( #ifdef IRILinks escapeIRI $ #endif T.unpack uriText) isGemWhitespace :: Char -> Bool isGemWhitespace = (`elem` (" \t"::String))