{-# LANGUAGE CPP #-} {-# LANGUAGE ViewPatterns #-} -- | Function for generating highlighted and aligned LaTeX from literate -- Agda source. module Agda.Interaction.Highlighting.LaTeX.Base ( generateLaTeXIO , prepareCommonAssets , runLogLaTeXTWith , logMsgToText , LogMessage(..) , MonadLogLaTeX , LogLaTeXT , LaTeXOptions(..) ) where import Prelude hiding (log) import Data.Bifunctor (second) import Data.Char import Data.Maybe import Data.Function import Data.Foldable (toList) #if !(MIN_VERSION_base(4,11,0)) import Data.Semigroup (Semigroup(..)) #endif import Control.Monad (forM_, mapM_, unless, when) import Control.Monad.Trans.Reader as R ( ReaderT(runReaderT)) import Control.Monad.RWS.Strict ( RWST(runRWST) , MonadReader(..), asks , MonadState(..), gets, modify , lift, tell ) import Control.Monad.IO.Class ( MonadIO(..) ) import System.Directory import System.Exit import System.FilePath import System.Process import Data.Text (Text) import qualified Data.Text as T #ifdef COUNT_CLUSTERS import qualified Data.Text.ICU as ICU #endif import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Encoding as E import qualified Data.ByteString.Lazy as BS import Data.HashSet (HashSet) import qualified Data.HashSet as Set import qualified Data.IntMap as IntMap import qualified Data.List as List import Paths_Agda import Agda.Syntax.Abstract (toTopLevelModuleName) import Agda.Syntax.Common import Agda.Syntax.Concrete (TopLevelModuleName, moduleNameParts) import Agda.Syntax.Parser.Literate (literateTeX, LayerRole, atomizeLayers) import qualified Agda.Syntax.Parser.Literate as L import Agda.Syntax.Position (startPos) import Agda.Interaction.Highlighting.Precise hiding (toList) import Agda.TypeChecking.Monad (Interface(..)) import Agda.Utils.FileName (AbsolutePath) import Agda.Utils.Function (applyWhen) import Agda.Utils.Functor ((<&>)) import Agda.Utils.List (last1, updateHead, updateLast) import Agda.Utils.Maybe (whenJust) import qualified Agda.Utils.List1 as List1 import Agda.Utils.Impossible ------------------------------------------------------------------------ -- * Logging class Monad m => MonadLogLaTeX m where logLaTeX :: LogMessage -> m () -- | Log LaTeX messages using a provided action. -- -- This could be accomplished by putting logs into the RWST output and splitting it -- into a WriterT, but that becomes slightly more complicated to reason about in -- the presence of IO exceptions. -- -- We want the logging to be reasonably polymorphic, avoid space leaks that can occur -- with WriterT, and also be usable during outer phases such as directory preparation. -- -- I'm not certain this is the best way to do it, but whatever. type LogLaTeXT m = ReaderT (LogMessage -> m ()) m instance Monad m => MonadLogLaTeX (LogLaTeXT m) where logLaTeX message = do doLog <- ask lift $ doLog message runLogLaTeXTWith :: Monad m => (LogMessage -> m ()) -> LogLaTeXT m a -> m a runLogLaTeXTWith = flip runReaderT -- Not currently used, but for example: -- runLogLaTeXWithIO :: MonadIO m => LogLaTeXT m a -> m a -- runLogLaTeXWithIO = runLogLaTeXTWith $ liftIO . T.putStrLn . logMsgToText ------------------------------------------------------------------------ -- * Datatypes. -- | The @LaTeX@ monad is a combination of @ExceptT@, @RWST@ and -- a logger @m@. The error part is just used to keep track whether we finished or -- not, the reader part contains static options used, the writer is where the -- output goes and the state is for keeping track of the tokens and some -- other useful info, and the MonadLogLaTeX part is used for printing debugging info. type LaTeX a = forall m. MonadLogLaTeX m => RWST Env [Output] State m a -- | Output items. data LogMessage = LogMessage Debug Text [Text] deriving Show data Output = Text !Text -- ^ A piece of text. | MaybeColumn !AlignmentColumn -- ^ A column. If it turns out to be an indentation column that is -- not used to indent or align something, then no column will be -- generated, only whitespace ('agdaSpace'). deriving Show -- | Column kinds. data Kind = Indentation -- ^ Used only for indentation (the placement of the first token -- on a line, relative to tokens on previous lines). | Alignment -- ^ Used both for indentation and for alignment. deriving (Eq, Show) -- | Unique identifiers for indentation columns. type IndentationColumnId = Int -- | Alignment and indentation columns. data AlignmentColumn = AlignmentColumn { columnCodeBlock :: !Int -- ^ The column's code block. , columnColumn :: !Int -- ^ The column number. , columnKind :: Maybe IndentationColumnId -- ^ The column kind. 'Nothing' for alignment columns and @'Just' -- i@ for indentation columns, where @i@ is the column's unique -- identifier. } deriving Show -- | Type of function for estimating column width of text. type TextWidthEstimator = Text -> Int data Env = Env { estimateTextWidth :: !TextWidthEstimator -- ^ How to estimate the column width of text (i.e. Count extended grapheme clusters vs. code points). , debugs :: [Debug] -- ^ Says what debug information should printed. } data State = State { codeBlock :: !Int -- ^ The number of the current code block. , column :: !Int -- ^ The current column number. , columns :: [AlignmentColumn] -- ^ All alignment columns found on the -- current line (so far), in reverse -- order. , columnsPrev :: [AlignmentColumn] -- ^ All alignment columns found in -- previous lines (in any code block), -- with larger columns coming first. , nextId :: !IndentationColumnId -- ^ The next indentation column -- identifier. , usedColumns :: HashSet IndentationColumnId -- ^ Indentation columns that have -- actually -- been used. } type Tokens = [Token] data Token = Token { text :: !Text , info :: Aspects } deriving Show withTokenText :: (Text -> Text) -> Token -> Token withTokenText f tok@Token{text = t} = tok{text = f t} data Debug = MoveColumn | NonCode | Code | Spaces | Output | FileSystem deriving (Eq, Show) -- | Run function for the @LaTeX@ monad. runLaTeX :: MonadLogLaTeX m => LaTeX a -> Env -> State -> m (a, State, [Output]) -- ASR (2021-02-07). The eta-expansion is required by GHC >= 9.0.1 -- (see Issue #4955). runLaTeX l = runRWST l emptyState :: State emptyState = State { codeBlock = 0 , column = 0 , columns = [] , columnsPrev = [] , nextId = 0 , usedColumns = Set.empty } emptyEnv :: TextWidthEstimator -- ^ Count extended grapheme clusters? -> Env emptyEnv twe = Env twe [] ------------------------------------------------------------------------ -- * Some helpers. -- | Gives the size of the string. If cluster counting is enabled, -- then the number of extended grapheme clusters is computed (the root -- locale is used), and otherwise the number of code points. size :: Text -> LaTeX Int size t = asks estimateTextWidth <&> ($ t) -- | Does the string consist solely of whitespace? isSpaces :: Text -> Bool isSpaces = T.all isSpace -- | Is the character a whitespace character distinct from '\n'? isSpaceNotNewline :: Char -> Bool isSpaceNotNewline c = isSpace c && c /= '\n' -- | Replaces all forms of whitespace, except for new-line characters, -- with spaces. replaceSpaces :: Text -> Text replaceSpaces = T.map (\c -> if isSpaceNotNewline c then ' ' else c) -- | If the `Token` consists of spaces, the internal column counter is advanced -- by the length of the token. Otherwise, `moveColumnForToken` is a no-op. moveColumnForToken :: Token -> LaTeX () moveColumnForToken Token{ text = t } = do unless (isSpaces t) $ do log MoveColumn t -- ASR (2021-02-07). The eta-expansion is required by GHC >= 9.0.1 -- (see Issue #4955). n <- size t moveColumn n -- | Merges 'columns' into 'columnsPrev', resets 'column' and -- 'columns' resetColumn :: LaTeX () resetColumn = modify $ \s -> s { column = 0 , columnsPrev = mergeCols (columns s) (columnsPrev s) , columns = [] } where -- Remove shadowed columns from old. mergeCols [] old = old mergeCols new@(n:ns) old = new ++ filter ((< leastNew) . columnColumn) old where leastNew = columnColumn (last1 n ns) moveColumn :: Int -> LaTeX () moveColumn i = modify $ \s -> s { column = i + column s } -- | Registers a column of the given kind. The column is returned. registerColumn :: Kind -> LaTeX AlignmentColumn registerColumn kind = do column <- gets column codeBlock <- gets codeBlock colKind <- case kind of Alignment -> return Nothing Indentation -> do nextId <- gets nextId modify $ \s -> s { nextId = succ nextId } return (Just nextId) let c = AlignmentColumn { columnColumn = column , columnCodeBlock = codeBlock , columnKind = colKind } modify $ \s -> s { columns = c : columns s } return c -- | Registers the given column as used (if it is an indentation -- column). useColumn :: AlignmentColumn -> LaTeX () useColumn c = whenJust (columnKind c) $ \ i -> modify $ \ s -> s { usedColumns = Set.insert i (usedColumns s) } -- | Alignment column zero in the current code block. columnZero :: LaTeX AlignmentColumn columnZero = do codeBlock <- gets codeBlock return $ AlignmentColumn { columnColumn = 0 , columnCodeBlock = codeBlock , columnKind = Nothing } -- | Registers column zero as an alignment column. registerColumnZero :: LaTeX () registerColumnZero = do c <- columnZero modify $ \s -> s { columns = [c] } -- | Changes to the state that are performed at the start of a code -- block. enterCode :: LaTeX () enterCode = do resetColumn modify $ \s -> s { codeBlock = codeBlock s + 1 } -- | Changes to the state that are performed at the end of a code -- block. leaveCode :: LaTeX () leaveCode = return () tshow :: Show a => a -> Text tshow = T.pack . show logMsgToText :: LogMessage -> Text logMsgToText (LogMessage messageLabel text extra) = T.concat $ [ tshow messageLabel, ": '", text, "' " ] ++ if null extra then [] else ["(", T.unwords extra, ")"] logHelper :: Debug -> Text -> [Text] -> LaTeX () logHelper debug text extra = do logLevels <- debugs <$> ask when (debug `elem` logLevels) $ do lift $ logLaTeX (LogMessage debug text extra) log :: Debug -> Text -> LaTeX () log MoveColumn text = do cols <- gets columns logHelper MoveColumn text ["columns=", tshow cols] log Code text = do cols <- gets columns col <- gets column logHelper Code text ["columns=", tshow cols, "col=", tshow col] log debug text = logHelper debug text [] output :: Output -> LaTeX () output item = do log Output $ tshow item tell [item] ------------------------------------------------------------------------ -- * LaTeX and polytable strings. -- Polytable, http://www.ctan.org/pkg/polytable, is used for code -- alignment, similar to lhs2TeX's approach. nl :: Text nl = "%\n" -- | A command that is used when two tokens are put next to each other -- in the same column. agdaSpace :: Text agdaSpace = cmdPrefix <> "Space" <> cmdArg T.empty <> nl -- | The column's name. -- -- Indentation columns have unique names, distinct from all alignment -- column names. columnName :: AlignmentColumn -> Text columnName c = T.pack $ case columnKind c of Nothing -> show (columnColumn c) Just i -> show i ++ "I" -- | Opens a column with the given name. ptOpen' :: Text -> Text ptOpen' name = "\\>[" <> name <> "]" -- | Opens the given column. ptOpen :: AlignmentColumn -> Text ptOpen c = ptOpen' (columnName c) -- | Opens a special column that is only used at the beginning of -- lines. ptOpenBeginningOfLine :: Text ptOpenBeginningOfLine = ptOpen' "." <> "[@{}l@{}]" -- | Opens the given column, and inserts an indentation instruction -- with the given argument at the end of it. ptOpenIndent :: AlignmentColumn -> Int -- ^ Indentation instruction argument. -> Text ptOpenIndent c delta = ptOpen c <> "[@{}l@{" <> cmdPrefix <> "Indent" <> cmdArg (T.pack $ show delta) <> "}]" ptClose :: Text ptClose = "\\<" ptClose' :: AlignmentColumn -> Text ptClose' c = ptClose <> "[" <> columnName c <> "]" ptNL :: Text ptNL = nl <> "\\\\\n" ptEmptyLine :: Text ptEmptyLine = nl <> "\\\\[" <> cmdPrefix <> "EmptyExtraSkip" <> "]%\n" cmdPrefix :: Text cmdPrefix = "\\Agda" cmdArg :: Text -> Text cmdArg x = "{" <> x <> "}" ------------------------------------------------------------------------ -- * Output generation from a stream of labelled tokens. processLayers :: [(LayerRole, Tokens)] -> LaTeX () -- ASR (2021-02-07). The eta-expansion on @lt@ is required by GHC >= -- 9.0.1 (see Issue #4955). processLayers lt = forM_ lt $ \ (layerRole,toks) -> do case layerRole of L.Markup -> processMarkup toks L.Comment -> processComment toks L.Code -> processCode toks -- | Deals with markup, which is output verbatim. processMarkup :: Tokens -> LaTeX () -- ASR (2021-02-07). The eta-expansion on @ts@ is required by GHC >= -- 9.0.1 (see Issue #4955). processMarkup ts = forM_ ts $ \ t' -> do moveColumnForToken t' output (Text (text t')) -- | Deals with literate text, which is output verbatim processComment :: Tokens -> LaTeX () -- ASR (2021-02-07). The eta-expansion with @ts@ is required by GHC >= -- 9.0.1 (see Issue #4955). processComment ts = forM_ ts $ \ t' -> do unless ("%" == T.take 1 (T.stripStart (text t'))) $ do moveColumnForToken t' output (Text (text t')) -- | Deals with code blocks. Every token, except spaces, is pretty -- printed as a LaTeX command. processCode :: Tokens -> LaTeX () processCode toks' = do output $ Text nl enterCode mapM_ go toks' ptOpenWhenColumnZero =<< gets column output $ Text $ ptClose <> nl leaveCode where go tok'@Token{ text = tok } = do -- Get the column information before grabbing the token, since -- grabbing (possibly) moves the column. col <- gets column moveColumnForToken tok' log Code tok unless (T.null tok) $ if (isSpaces tok) then do spaces $ T.group $ replaceSpaces tok else do ptOpenWhenColumnZero col output $ Text $ -- we return the escaped token wrapped in commands corresponding -- to its aspect (if any) and other aspects (e.g. error, unsolved meta) foldr (\c t -> cmdPrefix <> T.pack c <> cmdArg t) (escape tok) $ map fromOtherAspect (toList $ otherAspects $ info tok') ++ concatMap fromAspect (toList $ aspect $ info tok') -- Non-whitespace tokens at the start of a line trigger an -- alignment column. ptOpenWhenColumnZero col = when (col == 0) $ do registerColumnZero -- ASR (2021-02-07). The eta-expansion @\o -> output o@ is -- required by GHC >= 9.0.1 (see Issue #4955). (\o -> output o). Text . ptOpen =<< columnZero -- Translation from OtherAspect to command strings. So far it happens -- to correspond to @show@ but it does not have to (cf. fromAspect) fromOtherAspect :: OtherAspect -> String fromOtherAspect = show fromAspect :: Aspect -> [String] fromAspect a = let s = [show a] in case a of Comment -> s Keyword -> s Hole -> s String -> s Number -> s Symbol -> s PrimitiveType -> s Pragma -> s Background -> s Markup -> s Name Nothing isOp -> fromAspect (Name (Just Postulate) isOp) -- At the time of writing the case above can be encountered in -- --only-scope-checking mode, for instance for the token "Size" -- in the following code: -- -- {-# BUILTIN SIZE Size #-} -- -- The choice of "Postulate" works for this example, but might -- be less appropriate for others. Name (Just kind) isOp -> (\c -> if isOp then ["Operator", c] else [c]) $ case kind of Bound -> sk Generalizable -> sk Constructor Inductive -> "InductiveConstructor" Constructor CoInductive -> "CoinductiveConstructor" Datatype -> sk Field -> sk Function -> sk Module -> sk Postulate -> sk Primitive -> sk Record -> sk Argument -> sk Macro -> sk where sk = show kind -- | Escapes special characters. escape :: Text -> Text escape (T.uncons -> Nothing) = T.empty escape (T.uncons -> Just (c, s)) = T.pack (replace c) <> escape s where replace :: Char -> String replace char = case char of '_' -> "\\AgdaUnderscore{}" '{' -> "\\{" '}' -> "\\}" '#' -> "\\#" '$' -> "\\$" '&' -> "\\&" '%' -> "\\%" '~' -> "\\textasciitilde{}" '^' -> "\\textasciicircum{}" '\\' -> "\\textbackslash{}" ' ' -> "\\ " _ -> [ char ] #if __GLASGOW_HASKELL__ < 810 escape _ = __IMPOSSIBLE__ #endif -- | Every element in the list should consist of either one or more -- newline characters, or one or more space characters. Two adjacent -- list elements must not contain the same character. -- -- If the final element of the list consists of spaces, then these -- spaces are assumed to not be trailing whitespace. spaces :: [Text] -> LaTeX () spaces [] = return () -- Newlines. spaces (s@(T.uncons -> Just ('\n', _)) : ss) = do col <- gets column when (col == 0) $ -- ASR (2021-02-07). The eta-expansion @\o -> output o@ is -- required by GHC >= 9.0.1 (see Issue #4955). (\o -> output o) . Text . ptOpen =<< columnZero output $ Text $ ptClose <> ptNL <> T.replicate (T.length s - 1) ptEmptyLine resetColumn spaces ss -- Spaces followed by a newline character. spaces (_ : ss@(_ : _)) = spaces ss -- Spaces that are not followed by a newline character. spaces [ s ] = do col <- gets column let len = T.length s kind = if col /= 0 && len == 1 then Indentation else Alignment moveColumn len column <- registerColumn kind if col /= 0 then log Spaces "col /= 0" else do columns <- gets columnsPrev codeBlock <- gets codeBlock log Spaces $ "col == 0: " <> T.pack (show (len, columns)) case filter ((<= len) . columnColumn) columns of c : _ | columnColumn c == len, isJust (columnKind c) -> do -- Align. (This happens automatically if the column is an -- alignment column, but c is an indentation column.) useColumn c output $ Text $ ptOpenBeginningOfLine output $ Text $ ptClose' c c : _ | columnColumn c < len -> do -- Indent. useColumn c output $ Text $ ptOpenIndent c (codeBlock - columnCodeBlock c) _ -> return () output $ MaybeColumn column -- | Split multi-lines string literals into multiple string literals -- Isolating leading spaces for the alignment machinery to work -- properly stringLiteral :: Token -> Tokens stringLiteral t | aspect (info t) == Just String = map (\ x -> t { text = x }) $ concatMap leadingSpaces $ List.intersperse "\n" $ T.lines (text t) where leadingSpaces :: Text -> [Text] leadingSpaces tok = [pre, suf] where (pre , suf) = T.span isSpaceNotNewline tok stringLiteral t = [t] -- | Split multi-line comments into several tokens. -- See issue #5398. multiLineComment :: Token -> Tokens multiLineComment Token{ text = s, info = i } | aspect i == Just Comment = map (`Token` i) $ List.intersperse "\n" $ T.lines s -- multiLineComment Token{ text = s, info = i } | aspect i == Just Comment = -- map emptyToPar -- $ List1.groupBy ((==) `on` T.null) -- $ T.lines s -- where -- emptyToPar :: List1 Text -> Token -- emptyToPar ts@(t :| _) -- | T.null t = Token{ text = "\n", info = mempty } -- | otherwise = Token{ text = sconcat $ List1.intersperse "\n" ts, info = i } multiLineComment t = [t] ------------------------------------------------------------------------ -- * Main. -- | The Agda data directory containing the files for the LaTeX backend. latexDataDir :: FilePath latexDataDir = "latex" defaultStyFile :: String defaultStyFile = "agda.sty" data LaTeXOptions = LaTeXOptions { latexOptOutDir :: FilePath , latexOptSourceFileName :: Maybe AbsolutePath -- ^ The parser uses a @Position@ which includes a source filename for -- error reporting and such. We don't actually get the source filename -- with an @Interface@, and it isn't necessary to look it up. -- This is a "nice-to-have" parameter. , latexOptCountClusters :: Bool -- ^ Count extended grapheme clusters rather than code points when -- generating LaTeX. } getTextWidthEstimator :: Bool -> TextWidthEstimator getTextWidthEstimator _countClusters = #ifdef COUNT_CLUSTERS if _countClusters then length . (ICU.breaks (ICU.breakCharacter ICU.Root)) else T.length #else T.length #endif -- | Create the common base output directory and check for/install the style file. prepareCommonAssets :: (MonadLogLaTeX m, MonadIO m) => FilePath -> m () prepareCommonAssets dir = do liftIO $ createDirectoryIfMissing True dir (code, _, _) <- liftIO $ readProcessWithExitCode "kpsewhich" ["--path=" ++ dir, defaultStyFile] "" when (code /= ExitSuccess) $ do logLaTeX $ LogMessage FileSystem (T.pack $ unwords [defaultStyFile, "was not found. Copying a default version of", defaultStyFile, "into", dir]) [] liftIO $ do styFile <- getDataFileName $ latexDataDir defaultStyFile copyFile styFile (dir defaultStyFile) -- | Generates a LaTeX file for the given interface. generateLaTeXIO :: (MonadLogLaTeX m, MonadIO m) => LaTeXOptions -> Interface -> m () generateLaTeXIO opts i = do let textWidthEstimator = getTextWidthEstimator (latexOptCountClusters opts) let baseDir = latexOptOutDir opts let outPath = baseDir (latexOutRelativeFilePath $ toTopLevelModuleName $ iModuleName i) latex <- E.encodeUtf8 <$> toLaTeX (emptyEnv textWidthEstimator) (latexOptSourceFileName opts) (iSource i) (iHighlighting i) liftIO $ do createDirectoryIfMissing True (takeDirectory outPath) BS.writeFile outPath latex latexOutRelativeFilePath :: TopLevelModuleName -> FilePath latexOutRelativeFilePath m = List.intercalate [pathSeparator] (List1.toList $ moduleNameParts m) <.> "tex" groupByFst :: forall a b. Eq a => [(a,b)] -> [(a,[b])] groupByFst = map (\xs -> case xs of -- Float the grouping to the top level [] -> __IMPOSSIBLE__ (tag, _): _ -> (tag, map snd xs)) . List.groupBy ((==) `on` fst) -- Group together characters in the same -- role. -- | Transforms the source code into LaTeX. toLaTeX :: MonadLogLaTeX m => Env -> Maybe AbsolutePath -> L.Text -> HighlightingInfo -> m L.Text toLaTeX env path source hi = processTokens env . map ( ( \(role, tokens) -> (role,) $ -- This bit fixes issue 954 ( applyWhen (L.isCode role) $ -- Remove trailing whitespace from the -- final line; the function spaces -- expects trailing whitespace to be -- followed by a newline character. whenMoreThanOne ( updateLast $ withTokenText $ \suf -> maybe suf (T.dropWhileEnd isSpaceNotNewline) (T.stripSuffix "\n" suf) ) . updateLast (withTokenText $ T.dropWhileEnd isSpaceNotNewline) . updateHead ( withTokenText $ \pre -> fromMaybe pre $ T.stripPrefix "\n" $ T.dropWhile isSpaceNotNewline pre ) ) tokens ) . ( second ( -- Split tokens at newlines concatMap stringLiteral . concatMap multiLineComment . map ( \(mi, cs) -> Token {text = T.pack cs, info = fromMaybe mempty mi} ) . groupByFst ) ) ) . groupByFst -- Look up the meta info at each position in the highlighting info. . zipWith (\pos (role, char) -> (role, (IntMap.lookup pos infoMap, char))) [1..] . -- Add position in file to each character. -- Map each character to its role atomizeLayers . literateTeX (startPos path) $ L.unpack source where infoMap = toMap hi whenMoreThanOne :: ([a] -> [a]) -> [a] -> [a] whenMoreThanOne f xs@(_:_:_) = f xs whenMoreThanOne _ xs = xs processTokens :: MonadLogLaTeX m => Env -> [(LayerRole, Tokens)] -> m L.Text processTokens env ts = do ((), s, os) <- runLaTeX (processLayers ts) env emptyState return $ L.fromChunks $ map (render s) os where render _ (Text s) = s render s (MaybeColumn c) | Just i <- columnKind c, not (i `Set.member` usedColumns s) = agdaSpace | otherwise = nl <> ptOpen c