Copyright	Copyright (C) 2006-2023 John MacFarlane
License	GNU GPL, version 2 or above
Maintainer	John MacFarlane <jgm@berkeley.edu>
Stability	alpha
Portability	portable
Safe Haskell	Safe-Inferred
Language	Haskell2010

Text.Pandoc.Shared

Contents

List processing
Text processing
Date/time
Pandoc block and inline list processing
TagSoup HTML handling
File handling
for squashing blocks
Safe read

Description

Utility functions and definitions used by the various Pandoc modules.

Synopsis

splitBy :: (a -> Bool) -> [a] -> [[a]]
splitTextBy :: (Char -> Bool) -> Text -> [Text]
splitTextByIndices :: [Int] -> Text -> [Text]
inquotes :: Text -> Text
tshow :: Show a => a -> Text
stripTrailingNewlines :: Text -> Text
trim :: Text -> Text
triml :: Text -> Text
trimr :: Text -> Text
trimMath :: Text -> Text
stripFirstAndLast :: Text -> Text
camelCaseToHyphenated :: Text -> Text
camelCaseStrToHyphenated :: String -> String
toRomanNumeral :: Int -> Text
tabFilter :: Int -> Text -> Text
normalizeDate :: Text -> Maybe Text
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [Text]
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
removeFormatting :: Walkable Inline a => a -> [Inline]
deNote :: Inline -> Inline
stringify :: Walkable Inline a => a -> Text
capitalize :: Walkable Inline a => a -> a
compactify :: [Blocks] -> [Blocks]
compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
linesToPara :: [[Inline]] -> Block
figureDiv :: Attr -> Caption -> [Block] -> Block
makeSections :: Bool -> Maybe Int -> [Block] -> [Block]
uniqueIdent :: Extensions -> [Inline] -> Set Text -> Text
inlineListToIdentifier :: Extensions -> [Inline] -> Text
textToIdentifier :: Extensions -> Text -> Text
isHeaderBlock :: Block -> Bool
headerShift :: Int -> Pandoc -> Pandoc
stripEmptyParagraphs :: Pandoc -> Pandoc
onlySimpleTableCells :: [[[Block]]] -> Bool
isTightList :: [[Block]] -> Bool
taskListItemFromAscii :: Extensions -> [Block] -> [Block]
taskListItemToAscii :: Extensions -> [Block] -> [Block]
handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block]
addMetaField :: ToMetaValue a => Text -> a -> Meta -> Meta
eastAsianLineBreakFilter :: Pandoc -> Pandoc
htmlSpanLikeElements :: Set Text
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
formatCode :: Attr -> Inlines -> Inlines
renderTags' :: [Tag Text] -> Text
inDirectory :: FilePath -> IO a -> IO a
makeCanonical :: FilePath -> FilePath
collapseFilePath :: FilePath -> FilePath
filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, ByteString)]
blocksToInlines :: [Block] -> [Inline]
blocksToInlines' :: [Block] -> Inlines
blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
defaultBlocksSeparator :: Inlines
safeRead :: (MonadPlus m, Read a) => Text -> m a
safeStrRead :: (MonadPlus m, Read a) => String -> m a

List processing

splitBy :: (a -> Bool) -> [a] -> [[a]] Source #

Split list by groups of one or more sep.

splitTextBy :: (Char -> Bool) -> Text -> [Text] Source #

Split text by groups of one or more separator.

splitTextByIndices :: [Int] -> Text -> [Text] Source #

Split text at the given widths. Note that the break points are not indices but text widths, which will be different for East Asian characters, emojis, etc.

Text processing

inquotes :: Text -> Text Source #

Wrap double quotes around a Text

tshow :: Show a => a -> Text Source #

Like show, but returns a Text instead of a String.

stripTrailingNewlines :: Text -> Text Source #

Strip trailing newlines from string.

trim :: Text -> Text Source #

Remove leading and trailing space (including newlines) from string.

triml :: Text -> Text Source #

Remove leading space (including newlines) from string.

trimr :: Text -> Text Source #

Remove trailing space (including newlines) from string.

trimMath :: Text -> Text Source #

Trim leading space and trailing space unless after .

stripFirstAndLast :: Text -> Text Source #

Strip leading and trailing characters from string

camelCaseToHyphenated :: Text -> Text Source #

Change CamelCase word to hyphenated lowercase (e.g., camel-case).

camelCaseStrToHyphenated :: String -> String Source #

toRomanNumeral :: Int -> Text Source #

Convert number < 4000 to uppercase roman numeral.

tabFilter Source #

Arguments

:: Int	Tab stop
-> Text	Input
-> Text

Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.

Date/time

normalizeDate :: Text -> Maybe Text Source #

Parse a date and convert (if possible) to "YYYY-MM-DD" format. We limit years to the range 1601-9999 (ISO 8601 accepts greater than or equal to 1583, but MS Word only accepts dates starting 1601).

Pandoc block and inline list processing

orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [Text] Source #

Generate infinite lazy list of markers for an ordered list, depending on list attributes.

extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines Source #

Extract the leading and trailing spaces from inside an inline element and place them outside the element. SoftBreaks count as Spaces for these purposes.

removeFormatting :: Walkable Inline a => a -> [Inline] Source #

Extract inlines, removing formatting.

deNote :: Inline -> Inline Source #

Replaces Note elements with empty strings.

stringify :: Walkable Inline a => a -> Text Source #

Convert pandoc structure to a string with formatting removed. Footnotes are skipped (since we don't want their contents in link labels).

capitalize :: Walkable Inline a => a -> a Source #

Bring all regular text in a pandoc structure to uppercase.

This function correctly handles cases where a lowercase character doesn't match to a single uppercase character – e.g. “Straße” would be converted to “STRASSE”, not “STRAßE”.

compactify Source #

Arguments

:: [Blocks]	List of list items (each a list of blocks)
-> [Blocks]

Change final list item from Para to Plain if the list contains no other Para blocks. Otherwise (if the list items contain Para blocks besides possibly at the end), turn any Plains into Paras (#5285).

compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] Source #

Like compactify, but acts on items of definition lists.

linesToPara :: [[Inline]] -> Block Source #

Convert a list of lines into a paragraph with hard line breaks. This is useful e.g. for rudimentary support of LineBlock elements in writers.

figureDiv :: Attr -> Caption -> [Block] -> Block Source #

Creates a Div block from figure components. The intended use is in writers of formats that do not have markup support for figures.

The resulting div is given the class figure and contains the figure body and the figure caption. The latter is wrapped in a Div of class caption, with the stringified short-caption as attribute.

makeSections :: Bool -> Maybe Int -> [Block] -> [Block] Source #

Put a list of Pandoc blocks into a hierarchical structure: a list of sections (each a Div with class "section" and first element a Header). If the numbering parameter is True, Header numbers are added via the number attribute on the header. If the baseLevel parameter is Just n, Header levels are adjusted to be gapless starting at level n.

uniqueIdent :: Extensions -> [Inline] -> Set Text -> Text Source #

Generate a unique identifier from a list of inlines. Second argument is a list of already used identifiers.

inlineListToIdentifier :: Extensions -> [Inline] -> Text Source #

Convert Pandoc inline list to plain text identifier.

textToIdentifier :: Extensions -> Text -> Text Source #

Convert string to plain text identifier.

isHeaderBlock :: Block -> Bool Source #

True if block is a Header block.

headerShift :: Int -> Pandoc -> Pandoc Source #

Shift header levels up or down.

stripEmptyParagraphs :: Pandoc -> Pandoc Source #

Remove empty paragraphs.

onlySimpleTableCells :: [[[Block]]] -> Bool Source #

Detect if table rows contain only cells consisting of a single paragraph that has no LineBreak.

isTightList :: [[Block]] -> Bool Source #

Detect if a list is tight.

taskListItemFromAscii :: Extensions -> [Block] -> [Block] Source #

Convert a list item containing tasklist syntax (e.g. [x]) to using U+2610 BALLOT BOX or U+2612 BALLOT BOX WITH X.

taskListItemToAscii :: Extensions -> [Block] -> [Block] Source #

Convert a list item containing text starting with U+2610 BALLOT BOX or U+2612 BALLOT BOX WITH X to tasklist syntax (e.g. [x]).

handleTaskListItem :: ([Inline] -> [Inline]) -> Extensions -> [Block] -> [Block] Source #

addMetaField :: ToMetaValue a => Text -> a -> Meta -> Meta Source #

Set a field of a Meta object. If the field already has a value, convert it into a list with the new value appended to the old value(s).

eastAsianLineBreakFilter :: Pandoc -> Pandoc Source #

Remove soft breaks between East Asian characters.

htmlSpanLikeElements :: Set Text Source #

Set of HTML elements that are represented as Span with a class equal as the element tag itself.

filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc Source #

Process ipynb output cells. If mode is Nothing, remove all output. If mode is Just format, select best output for the format. If format is not ipynb, strip out ANSI escape sequences from CodeBlocks (see #5633).

formatCode :: Attr -> Inlines -> Inlines Source #

Reformat Inlines as code, putting the stringlike parts in Code elements while bringing other inline formatting outside. The idea is that e.g. `[Str "a",Space,Strong [Str "b"]]` should turn into `[Code ("",[],[]) "a ", Strong [Code ("",[],[]) "b"]]`. This helps work around the limitation that pandoc's Code element can only contain string content (see issue #7525).

TagSoup HTML handling

renderTags' :: [Tag Text] -> Text Source #

Render HTML tags.

File handling

inDirectory :: FilePath -> IO a -> IO a Source #

Perform an IO action in a directory, returning to starting directory.

makeCanonical :: FilePath -> FilePath Source #

Canonicalizes a file path by removing redundant . and ...

collapseFilePath :: FilePath -> FilePath Source #

Remove intermediate "." and ".." directories from a path.

collapseFilePath "./foo" == "foo"
collapseFilePath "/bar/../baz" == "/baz"
collapseFilePath "/../baz" == "/../baz"
collapseFilePath "parent/foo/baz/../bar" ==  "parent/foo/bar"
collapseFilePath "parent/foo/baz/../../bar" ==  "parent/bar"
collapseFilePath "parent/foo/.." ==  "parent"
collapseFilePath "/parent/foo/../../bar" ==  "/bar"

filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, ByteString)] Source #

for squashing blocks

blocksToInlines :: [Block] -> [Inline] Source #

blocksToInlines' :: [Block] -> Inlines Source #

blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines Source #

defaultBlocksSeparator :: Inlines Source #

Inline elements used to separate blocks when squashing blocks into inlines.

Safe read

safeRead :: (MonadPlus m, Read a) => Text -> m a Source #

safeStrRead :: (MonadPlus m, Read a) => String -> m a Source #