module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
import Data.Maybe ( catMaybes )
readRST :: ParserState
-> String
-> Pandoc
readRST state s = (readWith parseRST) state (s ++ "\n\n")
bulletListMarkers :: [Char]
bulletListMarkers = "*+-"
underlineChars :: [Char]
underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
specialChars :: [Char]
specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221"
isHeader :: Int -> Block -> Bool
isHeader n (Header x _) = x == n
isHeader _ _ = False
promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) =
(Header (level num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders _ [] = []
titleTransform :: [Block]
-> ([Block], [Inline])
titleTransform ((Header 1 head1):(Header 2 head2):rest) |
not (any (isHeader 1) rest || any (isHeader 2) rest) =
(promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) |
not (any (isHeader 1) rest) =
(promoteHeaders 1 rest, head1)
titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
optional blanklines
startPos <- getPosition
docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>=
return . concat
setInput docMinusKeys
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
state <- getState
let (blocks'', title) = if stateStandalone state
then titleTransform blocks'
else (blocks', [])
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
return $ Pandoc (Meta title' authors date) blocks''
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
block :: GenParser Char ParserState Block
block = choice [ codeBlock
, rawBlock
, blockQuote
, fieldList
, imageBlock
, customCodeBlock
, unknownDirective
, header
, hrule
, lineBlock
, table
, list
, lhsCodeBlock
, para
, plain
, nullBlock ] <?> "block"
rawFieldListItem :: String -> GenParser Char ParserState (String, String)
rawFieldListItem indent = try $ do
string indent
char ':'
name <- many1 $ alphaNum <|> spaceChar
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- option "" $ try $ do lookAhead (string indent >> spaceChar)
indentedBlock
let raw = first ++ "\n" ++ rest ++ "\n"
return (name, raw)
fieldListItem :: String
-> GenParser Char ParserState (Maybe ([Inline], [[Block]]))
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = [Str name]
contents <- parseFromString (many block) raw
case (name, contents) of
("Author", x) -> do
updateState $ \st ->
st{ stateAuthors = stateAuthors st ++ [extractContents x] }
return Nothing
("Authors", [BulletList auths]) -> do
updateState $ \st -> st{ stateAuthors = map extractContents auths }
return Nothing
("Date", x) -> do
updateState $ \st -> st{ stateDate = extractContents x }
return Nothing
("Title", x) -> do
updateState $ \st -> st{ stateTitle = extractContents x }
return Nothing
_ -> return $ Just (term, [contents])
extractContents :: [Block] -> [Inline]
extractContents [Plain auth] = auth
extractContents [Para auth] = auth
extractContents _ = []
fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
blanklines
if null items
then return Null
else return $ DefinitionList $ catMaybes items
lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
string "| "
white <- many spaceChar
line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ')
optional endline
return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line
lineBlock :: GenParser Char ParserState Block
lineBlock = try $ do
lines' <- many1 lineBlockLine
blanklines
return $ Para (intercalate [LineBreak] lines')
para :: GenParser Char ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
codeBlockStart :: GenParser Char st Char
codeBlockStart = string "::" >> blankline >> blankline
paraBeforeCodeBlock :: GenParser Char ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
return $ Para $ if last result == Space
then normalizeSpaces result
else (normalizeSpaces result) ++ [Str ":"]
paraNormal :: GenParser Char ParserState Block
paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
plain :: GenParser Char ParserState Block
plain = many1 inline >>= return . Plain . normalizeSpaces
imageBlock :: GenParser Char ParserState Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
fields <- try $ do indent <- lookAhead $ many (oneOf " /t")
many $ rawFieldListItem indent
optional blanklines
case lookup "alt" fields of
Just alt -> return $ Plain [Image [Str $ removeTrailingSpace alt]
(src, "")]
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
header :: GenParser Char ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
doubleHeader :: GenParser Char ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c)
let lenTop = length (c:rest)
skipSpaces
newline
txt <- many1 (notFollowedBy blankline >> inline)
pos <- getPosition
let len = (sourceColumn pos) 1
if (len > lenTop) then fail "title longer than border" else return ()
blankline
count lenTop (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
singleHeader :: GenParser Char ParserState Block
singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
let len = (sourceColumn pos) 1
blankline
c <- oneOf underlineChars
count (len 1) (char c)
many (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
hrule :: GenParser Char st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
skipMany (char chr)
blankline
blanklines
return HorizontalRule
indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
manyTill anyChar newline
indentedBlock :: GenParser Char st [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many $ choice $ [ indentedLine indents,
try $ do b <- blanklines
l <- indentedLine indents
return (b ++ l) ]
optional blanklines
return $ unlines lns
codeBlock :: GenParser Char st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
customCodeBlock :: GenParser Char st Block
customCodeBlock = try $ do
string ".. code-block:: "
language <- manyTill anyChar newline
blanklines
result <- indentedBlock
return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
lhsCodeBlock :: GenParser Char ParserState Block
lhsCodeBlock = try $ do
failUnlessLHS
optional codeBlockStart
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
lns <- many1 birdTrackLine
let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
then map (drop 1) lns
else lns
blanklines
return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns'
birdTrackLine :: GenParser Char st [Char]
birdTrackLine = do
char '>'
manyTill anyChar newline
rawBlock :: GenParser Char st Block
rawBlock = try $ do
string ".. raw:: "
lang <- many1 (letter <|> digit)
blanklines
result <- indentedBlock
return $ RawBlock lang result
blockQuote :: GenParser Char ParserState Block
blockQuote = do
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return $ BlockQuote contents
list :: GenParser Char ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
definitionListItem = try $ do
notFollowedBy (try $ char '.' >> char '.')
term <- many1Till inline endline
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n"
return (normalizeSpaces term, [contents])
definitionList :: GenParser Char ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
bulletListStart :: GenParser Char st Int
bulletListStart = try $ do
notFollowedBy' hrule
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
return $ length (marker:white)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
-> GenParser Char ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
listLine :: Int -> GenParser Char ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
return $ line ++ "\n"
indentWith :: Int -> GenParser Char ParserState [Char]
indentWith num = do
state <- getState
let tabStop = stateTabStop state
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
(try (char '\t' >> count (num tabStop) (char ' '))) ]
rawListItem :: GenParser Char ParserState Int
-> GenParser Char ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
listContinuation :: Int -> GenParser Char ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
listItem :: GenParser Char ParserState Int
-> GenParser Char ParserState [Block]
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (many blankline >>~ lookAhead start),
many1 blankline ]
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
updateState (\st -> st {stateParserContext = oldContext})
return parsed
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
bulletList :: GenParser Char ParserState Block
bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
unknownDirective :: GenParser Char st Block
unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
manyTill anyChar newline
many $ blanklines <|> (spaceChar >> manyTill anyChar newline)
return Null
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
string ".."
spaceChar >> skipMany spaceChar
ref <- noteMarker
spaceChar >> skipMany spaceChar
first <- anyLine
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
let newnote = (ref, raw)
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
noteMarker :: GenParser Char ParserState [Char]
noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']'
quotedReferenceName :: GenParser Char ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`')
label' <- many1Till inline (char '`')
return label'
unquotedReferenceName :: GenParser Char ParserState [Inline]
unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
isolated :: Char -> GenParser Char st Char
isolated ch = try $ char ch >>~ notFollowedBy (char ch)
simpleReferenceName :: GenParser Char st [Inline]
simpleReferenceName = do
raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
(try $ char '_' >>~ lookAhead alphaNum))
return [Str raw]
referenceName :: GenParser Char ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
(key, target) <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
optional blanklines
endPos <- getPosition
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
targetURI :: GenParser Char st [Char]
targetURI = do
skipSpaces
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
imageKey :: GenParser Char ParserState (Key, Target)
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
return (toKey (normalizeSpaces ref), (src, ""))
anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
return (toKey (normalizeSpaces ref), (src, ""))
dashedLine :: Char -> GenParser Char st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
colLines <- return []
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
simpleTableHeader :: Bool
-> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
then return ""
else simpleTableSep '=' >> anyLine
dashes <- simpleDashedLines '='
newline
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
let aligns = replicate (length lines') AlignDefault
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
heads <- mapM (parseFromString (many plain)) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
simpleTable :: Bool
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
return $ Table c a (replicate (length a) 0) h l
where
sep = return ()
gridTable :: Bool
-> GenParser Char ParserState Block
gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
inline :: GenParser Char ParserState Inline
inline = choice [ whitespace
, link
, str
, endline
, strong
, emph
, code
, image
, superscript
, subscript
, note
, smartPunctuation inline
, hyphens
, escapedChar
, symbol ] <?> "inline"
hyphens :: GenParser Char ParserState Inline
hyphens = do
result <- many1 (char '-')
option Space endline
return $ Str result
escapedChar :: GenParser Char st Inline
escapedChar = escaped anyChar
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialChars
return $ Str [result]
code :: GenParser Char ParserState Inline
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code nullAttr
$ removeLeadingTrailingSpace $ intercalate " " $ lines result
emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
interpreted :: [Char] -> GenParser Char st [Inline]
interpreted role = try $ do
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript :: GenParser Char ParserState Inline
superscript = interpreted "sup" >>= (return . Superscript)
subscript :: GenParser Char ParserState Inline
subscript = interpreted "sub" >>= (return . Subscript)
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
endline :: GenParser Char ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
st <- getState
if (stateParserContext st) == ListItemState
then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
notFollowedBy' bulletListStart
else return ()
return Space
link :: GenParser Char ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
explicitLink :: GenParser Char ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`')
label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n") (char '>')
skipSpaces
string "`_"
return $ Link (normalizeSpaces label')
(escapeURI $ removeLeadingTrailingSpace src, "")
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
state <- getState
let keyTable = stateKeys state
let isAnonKey x = case fromKey x of
[Str ('_':_)] -> True
_ -> False
key <- option (toKey label') $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
if null anonKeys
then pzero
else return (head anonKeys)
(src,tit) <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
autoURI = do
(orig, src) <- uri
return $ Link [Str orig] (src, "")
autoEmail :: GenParser Char ParserState Inline
autoEmail = do
(orig, src) <- emailAddress
return $ Link [Str orig] (src, "")
autoLink :: GenParser Char ParserState Inline
autoLink = autoURI <|> autoEmail
image :: GenParser Char ParserState Inline
image = try $ do
char '|'
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
(src,tit) <- case lookupKeySrc keyTable (toKey ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
note :: GenParser Char ParserState Inline
note = try $ do
ref <- noteMarker
char '_'
state <- getState
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> do
contents <- parseFromString parseBlocks raw
when (ref == "*" || ref == "#") $ do
let newnotes = deleteFirstsBy (==) notes [(ref,raw)]
updateState $ \st -> st{ stateNotes = newnotes }
return $ Note contents