module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.ParserCombinators.Parsec
import Text.HTML.TagSoup.Match
import Data.Char ( digitToInt, isLetter )
import Control.Monad ( guard, liftM )
readTextile :: ParserState
-> String
-> Pandoc
readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
specialChars :: [Char]
specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()"
parseTextile :: GenParser Char ParserState Pandoc
parseTextile = do
updateState (\state -> state { stateParseRaw = True, stateSmart = True })
many blankline
startPos <- getPosition
let firstPassParser = noteBlock <|> lineClump
manyTill firstPassParser eof >>= setInput . concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
blocks <- parseBlocks
return $ Pandoc (Meta [] [] []) blocks
noteMarker :: GenParser Char ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
let newnote = (ref, contents ++ "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
blockParsers :: [GenParser Char ParserState Block]
blockParsers = [ codeBlock
, header
, blockQuote
, hrule
, anyList
, rawHtmlBlock
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
, nullBlock ]
block :: GenParser Char ParserState Block
block = choice blockParsers <?> "block"
codeBlock :: GenParser Char ParserState Block
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: GenParser Char ParserState Block
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
return $ CodeBlock ("",[],[]) $ unlines contents
codeBlockPre :: GenParser Char ParserState Block
codeBlockPre = try $ do
htmlTag (tagOpen (=="pre") null)
result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
let result'' = case result' of
'\n':xs -> xs
_ -> result'
let result''' = case reverse result'' of
'\n':_ -> init result''
_ -> result''
return $ CodeBlock ("",[],[]) result'''
header :: GenParser Char ParserState Block
header = try $ do
char 'h'
level <- oneOf "123456" >>= return . digitToInt
optional attributes
char '.'
whitespace
name <- manyTill inline blockBreak
return $ Header level (normalizeSpaces name)
blockQuote :: GenParser Char ParserState Block
blockQuote = try $ do
string "bq"
optional attributes
char '.'
whitespace
para >>= return . BlockQuote . (:[])
hrule :: GenParser Char st Block
hrule = try $ do
skipSpaces
start <- oneOf "-*"
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return HorizontalRule
anyList :: GenParser Char ParserState Block
anyList = try $ do
l <- anyListAtDepth 1
blanklines
return l
anyListAtDepth :: Int -> GenParser Char ParserState Block
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
bulletListAtDepth :: Int -> GenParser Char ParserState Block
bulletListAtDepth depth = try $ do
items <- many1 (bulletListItemAtDepth depth)
return (BulletList items)
bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
bulletListItemAtDepth depth = try $ do
count depth (char '*')
optional attributes
whitespace
p <- inlines >>= return . Plain
sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
return (p:sublist)
orderedListAtDepth :: Int -> GenParser Char ParserState Block
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return (OrderedList (1, DefaultStyle, DefaultDelim) items)
orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
orderedListItemAtDepth depth = try $ do
count depth (char '#')
optional attributes
whitespace
p <- inlines >>= return . Plain
sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
return (p:sublist)
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
items <- many1 definitionListItem
return $ DefinitionList items
definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
definitionListItem = try $ do
string "- "
term <- many1Till inline (try (whitespace >> string ":="))
def <- inlineDef <|> multilineDef
return (term, def)
where inlineDef :: GenParser Char ParserState [[Block]]
inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines)
multilineDef :: GenParser Char ParserState [[Block]]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
blockBreak :: GenParser Char ParserState ()
blockBreak = try (newline >> blanklines >> return ()) <|>
(lookAhead rawHtmlBlock >> return ())
rawHtmlBlock :: GenParser Char ParserState Block
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ RawBlock "html" b
para :: GenParser Char ParserState Block
para = try $ do
content <- manyTill inline blockBreak
return $ Para $ normalizeSpaces content
tableCell :: GenParser Char ParserState TableCell
tableCell = do
c <- many1 (noneOf "|\n")
content <- parseFromString (many1 inline) c
return $ [ Plain $ normalizeSpaces content ]
tableRow :: GenParser Char ParserState [TableCell]
tableRow = try $ do
char '|'
cells <- endBy1 tableCell (char '|')
newline
return cells
tableRows :: GenParser Char ParserState [[TableCell]]
tableRows = many1 tableRow
tableHeaders :: GenParser Char ParserState [TableCell]
tableHeaders = try $ do
let separator = (try $ string "|_.")
separator
headers <- sepBy1 tableCell separator
char '|'
newline
return headers
table :: GenParser Char ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
return $ Table []
(replicate nbOfCols AlignDefault)
(replicate nbOfCols 0.0)
headers
rows
maybeExplicitBlock :: String
-> GenParser Char ParserState Block
-> GenParser Char ParserState Block
maybeExplicitBlock name blk = try $ do
optional $ try $ string name >> optional attributes >> char '.' >>
((try whitespace) <|> endline)
blk
inline :: GenParser Char ParserState Inline
inline = choice inlineParsers <?> "inline"
inlines :: GenParser Char ParserState [Inline]
inlines = manyTill inline newline
inlineParsers :: [GenParser Char ParserState Inline]
inlineParsers = [ autoLink
, str
, whitespace
, endline
, code
, htmlSpan
, rawHtmlInline
, note
, simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong
, simpleInline (string "__") Emph
, simpleInline (char '*') Strong
, simpleInline (char '_') Emph
, simpleInline (char '-') Strikeout
, simpleInline (char '^') Superscript
, simpleInline (char '~') Subscript
, link
, image
, mark
, smartPunctuation inline
, symbol
]
mark :: GenParser Char st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: GenParser Char st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
tm :: GenParser Char st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
copy :: GenParser Char st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
note :: GenParser Char ParserState Inline
note = try $ do
char '['
ref <- many1 digit
char ']'
state <- getState
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
str :: GenParser Char ParserState Inline
str = do
xs <- many1 (noneOf (specialChars ++ "\t\n "))
optional $ try $ do
lookAhead (char '(')
notFollowedBy' mark
getInput >>= setInput . (' ':)
result <- option xs $ try $ do
char '-'
next <- lookAhead letter
guard $ isLetter (last xs) || isLetter next
return $ xs ++ "-"
return $ Str result
htmlSpan :: GenParser Char ParserState Inline
htmlSpan = try $ do
char '%'
_ <- attributes
content <- manyTill anyChar (char '%')
return $ Str content
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
endline :: GenParser Char ParserState Inline
endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
rawHtmlInline :: GenParser Char ParserState Inline
rawHtmlInline = liftM (RawInline "html" . snd)
$ htmlTag isInlineTag
link :: GenParser Char ParserState Inline
link = try $ do
name <- surrounded (char '"') inline
char ':'
url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline))))
return $ Link name (url, "")
autoLink :: GenParser Char ParserState Inline
autoLink = do
(orig, src) <- (try uri <|> try emailAddress)
return $ Link [Str orig] (src, "")
image :: GenParser Char ParserState Inline
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
char '!'
return $ Image [Str alt] (src, alt)
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialChars
return $ Str [result]
code :: GenParser Char ParserState Inline
code = code1 <|> code2
code1 :: GenParser Char ParserState Inline
code1 = surrounded (char '@') anyChar >>= return . Code nullAttr
code2 :: GenParser Char ParserState Inline
code2 = do
htmlTag (tagOpen (=="tt") null)
result' <- manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
return $ Code nullAttr result'
attributes :: GenParser Char ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar,
enclosed (char '{') (char '}') anyChar,
enclosed (char '[') (char ']') anyChar]
surrounded :: GenParser Char st t
-> GenParser Char st a
-> GenParser Char st [a]
surrounded border = enclosed border border
simpleInline :: GenParser Char ParserState t
-> ([Inline] -> Inline)
-> GenParser Char ParserState Inline
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline