module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup.Match
import Data.List ( intercalate )
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
readTextile :: ReaderOptions
-> String
-> Pandoc
readTextile opts s =
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
parseTextile :: Parser [Char] ParserState Pandoc
parseTextile = do
oldOpts <- stateOptions `fmap` getState
updateState $ \state -> state{ stateOptions =
oldOpts{ readerSmart = True
, readerParseRaw = True
, readerOldDashes = True
} }
many blankline
startPos <- getPosition
let firstPassParser = noteBlock <|> lineClump
manyTill firstPassParser eof >>= setInput . concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
blocks <- parseBlocks
return $ Pandoc (Meta [] [] []) blocks
noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
let newnote = (ref, contents ++ "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
blockParsers :: [Parser [Char] ParserState Block]
blockParsers = [ codeBlock
, header
, blockQuote
, hrule
, commentBlock
, anyList
, rawHtmlBlock
, rawLaTeXBlock'
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
]
block :: Parser [Char] ParserState Block
block = choice blockParsers <?> "block"
commentBlock :: Parser [Char] ParserState Block
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return Null
codeBlock :: Parser [Char] ParserState Block
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: Parser [Char] ParserState Block
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
return $ CodeBlock ("",[],[]) $ unlines contents
codeBlockPre :: Parser [Char] ParserState Block
codeBlockPre = try $ do
htmlTag (tagOpen (=="pre") null)
result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
let result'' = case result' of
'\n':xs -> xs
_ -> result'
let result''' = case reverse result'' of
'\n':_ -> init result''
_ -> result''
return $ CodeBlock ("",[],[]) result'''
header :: Parser [Char] ParserState Block
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
attr <- option "" attributes
let ident = case attr of
'#':xs -> xs
_ -> ""
char '.'
whitespace
name <- normalizeSpaces <$> manyTill inline blockBreak
return $ Header level (ident,[],[]) name
blockQuote :: Parser [Char] ParserState Block
blockQuote = try $ do
string "bq" >> optional attributes >> char '.' >> whitespace
BlockQuote . singleton <$> para
hrule :: Parser [Char] st Block
hrule = try $ do
skipSpaces
start <- oneOf "-*"
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return HorizontalRule
anyList :: Parser [Char] ParserState Block
anyList = try $ anyListAtDepth 1 <* blanklines
anyListAtDepth :: Int -> Parser [Char] ParserState Block
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
bulletListAtDepth :: Int -> Parser [Char] ParserState Block
bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
bulletListItemAtDepth = genericListItemAtDepth '*'
orderedListAtDepth :: Int -> Parser [Char] ParserState Block
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return (OrderedList (1, DefaultStyle, DefaultDelim) items)
orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
orderedListItemAtDepth = genericListItemAtDepth '#'
genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
genericListItemAtDepth c depth = try $ do
count depth (char c) >> optional attributes >> whitespace
p <- many listInline
newline
sublist <- option [] (singleton <$> anyListAtDepth (depth + 1))
return (Plain p : sublist)
definitionList :: Parser [Char] ParserState Block
definitionList = try $ DefinitionList <$> many1 definitionListItem
listStart :: Parser [Char] st Char
listStart = oneOf "*#-"
listInline :: Parser [Char] ParserState Inline
listInline = try (notFollowedBy newline >> inline)
<|> try (endline <* notFollowedBy listStart)
definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
string "- "
term <- many1Till inline (try (whitespace >> string ":="))
def' <- multilineDef <|> inlineDef
return (term, def')
where inlineDef :: Parser [Char] ParserState [[Block]]
inlineDef = liftM (\d -> [[Plain d]])
$ optional whitespace >> many listInline <* newline
multilineDef :: Parser [Char] ParserState [[Block]]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
blockBreak :: Parser [Char] ParserState ()
blockBreak = try (newline >> blanklines >> return ()) <|>
(lookAhead rawHtmlBlock >> return ())
rawHtmlBlock :: Parser [Char] ParserState Block
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ RawBlock "html" b
rawLaTeXBlock' :: Parser [Char] ParserState Block
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
para :: Parser [Char] ParserState Block
para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
tableCell :: Parser [Char] ParserState TableCell
tableCell = do
c <- many1 (noneOf "|\n")
content <- parseFromString (many1 inline) c
return $ [ Plain $ normalizeSpaces content ]
tableRow :: Parser [Char] ParserState [TableCell]
tableRow = try $ ( char '|' *>
(endBy1 tableCell (optional blankline *> char '|')) <* newline)
tableRows :: Parser [Char] ParserState [[TableCell]]
tableRows = many1 tableRow
tableHeaders :: Parser [Char] ParserState [TableCell]
tableHeaders = let separator = (try $ string "|_.") in
try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
table :: Parser [Char] ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
return $ Table []
(replicate nbOfCols AlignDefault)
(replicate nbOfCols 0.0)
headers
rows
maybeExplicitBlock :: String
-> Parser [Char] ParserState Block
-> Parser [Char] ParserState Block
maybeExplicitBlock name blk = try $ do
optional $ try $ string name >> optional attributes >> char '.' >>
optional whitespace >> optional endline
blk
inline :: Parser [Char] ParserState Inline
inline = choice inlineParsers <?> "inline"
inlineParsers :: [Parser [Char] ParserState Inline]
inlineParsers = [ str
, whitespace
, endline
, code
, escapedInline
, htmlSpan
, rawHtmlInline
, rawLaTeXInline'
, note
, try $ (char '[' *> inlineMarkup <* char ']')
, inlineMarkup
, link
, image
, mark
, smartPunctuation inline
, symbol
]
inlineMarkup :: Parser [Char] ParserState Inline
inlineMarkup = choice [ simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong
, simpleInline (string "__") Emph
, simpleInline (char '*') Strong
, simpleInline (char '_') Emph
, simpleInline (char '+') Emph
, simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout
, simpleInline (char '^') Superscript
, simpleInline (char '~') Subscript
]
mark :: Parser [Char] st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: Parser [Char] st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
tm :: Parser [Char] st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
copy :: Parser [Char] st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
note :: Parser [Char] ParserState Inline
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
markupChars :: [Char]
markupChars = "\\*#_@~-+^|%=[]"
stringBreakers :: [Char]
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ intercalate "-" (x:xs)
wordChunk :: Parser [Char] ParserState String
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( (noneOf wordBoundaries) <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
return $ hd:tl
str :: Parser [Char] ParserState Inline
str = do
baseStr <- hyphenedWords
fullStr <- option baseStr $ try $ do
guard $ all isUpper baseStr
acro <- enclosed (char '(') (char ')') anyChar
return $ concat [baseStr, " (", acro, ")"]
updateLastStrPos
return $ Str fullStr
htmlSpan :: Parser [Char] ParserState Inline
htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
whitespace :: Parser [Char] ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
endline :: Parser [Char] ParserState Inline
endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
rawHtmlInline :: Parser [Char] ParserState Inline
rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: Parser [Char] ParserState Inline
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
rawLaTeXInline
link :: Parser [Char] ParserState Inline
link = linkB <|> linkNoB
linkNoB :: Parser [Char] ParserState Inline
linkNoB = try $ do
name <- surrounded (char '"') inline
char ':'
let stopChars = "!.,;:"
url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
let name' = if name == [Str "$"] then [Str url] else name
return $ Link name' (url, "")
linkB :: Parser [Char] ParserState Inline
linkB = try $ do
char '['
name <- surrounded (char '"') inline
char ':'
url <- manyTill nonspaceChar (char ']')
let name' = if name == [Str "$"] then [Str url] else name
return $ Link name' (url, "")
image :: Parser [Char] ParserState Inline
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
char '!'
return $ Image [Str alt] (src, alt)
escapedInline :: Parser [Char] ParserState Inline
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: Parser [Char] ParserState Inline
escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
escapedTag :: Parser [Char] ParserState Inline
escapedTag = Str <$>
(try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
symbol :: Parser [Char] ParserState Inline
symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
code :: Parser [Char] ParserState Inline
code = code1 <|> code2
code1 :: Parser [Char] ParserState Inline
code1 = Code nullAttr <$> surrounded (char '@') anyChar
code2 :: Parser [Char] ParserState Inline
code2 = do
htmlTag (tagOpen (=="tt") null)
Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
attributes :: Parser [Char] ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar,
enclosed (char '{') (char '}') anyChar,
enclosed (char '[') (char ']') anyChar]
surrounded :: Parser [Char] st t
-> Parser [Char] st a
-> Parser [Char] st [a]
surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: Parser [Char] ParserState t
-> ([Inline] -> Inline)
-> Parser [Char] ParserState Inline
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline
singleton :: a -> [a]
singleton x = [x]