module Text.Pandoc.Readers.TikiWiki ( readTikiWiki
) where
import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (CommonState (..), PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)
readTikiWiki :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readTikiWiki opts s = do
res <- readWithM parseTikiWiki def{ stateOptions = opts }
(T.unpack (crFilter s) ++ "\n\n")
case res of
Left e -> throwError e
Right d -> return d
type TikiWikiParser = ParserT [Char] ParserState
tryMsg :: PandocMonad m => String -> TikiWikiParser m a -> TikiWikiParser m a
tryMsg msg p = try p <?> msg
skip :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m ()
skip parser = Control.Monad.void parser
nested :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
parseTikiWiki :: PandocMonad m => TikiWikiParser m Pandoc
parseTikiWiki = do
bs <- mconcat <$> many block
spaces
eof
return $ B.doc bs
block :: PandocMonad m => TikiWikiParser m B.Blocks
block = do
verbosity <- getsCommonState stVerbosity
pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> blockElements
<|> para
skipMany blankline
when (verbosity >= INFO) $
trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
return res
blockElements :: PandocMonad m => TikiWikiParser m B.Blocks
blockElements = choice [ table
, hr
, header
, mixedList
, definitionList
, codeMacro
]
hr :: PandocMonad m => TikiWikiParser m B.Blocks
hr = try $ do
string "----"
many (char '-')
newline
return B.horizontalRule
header :: PandocMonad m => TikiWikiParser m B.Blocks
header = tryMsg "header" $ do
level <- fmap length (many1 (char '!'))
guard $ level <= 6
skipSpaces
content <- B.trimInlines . mconcat <$> manyTill inline newline
attr <- registerHeader nullAttr content
return $B.headerWith attr level content
tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks]
tableRow = try $ do
row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
return $ map B.plain row
where
parseColumn x = do
parsed <- parseFromString (many1 inline) x
return $ mconcat parsed
table :: PandocMonad m => TikiWikiParser m B.Blocks
table = try $ do
string "||"
rows <- sepBy1 tableRow (try $ string "\n" <|> (string "||" <* notFollowedBy (string "\n")))
string "||"
newline
return $B.simpleTable (headers rows) rows
where
headers rows = map (B.plain . B.str) $replicate (length $ rows !! 0) ""
para :: PandocMonad m => TikiWikiParser m B.Blocks
para = fmap (result . mconcat) ( many1Till inline endOfParaElement)
where
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
endOfInput = try $ skipMany blankline >> skipSpaces >> eof
endOfPara = try $ blankline >> skipMany1 blankline
newBlockElement = try $ blankline >> skip blockElements
result content = if F.all (==Space) content
then mempty
else B.para $ B.trimInlines content
definitionList :: PandocMonad m => TikiWikiParser m B.Blocks
definitionList = tryMsg "definitionList" $ do
elements <-many1 parseDefinitionListItem
return $ B.definitionList elements
where
parseDefinitionListItem :: PandocMonad m => TikiWikiParser m (B.Inlines, [B.Blocks])
parseDefinitionListItem = do
skipSpaces >> char ';' <* skipSpaces
term <- many1Till inline $ char ':' <* skipSpaces
line <- listItemLine 1
return (mconcat term, [B.plain line])
data ListType = None | Numbered | Bullet deriving (Ord, Eq, Show)
data ListNesting = LN { lntype :: ListType, lnnest :: Int } deriving (Ord, Eq, Show)
mixedList :: PandocMonad m => TikiWikiParser m B.Blocks
mixedList = try $ do
items <- try $ many1 listItem
return $ mconcat $ fixListNesting $ spanFoldUpList (LN None 0) items
fixListNesting :: [B.Blocks] -> [B.Blocks]
fixListNesting [] = []
fixListNesting [first] = [recurseOnList first]
fixListNesting (first:second:rest) =
let secondBlock = head $ B.toList second in
case secondBlock of
BulletList _ -> fixListNesting $ (mappend (recurseOnList first) (recurseOnList second)) : rest
OrderedList _ _ -> fixListNesting $ (mappend (recurseOnList first) (recurseOnList second)) : rest
_ -> recurseOnList first : fixListNesting (second:rest)
recurseOnList :: B.Blocks -> B.Blocks
recurseOnList items
| length (B.toList items) == 1 =
let itemBlock = head $ B.toList items in
case itemBlock of
BulletList listItems -> B.bulletList $ fixListNesting $ map B.fromList listItems
OrderedList _ listItems -> B.orderedList $ fixListNesting $ map B.fromList listItems
_ -> items
| otherwise = items
spanFoldUpList :: ListNesting -> [(ListNesting, B.Blocks)] -> [B.Blocks]
spanFoldUpList _ [] = []
spanFoldUpList ln [first] =
listWrap ln (fst first) [snd first]
spanFoldUpList ln (first:rest) =
let (span1, span2) = span (splitListNesting (fst first)) rest
newTree1 = listWrap ln (fst first) $ snd first : spanFoldUpList (fst first) span1
newTree2 = spanFoldUpList ln span2
in
newTree1 ++ newTree2
splitListNesting :: ListNesting -> (ListNesting, B.Blocks) -> Bool
splitListNesting ln1 (ln2, _)
| (lnnest ln1) < (lnnest ln2) =
True
| ln1 == ln2 =
True
| otherwise =
False
listWrap :: ListNesting -> ListNesting -> [B.Blocks] -> [B.Blocks]
listWrap upperLN curLN retTree =
if upperLN == curLN then
retTree
else
case lntype curLN of
None -> []
Bullet -> [B.bulletList retTree]
Numbered -> [B.orderedList retTree]
listItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
listItem = choice [
bulletItem
, numberedItem
]
bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
bulletItem = try $ do
prefix <- many1 $ char '*'
many1 $ char ' '
content <- listItemLine (length prefix)
return (LN Bullet (length prefix), B.plain content)
numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
numberedItem = try $ do
prefix <- many1 $ char '#'
many1 $ char ' '
content <- listItemLine (length prefix)
return (LN Numbered (length prefix), B.plain content)
listItemLine :: PandocMonad m => Int -> TikiWikiParser m B.Inlines
listItemLine nest = lineContent >>= parseContent
where
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
return $ filterSpaces content ++ "\n" ++ maybe "" id continuation
filterSpaces = reverse . dropWhile (== ' ') . reverse
listContinuation = string (replicate nest '+') >> lineContent
parseContent x = do
parsed <- parseFromString (many1 inline) x
return $ mconcat parsed
mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
mungeAttrs rawAttrs = ("", classes, rawAttrs)
where
color = fromMaybe "" $ lookup "colors" rawAttrs
lnRaw = fromMaybe "1" $ lookup "ln" rawAttrs
ln = if lnRaw == "0" then
""
else
"numberLines"
classes = filter (/= "") [color, ln]
codeMacro :: PandocMonad m => TikiWikiParser m B.Blocks
codeMacro = try $ do
string "{CODE("
rawAttrs <- macroAttrs
string ")}"
body <- manyTill anyChar (try (string "{CODE}"))
newline
if not (null rawAttrs)
then
return $ B.codeBlockWith (mungeAttrs rawAttrs) body
else
return $ B.codeBlock body
inline :: PandocMonad m => TikiWikiParser m B.Inlines
inline = choice [ whitespace
, noparse
, strong
, emph
, nbsp
, image
, htmlComment
, strikeout
, code
, wikiLink
, notExternalLink
, externalLink
, superTag
, superMacro
, subTag
, subMacro
, escapedChar
, colored
, centered
, underlined
, boxed
, breakChars
, str
, symbol
] <?> "inline"
whitespace :: PandocMonad m => TikiWikiParser m B.Inlines
whitespace = (lb <|> regsp)
where lb = try $ skipMany spaceChar >> linebreak >> return B.space
regsp = try $ skipMany1 spaceChar >> return B.space
nbsp :: PandocMonad m => TikiWikiParser m B.Inlines
nbsp = try $ do
string "~hs~"
return $ B.str " NOT SUPPORTED BEGIN: ~hs~ (non-breaking space) :END "
htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines
htmlComment = try $ do
string "~hc~"
inner <- many1 $ noneOf "~"
string "~/hc~"
return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END "
linebreak :: PandocMonad m => TikiWikiParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
where lastNewline = eof >> return mempty
innerNewline = return B.space
between :: (Monoid c, PandocMonad m, Show b) => TikiWikiParser m a -> TikiWikiParser m b -> (TikiWikiParser m b -> TikiWikiParser m c) -> TikiWikiParser m c
between start end p =
mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
enclosed :: (Monoid b, PandocMonad m, Show a) => TikiWikiParser m a -> (TikiWikiParser m a -> TikiWikiParser m b) -> TikiWikiParser m b
enclosed sep p = between sep (try $ sep <* endMarker) p
where
endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|'_") <|> eof
endSpace = (spaceChar <|> newline) >> return B.space
nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
where
innerSpace = try $ whitespace <* notFollowedBy end
nestedInline = notFollowedBy whitespace >> nested inline
image :: PandocMonad m => TikiWikiParser m B.Inlines
image = try $ do
string "{img "
rawAttrs <- sepEndBy1 imageAttr spaces
string "}"
let src = fromMaybe "" $ lookup "src" rawAttrs
let title = fromMaybe src $ lookup "desc" rawAttrs
let alt = fromMaybe title $ lookup "alt" rawAttrs
let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs
if not (null src)
then
return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt)
else
return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END "
where
printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs
imageAttr :: PandocMonad m => TikiWikiParser m (String, String)
imageAttr = try $ do
key <- many1 (noneOf "=} \t\n")
char '='
optional $ char '"'
value <- many1 (noneOf "}\"\n")
optional $ char '"'
optional $ char ','
return (key, value)
strong :: PandocMonad m => TikiWikiParser m B.Inlines
strong = try $ fmap B.strong (enclosed (string "__") nestedInlines)
emph :: PandocMonad m => TikiWikiParser m B.Inlines
emph = try $ fmap B.emph (enclosed (string "''") nestedInlines)
escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines
escapedChar = try $ do
string "~"
inner <- many1 $ oneOf "0123456789"
string "~"
return $B.str [(toEnum (read inner :: Int)) :: Char]
centered :: PandocMonad m => TikiWikiParser m B.Inlines
centered = try $ do
string "::"
inner <- many1 $ noneOf ":\n"
string "::"
return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END "
colored :: PandocMonad m => TikiWikiParser m B.Inlines
colored = try $ do
string "~~"
inner <- many1 $ noneOf "~\n"
string "~~"
return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END "
underlined :: PandocMonad m => TikiWikiParser m B.Inlines
underlined = try $ do
string "==="
inner <- many1 $ noneOf "=\n"
string "==="
return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END "
boxed :: PandocMonad m => TikiWikiParser m B.Inlines
boxed = try $ do
string "^"
inner <- many1 $ noneOf "^\n"
string "^"
return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END "
strikeout :: PandocMonad m => TikiWikiParser m B.Inlines
strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines)
nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String
nestedString end = innerSpace <|> count 1 nonspaceChar
where
innerSpace = try $ many1 spaceChar <* notFollowedBy end
breakChars :: PandocMonad m => TikiWikiParser m B.Inlines
breakChars = try $ string "%%%" >> return B.linebreak
superTag :: PandocMonad m => TikiWikiParser m B.Inlines
superTag = try $ fmap (B.superscript . B.text . fromEntities) ( between (string "{TAG(tag=>sup)}") (string "{TAG}") nestedString)
superMacro :: PandocMonad m => TikiWikiParser m B.Inlines
superMacro = try $ do
string "{SUP("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUP}")
return $ B.superscript $ B.text body
subTag :: PandocMonad m => TikiWikiParser m B.Inlines
subTag = try $ fmap (B.subscript . B.text . fromEntities) ( between (string "{TAG(tag=>sub)}") (string "{TAG}") nestedString)
subMacro :: PandocMonad m => TikiWikiParser m B.Inlines
subMacro = try $ do
string "{SUB("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUB}")
return $ B.subscript $ B.text body
code :: PandocMonad m => TikiWikiParser m B.Inlines
code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString)
macroAttr :: PandocMonad m => TikiWikiParser m (String, String)
macroAttr = try $ do
key <- many1 (noneOf "=)")
char '='
optional $ char '"'
value <- many1 (noneOf " )\"")
optional $ char '"'
return (key, value)
macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)]
macroAttrs = try $ sepEndBy macroAttr spaces
noparse :: PandocMonad m => TikiWikiParser m B.Inlines
noparse = try $ do
string "~np~"
body <- manyTill anyChar (string "~/np~")
return $ B.str body
str :: PandocMonad m => TikiWikiParser m B.Inlines
str = fmap B.str (many1 alphaNum <|> count 1 characterReference)
symbol :: PandocMonad m => TikiWikiParser m B.Inlines
symbol = fmap B.str (count 1 nonspaceChar)
notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines
notExternalLink = try $ do
start <- string "[["
body <- many (noneOf "\n[]")
end <- string "]"
return $ B.text (start ++ body ++ end)
makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines
makeLink start middle end = try $ do
st <- getState
guard $ stateAllowLinks st
setState $ st{ stateAllowLinks = False }
(url, title, anchor) <- wikiLinkText start middle end
parsedTitle <- parseFromString (many1 inline) title
setState $ st{ stateAllowLinks = True }
return $ B.link (url++anchor) "" $mconcat parsedTitle
wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String)
wikiLinkText start middle end = do
string start
url <- many1 (noneOf $ middle ++ "\n")
seg1 <- option url linkContent
seg2 <- option "" linkContent
string end
if seg2 /= ""
then
return (url, seg2, seg1)
else
return (url, seg1, "")
where
linkContent = do
char '|'
many (noneOf middle)
externalLink :: PandocMonad m => TikiWikiParser m B.Inlines
externalLink = makeLink "[" "]|" "]"
wikiLink :: PandocMonad m => TikiWikiParser m B.Inlines
wikiLink = makeLink "((" ")|" "))"