module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
import Data.Char (toLower, isSpace, isAscii, isControl)
import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody)
import Network.HTTP (lookupHeader, HeaderName(..), urlEncode)
import Network.URI (isURI, unEscapeString)
import System.FilePath (takeExtension)
import Text.XML.Light
import qualified Control.Exception as E
import qualified Data.ByteString as B
import qualified Text.XML.Light as X
import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
data FbRenderState = FbRenderState
{ footnotes :: [ (Int, String, [Content]) ]
, imagesToFetch :: [ (String, String) ]
, parentListMarker :: String
, parentBulletLevel :: Int
, writerOptions :: WriterOptions
} deriving (Show)
type FBM = StateT FbRenderState IO
newFB :: FbRenderState
newFB = FbRenderState { footnotes = [], imagesToFetch = []
, parentListMarker = "", parentBulletLevel = 0
, writerOptions = def }
data ImageMode = NormalImage | InlineImage deriving (Eq)
instance Show ImageMode where
show NormalImage = "imageType"
show InlineImage = "inlineImageType"
writeFB2 :: WriterOptions
-> Pandoc
-> IO String
writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do
modify (\s -> s { writerOptions = opts { writerStandalone = True } })
desc <- description meta
fp <- frontpage meta
secs <- renderSections 1 blocks
let body = el "body" $ fp ++ secs
notes <- renderFootnotes
(imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s)
let body' = replaceImagesWithAlt missing body
let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
return $ xml_head ++ (showContent fb2_xml) ++ "\n"
where
xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
fb2_attrs =
let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0"
xlink = "http://www.w3.org/1999/xlink"
in [ uattr "xmlns" xmlns
, attr ("xmlns", "l") xlink ]
frontpage :: Meta -> FBM [Content]
frontpage meta' = do
t <- cMapM toXml . docTitle $ meta'
return $
[ el "title" (el "p" t)
, el "annotation" (map (el "p" . cMap plain)
(docAuthors meta' ++ [docDate meta']))
]
description :: Meta -> FBM Content
description meta' = do
bt <- booktitle meta'
let as = authors meta'
dd <- docdate meta'
return $ el "description"
[ el "title-info" (bt ++ as ++ dd)
, el "document-info" [ el "program-used" "pandoc" ]
]
booktitle :: Meta -> FBM [Content]
booktitle meta' = do
t <- cMapM toXml . docTitle $ meta'
return $ if null t
then []
else [ el "book-title" t ]
authors :: Meta -> [Content]
authors meta' = cMap author (docAuthors meta')
author :: [Inline] -> [Content]
author ss =
let ws = words . cMap plain $ ss
email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws)
ws' = filter ('@' `notElem`) ws
names = case ws' of
(nickname:[]) -> [ el "nickname" nickname ]
(fname:lname:[]) -> [ el "first-name" fname
, el "last-name" lname ]
(fname:rest) -> [ el "first-name" fname
, el "middle-name" (concat . init $ rest)
, el "last-name" (last rest) ]
([]) -> []
in list $ el "author" (names ++ email)
docdate :: Meta -> FBM [Content]
docdate meta' = do
let ss = docDate meta'
d <- cMapM toXml ss
return $ if null d
then []
else [el "date" d]
renderSections :: Int -> [Block] -> FBM [Content]
renderSections level blocks = do
let secs = splitSections level blocks
mapM (renderSection level) secs
renderSection :: Int -> ([Inline], [Block]) -> FBM Content
renderSection level (ttl, body) = do
title <- if null ttl
then return []
else return . list . el "title" . formatTitle $ ttl
content <- if (hasSubsections body)
then renderSections (level + 1) body
else cMapM blockToXml body
return $ el "section" (title ++ content)
where
hasSubsections = any isHeaderBlock
formatTitle :: [Inline] -> [Content]
formatTitle inlines =
let lns = split isLineBreak inlines
lns' = map (el "p" . cMap plain) lns
in intersperse (el "empty-line" ()) lns'
split :: (a -> Bool) -> [a] -> [[a]]
split _ [] = []
split cond xs = let (b,a) = break cond xs
in (b:split cond (drop 1 a))
isLineBreak :: Inline -> Bool
isLineBreak LineBreak = True
isLineBreak _ = False
splitSections :: Int -> [Block] -> [([Inline], [Block])]
splitSections level blocks = reverse $ revSplit (reverse blocks)
where
revSplit [] = []
revSplit rblocks =
let (lastsec, before) = break sameLevel rblocks
(header, prevblocks) =
case before of
((Header n _ title):prevblocks') ->
if n == level
then (title, prevblocks')
else ([], before)
_ -> ([], before)
in (header, reverse lastsec) : revSplit prevblocks
sameLevel (Header n _ _) = n == level
sameLevel _ = False
renderFootnotes :: FBM [Content]
renderFootnotes = do
fns <- footnotes `liftM` get
if null fns
then return []
else return . list $
el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
where
renderFN (n, idstr, cs) =
let fn_texts = (el "title" (el "p" (show n))) : cs
in el "section" ([uattr "id" idstr], fn_texts)
fetchImages :: [(String,String)] -> IO ([Content],[String])
fetchImages links = do
imgs <- mapM (uncurry fetchImage) links
return $ (rights imgs, lefts imgs)
fetchImage :: String -> String -> IO (Either String Content)
fetchImage href link = do
mbimg <-
case (isURI link, readDataURI link) of
(True, Just (mime,_,True,base64)) ->
let mime' = map toLower mime
in if mime' == "image/png" || mime' == "image/jpeg"
then return (Just (mime',base64))
else return Nothing
(True, Just _) -> return Nothing
(True, Nothing) -> fetchURL link
(False, _) -> do
d <- nothingOnError $ B.readFile (unEscapeString link)
let t = case map toLower (takeExtension link) of
".png" -> Just "image/png"
".jpg" -> Just "image/jpeg"
".jpeg" -> Just "image/jpeg"
".jpe" -> Just "image/jpeg"
_ -> Nothing
return $ liftM2 (,) t (liftM (toStr . encode) d)
case mbimg of
Just (imgtype, imgdata) -> do
return . Right $ el "binary"
( [uattr "id" href
, uattr "content-type" imgtype]
, txt imgdata )
_ -> return (Left ('#':href))
where
nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString))
nothingOnError action = liftM Just action `E.catch` omnihandler
omnihandler :: E.SomeException -> IO (Maybe B.ByteString)
omnihandler _ = return Nothing
readDataURI :: String
-> Maybe (String,String,Bool,String)
readDataURI uri =
case stripPrefix "data:" uri of
Nothing -> Nothing
Just rest ->
let meta = takeWhile (/= ',') rest
uridata = drop (length meta + 1) rest
parts = split (== ';') meta
(mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
in Just (mime,cs,enc,uridata)
where
upd str m@(mime,cs,enc)
| isMimeType str = (str,cs,enc)
| Just str' <- stripPrefix "charset=" str = (mime,str',enc)
| str == "base64" = (mime,cs,True)
| otherwise = m
isMimeType :: String -> Bool
isMimeType s =
case split (=='/') s of
[mtype,msubtype] ->
((map toLower mtype) `elem` types
|| "x-" `isPrefixOf` (map toLower mtype))
&& all valid mtype
&& all valid msubtype
_ -> False
where
types = ["text","image","audio","video","application","message","multipart"]
valid c = isAscii c && not (isControl c) && not (isSpace c) &&
c `notElem` "()<>@,;:\\\"/[]?="
fetchURL :: String -> IO (Maybe (String, String))
fetchURL url = do
flip catchIO_ (return Nothing) $ do
r <- browse $ do
setOutHandler (const (return ()))
setAllowRedirects True
liftM snd . request . getRequest $ url
let content_type = lookupHeader HdrContentType (getHeaders r)
content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r
return $ liftM2 (,) content_type content
toBS :: String -> B.ByteString
toBS = B.pack . map (toEnum . fromEnum)
toStr :: B.ByteString -> String
toStr = map (toEnum . fromEnum) . B.unpack
footnoteID :: Int -> String
footnoteID i = "n" ++ (show i)
linkID :: Int -> String
linkID i = "l" ++ (show i)
blockToXml :: Block -> FBM [Content]
blockToXml (Plain ss) = cMapM toXml ss
blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) =
insertImage NormalImage (Image atr alt (src,tit))
blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss
blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
map (el "p" . el "code") . lines $ s
blockToXml (RawBlock _ s) = return . spaceBeforeAfter .
map (el "p" . el "code") . lines $ s
blockToXml (Div _ bs) = cMapM blockToXml bs
blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
blockToXml (OrderedList a bss) = do
state <- get
let pmrk = parentListMarker state
let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a
let mkitem mrk bs = do
modify (\s -> s { parentListMarker = mrk })
itemtext <- cMapM blockToXml . paraToPlain $ bs
modify (\s -> s { parentListMarker = pmrk })
return . el "p" $ [ txt mrk, txt " " ] ++ itemtext
mapM (uncurry mkitem) (zip markers bss)
blockToXml (BulletList bss) = do
state <- get
let level = parentBulletLevel state
let pmrk = parentListMarker state
let prefix = replicate (length pmrk) ' '
let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"]
let mrk = prefix ++ bullets !! (level `mod` (length bullets))
let mkitem bs = do
modify (\s -> s { parentBulletLevel = (level+1) })
itemtext <- cMapM blockToXml . paraToPlain $ bs
modify (\s -> s { parentBulletLevel = level })
return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext
mapM mkitem bss
blockToXml (DefinitionList defs) =
cMapM mkdef defs
where
mkdef (term, bss) = do
def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss
t <- wrap "strong" term
return [ el "p" t, el "p" def' ]
sep blocks =
if all needsBreak blocks then
blocks ++ [Plain [LineBreak]]
else
blocks
needsBreak (Para _) = False
needsBreak (Plain ins) = LineBreak `notElem` ins
needsBreak _ = True
blockToXml (Header _ _ _) =
error "unexpected header in section text"
blockToXml HorizontalRule = return
[ el "empty-line" ()
, el "p" (txt (replicate 10 '—'))
, el "empty-line" () ]
blockToXml (Table caption aligns _ headers rows) = do
hd <- mkrow "th" headers aligns
bd <- mapM (\r -> mkrow "td" r aligns) rows
c <- return . el "emphasis" =<< cMapM toXml caption
return [el "table" (hd : bd), el "p" c]
where
mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content
mkrow tag cells aligns' =
(el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns'))
mkcell :: String -> (TableCell, Alignment) -> FBM Content
mkcell tag (cell, align) = do
cblocks <- cMapM blockToXml cell
return $ el tag ([align_attr align], cblocks)
align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
align_str AlignLeft = "left"
align_str AlignCenter = "center"
align_str AlignRight = "right"
align_str AlignDefault = "left"
blockToXml Null = return []
paraToPlain :: [Block] -> [Block]
paraToPlain [] = []
paraToPlain (Para inlines : rest) =
let p = (Plain (inlines ++ [LineBreak]))
in p : paraToPlain rest
paraToPlain (p:rest) = p : paraToPlain rest
indent :: Block -> Block
indent = indentBlock
where
spacer :: String
spacer = replicate 4 ' '
indentBlock (Plain ins) = Plain ((Str spacer):ins)
indentBlock (Para ins) = Para ((Str spacer):ins)
indentBlock (CodeBlock a s) =
let s' = unlines . map (spacer++) . lines $ s
in CodeBlock a s'
indentBlock (BlockQuote bs) = BlockQuote (map indent bs)
indentBlock (Header l attr' ins) = Header l attr' (indentLines ins)
indentBlock everythingElse = everythingElse
indentLines :: [Inline] -> [Inline]
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map ((Str spacer):) lns
toXml :: Inline -> FBM [Content]
toXml (Str s) = return [txt s]
toXml (Span _ ils) = cMapM toXml ils
toXml (Emph ss) = list `liftM` wrap "emphasis" ss
toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do
inner <- cMapM toXml ss
return $ [txt "‘"] ++ inner ++ [txt "’"]
toXml (Quoted DoubleQuote ss) = do
inner <- cMapM toXml ss
return $ [txt "“"] ++ inner ++ [txt "”"]
toXml (Cite _ ss) = cMapM toXml ss
toXml (Code _ s) = return [el "code" s]
toXml Space = return [txt " "]
toXml SoftBreak = return [txt " "]
toXml LineBreak = return [el "empty-line" ()]
toXml (Math _ formula) = insertMath InlineImage formula
toXml (RawInline _ _) = return []
toXml (Link _ text (url,ttl)) = do
fns <- footnotes `liftM` get
let n = 1 + length fns
let ln_id = linkID n
let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]"
ln_text <- cMapM toXml text
let ln_desc =
let ttl' = dropWhile isSpace ttl
in if null ttl'
then list . el "p" $ el "code" url
else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ]
modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns })
return $ ln_text ++
[ el "a"
( [ attr ("l","href") ('#':ln_id)
, uattr "type" "note" ]
, ln_ref) ]
toXml img@(Image _ _ _) = insertImage InlineImage img
toXml (Note bs) = do
fns <- footnotes `liftM` get
let n = 1 + length fns
let fn_id = footnoteID n
fn_desc <- cMapM blockToXml bs
modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns })
let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]"
return . list $ el "a" ( [ attr ("l","href") ('#':fn_id)
, uattr "type" "note" ]
, fn_ref )
insertMath :: ImageMode -> String -> FBM [Content]
insertMath immode formula = do
htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get
case htmlMath of
WebTeX url -> do
let alt = [Code nullAttr formula]
let imgurl = url ++ urlEncode formula
let img = Image nullAttr alt (imgurl, "")
insertImage immode img
_ -> return [el "code" formula]
insertImage :: ImageMode -> Inline -> FBM [Content]
insertImage immode (Image _ alt (url,ttl)) = do
images <- imagesToFetch `liftM` get
let n = 1 + length images
let fname = "image" ++ show n
modify (\s -> s { imagesToFetch = (fname, url) : images })
let ttlattr = case (immode, null ttl) of
(NormalImage, False) -> [ uattr "title" ttl ]
_ -> []
return . list $
el "image" $
[ attr ("l","href") ('#':fname)
, attr ("l","type") (show immode)
, uattr "alt" (cMap plain alt) ]
++ ttlattr
insertImage _ _ = error "unexpected inline instead of image"
replaceImagesWithAlt :: [String] -> Content -> Content
replaceImagesWithAlt missingHrefs body =
let cur = XC.fromContent body
cur' = replaceAll cur
in XC.toTree . XC.root $ cur'
where
replaceAll :: XC.Cursor -> XC.Cursor
replaceAll c =
let n = XC.current c
c' = if isImage n && isMissing n
then XC.modifyContent replaceNode c
else c
in case XC.nextDF c' of
(Just cnext) -> replaceAll cnext
Nothing -> c'
isImage :: Content -> Bool
isImage (Elem e) = (elName e) == (uname "image")
isImage _ = False
isMissing (Elem img@(Element _ _ _ _)) =
let imgAttrs = elAttribs img
badAttrs = map (attr ("l","href")) missingHrefs
in any (`elem` imgAttrs) badAttrs
isMissing _ = False
replaceNode :: Content -> Content
replaceNode n@(Elem img@(Element _ _ _ _)) =
let attrs = elAttribs img
alt = getAttrVal attrs (uname "alt")
imtype = getAttrVal attrs (qname "l" "type")
in case (alt, imtype) of
(Just alt', Just imtype') ->
if imtype' == show NormalImage
then el "p" alt'
else txt alt'
(Just alt', Nothing) -> txt alt'
_ -> n
replaceNode n = n
getAttrVal :: [X.Attr] -> QName -> Maybe String
getAttrVal attrs name =
case filter ((name ==) . attrKey) attrs of
(a:_) -> Just (attrVal a)
_ -> Nothing
wrap :: String -> [Inline] -> FBM Content
wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
list :: a -> [a]
list = (:[])
plain :: Inline -> String
plain (Str s) = s
plain (Emph ss) = concat (map plain ss)
plain (Span _ ss) = concat (map plain ss)
plain (Strong ss) = concat (map plain ss)
plain (Strikeout ss) = concat (map plain ss)
plain (Superscript ss) = concat (map plain ss)
plain (Subscript ss) = concat (map plain ss)
plain (SmallCaps ss) = concat (map plain ss)
plain (Quoted _ ss) = concat (map plain ss)
plain (Cite _ ss) = concat (map plain ss)
plain (Code _ s) = s
plain Space = " "
plain SoftBreak = " "
plain LineBreak = "\n"
plain (Math _ s) = s
plain (RawInline _ s) = s
plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"])
plain (Image _ alt _) = concat (map plain alt)
plain (Note _) = ""
el :: (Node t)
=> String
-> t
-> Content
el name cs = Elem $ unode name cs
spaceBeforeAfter :: [Content] -> [Content]
spaceBeforeAfter cs =
let emptyline = el "empty-line" ()
in [emptyline] ++ cs ++ [emptyline]
txt :: String -> Content
txt s = Text $ CData CDataText s Nothing
uattr :: String -> String -> Text.XML.Light.Attr
uattr name val = Attr (uname name) val
attr :: (String, String) -> String -> Text.XML.Light.Attr
attr (ns, name) val = Attr (qname ns name) val
uname :: String -> QName
uname name = QName name Nothing Nothing
qname :: String -> String -> QName
qname ns name = QName name Nothing (Just ns)
cMap :: (a -> [b]) -> [a] -> [b]
cMap = concatMap
cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
cMapM f xs = concat `liftM` mapM f xs