{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleInstances, ScopedTypeVariables, CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Pandoc -- Copyright : (c) Andrea Rossato -- License : GPL2 -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- A pandoc filter for citeproc-hs -- ----------------------------------------------------------------------------- module Text.CSL.Pandoc ( processCites , processCites' ) where import Control.Applicative ((<|>)) import Control.Monad import Control.Monad.State import Data.Aeson import Data.ByteString.Lazy.UTF8 ( toString ) import Data.Char ( isDigit, isPunctuation, isSpace ) import Data.List ( (\\) ) import qualified Data.Map as M import Data.Monoid (mempty) import System.FilePath import System.Directory (doesFileExist) import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Parsec hiding (State, (<|>)) import Text.Pandoc import Text.Pandoc.Walk import Text.Pandoc.Builder (Inlines, cite) import Text.Pandoc.Shared (stringify) import Text.CSL.Reference hiding (processCites, Value) import Text.CSL.Input.Bibutils (readBiblioFile) import Text.CSL.Input.Json (readJsonInputString, readJsonAbbrevFile) import Text.CSL.Proc import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc', isPunctuationInQuote) import Text.CSL.Parser import Text.CSL.Output.Pandoc ( headInline, tailFirstInlineStr, initInline, toCapital ) import Text.CSL.Style hiding (Cite(..), Citation(..), query) import qualified Text.CSL.Style as CSL -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. Add a bibliography (if one is called -- for) at the end of the document. processCites :: Style -> [Reference] -> Pandoc -> Pandoc processCites style refs (Pandoc m b1) = let Pandoc _ b2 = evalState (walkM setHashes $ Pandoc m b1) 1 (nts,grps) = if styleClass style == "note" then let cits = query getCite b2 ncits = map (query getCite) $ query getNote b2 needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt b2 else (,) [] $ query getCitation b2 nocite = query getCitation m result = citeproc procOpts style refs (setNearNote style $ map (map toCslCite) $ grps ++ nocite) cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) (bibliography result) Pandoc _ b3 = bottomUp (mvPunct style) . bottomUp (processCite style cits_map) $ Pandoc m b2 (bs, lastb) = case reverse b3 of (Header lev (id',classes,kvs) ys) : xs -> (reverse xs, [Header lev (id',classes',kvs) ys]) where classes' = "unnumbered" : [c | c <- classes, c /= "unnumbered"] _ -> (b3, []) pandocFinal = Pandoc m $ bottomUp (concatMap removeNocaseSpans) $ bs ++ if lookupMeta "suppress-bibliography" m == Just (MetaBool True) then [] else [Div ("",["references"],[]) (lastb ++ biblioList)] in if lookupMeta "suppress-note-generation" m == Just (MetaBool True) then pandocFinal else generateNotes nts pandocFinal removeNocaseSpans :: Inline -> [Inline] removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs removeNocaseSpans x = [x] -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style. The style filename is derived from -- the `csl` field of the metadata, and the references are taken -- from the `references` field or read from a file in the `bibliography` -- field. processCites' :: Pandoc -> IO Pandoc processCites' (Pandoc meta blocks) = do let convertRefs = readJsonInputString . toString . encode . metaValueToJSON inlineRefs = convertRefs $ maybe (MetaList []) id $ lookupMeta "references" meta bibRefs <- getBibRefs $ maybe (MetaList []) id $ lookupMeta "bibliography" meta let refs = inlineRefs ++ bibRefs cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) >>= toPath csl <- case cslfile of Just f | not (null f) -> readCSLFile f _ -> error "no CSL file" let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath abbrevs <- maybe (return []) readJsonAbbrevFile cslAbbrevFile let csl' = csl{ styleAbbrevs = abbrevs } return $ processCites csl' refs $ Pandoc meta blocks toPath :: MetaValue -> Maybe String toPath (MetaString s) = Just s toPath (MetaInlines ils) = Just $ stringify ils toPath _ = Nothing getBibRefs :: MetaValue -> IO [Reference] getBibRefs (MetaList xs) = concat `fmap` mapM getBibRefs xs getBibRefs (MetaInlines xs) = getBibRefs (MetaString $ stringify xs) getBibRefs (MetaString s) = do path <- findFile ["."] s >>= maybe (error $ "Could not find " ++ s) return map unescapeRefId `fmap` readBiblioFile path getBibRefs _ = return [] -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality unescapeRefId :: Reference -> Reference unescapeRefId ref = ref{ refId = decodeEntities (refId ref) } decodeEntities :: String -> String decodeEntities [] = [] decodeEntities ('&':xs) = let (ys,zs) = break (==';') xs in case zs of ';':ws -> case lookupEntity ('&':ys ++ ";") of #if MIN_VERSION_tagsoup(0,13,0) Just s -> s ++ decodeEntities ws #else Just c -> [c] ++ decodeEntities ws #endif Nothing -> '&' : decodeEntities xs _ -> '&' : decodeEntities xs decodeEntities (x:xs) = x : decodeEntities xs -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline] processCite s cs ((Cite t _):is) = case M.lookup t cs of Just (x:xs) | isTextualCitation t && not (null xs) -> renderPandoc s [x] ++ [Space, Cite t (renderPandoc s xs)] ++ processCite s cs is | otherwise -> Cite t (renderPandoc s (x:xs)) : processCite s cs is _ -> [Strong [Str "???"]] ++ processCite s cs is -- TODO raise error instead? processCite _ _ x = x isNote :: Inline -> Bool isNote (Note _) = True isNote (Cite _ [Note _]) = True isNote _ = False mvPunctInsideQuote :: Inline -> Inline -> [Inline] mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] = [Quoted qt (init ils ++ (mvPunctInsideQuote (last ils) (Str s)))] mvPunctInsideQuote il il' = [il, il'] mvPunct :: Style -> [Inline] -> [Inline] mvPunct _ (Space : Space : xs) = Space : xs mvPunct _ (Space : x : ys) | isNote x, startWithPunct ys = Str (headInline ys) : x : tailFirstInlineStr ys mvPunct _ (Cite cs ils : ys) | length ils > 1 , isNote (last ils) , startWithPunct ys = Cite cs (init ils ++ [Str (headInline ys) | not (endWithPunct (init ils))] ++ [last ils]) : tailFirstInlineStr ys mvPunct sty (q@(Quoted _ _) : w@(Str _) : x : ys) | isNote x, isPunctuationInQuote sty = mvPunctInsideQuote q w ++ (x : ys) mvPunct _ (Space : x : ys) | isNote x = x : ys mvPunct _ (Space : x@(Cite _ (Superscript _ : _)) : ys) = x : ys mvPunct _ xs = xs endWithPunct :: [Inline] -> Bool endWithPunct [] = True endWithPunct xs@(_:_) = case reverse (stringify xs) of [] -> True -- covers .), .", etc.: (d:c:_) | isPunctuation d && isEndPunct c -> True (c:_) | isEndPunct c -> True | otherwise -> False where isEndPunct c = c `elem` ".,;:!?" startWithPunct :: [Inline] -> Bool startWithPunct = and . map (`elem` ".,;:!?") . headInline isTextualCitation :: [Citation] -> Bool isTextualCitation (c:_) = citationMode c == AuthorInText isTextualCitation _ = False -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'query'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] | otherwise = [] getCite :: Inline -> [Inline] getCite i | Cite _ _ <- i = [i] | otherwise = [] getNote :: Inline -> [Inline] getNote i | Note _ <- i = [i] | otherwise = [] getNoteCitations :: [Inline] -> [Block] -> [[Citation]] getNoteCitations needNote = let mvCite i = if i `elem` needNote then Note [Para [i]] else i setNote = bottomUp mvCite getCits = concat . flip (zipWith $ setCiteNoteNum) [1..] . map (query getCite) . query getNote . setNote in query getCitation . getCits setCiteNoteNum :: [Inline] -> Int -> [Inline] setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n setCiteNoteNum _ _ = [] setCitationNoteNum :: Int -> [Citation] -> [Citation] setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} setHashes :: Inline -> State Int Inline setHashes i | Cite t ils <- i = do t' <- mapM setHash t return $ Cite t' ils | otherwise = return i setHash :: Citation -> State Int Citation setHash c = do ident <- get put $ ident + 1 return c{ citationHash = ident } toCslCite :: Citation -> CSL.Cite toCslCite c = let (l,l', s) = locatorWords $ citationSuffix c (la,lo) = parseLocator (l ++ " " ++ l') s' = case (la,lo,s) of -- treat a bare locator as if it begins with space -- so @item1 [blah] is like [@item1, blah] ("","",(x:_)) | not (isPunct x) -> Space : s _ -> s isPunct (Str (x:_)) = isPunctuation x isPunct _ = False citMode = case citationMode c of AuthorInText -> (True, False) SuppressAuthor -> (False,True ) NormalCitation -> (False,False) in emptyCite { CSL.citeId = citationId c , CSL.citePrefix = PandocText $ citationPrefix c , CSL.citeSuffix = PandocText $ s' , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show $ citationNoteNum c , CSL.authorInText = fst citMode , CSL.suppressAuthor = snd citMode , CSL.citeHash = citationHash c } locatorWords :: [Inline] -> (String, String, [Inline]) locatorWords inp = case parse pLocatorWords "suffix" $ splitStrWhen (\c -> isPunctuation c || isSpace c) inp of Right r -> r Left _ -> ("","",inp) pLocatorWords :: Parsec [Inline] st (String, String, [Inline]) pLocatorWords = do (la,lo) <- pLocator s <- getInput -- rest is suffix return (la, lo, s) pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline pMatch condition = try $ do t <- anyToken guard $ condition t return t pSpace :: Parsec [Inline] st Inline pSpace = pMatch (\t -> t == Space || t == Str "\160") pLocator :: Parsec [Inline] st (String, String) pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace rawLoc <- many (notFollowedBy pSpace >> notFollowedBy (pWordWithDigits True) >> anyToken) la <- case stringify rawLoc of "" -> lookAhead (optional pSpace >> pDigit) >> return "page" s -> return s g <- pWordWithDigits True gs <- many (pWordWithDigits False) let lo = concat (g:gs) return (la, lo) pWordWithDigits :: Bool -> Parsec [Inline] st String pWordWithDigits isfirst = try $ do punct <- if isfirst then return "" else stringify `fmap` pLocatorPunct sp <- option "" (pSpace >> return " ") r <- many1 (notFollowedBy pSpace >> notFollowedBy pLocatorPunct >> anyToken) let s = stringify r guard $ any isDigit s || all (`elem` "IVXLCM") s return $ punct ++ sp ++ s pDigit :: Parsec [Inline] st () pDigit = do t <- anyToken case t of Str (d:_) | isDigit d -> return () _ -> mzero pLocatorPunct :: Parsec [Inline] st Inline pLocatorPunct = pMatch isLocatorPunct isLocatorPunct :: Inline -> Bool isLocatorPunct (Str ",") = True isLocatorPunct _ = False -- Utilities -- FIXME: isPunctuationInQuote :: Style -> Bool --isPunctuationInQuote = const False {- isPunctuationInQuote :: Style -> Bool isPunctuationInQuote = or . CSL.query punctIn' where punctIn' n | ("punctuation-in-quote","true") <- n = [True] | otherwise = [False] --} splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] splitStrWhen _ [] = [] splitStrWhen p (Str xs : ys) = go xs ++ splitStrWhen p ys where go [] = [] go s = case break p s of ([],[]) -> [] (zs,[]) -> [Str zs] ([],(w:ws)) -> Str [w] : go ws (zs,(w:ws)) -> Str zs : Str [w] : go ws splitStrWhen p (x : ys) = x : splitStrWhen p ys findFile :: [FilePath] -> FilePath -> IO (Maybe FilePath) findFile [] _ = return Nothing findFile (p:ps) f = do exists <- doesFileExist (p f) if exists then return $ Just (p f) else findFile ps f generateNotes :: [Inline] -> Pandoc -> Pandoc generateNotes needNote = bottomUp (mvCiteInNote needNote) procInlines :: ([Inline] -> [Inline]) -> Block -> Block procInlines f b | Plain inls <- b = Plain $ f inls | Para inls <- b = Para $ f inls | Header i x inls <- b = Header i x $ f inls | otherwise = b mvCiteInNote :: [Inline] -> Block -> Block mvCiteInNote is = --error ("ciao ciao " ++ show is) procInlines mvCite where mvCite :: [Inline] -> [Inline] mvCite inls | x:i:xs <- inls, startWithPunct xs , x == Space, i `elem_` is = switch i xs ++ mvCite (tailFirstInlineStr xs) | x:i:xs <- inls , x == Space, i `elem_` is = mvInNote i : mvCite xs | i:xs <- inls, i `elem_` is , startWithPunct xs = switch i xs ++ mvCite (tailFirstInlineStr xs) | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs | i:xs <- inls = i : mvCite xs | otherwise = [] --elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False elem_ (Cite x _) = not . null . filter (\(Cite cs _) -> cs == x) elem_ _ = const False switch i xs = Str (headInline xs) : mvInNote i : [] mvInNote i | Cite t o <- i = Note [Para [Cite t $ sanitize o]] | otherwise = Note [Para [i ]] sanitize i | endWithPunct i = toCapital i | otherwise = toCapital (i ++ [Str "."]) checkPt i | Cite c o : xs <- i , endWithPunct o, startWithPunct xs , endWithPunct o = Cite c (initInline o) : checkPt xs | x:xs <- i = x : checkPt xs | otherwise = [] checkNt = bottomUp $ procInlines checkPt metaValueToJSON :: MetaValue -> Value metaValueToJSON (MetaMap m) = toJSON $ M.map metaValueToJSON m metaValueToJSON (MetaList xs) = toJSON $ map metaValueToJSON xs metaValueToJSON (MetaString t) = toJSON t metaValueToJSON (MetaBool b) = toJSON b metaValueToJSON (MetaInlines ils) = toJSON (stringify ils) metaValueToJSON (MetaBlocks bs) = toJSON bs