module Text.CSL.Pandoc (processCites, processCites') where
import Text.Pandoc
import Text.Pandoc.Walk
import Text.Pandoc.Builder (setMeta, deleteMeta)
import Text.Pandoc.Shared (stringify)
import Text.HTML.TagSoup.Entity (lookupEntity)
import qualified Data.ByteString.Lazy as L
import System.SetEnv (setEnv)
import System.Environment (getEnv)
import Control.Applicative ((<|>))
import Data.Aeson
import Data.Char ( isDigit, isPunctuation, toLower, isSpace )
import qualified Data.Map as M
import Text.CSL.Reference hiding (processCites, Value)
import Text.CSL.Input.Bibutils (readBiblioFile, convertRefs)
import Text.CSL.Style hiding (Cite(..), Citation(..))
import Text.CSL.Proc
import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc')
import qualified Text.CSL.Style as CSL
import Text.CSL.Parser
import Text.CSL.Output.Pandoc ( headInline, tailInline, initInline,
toCapital )
import Text.CSL.Data (getDefaultCSL)
import Text.Parsec hiding (State, (<|>))
import Control.Monad
import qualified Control.Exception as E
import Control.Monad.State
import System.FilePath
import System.Directory (getAppUserDataDirectory)
import Text.CSL.Util (findFile, splitStrWhen, tr', parseRomanNumeral, trim)
import System.IO.Error (isDoesNotExistError)
import Data.Maybe (fromMaybe)
processCites :: Style -> [Reference] -> Pandoc -> Pandoc
processCites style refs (Pandoc m1 b1) =
let metanocites = lookupMeta "nocite" m1
nocites = mkNociteWildcards refs . query getCitation <$> metanocites
Pandoc m2 b2 = evalState (walkM setHashes $ Pandoc (deleteMeta "nocite" m1) b1) 1
grps = query getCitation (Pandoc m2 b2) ++ fromMaybe [] nocites
locMap = locatorMap style
result = citeproc procOpts{ linkCitations = isLinkCitations m2}
style refs (setNearNote style $
map (map (toCslCite locMap)) grps)
cits_map = tr' "cits_map" $ M.fromList $ zip grps (citations result)
biblioList = map (renderPandoc' style) $ zip (bibliography result) (citationIds result)
moveNotes = case lookupMeta "notes-after-punctuation" m1 of
Just (MetaBool False) -> False
_ -> True
Pandoc m3 bs = bottomUp (mvPunct moveNotes style) . deNote .
topDown (processCite style cits_map) $ Pandoc m2 b2
m = case metanocites of
Nothing -> m3
Just x -> setMeta "nocite" x m3
in Pandoc m $ bottomUp (concatMap removeNocaseSpans)
$ insertRefs m biblioList bs
insertRefs :: Meta -> [Block] -> [Block] -> [Block]
insertRefs _ [] bs = bs
insertRefs meta refs bs =
if isRefRemove meta
then bs
else case runState (walkM go bs) False of
(bs', True) -> bs'
(_, False) ->
case reverse bs of
(Header lev (id',classes,kvs) ys) : xs ->
reverse xs ++
[Header lev (id',addUnNumbered classes,kvs) ys,
Div ("refs",["references"],[]) refs]
_ -> bs ++ refHeader ++
[Div ("refs",["references"],[]) refs]
where go :: Block -> State Bool Block
go (Div attr@("refs",_,_) xs) = do
put True
return $ Div attr (xs ++ refs)
go x = return x
addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"]
refHeader = case refTitle meta of
Just ils ->
[Header 1 ("bibliography", ["unnumbered"], []) ils]
_ -> []
refTitle :: Meta -> Maybe [Inline]
refTitle meta =
case lookupMeta "reference-section-title" meta of
Just (MetaString s) -> Just [Str s]
Just (MetaInlines ils) -> Just ils
Just (MetaBlocks [Plain ils]) -> Just ils
Just (MetaBlocks [Para ils]) -> Just ils
_ -> Nothing
isRefRemove :: Meta -> Bool
isRefRemove meta =
case lookupMeta "suppress-bibliography" meta of
Just (MetaBool True) -> True
_ -> False
isLinkCitations :: Meta -> Bool
isLinkCitations meta =
case lookupMeta "link-citations" meta of
Just (MetaBool True) -> True
Just (MetaString s) -> map toLower s `elem` yesValues
Just (MetaInlines ils) -> map toLower (stringify ils) `elem` yesValues
_ -> False
where yesValues = ["true", "yes", "on"]
mkNociteWildcards :: [Reference] -> [[Citation]] -> [[Citation]]
mkNociteWildcards refs nocites =
map (\citgrp -> expandStar citgrp) nocites
where expandStar cs =
case [c | c <- cs
, citationId c == "*"] of
[] -> cs
_ -> allcites
allcites = map (\ref -> Citation{
citationId = unLiteral (refId ref),
citationPrefix = [],
citationSuffix = [],
citationMode = NormalCitation,
citationNoteNum = 0,
citationHash = 0 }) refs
removeNocaseSpans :: Inline -> [Inline]
removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs
removeNocaseSpans x = [x]
processCites' :: Pandoc -> IO Pandoc
processCites' (Pandoc meta blocks) = do
mbcsldir <- E.catch (Just <$> getAppUserDataDirectory "csl") $ \e ->
if isDoesNotExistError e
then return Nothing
else E.throwIO e
mbpandocdir <- E.catch (Just <$> getAppUserDataDirectory "pandoc") $ \e ->
if isDoesNotExistError e
then return Nothing
else E.throwIO e
let inlineRefError s = error $ "Error parsing references: " ++ s
let inlineRefs = either inlineRefError id
$ convertRefs $ lookupMeta "references" meta
let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
>>= toPath
let mbLocale = (lookupMeta "lang" meta `mplus` lookupMeta "locale" meta)
>>= toPath
let tryReadCSLFile Nothing _ = mzero
tryReadCSLFile (Just d) f = E.catch (readCSLFile mbLocale (d </> f))
(\(_ :: E.SomeException) -> mzero)
csl <- case cslfile of
Just f | not (null f) -> readCSLFile mbLocale f
_ -> tryReadCSLFile mbpandocdir "default.csl"
`mplus` tryReadCSLFile mbcsldir "chicago-author-date.csl"
`mplus` (getDefaultCSL >>=
localizeCSL mbLocale . parseCSL')
case styleLocale csl of
(l:_) -> do
setEnv "LC_ALL" (localeLang l)
setEnv "LANG" (localeLang l)
[] -> do
envlang <- getEnv "LANG"
if null envlang
then do
setEnv "LANG" "en-US.UTF-8"
setEnv "LC_ALL" "en-US.UTF-8"
else do
setEnv "LC_ALL" envlang
bibRefs <- getBibRefs $ maybe (MetaList []) id
$ lookupMeta "bibliography" meta
let refs = inlineRefs ++ bibRefs
let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath
let skipLeadingSpace = L.dropWhile (\s -> s == 32 || (s >= 9 && s <= 13))
abbrevs <- maybe (return (Abbreviations M.empty))
(\f -> findFile (maybe ["."] (\g -> [".", g]) mbcsldir) f >>=
maybe (error $ "Could not find " ++ f) return >>=
L.readFile >>=
either error return . eitherDecode . skipLeadingSpace)
cslAbbrevFile
let csl' = csl{ styleAbbrevs = abbrevs }
return $ processCites csl' refs $ Pandoc meta blocks
toPath :: MetaValue -> Maybe String
toPath (MetaString s) = Just s
toPath (MetaList xs) = case reverse xs of
[] -> Nothing
(x:_) -> toPath x
toPath (MetaInlines ils) = Just $ stringify ils
toPath _ = Nothing
getBibRefs :: MetaValue -> IO [Reference]
getBibRefs (MetaList xs) = concat `fmap` mapM getBibRefs xs
getBibRefs (MetaInlines xs) = getBibRefs (MetaString $ stringify xs)
getBibRefs (MetaString s) = do
path <- findFile ["."] s >>= maybe (error $ "Could not find " ++ s) return
map unescapeRefId `fmap` readBiblioFile path
getBibRefs _ = return []
unescapeRefId :: Reference -> Reference
unescapeRefId ref = ref{ refId = Literal $ decodeEntities (unLiteral $ refId ref) }
decodeEntities :: String -> String
decodeEntities [] = []
decodeEntities ('&':xs) =
let (ys,zs) = break (==';') xs
in case zs of
';':ws -> case lookupEntity ('&':ys ++ ";") of
#if MIN_VERSION_tagsoup(0,13,0)
Just s -> s ++ decodeEntities ws
#else
Just c -> [c] ++ decodeEntities ws
#endif
Nothing -> '&' : decodeEntities xs
_ -> '&' : decodeEntities xs
decodeEntities (x:xs) = x : decodeEntities xs
processCite :: Style -> M.Map [Citation] Formatted -> Inline -> Inline
processCite s cs (Cite t _) =
case M.lookup t cs of
Just (Formatted (x:xs)) -> Cite t (renderPandoc s (Formatted (x:xs)))
_ -> Strong [Str "???"]
processCite _ _ x = x
isNote :: Inline -> Bool
isNote (Note _) = True
isNote (Cite _ [Note _]) = True
isNote _ = False
mvPunctInsideQuote :: Inline -> Inline -> [Inline]
mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] =
[Quoted qt (init ils ++ (mvPunctInsideQuote (last ils) (Str s)))]
mvPunctInsideQuote il il' = [il, il']
isSpacy :: Inline -> Bool
isSpacy Space = True
isSpacy SoftBreak = True
isSpacy _ = False
mvPunct :: Bool -> Style -> [Inline] -> [Inline]
mvPunct _ _ (x : Space : xs) | isSpacy x = x : xs
mvPunct moveNotes _ (s : x : ys) | isSpacy s, isNote x, startWithPunct ys =
if moveNotes
then Str (headInline ys) : x : tailInline ys
else x : ys
mvPunct moveNotes _ (Cite cs ils : ys) |
length ils > 1
, isNote (last ils)
, startWithPunct ys
, moveNotes
= Cite cs (init ils ++ [Str (headInline ys) | not (endWithPunct False (init ils))]
++ [last ils]) : tailInline ys
mvPunct moveNotes sty (q@(Quoted _ _) : w@(Str _) : x : ys)
| isNote x, isPunctuationInQuote sty, moveNotes =
mvPunctInsideQuote q w ++ (x : ys)
mvPunct _ _ (s : x : ys) | isSpacy s, isNote x = x : ys
mvPunct _ _ (s : x@(Cite _ (Superscript _ : _)) : ys) | isSpacy s = x : ys
mvPunct _ _ xs = xs
endWithPunct :: Bool -> [Inline] -> Bool
endWithPunct _ [] = True
endWithPunct onlyFinal xs@(_:_) =
case reverse (stringify xs) of
[] -> True
(d:c:_) | isPunctuation d && not onlyFinal
&& isEndPunct c -> True
(c:_) | isEndPunct c -> True
| otherwise -> False
where isEndPunct c = c `elem` (".,;:!?" :: String)
startWithPunct :: [Inline] -> Bool
startWithPunct = and . map (`elem` (".,;:!?" :: String)) . headInline
deNote :: Pandoc -> Pandoc
deNote = topDown go
where go (Cite (c:cs) [Note [Para xs]]) =
Cite (c:cs) [Note [Para $ toCapital xs]]
go (Note xs) = Note $ topDown go' xs
go x = x
go' (x : Cite cs [Note [Para xs]] : ys) | not (isSpacy x) =
x : Str "," : Space : comb (\zs -> [Cite cs zs]) xs ys
go' (x : Note [Para xs] : ys) | not (isSpacy x) =
x : Str "," : Space : comb id xs ys
go' (Cite cs [Note [Para xs]] : ys) = comb (\zs -> [Cite cs zs]) xs ys
go' (Note [Para xs] : ys) = comb id xs ys
go' xs = xs
comb :: ([Inline] -> [Inline]) -> [Inline] -> [Inline] -> [Inline]
comb f xs ys =
let xs' = if startWithPunct ys && endWithPunct True xs
then initInline $ removeLeadingPunct xs
else removeLeadingPunct xs
removeLeadingPunct (Str [c] : s : zs)
| isSpacy s && (c == ',' || c == '.' || c == ':') = zs
removeLeadingPunct zs = zs
in f xs' ++ ys
getCitation :: Inline -> [[Citation]]
getCitation i | Cite t _ <- i = [t]
| otherwise = []
setHashes :: Inline -> State Int Inline
setHashes i | Cite t ils <- i = do t' <- mapM setHash t
return $ Cite t' ils
| otherwise = return i
setHash :: Citation -> State Int Citation
setHash c = do
ident <- get
put $ ident + 1
return c{ citationHash = ident }
toCslCite :: LocatorMap -> Citation -> CSL.Cite
toCslCite locMap c
= let (la, lo, s) = locatorWords locMap $ citationSuffix c
s' = case (la,lo,s) of
("","",(x:_))
| not (isPunct x) -> Space : s
_ -> s
isPunct (Str (x:_)) = isPunctuation x
isPunct _ = False
in emptyCite { CSL.citeId = citationId c
, CSL.citePrefix = Formatted $ citationPrefix c
, CSL.citeSuffix = Formatted s'
, CSL.citeLabel = la
, CSL.citeLocator = lo
, CSL.citeNoteNumber = show $ citationNoteNum c
, CSL.authorInText = citationMode c == AuthorInText
, CSL.suppressAuthor = citationMode c == SuppressAuthor
, CSL.citeHash = citationHash c
}
locatorWords :: LocatorMap -> [Inline] -> (String, String, [Inline])
locatorWords locMap inp =
case parse (pLocatorWords locMap) "suffix" $
splitStrWhen (\c -> isLocatorPunct c || isSpace c) inp of
Right r -> r
Left _ -> ("","",inp)
pLocatorWords :: LocatorMap -> Parsec [Inline] st (String, String, [Inline])
pLocatorWords locMap = do
(la,lo) <- pLocator locMap
s <- getInput
return (la, lo, s)
pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline
pMatch condition = try $ do
t <- anyToken
guard $ condition t
return t
pSpace :: Parsec [Inline] st Inline
pSpace = pMatch (\t -> isSpacy t || t == Str "\160")
pLocator :: LocatorMap -> Parsec [Inline] st (String, String)
pLocator locMap = try $ do
optional $ pMatch (== Str ",")
optional pSpace
la <- try (do ts <- many1 (notFollowedBy (pWordWithDigits True) >> anyToken)
case M.lookup (trim (stringify ts)) locMap of
Just l -> return l
Nothing -> mzero)
<|> (lookAhead pDigit >> return "page")
g <- pWordWithDigits True
gs <- many (pWordWithDigits False)
let lo = concat (g:gs)
return (la, lo)
pRoman :: Parsec [Inline] st String
pRoman = try $ do
t <- anyToken
case t of
Str xs -> case parseRomanNumeral xs of
Nothing -> mzero
Just _ -> return xs
_ -> mzero
pWordWithDigits :: Bool -> Parsec [Inline] st String
pWordWithDigits isfirst = try $ do
punct <- if isfirst
then return ""
else stringify `fmap` pLocatorPunct
sp <- option "" (pSpace >> return " ")
s <- pRoman <|>
try (do ts <- many1 (notFollowedBy pSpace >>
notFollowedBy pLocatorPunct >>
anyToken)
let ts' = stringify ts
guard (any isDigit ts')
return ts')
return $ punct ++ sp ++ s
pDigit :: Parsec [Inline] st ()
pDigit = do
t <- anyToken
case t of
Str (d:_) | isDigit d -> return ()
_ -> mzero
pLocatorPunct :: Parsec [Inline] st Inline
pLocatorPunct = pMatch isLocatorPunct'
where isLocatorPunct' (Str [c]) = isLocatorPunct c
isLocatorPunct' _ = False
isLocatorPunct :: Char -> Bool
isLocatorPunct ':' = False
isLocatorPunct c = isPunctuation c
type LocatorMap = M.Map String String
locatorMap :: Style -> LocatorMap
locatorMap sty =
foldr (\term -> M.insert (termSingular term) (cslTerm term)
. M.insert (termPlural term) (cslTerm term))
M.empty
(concatMap localeTerms $ styleLocale sty)