{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
#if MIN_VERSION_base(4,9,0)
{-# OPTIONS_GHC -fno-warn-redundant-constraints #-}
#endif
module Text.CSL.Proc.Disamb where
import Prelude
import Control.Arrow (second, (&&&), (>>>))
import Data.List (elemIndex, find, findIndex, groupBy,
mapAccumL, nub, nubBy, sortOn)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Maybe
import Text.CSL.Eval
import Text.CSL.Reference
import Text.CSL.Style
import Text.CSL.Util (proc, query)
import Text.Pandoc.Shared (ordNub)
disambCitations :: Style -> [Reference] -> Citations -> [CitationGroup]
-> ([(Text, Text)], [CitationGroup])
disambCitations s bibs cs groups
= (,) yearSuffs citOutput
where
when_ b f = if b then f else []
filter_ f = concatMap (map fst . filter f . uncurry zip)
refs = processCites bibs cs
nameDupls = getDuplNameData groups
duplics = getDuplCiteData giveNameDisamb hasNamesOpt hasYSuffOpt groups
disOpts = getCitDisambOptions s
hasNamesOpt = "disambiguate-add-names" `elem` disOpts
hasGNameOpt = "disambiguate-add-givenname" `elem` disOpts
hasYSuffOpt = "disambiguate-add-year-suffix" `elem` disOpts
giveNameDisamb = case getOptionVal "givenname-disambiguation-rule"
(citOptions (citation s)) of
"by-cite" -> ByCite
"all-names" -> AllNames
"all-names-with-initials" -> AllNames
"primary-name" -> PrimaryName
"primary-name-with-initials" -> PrimaryName
_ -> ByCite
clean = if hasGNameOpt then id else proc rmHashAndGivenNames
withNames = flip map duplics $ same . clean .
map (if hasNamesOpt then disambData else return . disambYS)
needNames = filter_ (not . snd) $ zip duplics withNames
needYSuff = filter_ snd $ zip duplics withNames
newNames :: [CiteData]
newNames = when_ (hasNamesOpt || hasGNameOpt) $ disambAddNames giveNameDisamb $ needNames ++
if hasYSuffOpt && giveNameDisamb == NoGiven then [] else needYSuff
newGName :: [NameData]
newGName = when_ hasGNameOpt $ concatMap disambAddGivenNames nameDupls
reEval = let chk = if hasYSuffOpt then filter (T.null . citYear) else id
in chk needYSuff
reEvaluated = if or (query hasIfDis s) && not (null reEval)
then zipWith (reEvaluate s reEval) refs groups
else groups
withYearS = addNames $
if hasYSuffOpt
then map (mapCitationGroup
(setYearSuffCollision hasNamesOpt needYSuff))
reEvaluated
else rmYearSuff reEvaluated
yearSuffs = when_ hasYSuffOpt . generateYearSuffix bibs . concatMap getYearSuffixes $ withYearS
addNames = proc (updateContrib giveNameDisamb newNames newGName)
processed = if hasYSuffOpt
then proc (updateYearSuffixes yearSuffs) withYearS
else withYearS
citOutput = if disOpts /= [] then processed else reEvaluated
mapDisambData :: (Output -> Output) -> CiteData -> CiteData
mapDisambData f (CD k c ys d r s y) = CD k c ys (proc f d) r s y
mapCitationGroup :: ([Output] -> [Output]) -> CitationGroup -> CitationGroup
mapCitationGroup f (CG cs fm d os) = CG cs fm d (zip (map fst os) . f $ map snd os)
data GiveNameDisambiguation
= NoGiven
| ByCite
| AllNames
| PrimaryName
deriving (Show, Eq)
disambAddNames :: GiveNameDisambiguation -> [CiteData] -> [CiteData]
disambAddNames b needName = addLNames
where
clean = if b == NoGiven then proc rmHashAndGivenNames else id
disSolved = zip needName' . disambiguate . map disambData $ needName'
needName' = nub' needName []
addLNames = map (\(c,n) -> c { disambed = if null n then collision c else head n }) disSolved
nub' [] r = r
nub' (x:xs) r = case elemIndex (disambData $ clean x)
(map (disambData . clean) r) of
Nothing -> nub' xs (x:r)
Just i -> let y = r !! i
in nub' xs (y {sameAs = key x : sameAs y}
: filter (/= y) r)
disambAddGivenNames :: [NameData] -> [NameData]
disambAddGivenNames needName = addGName
where
disSolved = zip needName (disambiguate $ map nameDisambData needName)
addGName = map (\(c,n) -> c { nameDataSolved = if null n then nameCollision c else head n }) disSolved
updateContrib :: GiveNameDisambiguation -> [CiteData] -> [NameData] -> Output -> Output
updateContrib g c n o
| OContrib k r s d dd <- o = case filter (key &&& sameAs >>> uncurry (:) >>> elem k) c of
x:_ | clean (disambData x) == clean (d:dd) ->
OContrib k r (map processGNames $ disambed x) [] dd
_ | null c, AllNames <- g -> OContrib k r (map processGNames s) d dd
| otherwise -> o
| otherwise = o
where
clean = if g == NoGiven then proc rmHashAndGivenNames else id
processGNames = if g /= NoGiven then updateOName n else id
updateOName :: [NameData] -> Output -> Output
updateOName n o
| OName _ _ [] _ <- o = o
| OName k x _ f <- o = case elemIndex (ND k (clean x) [] []) n of
Just i -> OName emptyAgent (nameDataSolved $ n !! i) [] f
_ -> o
| otherwise = o
where
clean = proc rmGivenNames
reEvaluate :: Style
-> [CiteData]
-> [(Cite, Maybe Reference)]
-> CitationGroup
-> CitationGroup
reEvaluate Style{citation = ct, csMacros = ms , styleLocale = lo,
styleAbbrevs = as} l cr (CG a f d os)
= CG a f d . flip concatMap (zip cr os) $
\((c,mbr),out) ->
case mbr of
Just r | unLiteral (refId r) `elem` lkeys ->
return . second (flip Output emptyFormatting) $
(,) c $ evalLayout (citLayout ct) (EvalCite c) True lo ms (citOptions ct) as mbr
_ -> [out]
where lkeys = map key l
hasIfDis :: IfThen -> [Bool]
hasIfDis (IfThen Condition{disambiguation = (_:_)} _ _) = [True]
hasIfDis _ = [False]
getCitDisambOptions :: Style -> [Text]
getCitDisambOptions
= map fst . filter ((==) "true" . snd) .
filter (T.isPrefixOf "disambiguate" . fst) . citOptions . citation
getDuplCiteData :: GiveNameDisambiguation -> Bool -> Bool -> [CitationGroup] -> [[CiteData]]
getDuplCiteData giveNameDisamb b1 b2 g
= groupBy (\x y -> collide x == collide y) . sortOn collide
$ duplicates
where
whatToGet = if b1 then collision else disambYS
collide = proc (rmExtras giveNameDisamb) .
proc rmHashAndGivenNames .
whatToGet
citeData = nubBy (\a b -> collide a == collide b && key a == key b) $
concatMap (mapGroupOutput getCiteData) g
duplicates = [c | c <- citeData , d <- citeData , collides c d]
collides x y = x /= y && (collide x == collide y)
&& (not b2 || citYear x == citYear y)
rmExtras :: GiveNameDisambiguation -> [Output] -> [Output]
rmExtras g os
| Output x _ : xs <- os = case rmExtras g x of
[] -> rmExtras g xs
ys -> ys ++ rmExtras g xs
| OContrib _ _ (y:ys) _ _ : xs <- os
= if g == PrimaryName
then OContrib "" "" [y] [] [] : rmExtras g xs
else OContrib "" "" (y:ys) [] [] : rmExtras g xs
| OYear{} : xs <- os = rmExtras g xs
| OYearSuf{} : xs <- os = rmExtras g xs
| OLabel{} : xs <- os = rmExtras g xs
| ODel _ : xs <- os = rmExtras g xs
| OLoc _ _ : xs <- os = rmExtras g xs
| x : xs <- os = x : rmExtras g xs
| otherwise = []
getCiteData :: Output -> [CiteData]
getCiteData out
= (contribs &&& years >>> zipData) out
where
contribs x = case query contribsQ x of
[] -> [CD "" [out] [] [] [] [] ""]
xs -> xs
years o = case query getYears o of
[] -> [("","")]
r -> r
zipData = uncurry . zipWith $ \c y -> if key c /= ""
then c {citYear = snd y}
else c {key = fst y
,citYear = snd y}
contribsQ o
| OContrib k _ _ d dd <- o = [CD k [out] d (d:dd) [] [] ""]
| otherwise = []
getYears :: Output -> [(Text,Text)]
getYears o
| OYear x k _ <- o = [(k,x)]
| otherwise = []
getDuplNameData :: [CitationGroup] -> [[NameData]]
getDuplNameData g
= groupBy (\a b -> collide a == collide b) . sortOn collide $ duplicates
where
collide = nameCollision
nameData = nub $ concatMap (mapGroupOutput getName) g
duplicates = filter (flip elem (getDuplNames g) . collide) nameData
getDuplNames :: [CitationGroup] -> [[Output]]
getDuplNames = ordNub . catMaybes . snd . mapAccumL dupl [] . getData
where
getData = concatMap (mapGroupOutput getName)
dupl a c = if nameCollision c `elem` map nameCollision a
then (a,Just $ nameCollision c)
else (c:a,Nothing)
getName :: Output -> [NameData]
getName = query getName'
where
getName' o
| OName i n ns _ <- o = [ND i n (n:ns) []]
| otherwise = []
generateYearSuffix :: [Reference] -> [(Text, [Output])] -> [(Text,Text)]
generateYearSuffix refs
= concatMap (`zip` suffs) .
getFst . map (sort' . filter ((/=) 0 . snd) . map getP) .
getFst . filter (\grp -> length grp >= 2) . map nub .
groupBy (\a b -> snd a == snd b) .
sort' . filter (not . null . snd)
where
sort' :: (Ord a, Ord b) => [(a,b)] -> [(a,b)]
sort' = sortOn snd
getFst = map $ map fst
getP k = case findIndex ((==) k . unLiteral . refId) refs of
Just x -> (k, x + 1)
_ -> (k, 0)
suffs = letters ++ [x <> y | x <- letters, y <- letters ]
letters = map T.singleton ['a'..'z']
setYearSuffCollision :: Bool -> [CiteData] -> [Output] -> [Output]
setYearSuffCollision b cs = proc (setYS cs) .
map (\x -> if hasYearSuf x then x else addYearSuffix x)
where
setYS c o
| OYearSuf _ k _ f <- o = OYearSuf "" k (getCollision k c) f
| otherwise = o
collide = if b then disambed else disambYS
getCollision k c = case find ((==) k . key) c of
Just x -> case collide x of
[] -> [OStr (citYear x) emptyFormatting]
ys -> ys
_ -> []
updateYearSuffixes :: [(Text, Text)] -> Output -> Output
updateYearSuffixes yss o
| OYearSuf _ k c f <- o = case lookup k yss of
Just x -> OYearSuf x k c f
_ -> ONull
| otherwise = o
getYearSuffixes :: CitationGroup -> [(Text,[Output])]
getYearSuffixes (CG _ _ _ d) = map go d
where go (c,x) = (citeId c, relevant False [x])
relevant :: Bool -> [Output] -> [Output]
relevant c (Output xs _ : rest) = relevant c xs ++ relevant c rest
relevant c (OYear n _ _ : rest) = OStr n emptyFormatting : relevant c rest
relevant c (ODate xs : rest) = relevant c xs ++ relevant c rest
relevant False (OStr s _ : rest) = OStr s emptyFormatting : relevant False rest
relevant False (OSpace : rest) = OSpace : relevant False rest
relevant False (OPan ils : rest) = OPan ils : relevant False rest
relevant _ (OContrib _ _ v _ _ : rest ) = relevant False v ++ relevant True rest
relevant c (OName _ v _ _ : rest ) = relevant c v ++ relevant c rest
relevant c (_ : rest) = relevant c rest
relevant _ [] = []
rmYearSuff :: [CitationGroup] -> [CitationGroup]
rmYearSuff = proc rmYS
where
rmYS o
| OYearSuf{} <- o = ONull
| otherwise = o
disambiguate :: (Eq a) => [[a]] -> [[a]]
disambiguate [] = []
disambiguate l
= if hasMult l && not (allTheSame l) && hasDuplicates heads
then disambiguate (rest l)
else heads
where
heads = map (take 1) l
rest = map (\(b,x) -> if b then tail_ x else take 1 x) . zip (same heads)
hasMult = foldr (\x -> (||) (length x > 1)) False
tail_ [x] = [x]
tail_ x = if null x then x else tail x
same :: Eq a => [a] -> [Bool]
same l = map (`elem` dupl) l
where
dupl = catMaybes . snd . macc [] $ l
macc = mapAccumL $ \a x -> if x `elem` a
then (a,Just x)
else (x:a,Nothing)
hasDuplicates :: Eq a => [a] -> Bool
hasDuplicates = or . same
allTheSame :: Eq a => [a] -> Bool
allTheSame [] = True
allTheSame (x:xs) = all (== x) xs
addYearSuffix :: Output -> Output
addYearSuffix o
| OYear y k f <- o = Output [ OYear y k emptyFormatting
, OYearSuf "" k [] emptyFormatting] f
| ODate (x:xs) <- o = if any hasYear xs
then Output (x : [addYearSuffix $ ODate xs]) emptyFormatting
else addYearSuffix (Output (x:xs) emptyFormatting)
| Output (x:xs) f <- o = if any hasYearSuf (x : xs)
then Output (x : xs) f
else if hasYear x
then Output (addYearSuffix x : xs) f
else Output (x : [addYearSuffix $ Output xs emptyFormatting]) f
| otherwise = o
hasYear :: Output -> Bool
hasYear = not . null . query getYear
where getYear o
| OYear{} <- o = [o]
| otherwise = []
hasYearSuf :: Output -> Bool
hasYearSuf = not . null . query getYearSuf
where getYearSuf :: Output -> [Text]
getYearSuf o
| OYearSuf{} <- o = ["a"]
| otherwise = []
rmHashAndGivenNames :: Output -> Output
rmHashAndGivenNames (OName _ s _ f) = OName emptyAgent s [] f
rmHashAndGivenNames o = o
rmGivenNames :: Output -> Output
rmGivenNames (OName a s _ f) = OName a s [] f
rmGivenNames o = o
addGivenNames :: [Output] -> [Output]
addGivenNames
= addGN True
where
addGN _ [] = []
addGN b (o:os)
| OName i _ xs f <- o
, xs /= [] = if b then OName i (head xs) (tail xs) f : addGN False os else o:os
| otherwise = o : addGN b os
mapGroupOutput :: (Output -> [a]) -> CitationGroup -> [a]
mapGroupOutput f (CG _ _ _ os) = concatMap (f . snd) os