module Text.CSL.Proc.Disamb where
import Control.Arrow ( (&&&), (>>>), second )
import Data.Char ( chr )
import Data.List ( elemIndex, find, findIndex, sortBy, mapAccumL
, nub, nubBy, groupBy, isPrefixOf )
import Data.Maybe
import Data.Ord ( comparing )
import Text.CSL.Eval
import Text.CSL.Reference
import Text.CSL.Style
import Text.CSL.Util (query, proc)
disambCitations :: Style -> [Reference] -> Citations -> [CitationGroup]
-> ([(String, String)], [CitationGroup])
disambCitations s bibs cs groups
= (,) yearSuffs citOutput
where
when_ b f = if b then f else []
filter_ f = concatMap (map fst) . map (filter f) . map (uncurry zip)
refs = processCites bibs cs
nameDupls = getDuplNameData groups
duplics = getDuplCiteData hasNamesOpt hasYSuffOpt groups
isByCite = let gno = getOptionVal "givenname-disambiguation-rule" (citOptions $ citation s)
in gno == "by-cite" || gno == []
disOpts = getCitDisambOptions s
hasNamesOpt = "disambiguate-add-names" `elem` disOpts
hasGNameOpt = "disambiguate-add-givenname" `elem` disOpts
hasYSuffOpt = "disambiguate-add-year-suffix" `elem` disOpts
givenNames = if hasGNameOpt
then if isByCite then ByCite else AllNames
else NoGiven
clean = if hasGNameOpt then id else proc rmHashAndGivenNames
withNames = flip map duplics $ same . clean .
map (if hasNamesOpt then disambData else return . disambYS)
needNames = filter_ (not . snd) $ zip duplics withNames
needYSuff = filter_ snd $ zip duplics withNames
newNames :: [CiteData]
newNames = when_ (hasNamesOpt || hasGNameOpt) $ disambAddNames givenNames $ needNames ++
if hasYSuffOpt && givenNames == NoGiven then [] else needYSuff
newGName :: [NameData]
newGName = when_ hasGNameOpt $ concatMap disambAddGivenNames nameDupls
reEval = let chk = if hasYSuffOpt then filter ((==) [] . citYear) else id
in chk needYSuff
reEvaluated = if or (query hasIfDis s) && not (null reEval)
then map (uncurry $ reEvaluate s reEval) $ zip refs groups
else groups
withYearS = addNames $
if hasYSuffOpt
then map (mapCitationGroup $ setYearSuffCollision hasNamesOpt needYSuff) $ reEvaluated
else rmYearSuff $ reEvaluated
yearSuffs = when_ hasYSuffOpt . generateYearSuffix bibs . concatMap getYearSuffixes $ withYearS
addNames = proc (updateContrib givenNames newNames newGName)
processed = if hasYSuffOpt
then proc (updateYearSuffixes yearSuffs) $ withYearS
else withYearS
citOutput = if disOpts /= [] then processed else reEvaluated
mapDisambData :: (Output -> Output) -> CiteData -> CiteData
mapDisambData f (CD k c ys d r s y) = CD k c ys (proc f d) r s y
mapCitationGroup :: ([Output] -> [Output]) -> CitationGroup -> CitationGroup
mapCitationGroup f (CG cs fm d os) = CG cs fm d (zip (map fst os) . f $ map snd os)
data GiveNameDisambiguation
= NoGiven
| ByCite
| AllNames
deriving (Show, Eq)
disambAddNames :: GiveNameDisambiguation -> [CiteData] -> [CiteData]
disambAddNames b needName = addLNames
where
clean = if b == NoGiven then proc rmHashAndGivenNames else id
disSolved = zip needName' . disambiguate . map disambData $ needName'
needName' = nub' needName []
addLNames = map (\(c,n) -> c { disambed = if null n then collision c else head n }) disSolved
nub' [] r = r
nub' (x:xs) r = case elemIndex (disambData $ clean x) (map (disambData . clean) r) of
Nothing -> nub' xs (x:r)
Just i -> let y = r !! i
in nub' xs (y {sameAs = key x : sameAs y} : filter (/= y) r)
disambAddGivenNames :: [NameData] -> [NameData]
disambAddGivenNames needName = addGName
where
disSolved = zip needName (disambiguate $ map nameDisambData needName)
addGName = map (\(c,n) -> c { nameDataSolved = if null n then nameCollision c else head n }) disSolved
updateContrib :: GiveNameDisambiguation -> [CiteData] -> [NameData] -> Output -> Output
updateContrib g c n o
| OContrib k r s d dd <- o = case filter (key &&& sameAs >>> uncurry (:) >>> elem k) c of
x:_ | clean (disambData x) == clean (d:dd) ->
OContrib k r (map processGNames $ disambed x) [] dd
_ | null c, AllNames <- g -> OContrib k r (map processGNames s) d dd
| otherwise -> o
| otherwise = o
where
clean = if g == NoGiven then proc rmHashAndGivenNames else id
processGNames = if g /= NoGiven then updateOName n else id
updateOName :: [NameData] -> Output -> Output
updateOName n o
| OName _ _ [] _ <- o = o
| OName k x _ f <- o = case elemIndex (ND k (clean x) [] []) n of
Just i -> OName emptyAgent (nameDataSolved $ n !! i) [] f
_ -> o
| otherwise = o
where
clean = proc rmGivenNames
reEvaluate :: Style -> [CiteData] -> [(Cite, Reference)] -> CitationGroup -> CitationGroup
reEvaluate (Style {citation = ct, csMacros = ms , styleLocale = lo,
styleAbbrevs = as}) l cr (CG a f d os)
= CG a f d . flip concatMap (zip cr os) $
\((c,r),out) -> if unLiteral (refId r) `elem` map key l
then return . second (flip Output emptyFormatting) $
(,) c $ evalLayout (citLayout ct) (EvalCite c) True lo ms (citOptions ct) as r
else [out]
hasIfDis :: IfThen -> [Bool]
hasIfDis (IfThen (Condition {disambiguation = (_:_)}) _ _) = [True]
hasIfDis _ = [False]
getCitDisambOptions :: Style -> [String]
getCitDisambOptions
= map fst . filter ((==) "true" . snd) .
filter (isPrefixOf "disambiguate" . fst) . citOptions . citation
getDuplCiteData :: Bool -> Bool -> [CitationGroup] -> [[CiteData]]
getDuplCiteData b1 b2 g
= groupBy (\x y -> collide x == collide y) . sortBy (comparing collide)
$ duplicates
where
whatToGet = if b1 then collision else disambYS
collide = proc rmExtras . proc rmHashAndGivenNames . whatToGet
citeData = nubBy (\a b -> collide a == collide b && key a == key b) $
concatMap (mapGroupOutput $ getCiteData) g
duplicates = [c | c <- citeData , d <- citeData , collides c d]
collides x y = x /= y && (collide x == collide y)
&& (not b2 || citYear x == citYear y)
rmExtras :: [Output] -> [Output]
rmExtras os
| Output x _ : xs <- os = case rmExtras x of
[] -> rmExtras xs
ys -> ys ++ rmExtras xs
| OContrib _ _ x _ _ : xs <- os = OContrib [] [] x [] [] : rmExtras xs
| OYear y _ f : xs <- os = OYear y [] f : rmExtras xs
| ODel _ : xs <- os = rmExtras xs
| OLoc _ _ : xs <- os = rmExtras xs
| x : xs <- os = x : rmExtras xs
| otherwise = []
getCiteData :: Output -> [CiteData]
getCiteData out
= (contribs &&& years >>> zipData) out
where
contribs x = case query contribsQ x of
[] -> [CD [] [out] [] [] [] [] []]
xs -> xs
years o = case query getYears o of
[] -> [([],[])]
r -> r
zipData = uncurry . zipWith $ \c y -> if key c /= []
then c {citYear = snd y}
else c {key = fst y
,citYear = snd y}
contribsQ o
| OContrib k _ _ d dd <- o = [CD k [out] d (d:dd) [] [] []]
| otherwise = []
getYears :: Output -> [(String,String)]
getYears o
| OYear x k _ <- o = [(k,x)]
| otherwise = []
getDuplNameData :: [CitationGroup] -> [[NameData]]
getDuplNameData g
= groupBy (\a b -> collide a == collide b) . sortBy (comparing collide) $ duplicates
where
collide = nameCollision
nameData = nub $ concatMap (mapGroupOutput getName) g
duplicates = filter (flip elem (getDuplNames g) . collide) nameData
getDuplNames :: [CitationGroup] -> [[Output]]
getDuplNames xs
= nub . catMaybes . snd . mapAccumL dupl [] . getData $ xs
where
getData = concatMap (mapGroupOutput getName)
dupl a c = if nameCollision c `elem` map nameCollision a
then (a,Just $ nameCollision c)
else (c:a,Nothing)
getName :: Output -> [NameData]
getName = query getName'
where
getName' o
| OName i n ns _ <- o = [ND i n (n:ns) []]
| otherwise = []
generateYearSuffix :: [Reference] -> [(String, [Output])] -> [(String,String)]
generateYearSuffix refs
= concatMap (flip zip suffs) .
getFst . map sort' . map (filter ((/=) 0 . snd)) . map (map getP) .
getFst . filter (\grp -> length grp >= 2) . map nub . groupBy (\a b -> snd a == snd b) . sort' . filter ((/=) [] . snd)
where
sort' :: (Ord a, Ord b) => [(a,b)] -> [(a,b)]
sort' = sortBy (comparing snd)
getFst = map $ map fst
getP k = case findIndex ((==) k . unLiteral . refId) refs of
Just x -> (k, x + 1)
_ -> (k, 0)
suffs = l ++ [x ++ y | x <- l, y <- l ]
l = map (return . chr) [97..122]
setYearSuffCollision :: Bool -> [CiteData] -> [Output] -> [Output]
setYearSuffCollision b cs = proc (setYS cs) . (map $ \x -> if hasYearSuf x then x else addYearSuffix x)
where
setYS c o
| OYearSuf _ k _ f <- o = OYearSuf [] k (getCollision k c) f
| otherwise = o
collide = if b then disambed else disambYS
getCollision k c = case find ((==) k . key) c of
Just x -> if collide x == []
then [OStr (citYear x) emptyFormatting]
else collide x
_ -> []
updateYearSuffixes :: [(String, String)] -> Output -> Output
updateYearSuffixes yss o
| OYearSuf _ k c f <- o = case lookup k yss of
Just x -> OYearSuf x k c f
_ -> ONull
| otherwise = o
getYearSuffixes :: CitationGroup -> [(String,[Output])]
getYearSuffixes (CG _ _ _ d) = map go d
where go (c,x) = (citeId c, relevant False [x])
relevant :: Bool -> [Output] -> [Output]
relevant c (Output xs _ : rest) = relevant c xs ++ relevant c rest
relevant c (OYear n _ _ : rest) = OStr n emptyFormatting : relevant c rest
relevant False (OStr s _ : rest) = OStr s emptyFormatting : relevant False rest
relevant False (OSpace : rest) = OSpace : relevant False rest
relevant False (OPan ils : rest) = OPan ils : relevant False rest
relevant _ (OContrib _ _ v _ _ : rest ) = relevant False v ++ relevant True rest
relevant c (OName _ v _ _ : rest ) = relevant c v ++ relevant c rest
relevant c (_ : rest) = relevant c rest
relevant _ [] = []
rmYearSuff :: [CitationGroup] -> [CitationGroup]
rmYearSuff = proc rmYS
where
rmYS o
| OYearSuf _ _ _ _ <- o = ONull
| otherwise = o
disambiguate :: (Eq a) => [[a]] -> [[a]]
disambiguate [] = []
disambiguate l
= if hasMult l && not (allTheSame l) && hasDuplicates heads
then disambiguate (rest l)
else heads
where
heads = map (take 1) l
rest = map (\(b,x) -> if b then tail_ x else take 1 x) . zip (same heads)
hasMult [] = False
hasMult (x:xs) = length x > 1 || hasMult xs
tail_ [x] = [x]
tail_ x = if null x then x else tail x
same :: Eq a => [a] -> [Bool]
same l = map (`elem` dupl) l
where
dupl = catMaybes . snd . macc [] $ l
macc = mapAccumL $ \a x -> if x `elem` a
then (a,Just x)
else (x:a,Nothing)
hasDuplicates :: Eq a => [a] -> Bool
hasDuplicates = or . same
allTheSame :: Eq a => [a] -> Bool
allTheSame [] = True
allTheSame (x:xs) = all (== x) xs
addYearSuffix :: Output -> Output
addYearSuffix o
| OYear y k f <- o = Output [OYear y k emptyFormatting,OYearSuf [] k [] emptyFormatting] f
| ODate (x:xs) <- o = if or $ map hasYear xs
then Output (x : [addYearSuffix $ ODate xs]) emptyFormatting
else addYearSuffix (Output (x:xs) emptyFormatting)
| Output (x:xs) f <- o = if or $ map hasYearSuf (x : xs)
then Output (x : xs) f
else if hasYear x
then Output (addYearSuffix x : xs) f
else Output (x : [addYearSuffix $ Output xs emptyFormatting]) f
| otherwise = o
hasYear :: Output -> Bool
hasYear = not . null . query getYear
where getYear o
| OYear _ _ _ <- o = [o]
| otherwise = []
hasYearSuf :: Output -> Bool
hasYearSuf = not . null . query getYearSuf
where getYearSuf :: Output -> [String]
getYearSuf o
| OYearSuf _ _ _ _ <- o = ["a"]
| otherwise = []
rmHashAndGivenNames :: Output -> Output
rmHashAndGivenNames (OName _ s _ f) = OName emptyAgent s [] f
rmHashAndGivenNames o = o
rmGivenNames :: Output -> Output
rmGivenNames (OName a s _ f) = OName a s [] f
rmGivenNames o = o
addGivenNames :: [Output] -> [Output]
addGivenNames
= addGN True
where
addGN _ [] = []
addGN b (o:os)
| OName i _ xs f <- o
, xs /= [] = if b then OName i (head xs) (tail xs) f : addGN False os else o:os
| otherwise = o : addGN b os
mapGroupOutput :: (Output -> [a]) -> CitationGroup -> [a]
mapGroupOutput f (CG _ _ _ os) = concatMap f $ map snd os