-- ---------------------------------------------------------------------------- {- | Module : Holumbus.Query.Ranking Copyright : Copyright (C) 2007, 2008 Timo B. Huebel License : MIT Maintainer : Timo B. Huebel (tbh@holumbus.org) Stability : experimental Portability: portable Version : 0.3 The ranking mechanism for Holumbus. Customized ranking functions for both documents and suggested words can be provided by the user. Some predefined ranking functions are avaliable, too. -} -- ---------------------------------------------------------------------------- module Holumbus.Query.Ranking ( -- * Ranking types RankConfig (..) , DocRanking , WordRanking -- * Ranking , rank -- * Predefined document rankings , docRankByCount , docRankWeightedByCount -- * Predefined word rankings , wordRankByCount , wordRankWeightedByCount ) where import Prelude hiding (foldr) import Data.Function import Data.Foldable import qualified Data.List as L import qualified Data.Map as M import Holumbus.Query.Result import Holumbus.Index.Common -- ---------------------------------------------------------------------------- -- | The configuration of the ranking mechanism. data RankConfig a = RankConfig { docRanking :: DocRanking a -- ^ A function to determine the score of a document. , wordRanking :: WordRanking -- ^ A funciton to determine the score of a word. } -- | The signature of a function to determine the score of a document. type DocRanking a = DocId -> DocInfo a -> DocContextHits -> Score -- | The signature of a function to determine the score of a word. type WordRanking = Word -> WordInfo -> WordContextHits -> Score -- ---------------------------------------------------------------------------- -- | Rank the result with custom ranking functions. rank :: RankConfig a -> Result a -> Result a rank (RankConfig fd fw {-ld lw-}) r = Result scoredDocHits scoredWordHits where scoredDocHits = mapWithKeyDocIdMap (\k (di, dch) -> (setDocScore (fd k di dch) di, dch)) $ docHits r scoredWordHits = M.mapWithKey (\k (wi, wch) -> (setWordScore (fw k wi wch) wi, wch)) $ wordHits r -- | Rank documents by count. docRankByCount :: DocId -> DocInfo a -> DocContextHits -> Score docRankByCount _ _ h = fromIntegral $ M.fold (\h1 r1 -> M.fold (\h2 r2 -> sizePos h2 + r2) r1 h1) 0 h -- | Rank words by count. wordRankByCount :: Word -> WordInfo -> WordContextHits -> Score wordRankByCount _ _ h = fromIntegral $ M.fold (\h1 r1 -> foldDocIdMap ((+) . sizePos) r1 h1) 0 h -- | Rank documents by context-weighted count. The weights will be normalized to a maximum of 1.0. -- Contexts with no weight (or a weight of zero) will be ignored. docRankWeightedByCount :: [(Context, Score)] -> DocId -> DocInfo a -> DocContextHits -> Score docRankWeightedByCount ws _ _ h = M.foldrWithKey (calcWeightedScore ws) 0.0 h -- | Rank words by context-weighted count. The weights will be normalized to a maximum of 1.0. -- Contexts with no weight (or a weight of zero) will be ignored. wordRankWeightedByCount :: [(Context, Score)] -> Word -> WordInfo -> WordContextHits -> Score wordRankWeightedByCount ws _ _ h = M.foldrWithKey (calcWeightedScore ws) 0.0 h -- | Calculate the weighted score of occurrences of a word. calcWeightedScore :: (Foldable f) => [(Context, Score)] -> Context -> (f Positions) -> Score -> Score calcWeightedScore ws c h r = maybe r (\w -> r + ((w / mw) * count)) $ lookupWeight c ws where count = fromIntegral $ foldl' (flip $ (+) . sizePos) 0 h mw = snd $ L.maximumBy (compare `on` snd) ws -- | Find the weight of a context in a list of weights. If the context was not found or it's -- weight is equal to zero, 'Nothing' will be returned. lookupWeight :: Context -> [(Context, Score)] -> Maybe Score lookupWeight _ [] = Nothing lookupWeight c (x:xs) = if fst x == c then if snd x /= 0.0 then Just (snd x) else Nothing else lookupWeight c xs -- ----------------------------------------------------------------------------