module Data.SearchEngine.Query (
query,
ResultsFilter(..),
queryExplain,
BM25F.Explanation(..),
setRankParams,
relevanceScore,
indexDocToBM25Doc,
expandTransformedQueryTerm,
) where
import Data.SearchEngine.Types
import qualified Data.SearchEngine.SearchIndex as SI
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import qualified Data.SearchEngine.DocTermIds as DocTermIds
import qualified Data.SearchEngine.DocFeatVals as DocFeatVals
import qualified Data.SearchEngine.BM25F as BM25F
import Data.Ix
import Data.List
import Data.Function
import Data.Maybe
query :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[Term] -> [key]
query se@SearchEngine{ searchIndex,
searchRankParams = SearchRankParameters{..} }
terms =
let
lookupTerms :: [Term]
lookupTerms = concatMap (expandTransformedQueryTerm se) terms
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults = map (SI.lookupTerm searchIndex) lookupTerms
termids :: [TermId]
docidsets :: [DocIdSet]
(termids, docidsets) = unzip (catMaybes rawresults)
unrankedResults :: DocIdSet
unrankedResults = pruneRelevantResults
paramResultsetSoftLimit
paramResultsetHardLimit
docidsets
in rankResults se termids (DocIdSet.toList unrankedResults)
expandTransformedQueryTerm :: (Ix field, Bounded field) =>
SearchEngine doc key field feature ->
Term -> [Term]
expandTransformedQueryTerm SearchEngine{searchConfig} term =
nub [ transformForField field
| let transformForField = transformQueryTerm searchConfig term
, field <- range (minBound, maxBound) ]
rankResults :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] -> [DocId] -> [key]
rankResults se@SearchEngine{searchIndex} queryTerms docids =
map snd
$ sortBy (flip compare `on` fst)
[ (relevanceScore se queryTerms doctermids docfeatvals, dockey)
| docid <- docids
, let (dockey, doctermids, docfeatvals) = SI.lookupDocId searchIndex docid ]
relevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] -> DocTermIds field -> DocFeatVals feature -> Float
relevanceScore SearchEngine{bm25Context} queryTerms doctermids docfeatvals =
BM25F.score bm25Context doc queryTerms
where
doc = indexDocToBM25Doc doctermids docfeatvals
indexDocToBM25Doc :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field ->
DocFeatVals feature ->
BM25F.Doc TermId field feature
indexDocToBM25Doc doctermids docfeatvals =
BM25F.Doc {
BM25F.docFieldLength = DocTermIds.fieldLength doctermids,
BM25F.docFieldTermFrequency = DocTermIds.fieldTermCount doctermids,
BM25F.docFeatureValue = DocFeatVals.featureValue docfeatvals
}
pruneRelevantResults :: Int -> Int -> [DocIdSet] -> DocIdSet
pruneRelevantResults softLimit hardLimit =
go DocIdSet.empty . sortBy (compare `on` DocIdSet.size)
where
go !acc [] = acc
go !acc (d:ds)
| DocIdSet.null acc = go d ds
| size > hardLimit = acc
| size > softLimit = DocIdSet.union acc d
| otherwise = go (DocIdSet.union acc d) ds
where
size = DocIdSet.size acc + DocIdSet.size d
queryExplain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[Term] -> [(BM25F.Explanation field feature Term, key)]
queryExplain se@SearchEngine{ searchIndex,
searchConfig = SearchConfig{transformQueryTerm},
searchRankParams = SearchRankParameters{..} }
terms =
let lookupTerms :: [Term]
lookupTerms = [ term'
| term <- terms
, let transformForField = transformQueryTerm term
, term' <- nub [ transformForField field
| field <- range (minBound, maxBound) ]
]
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults = map (SI.lookupTerm searchIndex) lookupTerms
termids :: [TermId]
docidsets :: [DocIdSet]
(termids, docidsets) = unzip (catMaybes rawresults)
unrankedResults :: DocIdSet
unrankedResults = pruneRelevantResults
paramResultsetSoftLimit
paramResultsetHardLimit
docidsets
in rankExplainResults se termids (DocIdSet.toList unrankedResults)
rankExplainResults :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] ->
[DocId] ->
[(BM25F.Explanation field feature Term, key)]
rankExplainResults se@SearchEngine{searchIndex} queryTerms docids =
sortBy (flip compare `on` (BM25F.overallScore . fst))
[ (explainRelevanceScore se queryTerms doctermids docfeatvals, dockey)
| docid <- docids
, let (dockey, doctermids, docfeatvals) = SI.lookupDocId searchIndex docid ]
explainRelevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] ->
DocTermIds field ->
DocFeatVals feature ->
BM25F.Explanation field feature Term
explainRelevanceScore SearchEngine{bm25Context, searchIndex}
queryTerms doctermids docfeatvals =
fmap (SI.getTerm searchIndex) (BM25F.explain bm25Context doc queryTerms)
where
doc = indexDocToBM25Doc doctermids docfeatvals
setRankParams :: SearchRankParameters field feature ->
SearchEngine doc key field feature ->
SearchEngine doc key field feature
setRankParams params@SearchRankParameters{..} se =
se {
searchRankParams = params,
bm25Context = (bm25Context se) {
BM25F.paramK1 = paramK1,
BM25F.paramB = paramB,
BM25F.fieldWeight = paramFieldWeights,
BM25F.featureWeight = paramFeatureWeights,
BM25F.featureFunction = paramFeatureFunctions
}
}
data ResultsFilter key = NoFilter
| FilterPredicate (key -> Bool)
| FilterBulkPredicate ([key] -> [Bool])