module Data.SearchEngine.Types (
SearchEngine(..),
SearchConfig(..),
SearchRankParameters(..),
BM25F.FeatureFunction(..),
initSearchEngine,
cacheBM25Context,
NoFeatures,
noFeatures,
SearchIndex, Term, TermId,
DocIdSet, DocId,
DocTermIds, DocFeatVals,
invariant,
) where
import Data.SearchEngine.SearchIndex (SearchIndex, Term, TermId)
import qualified Data.SearchEngine.SearchIndex as SI
import Data.SearchEngine.DocIdSet (DocIdSet, DocId)
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import Data.SearchEngine.DocFeatVals (DocFeatVals)
import Data.SearchEngine.DocTermIds (DocTermIds)
import qualified Data.SearchEngine.BM25F as BM25F
import Data.Ix
import Data.Array.Unboxed
data SearchConfig doc key field feature = SearchConfig {
documentKey :: doc -> key,
extractDocumentTerms :: doc -> field -> [Term],
transformQueryTerm :: Term -> field -> Term,
documentFeatureValue :: doc -> feature -> Float
}
data SearchRankParameters field feature = SearchRankParameters {
paramK1 :: !Float,
paramB :: field -> Float,
paramFieldWeights :: field -> Float,
paramFeatureWeights :: feature -> Float,
paramFeatureFunctions :: feature -> BM25F.FeatureFunction,
paramResultsetSoftLimit :: !Int,
paramResultsetHardLimit :: !Int,
paramAutosuggestPrefilterLimit :: !Int,
paramAutosuggestPostfilterLimit :: !Int
}
data SearchEngine doc key field feature = SearchEngine {
searchIndex :: !(SearchIndex key field feature),
searchConfig :: !(SearchConfig doc key field feature),
searchRankParams :: !(SearchRankParameters field feature),
sumFieldLengths :: !(UArray field Int),
bm25Context :: BM25F.Context TermId field feature
}
invariant :: (Ord key, Ix field, Bounded field) =>
SearchEngine doc key field feature -> Bool
invariant SearchEngine{searchIndex} =
SI.invariant searchIndex
initSearchEngine :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchConfig doc key field feature ->
SearchRankParameters field feature ->
SearchEngine doc key field feature
initSearchEngine config params =
cacheBM25Context
SearchEngine {
searchIndex = SI.emptySearchIndex,
searchConfig = config,
searchRankParams = params,
sumFieldLengths = listArray (minBound, maxBound) (repeat 0),
bm25Context = undefined
}
cacheBM25Context :: Ix field =>
SearchEngine doc key field feature ->
SearchEngine doc key field feature
cacheBM25Context
se@SearchEngine {
searchRankParams = SearchRankParameters{..},
searchIndex,
sumFieldLengths
}
= se { bm25Context = bm25Context' }
where
bm25Context' = BM25F.Context {
BM25F.numDocsTotal = SI.docCount searchIndex,
BM25F.avgFieldLength = \f -> fromIntegral (sumFieldLengths ! f)
/ fromIntegral (SI.docCount searchIndex),
BM25F.numDocsWithTerm = DocIdSet.size . SI.lookupTermId searchIndex,
BM25F.paramK1 = paramK1,
BM25F.paramB = paramB,
BM25F.fieldWeight = paramFieldWeights,
BM25F.featureWeight = paramFeatureWeights,
BM25F.featureFunction = paramFeatureFunctions
}
data NoFeatures = NoFeatures
deriving (Eq, Ord, Bounded, Show)
instance Ix NoFeatures where
range _ = []
inRange _ _ = False
index _ _ = 1
noFeatures :: NoFeatures -> a
noFeatures _ = error "noFeatures"