{-# LANGUAGE RecordWildCards #-}
module Data.FuzzySet
(
FuzzySet
, emptySet
, defaultSet
, fromList
, add
, addToSet
, addMany
, get
, getWithMinScore
, getOne
, getOneWithMinScore
, size
, isEmpty
, values
) where
import Data.Default (Default, def)
import Data.FuzzySet.Internal
import Data.FuzzySet.Types
import Data.FuzzySet.Util
import Data.HashMap.Strict (HashMap, elems, insert)
import Data.List (find)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Data.Vector (snoc)
import qualified Data.FuzzySet.Util as Util
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import qualified Data.Vector as Vector
emptySet
:: Int
-> Int
-> Bool
-> FuzzySet
emptySet :: Int -> Int -> Bool -> FuzzySet
emptySet =
HashMap Text Text
-> HashMap Text [GramInfo]
-> HashMap Int (Vector FuzzySetItem)
-> Int
-> Int
-> Bool
-> FuzzySet
FuzzySet HashMap Text Text
forall a. Monoid a => a
mempty HashMap Text [GramInfo]
forall a. Monoid a => a
mempty HashMap Int (Vector FuzzySetItem)
forall a. Monoid a => a
mempty
defaultSet :: FuzzySet
defaultSet :: FuzzySet
defaultSet =
Int -> Int -> Bool -> FuzzySet
emptySet Int
2 Int
3 Bool
True
instance Default FuzzySet where
def :: FuzzySet
def = FuzzySet
defaultSet
getWithMinScore
:: Double
-> FuzzySet
-> Text
-> [( Double, Text )]
getWithMinScore :: Double -> FuzzySet -> Text -> [(Double, Text)]
getWithMinScore
Double
minScore
set :: FuzzySet
set@FuzzySet{ gramSizeLower :: FuzzySet -> Int
gramSizeLower = Int
lower, gramSizeUpper :: FuzzySet -> Int
gramSizeUpper = Int
upper, Bool
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
useLevenshtein :: FuzzySet -> Bool
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
matchDict :: FuzzySet -> HashMap Text [GramInfo]
exactSet :: FuzzySet -> HashMap Text Text
useLevenshtein :: Bool
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
exactSet :: HashMap Text Text
.. }
Text
value =
case Text
key Text -> HashMap Text Text -> Maybe Text
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
`HashMap.lookup` HashMap Text Text
exactSet of
Just Text
match ->
[( Double
1, Text
match )]
Maybe Text
Nothing ->
[Int]
sizes
[Int] -> ([Int] -> [[(Double, Text)]]) -> [[(Double, Text)]]
forall a b. a -> (a -> b) -> b
|> (Int -> [(Double, Text)]) -> [Int] -> [[(Double, Text)]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (FuzzySet -> Text -> Double -> Int -> [(Double, Text)]
getMatches FuzzySet
set Text
key Double
minScore)
[[(Double, Text)]]
-> ([[(Double, Text)]] -> Maybe [(Double, Text)])
-> Maybe [(Double, Text)]
forall a b. a -> (a -> b) -> b
|> ([(Double, Text)] -> Bool)
-> [[(Double, Text)]] -> Maybe [(Double, Text)]
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Bool -> Bool
not (Bool -> Bool)
-> ([(Double, Text)] -> Bool) -> [(Double, Text)] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Double, Text)] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null)
Maybe [(Double, Text)]
-> (Maybe [(Double, Text)] -> [(Double, Text)]) -> [(Double, Text)]
forall a b. a -> (a -> b) -> b
|> [(Double, Text)] -> Maybe [(Double, Text)] -> [(Double, Text)]
forall a. a -> Maybe a -> a
fromMaybe []
where
key :: Text
key = Text -> Text
Text.toLower Text
value
sizes :: [Int]
sizes = [Int] -> [Int]
forall a. [a] -> [a]
reverse (Int -> Int -> [Int]
forall a. Enum a => a -> a -> [a]
enumFromTo Int
lower Int
upper)
get
:: FuzzySet
-> Text
-> [( Double, Text )]
get :: FuzzySet -> Text -> [(Double, Text)]
get =
Double -> FuzzySet -> Text -> [(Double, Text)]
getWithMinScore Double
0.33
getOneWithMinScore
:: Double
-> FuzzySet
-> Text
-> Maybe Text
getOneWithMinScore :: Double -> FuzzySet -> Text -> Maybe Text
getOneWithMinScore Double
minScore FuzzySet
fuzzySet Text
value =
case Double -> FuzzySet -> Text -> [(Double, Text)]
getWithMinScore Double
minScore FuzzySet
fuzzySet Text
value of
[] ->
Maybe Text
forall a. Maybe a
Nothing
(Double, Text)
head : [(Double, Text)]
_ ->
Text -> Maybe Text
forall a. a -> Maybe a
Just ((Double, Text) -> Text
forall a b. (a, b) -> b
snd (Double, Text)
head)
getOne :: FuzzySet
-> Text
-> Maybe Text
getOne :: FuzzySet -> Text -> Maybe Text
getOne =
Double -> FuzzySet -> Text -> Maybe Text
getOneWithMinScore Double
0.33
add
:: FuzzySet
-> Text
-> FuzzySet
add :: FuzzySet -> Text -> FuzzySet
add FuzzySet
fuzzySet =
(FuzzySet, Bool) -> FuzzySet
forall a b. (a, b) -> a
fst ((FuzzySet, Bool) -> FuzzySet)
-> (Text -> (FuzzySet, Bool)) -> Text -> FuzzySet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzySet -> Text -> (FuzzySet, Bool)
addToSet FuzzySet
fuzzySet
addToSet
:: FuzzySet
-> Text
-> ( FuzzySet, Bool )
addToSet :: FuzzySet -> Text -> (FuzzySet, Bool)
addToSet set :: FuzzySet
set@FuzzySet{ gramSizeLower :: FuzzySet -> Int
gramSizeLower = Int
lower, gramSizeUpper :: FuzzySet -> Int
gramSizeUpper = Int
upper, Bool
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
useLevenshtein :: Bool
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
exactSet :: HashMap Text Text
useLevenshtein :: FuzzySet -> Bool
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
matchDict :: FuzzySet -> HashMap Text [GramInfo]
exactSet :: FuzzySet -> HashMap Text Text
.. } Text
value
| Text
key Text -> HashMap Text Text -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` HashMap Text Text
exactSet =
( FuzzySet
set, Bool
False )
| Bool
otherwise =
( FuzzySet
newSet FuzzySet -> (FuzzySet -> FuzzySet) -> FuzzySet
forall a b. a -> (a -> b) -> b
|> Text -> FuzzySet -> FuzzySet
updateExactSet Text
value, Bool
True )
where
newSet :: FuzzySet
newSet = (Int -> FuzzySet -> FuzzySet) -> FuzzySet -> [Int] -> FuzzySet
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> FuzzySet -> FuzzySet
addSize FuzzySet
set (Int -> Int -> [Int]
forall a. Enum a => a -> a -> [a]
enumFromTo Int
lower Int
upper)
key :: Text
key = Text -> Text
Text.toLower Text
value
addSize :: Int -> FuzzySet -> FuzzySet
addSize :: Int -> FuzzySet -> FuzzySet
addSize Int
gramSize FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
useLevenshtein :: Bool
gramSizeUpper :: Int
gramSizeLower :: Int
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
exactSet :: HashMap Text Text
useLevenshtein :: FuzzySet -> Bool
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
matchDict :: FuzzySet -> HashMap Text [GramInfo]
exactSet :: FuzzySet -> HashMap Text Text
gramSizeUpper :: FuzzySet -> Int
gramSizeLower :: FuzzySet -> Int
..} =
let
item :: FuzzySetItem
item = Double -> Text -> FuzzySetItem
FuzzySetItem (HashMap Text Int -> [Int]
forall k v. HashMap k v -> [v]
elems HashMap Text Int
grams [Int] -> ([Int] -> Double) -> Double
forall a b. a -> (a -> b) -> b
|> [Int] -> Double
forall a b. (Integral a, Floating b) => [a] -> b
Util.norm) Text
key
in
FuzzySet :: HashMap Text Text
-> HashMap Text [GramInfo]
-> HashMap Int (Vector FuzzySetItem)
-> Int
-> Int
-> Bool
-> FuzzySet
FuzzySet{ items :: HashMap Int (Vector FuzzySetItem)
items = HashMap Int (Vector FuzzySetItem)
items HashMap Int (Vector FuzzySetItem)
-> (HashMap Int (Vector FuzzySetItem)
-> HashMap Int (Vector FuzzySetItem))
-> HashMap Int (Vector FuzzySetItem)
forall a b. a -> (a -> b) -> b
|> Int
-> Vector FuzzySetItem
-> HashMap Int (Vector FuzzySetItem)
-> HashMap Int (Vector FuzzySetItem)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
insert Int
gramSize (Vector FuzzySetItem
itemVector Vector FuzzySetItem -> FuzzySetItem -> Vector FuzzySetItem
forall a. Vector a -> a -> Vector a
`snoc` FuzzySetItem
item)
, matchDict :: HashMap Text [GramInfo]
matchDict = HashMap Text Int
grams HashMap Text Int
-> (HashMap Text Int -> HashMap Text [GramInfo])
-> HashMap Text [GramInfo]
forall a b. a -> (a -> b) -> b
|> (Text -> Int -> HashMap Text [GramInfo] -> HashMap Text [GramInfo])
-> HashMap Text [GramInfo]
-> HashMap Text Int
-> HashMap Text [GramInfo]
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
HashMap.foldrWithKey Text -> Int -> HashMap Text [GramInfo] -> HashMap Text [GramInfo]
forall k.
Hashable k =>
k -> Int -> HashMap k [GramInfo] -> HashMap k [GramInfo]
updateDict HashMap Text [GramInfo]
matchDict
, Bool
Int
HashMap Text Text
useLevenshtein :: Bool
gramSizeUpper :: Int
gramSizeLower :: Int
exactSet :: HashMap Text Text
useLevenshtein :: Bool
exactSet :: HashMap Text Text
gramSizeUpper :: Int
gramSizeLower :: Int
.. }
where
updateDict :: k -> Int -> HashMap k [GramInfo] -> HashMap k [GramInfo]
updateDict k
gram Int
count =
let
info :: GramInfo
info = Int -> Int -> GramInfo
GramInfo (Vector FuzzySetItem -> Int
forall a. Vector a -> Int
Vector.length Vector FuzzySetItem
itemVector) Int
count
in
(Maybe [GramInfo] -> Maybe [GramInfo])
-> k -> HashMap k [GramInfo] -> HashMap k [GramInfo]
forall k v.
(Eq k, Hashable k) =>
(Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v
HashMap.alter (\Maybe [GramInfo]
maybeInfos -> [GramInfo] -> Maybe [GramInfo]
forall a. a -> Maybe a
Just ([GramInfo] -> Maybe [GramInfo]) -> [GramInfo] -> Maybe [GramInfo]
forall a b. (a -> b) -> a -> b
$ GramInfo
info GramInfo -> [GramInfo] -> [GramInfo]
forall a. a -> [a] -> [a]
: [GramInfo] -> Maybe [GramInfo] -> [GramInfo]
forall a. a -> Maybe a -> a
fromMaybe [] Maybe [GramInfo]
maybeInfos) k
gram
itemVector :: Vector FuzzySetItem
itemVector =
HashMap Int (Vector FuzzySetItem)
items
HashMap Int (Vector FuzzySetItem)
-> (HashMap Int (Vector FuzzySetItem)
-> Maybe (Vector FuzzySetItem))
-> Maybe (Vector FuzzySetItem)
forall a b. a -> (a -> b) -> b
|> Int
-> HashMap Int (Vector FuzzySetItem) -> Maybe (Vector FuzzySetItem)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Int
gramSize
Maybe (Vector FuzzySetItem)
-> (Maybe (Vector FuzzySetItem) -> Vector FuzzySetItem)
-> Vector FuzzySetItem
forall a b. a -> (a -> b) -> b
|> Vector FuzzySetItem
-> Maybe (Vector FuzzySetItem) -> Vector FuzzySetItem
forall a. a -> Maybe a -> a
fromMaybe Vector FuzzySetItem
forall a. Vector a
Vector.empty
grams :: HashMap Text Int
grams =
Text -> Int -> HashMap Text Int
gramVector Text
key Int
gramSize
updateExactSet :: Text -> FuzzySet -> FuzzySet
updateExactSet :: Text -> FuzzySet -> FuzzySet
updateExactSet Text
value FuzzySet{Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
HashMap Text Text
useLevenshtein :: Bool
gramSizeUpper :: Int
gramSizeLower :: Int
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
exactSet :: HashMap Text Text
useLevenshtein :: FuzzySet -> Bool
items :: FuzzySet -> HashMap Int (Vector FuzzySetItem)
matchDict :: FuzzySet -> HashMap Text [GramInfo]
exactSet :: FuzzySet -> HashMap Text Text
gramSizeUpper :: FuzzySet -> Int
gramSizeLower :: FuzzySet -> Int
..} =
FuzzySet :: HashMap Text Text
-> HashMap Text [GramInfo]
-> HashMap Int (Vector FuzzySetItem)
-> Int
-> Int
-> Bool
-> FuzzySet
FuzzySet{ exactSet :: HashMap Text Text
exactSet = HashMap Text Text
exactSet HashMap Text Text
-> (HashMap Text Text -> HashMap Text Text) -> HashMap Text Text
forall a b. a -> (a -> b) -> b
|> Text -> Text -> HashMap Text Text -> HashMap Text Text
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
insert Text
key Text
value
, Bool
Int
HashMap Int (Vector FuzzySetItem)
HashMap Text [GramInfo]
useLevenshtein :: Bool
gramSizeUpper :: Int
gramSizeLower :: Int
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
useLevenshtein :: Bool
items :: HashMap Int (Vector FuzzySetItem)
matchDict :: HashMap Text [GramInfo]
gramSizeUpper :: Int
gramSizeLower :: Int
.. }
addMany :: FuzzySet -> [Text] -> FuzzySet
addMany :: FuzzySet -> [Text] -> FuzzySet
addMany =
(Text -> FuzzySet -> FuzzySet) -> FuzzySet -> [Text] -> FuzzySet
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((FuzzySet -> Text -> FuzzySet) -> Text -> FuzzySet -> FuzzySet
forall a b c. (a -> b -> c) -> b -> a -> c
flip FuzzySet -> Text -> FuzzySet
add)
fromList :: [Text] -> FuzzySet
fromList :: [Text] -> FuzzySet
fromList =
FuzzySet -> [Text] -> FuzzySet
addMany FuzzySet
defaultSet
size :: FuzzySet -> Int
size :: FuzzySet -> Int
size =
HashMap Text Text -> Int
forall k v. HashMap k v -> Int
HashMap.size (HashMap Text Text -> Int)
-> (FuzzySet -> HashMap Text Text) -> FuzzySet -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzySet -> HashMap Text Text
exactSet
isEmpty :: FuzzySet -> Bool
isEmpty :: FuzzySet -> Bool
isEmpty =
HashMap Text Text -> Bool
forall k v. HashMap k v -> Bool
HashMap.null (HashMap Text Text -> Bool)
-> (FuzzySet -> HashMap Text Text) -> FuzzySet -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzySet -> HashMap Text Text
exactSet
values :: FuzzySet -> [Text]
values :: FuzzySet -> [Text]
values =
HashMap Text Text -> [Text]
forall k v. HashMap k v -> [v]
elems (HashMap Text Text -> [Text])
-> (FuzzySet -> HashMap Text Text) -> FuzzySet -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FuzzySet -> HashMap Text Text
exactSet