-- | Parallel versions of 'filter' and 'simpleFilter'

module Text.Fuzzy.Parallel
(   filter, filter',
    simpleFilter, simpleFilter',
    match, defChunkSize, defMaxResults,
    Scored(..)
) where

import           Control.Parallel.Strategies (evalList, parList, rseq, using)
import           Data.Bits                   ((.|.))
import           Data.Maybe                  (fromMaybe, mapMaybe)
import qualified Data.Text                   as T
import qualified Data.Text.Array             as TA
import qualified Data.Text.Internal          as T
import           Prelude                     hiding (filter)

data Scored a = Scored {forall a. Scored a -> Int
score :: !Int, forall a. Scored a -> a
original:: !a}
  deriving ((forall a b. (a -> b) -> Scored a -> Scored b)
-> (forall a b. a -> Scored b -> Scored a) -> Functor Scored
forall a b. a -> Scored b -> Scored a
forall a b. (a -> b) -> Scored a -> Scored b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
$cfmap :: forall a b. (a -> b) -> Scored a -> Scored b
fmap :: forall a b. (a -> b) -> Scored a -> Scored b
$c<$ :: forall a b. a -> Scored b -> Scored a
<$ :: forall a b. a -> Scored b -> Scored a
Functor, Int -> Scored a -> ShowS
[Scored a] -> ShowS
Scored a -> String
(Int -> Scored a -> ShowS)
-> (Scored a -> String) -> ([Scored a] -> ShowS) -> Show (Scored a)
forall a. Show a => Int -> Scored a -> ShowS
forall a. Show a => [Scored a] -> ShowS
forall a. Show a => Scored a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> Scored a -> ShowS
showsPrec :: Int -> Scored a -> ShowS
$cshow :: forall a. Show a => Scored a -> String
show :: Scored a -> String
$cshowList :: forall a. Show a => [Scored a] -> ShowS
showList :: [Scored a] -> ShowS
Show)

-- | Returns the rendered output and the
-- matching score for a pattern and a text.
-- Two examples are given below:
--
-- >>> match "fnt" "infinite"
-- Just 3
--
-- >>> match "hsk" "Haskell"
-- Just 5
--
{-# INLINABLE match #-}
match :: T.Text    -- ^ Pattern in lowercase except for first character
      -> T.Text    -- ^ The text to search in.
      -> Maybe Int -- ^ The score
match :: Text -> Text -> Maybe Int
match (T.Text Array
pArr Int
pOff Int
pLen) (T.Text Array
sArr Int
sOff Int
sLen) = Int -> Int -> Int -> Int -> Maybe Int
forall {t}. Num t => t -> t -> Int -> Int -> Maybe t
go Int
0 Int
1 Int
pOff Int
sOff
  where
    pTotal :: Int
pTotal = Int
pOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
pLen
    sDelta :: Int
sDelta = Int
sOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
sLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
pTotal

    go :: t -> t -> Int -> Int -> Maybe t
go !t
totalScore !t
currScore !Int
currPOff !Int
currSOff
      -- If pattern has been matched in full
      | Int
currPOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
pTotal
      = t -> Maybe t
forall a. a -> Maybe a
Just t
totalScore
      -- If there is not enough left to match the rest of the pattern, equivalent to
      -- (sOff + sLen - currSOff) < (pOff + pLen - currPOff)
      | Int
currSOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
currPOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
sDelta
      = Maybe t
forall a. Maybe a
Nothing
      -- This is slightly broken for non-ASCII:
      -- 1. If code units, consisting a single pattern code point, are found as parts
      --    of different code points, it counts as a match. Unless you use a ton of emojis
      --    as identifiers, such false positives should not be be a big deal,
      --    and anyways HLS does not currently support such use cases, because it uses
      --    code point and UTF-16 code unit positions interchangeably.
      -- 2. Case conversions is not applied to non-ASCII code points, because one has
      --    to call T.toLower (not T.map toLower), reallocating the string in full, which
      --    is too much of performance penalty for fuzzy search. Again, anyway HLS does not
      --    attempt to do justice to Unicode: proper Unicode text matching requires
      --    `unicode-transforms` and friends.
      -- Altogether we sacrifice correctness for the sake of performance, which
      -- is a right trade-off for fuzzy search.
      | Word8
pByte <- Array -> Int -> Word8
TA.unsafeIndex Array
pArr Int
currPOff
      , Word8
sByte <- Array -> Int -> Word8
TA.unsafeIndex Array
sArr Int
currSOff
      -- First byte (currPOff == pOff) should match exactly, otherwise - up to case.
      , Word8
pByte Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
sByte Bool -> Bool -> Bool
|| (Int
currPOff Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
pOff Bool -> Bool -> Bool
&& Word8
pByte Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8 -> Word8
forall {a}. (Ord a, Num a, Bits a) => a -> a
toLowerAscii Word8
sByte)
      = let curr :: t
curr = t
currScore t -> t -> t
forall a. Num a => a -> a -> a
* t
2 t -> t -> t
forall a. Num a => a -> a -> a
+ t
1 in
        t -> t -> Int -> Int -> Maybe t
go (t
totalScore t -> t -> t
forall a. Num a => a -> a -> a
+ t
curr) t
curr (Int
currPOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
currSOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      | Bool
otherwise
      = t -> t -> Int -> Int -> Maybe t
go t
totalScore t
0 Int
currPOff (Int
currSOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

    toLowerAscii :: a -> a
toLowerAscii a
w = if (a
w a -> a -> a
forall a. Num a => a -> a -> a
- a
65) a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
26 then a
w a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
0x20 else a
w

-- | Sensible default value for chunk size to use when calling simple filter.
defChunkSize :: Int
defChunkSize :: Int
defChunkSize = Int
1000

-- | Sensible default value for the number of max results to use when calling simple filter.
defMaxResults :: Int
defMaxResults :: Int
defMaxResults = Int
10

-- | Return all elements of the list that have a fuzzy
-- match against the pattern. Runs with default settings where
-- nothing is added around the matches, as case insensitive.
--
-- >>> simpleFilter 1000 10 "vm" ["vim", "emacs", "virtual machine"]
-- [Scored {score = 4, original = "vim"},Scored {score = 4, original = "virtual machine"}]
{-# INLINABLE simpleFilter #-}
simpleFilter :: Int      -- ^ Chunk size. 1000 works well.
             -> Int      -- ^ Max. number of results wanted
             -> T.Text   -- ^ Pattern to look for.
             -> [T.Text] -- ^ List of texts to check.
             -> [Scored T.Text] -- ^ The ones that match.
simpleFilter :: Int -> Int -> Text -> [Text] -> [Scored Text]
simpleFilter Int
chunk Int
maxRes Text
pattern [Text]
xs =
  Int -> Int -> Text -> [Text] -> (Text -> Text) -> [Scored Text]
forall t. Int -> Int -> Text -> [t] -> (t -> Text) -> [Scored t]
filter Int
chunk Int
maxRes Text
pattern [Text]
xs Text -> Text
forall a. a -> a
id


-- | The function to filter a list of values by fuzzy search on the text extracted from them,
-- using a custom matching function which determines how close words are.
filter' :: Int           -- ^ Chunk size. 1000 works well.
       -> Int           -- ^ Max. number of results wanted
       -> T.Text        -- ^ Pattern.
       -> [t]           -- ^ The list of values containing the text to search in.
       -> (t -> T.Text) -- ^ The function to extract the text from the container.
       -> (T.Text -> T.Text -> Maybe Int)
       -- ^ Custom scoring function to use for calculating how close words are
       -- When the function returns Nothing, this means the values are incomparable.
       -> [Scored t]    -- ^ The list of results, sorted, highest score first.
filter' :: forall t.
Int
-> Int
-> Text
-> [t]
-> (t -> Text)
-> (Text -> Text -> Maybe Int)
-> [Scored t]
filter' Int
chunkSize Int
maxRes Text
pattern [t]
ts t -> Text
extract Text -> Text -> Maybe Int
match' = Int -> Int -> [Scored t] -> [Scored t]
forall t. Int -> Int -> [Scored t] -> [Scored t]
partialSortByAscScore Int
maxRes Int
perfectScore ([[Scored t]] -> [Scored t]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Scored t]]
vss)
  where
      -- Preserve case for the first character, make all others lowercase
      pattern' :: Text
pattern' = case Text -> Maybe (Char, Text)
T.uncons Text
pattern of
        Just (Char
c, Text
rest) -> Char -> Text -> Text
T.cons Char
c (Text -> Text
T.toLower Text
rest)
        Maybe (Char, Text)
_              -> Text
pattern
      vss :: [[Scored t]]
vss = ([t] -> [Scored t]) -> [[t]] -> [[Scored t]]
forall a b. (a -> b) -> [a] -> [b]
map ((t -> Maybe (Scored t)) -> [t] -> [Scored t]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (\t
t -> (Int -> t -> Scored t) -> t -> Int -> Scored t
forall a b c. (a -> b -> c) -> b -> a -> c
flip Int -> t -> Scored t
forall a. Int -> a -> Scored a
Scored t
t (Int -> Scored t) -> Maybe Int -> Maybe (Scored t)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Text -> Maybe Int
match' Text
pattern' (t -> Text
extract t
t))) (Int -> [t] -> [[t]]
forall a. Int -> [a] -> [[a]]
chunkList Int
chunkSize [t]
ts)
        [[Scored t]] -> Strategy [[Scored t]] -> [[Scored t]]
forall a. a -> Strategy a -> a
`using` Strategy [Scored t] -> Strategy [[Scored t]]
forall a. Strategy a -> Strategy [a]
parList (Strategy (Scored t) -> Strategy [Scored t]
forall a. Strategy a -> Strategy [a]
evalList Strategy (Scored t)
forall a. Strategy a
rseq)
      perfectScore :: Int
perfectScore = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe (String -> Int
forall a. HasCallStack => String -> a
error (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
pattern) (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Int
match' Text
pattern' Text
pattern'

-- | The function to filter a list of values by fuzzy search on the text extracted from them,
-- using a custom matching function which determines how close words are.
filter :: Int           -- ^ Chunk size. 1000 works well.
       -> Int           -- ^ Max. number of results wanted
       -> T.Text        -- ^ Pattern.
       -> [t]           -- ^ The list of values containing the text to search in.
       -> (t -> T.Text) -- ^ The function to extract the text from the container.
       -> [Scored t]    -- ^ The list of results, sorted, highest score first.
filter :: forall t. Int -> Int -> Text -> [t] -> (t -> Text) -> [Scored t]
filter Int
chunkSize Int
maxRes Text
pattern [t]
ts t -> Text
extract =
  Int
-> Int
-> Text
-> [t]
-> (t -> Text)
-> (Text -> Text -> Maybe Int)
-> [Scored t]
forall t.
Int
-> Int
-> Text
-> [t]
-> (t -> Text)
-> (Text -> Text -> Maybe Int)
-> [Scored t]
filter' Int
chunkSize Int
maxRes Text
pattern [t]
ts t -> Text
extract Text -> Text -> Maybe Int
match

-- | Return all elements of the list that have a fuzzy match against the pattern,
-- the closeness of the match is determined using the custom scoring match function that is passed.
-- Runs with default settings where nothing is added around the matches, as case insensitive.
{-# INLINABLE simpleFilter' #-}
simpleFilter' :: Int      -- ^ Chunk size. 1000 works well.
             -> Int      -- ^ Max. number of results wanted
             -> T.Text   -- ^ Pattern to look for.
             -> [T.Text] -- ^ List of texts to check.
             -> (T.Text -> T.Text -> Maybe Int)
             -- ^ Custom scoring function to use for calculating how close words are
             -> [Scored T.Text] -- ^ The ones that match.
simpleFilter' :: Int
-> Int
-> Text
-> [Text]
-> (Text -> Text -> Maybe Int)
-> [Scored Text]
simpleFilter' Int
chunk Int
maxRes Text
pattern [Text]
xs Text -> Text -> Maybe Int
match' =
  Int
-> Int
-> Text
-> [Text]
-> (Text -> Text)
-> (Text -> Text -> Maybe Int)
-> [Scored Text]
forall t.
Int
-> Int
-> Text
-> [t]
-> (t -> Text)
-> (Text -> Text -> Maybe Int)
-> [Scored t]
filter' Int
chunk Int
maxRes Text
pattern [Text]
xs Text -> Text
forall a. a -> a
id Text -> Text -> Maybe Int
match'
--------------------------------------------------------------------------------

chunkList :: Int -> [a] -> [[a]]
chunkList :: forall a. Int -> [a] -> [[a]]
chunkList Int
chunkSize = [a] -> [[a]]
forall {a}. [a] -> [[a]]
go
  where
    go :: [a] -> [[a]]
go [] = []
    go [a]
xs = [a]
ys [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [a] -> [[a]]
go [a]
zs
      where
        ([a]
ys, [a]
zs) = Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
chunkSize [a]
xs

-- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
partialSortByAscScore
            :: Int  -- ^ Number of items needed
            -> Int  -- ^ Value of a perfect score
            -> [Scored t]
            -> [Scored t]
partialSortByAscScore :: forall t. Int -> Int -> [Scored t] -> [Scored t]
partialSortByAscScore Int
wantedCount Int
perfectScore [Scored t]
orig = [Scored t] -> SortState Any -> [Scored t] -> [Scored t]
forall {k} {a :: k}.
[Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [Scored t]
orig (Int -> Int -> Int -> SortState Any
forall {k} (a :: k). Int -> Int -> Int -> SortState a
SortState Int
forall a. Bounded a => a
minBound Int
perfectScore Int
0) [] where
  loop :: [Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [] st :: SortState a
st@SortState{Int
bestScoreSeen :: Int
scoreWanted :: Int
foundCount :: Int
bestScoreSeen :: forall {k} (a :: k). SortState a -> Int
scoreWanted :: forall {k} (a :: k). SortState a -> Int
foundCount :: forall {k} (a :: k). SortState a -> Int
..} [Scored t]
acc
    | Int
foundCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
wantedCount = [Scored t] -> [Scored t]
forall a. [a] -> [a]
reverse [Scored t]
acc
    | Bool
otherwise = if Int
bestScoreSeen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
scoreWanted
        then [Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [Scored t]
orig SortState a
st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound} [Scored t]
acc
        else [Scored t] -> [Scored t]
forall a. [a] -> [a]
reverse [Scored t]
acc
  loop (Scored t
x : [Scored t]
xs) st :: SortState a
st@SortState{Int
bestScoreSeen :: forall {k} (a :: k). SortState a -> Int
scoreWanted :: forall {k} (a :: k). SortState a -> Int
foundCount :: forall {k} (a :: k). SortState a -> Int
bestScoreSeen :: Int
scoreWanted :: Int
foundCount :: Int
..} [Scored t]
acc
    | Int
foundCount Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
wantedCount = [Scored t] -> [Scored t]
forall a. [a] -> [a]
reverse [Scored t]
acc
    | Scored t -> Int
forall a. Scored a -> Int
score Scored t
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
scoreWanted
    = [Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [Scored t]
xs SortState a
st{foundCount = foundCount+1} (Scored t
xScored t -> [Scored t] -> [Scored t]
forall a. a -> [a] -> [a]
:[Scored t]
acc)
    | Scored t -> Int
forall a. Scored a -> Int
score Scored t
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
scoreWanted Bool -> Bool -> Bool
&& Scored t -> Int
forall a. Scored a -> Int
score Scored t
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
bestScoreSeen
    = [Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [Scored t]
xs SortState a
st{bestScoreSeen = score x} [Scored t]
acc
    | Bool
otherwise
    = [Scored t] -> SortState a -> [Scored t] -> [Scored t]
loop [Scored t]
xs SortState a
st [Scored t]
acc

data SortState a = SortState
  { forall {k} (a :: k). SortState a -> Int
bestScoreSeen :: !Int
  , forall {k} (a :: k). SortState a -> Int
scoreWanted   :: !Int
  , forall {k} (a :: k). SortState a -> Int
foundCount    :: !Int
  }
  deriving Int -> SortState a -> ShowS
[SortState a] -> ShowS
SortState a -> String
(Int -> SortState a -> ShowS)
-> (SortState a -> String)
-> ([SortState a] -> ShowS)
-> Show (SortState a)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (a :: k). Int -> SortState a -> ShowS
forall k (a :: k). [SortState a] -> ShowS
forall k (a :: k). SortState a -> String
$cshowsPrec :: forall k (a :: k). Int -> SortState a -> ShowS
showsPrec :: Int -> SortState a -> ShowS
$cshow :: forall k (a :: k). SortState a -> String
show :: SortState a -> String
$cshowList :: forall k (a :: k). [SortState a] -> ShowS
showList :: [SortState a] -> ShowS
Show