-- | Parallel versions of 'filter' and 'simpleFilter'
module Text.Fuzzy.Parallel
(   filter,
    simpleFilter,
    -- reexports
    Fuzzy(..),
    match
) where

import           Control.Monad.ST            (runST)
import           Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
                                              parTraversable, rseq, using)
import           Data.Function               (on)
import           Data.Monoid.Textual         (TextualMonoid)
import           Data.Ord                    (Down (Down))
import           Data.Vector                 (Vector, (!))
import qualified Data.Vector                 as V
-- need to use a stable sort
import           Data.Bifunctor              (second)
import qualified Data.Vector.Algorithms.Tim  as VA
import           Prelude                     hiding (filter)
import           Text.Fuzzy                  (Fuzzy (..), match)

-- | The function to filter a list of values by fuzzy search on the text extracted from them.
--
-- >>> length $ filter 1000 200 "ML" (concat $ replicate 10000 [("Standard ML", 1990),("OCaml",1996),("Scala",2003)]) "<" ">" fst False
-- 200
filter :: (TextualMonoid s)
       => Int      -- ^ Chunk size. 1000 works well.
       -> s        -- ^ Pattern.
       -> [t]      -- ^ The list of values containing the text to search in.
       -> s        -- ^ The text to add before each match.
       -> s        -- ^ The text to add after each match.
       -> (t -> s) -- ^ The function to extract the text from the container.
       -> Bool     -- ^ Case sensitivity.
       -> [Fuzzy t s] -- ^ The list of results, sorted, highest score first.
filter :: Int -> s -> [t] -> s -> s -> (t -> s) -> Bool -> [Fuzzy t s]
filter Int
chunkSize s
pattern [t]
ts s
pre s
post t -> s
extract Bool
caseSen = (forall s. ST s [Fuzzy t s]) -> [Fuzzy t s]
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s [Fuzzy t s]) -> [Fuzzy t s])
-> (forall s. ST s [Fuzzy t s]) -> [Fuzzy t s]
forall a b. (a -> b) -> a -> b
$ do
  let v :: Vector (Fuzzy t s)
v = ((Maybe (Fuzzy t s) -> Maybe (Fuzzy t s))
-> Vector (Maybe (Fuzzy t s)) -> Vector (Fuzzy t s)
forall a b. (a -> Maybe b) -> Vector a -> Vector b
V.mapMaybe Maybe (Fuzzy t s) -> Maybe (Fuzzy t s)
forall a. a -> a
id
             ((t -> Maybe (Fuzzy t s)) -> Vector t -> Vector (Maybe (Fuzzy t s))
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\t
t -> s -> t -> s -> s -> (t -> s) -> Bool -> Maybe (Fuzzy t s)
forall s t.
TextualMonoid s =>
s -> t -> s -> s -> (t -> s) -> Bool -> Maybe (Fuzzy t s)
match s
pattern t
t s
pre s
post t -> s
extract Bool
caseSen) ([t] -> Vector t
forall a. [a] -> Vector a
V.fromList [t]
ts)
             Vector (Maybe (Fuzzy t s))
-> Strategy (Vector (Maybe (Fuzzy t s)))
-> Vector (Maybe (Fuzzy t s))
forall a. a -> Strategy a -> a
`using`
             Int
-> Strategy (Maybe (Fuzzy t s))
-> Strategy (Vector (Maybe (Fuzzy t s)))
forall a. Int -> Strategy a -> Vector a -> Eval (Vector a)
parVectorChunk Int
chunkSize (Strategy (Fuzzy t s) -> Strategy (Maybe (Fuzzy t s))
forall (t :: * -> *) a.
Traversable t =>
Strategy a -> Strategy (t a)
evalTraversable Strategy (Fuzzy t s)
forall s t. TextualMonoid s => Fuzzy t s -> Eval (Fuzzy t s)
forceScore)))
  MVector s (Fuzzy t s)
v' <- Vector (Fuzzy t s) -> ST s (MVector (PrimState (ST s)) (Fuzzy t s))
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw Vector (Fuzzy t s)
v
  Comparison (Fuzzy t s)
-> MVector (PrimState (ST s)) (Fuzzy t s) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy (Down Int -> Down Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Down Int -> Down Int -> Ordering)
-> (Fuzzy t s -> Down Int) -> Comparison (Fuzzy t s)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int) -> (Fuzzy t s -> Int) -> Fuzzy t s -> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Fuzzy t s -> Int
forall t s. TextualMonoid s => Fuzzy t s -> Int
score)) MVector s (Fuzzy t s)
MVector (PrimState (ST s)) (Fuzzy t s)
v'
  Vector (Fuzzy t s)
v'' <- MVector (PrimState (ST s)) (Fuzzy t s) -> ST s (Vector (Fuzzy t s))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector s (Fuzzy t s)
MVector (PrimState (ST s)) (Fuzzy t s)
v'
  return $ Vector (Fuzzy t s) -> [Fuzzy t s]
forall a. Vector a -> [a]
V.toList Vector (Fuzzy t s)
v''

-- | Return all elements of the list that have a fuzzy
-- match against the pattern. Runs with default settings where
-- nothing is added around the matches, as case insensitive.
--
-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
-- ["vim","virtual machine"]
{-# INLINABLE simpleFilter #-}
simpleFilter :: (TextualMonoid s)
             => Int -- ^ Chunk size. 1000 works well.
             -> s   -- ^ Pattern to look for.
             -> [s] -- ^ List of texts to check.
             -> [s] -- ^ The ones that match.
simpleFilter :: Int -> s -> [s] -> [s]
simpleFilter Int
chunk s
pattern [s]
xs =
  (Fuzzy s s -> s) -> [Fuzzy s s] -> [s]
forall a b. (a -> b) -> [a] -> [b]
map Fuzzy s s -> s
forall t s. TextualMonoid s => Fuzzy t s -> t
original ([Fuzzy s s] -> [s]) -> [Fuzzy s s] -> [s]
forall a b. (a -> b) -> a -> b
$ Int -> s -> [s] -> s -> s -> (s -> s) -> Bool -> [Fuzzy s s]
forall s t.
TextualMonoid s =>
Int -> s -> [t] -> s -> s -> (t -> s) -> Bool -> [Fuzzy t s]
filter Int
chunk s
pattern [s]
xs s
forall a. Monoid a => a
mempty s
forall a. Monoid a => a
mempty s -> s
forall a. a -> a
id Bool
False

--------------------------------------------------------------------------------

-- | Evaluation that forces the 'score' field
forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
forceScore :: Fuzzy t s -> Eval (Fuzzy t s)
forceScore it :: Fuzzy t s
it@Fuzzy{Int
score :: Int
score :: forall t s. TextualMonoid s => Fuzzy t s -> Int
score} = do
  Int
score' <- Strategy Int
forall a. Strategy a
rseq Int
score
  return Fuzzy t s
it{score :: Int
score = Int
score'}

--------------------------------------------------------------------------------

-- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
parVectorChunk Int
chunkSize Strategy a
st Vector a
v =
    [Vector a] -> Vector a
forall a. [Vector a] -> Vector a
V.concat ([Vector a] -> Vector a) -> Eval [Vector a] -> Eval (Vector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Vector a -> Eval (Vector a)) -> Strategy [Vector a]
forall (t :: * -> *) a.
Traversable t =>
Strategy a -> Strategy (t a)
parTraversable (Strategy a -> Vector a -> Eval (Vector a)
forall (t :: * -> *) a.
Traversable t =>
Strategy a -> Strategy (t a)
evalTraversable Strategy a
st) (Int -> Vector a -> [Vector a]
forall a. Int -> Vector a -> [Vector a]
chunkVector Int
chunkSize Vector a
v)

-- >>> chunkVector 3 (V.fromList [0..10])
-- >>> chunkVector 3 (V.fromList [0..11])
-- >>> chunkVector 3 (V.fromList [0..12])
-- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
-- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
chunkVector :: Int -> Vector a -> [Vector a]
chunkVector :: Int -> Vector a -> [Vector a]
chunkVector Int
chunkSize Vector a
v = do
    let indices :: [(Int, Int)]
indices = Int -> (Int, Int) -> [(Int, Int)]
chunkIndices Int
chunkSize (Int
0,Int
l)
        l :: Int
l = Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
v
    [Int -> [a] -> Vector a
forall a. Int -> [a] -> Vector a
V.fromListN (Int
hInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Vector a
v Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
j | Int
j <- [Int
l .. Int
h]]
            | (Int
l,Int
h) <- [(Int, Int)]
indices]

-- >>> chunkIndices 3 (0,9)
-- >>> chunkIndices 3 (0,10)
-- >>> chunkIndices 3 (0,11)
-- [(0,2),(3,5),(6,8)]
-- [(0,2),(3,5),(6,8),(9,9)]
-- [(0,2),(3,5),(6,8),(9,10)]
chunkIndices :: Int -> (Int,Int) -> [(Int,Int)]
chunkIndices :: Int -> (Int, Int) -> [(Int, Int)]
chunkIndices Int
chunkSize (Int
from,Int
to) =
  ((Int, Int) -> (Int, Int)) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int) -> (Int, Int) -> (Int, Int)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Int
forall a. Enum a => a -> a
pred) ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
  [Int] -> [(Int, Int)]
forall a. [a] -> [(a, a)]
pairwise ([Int] -> [(Int, Int)]) -> [Int] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
  [Int
from, Int
fromInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
chunkSize .. Int
toInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [Int
to]

pairwise :: [a] -> [(a,a)]
pairwise :: [a] -> [(a, a)]
pairwise []       = []
pairwise [a
_]      = []
pairwise (a
x:a
y:[a]
xs) = (a
x,a
y) (a, a) -> [(a, a)] -> [(a, a)]
forall a. a -> [a] -> [a]
: [a] -> [(a, a)]
forall a. [a] -> [(a, a)]
pairwise (a
ya -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
xs)