{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ExistentialQuantification #-}
-- | This modules provides the function `searchSome` for searching the candidates provided by `Vector` `Text`. The information about the location of matches
-- is stored in a length-tagged unboxed vector `S.Vector`. Such vectors have an `Unbox` instances which allows us to store the collection of such mathces in an
-- unboxed `U.Vector`. This significantly reduces the memory usage and pressure on garbage collector. As a result the matchers used by this function are tagged
-- with the number @n@ of needles need to be matched and are provided by `MatcherSized`. An unsized interface is provided by `Matcher` which is existenially
-- quantified over the number of needles. Functions for constructing matching and matchers have both a sized and unsized version.

module Talash.Core ( -- * Types
                     MatcherSized (..) , Matcher (..) , MatchState (..) , MatchPart (..) , MatchFull (..) , SearchSettings (..) , Indices
                     -- * Matchers and matching
                     , makeMatcher
                     -- ** Fuzzy style
                     , fuzzyMatcherSized , fuzzyMatcher , fuzzyMatchSized , fuzzyMatch
                     -- ** Orderless Style
                     , orderlessMatcherSized , orderlessMatcher , orderlessMatchSized , orderlessMatch
                     -- * Search
                     , fuzzySettings , orderlessSettings  ,  searchSome , parts , partsOrderless , minify) where

import Control.Monad.ST (ST, runST)
import qualified Data.Text as T
import Data.Text.AhoCorasick.Automaton
import Data.Text.Utf16
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Intro as V
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M
import qualified Data.Vector.Unboxed.Sized as S
import GHC.TypeNats
import Intro
import Lens.Micro (_1 , _2 , (^.))

-- | The MatcherSized type consists of a state machine for matching a fixed number of needles. The number of matches needed is encoded in the Nat parameterzing
--   the type. Here the purpose is to improve the memory consumption by utlizing the `Unbox` instance for sized tagged unboxed vectors from
--   (vector-sized)[https://hackage.haskell.org/package/vector-sized] package. This significantly reduces the memory consumption. At least in the present
--   implementation there is no benefit for correctness and dealing with the length tag is occasionally annoying.
data MatcherSized (n :: Nat) a = MatcherSized {
                              MatcherSized n a -> CaseSensitivity
caseSensitivity :: CaseSensitivity ,
                              -- | An AhoCorasick state machine from the alfred-margaret package which does the actual string matching
                              MatcherSized n a -> AcMachine a
machina :: {-# UNPACK #-} !(AcMachine a) ,
                              -- | The sizes of the /basic/ needles in code unit indices. The Left Int case is for when the length of all the
                              -- needles is 1 with Int the number of needles.
                              MatcherSized n a -> Either Int (Vector n Int)
sizes :: !(Either Int (S.Vector n Int))}

-- | The existential version of MatcherSized
data Matcher a      = forall n. KnownNat n => Matcher (MatcherSized n a)

-- | The matching process essentially takes the form of a fold with possible early termination over the matches produced. See the runLower from the
--   alfred-margaret. Here MatchState is the return type of this fold and essentially it records the positions of the matches. Here like in alfred-margaret
--   position is the code unit index of the first code unit beyond the match. We can't use the CodeUnitIndex here because it doesn't have an unbox instance.
data MatchState (n :: Nat) a = MatchState {
                                 -- | This is used to record the present extent of the match. What extent means is different to different matching styles.
                                 MatchState n a -> Int
endLocation :: {-# UNPACK #-} !Int ,
                                 -- | The vector recording the position of the matches.
                                 MatchState n a -> Vector n Int
partialMatch :: {-# UNPACK #-} !(S.Vector n Int) ,
                                 -- | Any auxiliary information needed to describe the state of the match.
                                 MatchState n a -> a
aux :: !a} deriving Int -> MatchState n a -> ShowS
[MatchState n a] -> ShowS
MatchState n a -> String
(Int -> MatchState n a -> ShowS)
-> (MatchState n a -> String)
-> ([MatchState n a] -> ShowS)
-> Show (MatchState n a)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (n :: Nat) a. Show a => Int -> MatchState n a -> ShowS
forall (n :: Nat) a. Show a => [MatchState n a] -> ShowS
forall (n :: Nat) a. Show a => MatchState n a -> String
showList :: [MatchState n a] -> ShowS
$cshowList :: forall (n :: Nat) a. Show a => [MatchState n a] -> ShowS
show :: MatchState n a -> String
$cshow :: forall (n :: Nat) a. Show a => MatchState n a -> String
showsPrec :: Int -> MatchState n a -> ShowS
$cshowsPrec :: forall (n :: Nat) a. Show a => Int -> MatchState n a -> ShowS
Show

data MatchPart = MatchPart {MatchPart -> Int
matchBegin :: {-# UNPACK #-} !Int , MatchPart -> Int
matchEnd :: {-# UNPACK #-} !Int} deriving Int -> MatchPart -> ShowS
[MatchPart] -> ShowS
MatchPart -> String
(Int -> MatchPart -> ShowS)
-> (MatchPart -> String)
-> ([MatchPart] -> ShowS)
-> Show MatchPart
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchPart] -> ShowS
$cshowList :: [MatchPart] -> ShowS
show :: MatchPart -> String
$cshow :: MatchPart -> String
showsPrec :: Int -> MatchPart -> ShowS
$cshowsPrec :: Int -> MatchPart -> ShowS
Show

-- | The full match consisting of a score for the match and vector consisting of the positions of the match. The score is intended as for bucketing and as a
--   result shouldn't be two large and must be non-negative . For the fuzzy style in this module @n@ contiguous matches contribute @n-1@ to the score. The
--   scores thus range from @0@ to @n-1@ where @n@ is the length of the string to be matched. For orderless style this score is always @0@.
data MatchFull (n :: Nat) = MatchFull {MatchFull n -> Int
scored :: {-# UNPACK #-} !Int , MatchFull n -> Vector n Int
indices :: {-# UNPACK #-} !(S.Vector n Int)} deriving Int -> MatchFull n -> ShowS
[MatchFull n] -> ShowS
MatchFull n -> String
(Int -> MatchFull n -> ShowS)
-> (MatchFull n -> String)
-> ([MatchFull n] -> ShowS)
-> Show (MatchFull n)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (n :: Nat). Int -> MatchFull n -> ShowS
forall (n :: Nat). [MatchFull n] -> ShowS
forall (n :: Nat). MatchFull n -> String
showList :: [MatchFull n] -> ShowS
$cshowList :: forall (n :: Nat). [MatchFull n] -> ShowS
show :: MatchFull n -> String
$cshow :: forall (n :: Nat). MatchFull n -> String
showsPrec :: Int -> MatchFull n -> ShowS
$cshowsPrec :: forall (n :: Nat). Int -> MatchFull n -> ShowS
Show

-- | The configuration for a search style with n needles and matcher of type a
data SearchSettings a (n :: Nat) = SearchSettings {
                                     -- | Given the matcher and the candidate text, find a match or return Nothing if there is none.
                                     SearchSettings a n -> a -> Text -> Maybe (MatchFull n)
match :: a -> Text -> Maybe (MatchFull n) ,
                                     -- | The maximum score for a given matcher. It determines the number of buckets.
                                     SearchSettings a n -> a -> Int
fullscore :: a -> Int ,
                                     -- | Maximum number of matches with full score to produce.
                                     SearchSettings a n -> Int
maxFullMatches :: Int ,
                                     -- | The ordering to sort the matches within a given bucket. It is run with two candidates and their corresponding matches.
                                     SearchSettings a n
-> Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs :: Text -> S.Vector n Int -> Text -> S.Vector n Int -> Ordering}

-- | Type synonym for the index of a candidate in the backing vector along with the positions of the matches for it.
type Indices (n :: Nat) = (Int , S.Vector n Int)

-- | Unsafe, use with care. eIndex i return 1 for Left and for Right the element at @i@-th position in the vector. The vector must have at least @i+1@ elements.
-- This uses unsafeIndex so no bound checks are performed.
{-# INLINE eIndex #-}
eIndex :: KnownNat n => Int -> Either a (S.Vector n Int) -> Int
eIndex :: Int -> Either a (Vector n Int) -> Int
eIndex Int
i = (a -> Int)
-> (Vector n Int -> Int) -> Either a (Vector n Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> a -> Int
forall a b. a -> b -> a
const Int
1) (Vector n Int -> Int -> Int
forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
`S.unsafeIndex` Int
i)

{-# INLINEABLE updateMatch #-}
updateMatch :: KnownNat n => Int -> Either Int (S.Vector n Int) -> MatchState n a -> Int -> Int -> a -> MatchState n a
updateMatch :: Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch !Int
c Either Int (Vector n Int)
l (MatchState !Int
f !Vector n Int
m a
_) !Int
b !Int
e !a
a = Int -> Vector n Int -> a -> MatchState n a
forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
e ((Vector Int -> Vector Int) -> Vector n Int -> Vector n Int
forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe ((forall s. MVector s Int -> ST s ()) -> Vector Int -> Vector Int
forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify forall s. MVector s Int -> ST s ()
forall (f :: * -> *).
PrimMonad f =>
MVector (PrimState f) Int -> f ()
doWrites) Vector n Int
m) a
a
  where
    doWrites :: MVector (PrimState f) Int -> f ()
doWrites MVector (PrimState f) Int
s = f Int -> f ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (f Int -> f ()) -> f Int -> f ()
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> f Int) -> Int -> [Int] -> f Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldlM (\Int
d Int
i -> MVector (PrimState f) Int -> Int -> Int -> f ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector (PrimState f) Int
s Int
i Int
d f () -> Int -> f Int
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Either Int (Vector n Int) -> Int
forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
l) Int
c [Int
e , Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1 .. Int
b]

-- | The score for a fuzzy match.
{-# INLINE matchScore #-}
matchScore :: KnownNat n => Either Int (S.Vector n Int) -> S.Vector n Int -> Int
matchScore :: Either Int (Vector n Int) -> Vector n Int -> Int
matchScore Either Int (Vector n Int)
u Vector n Int
v
  | Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
v Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0       = Int
0
  | Vector Int
v' <- Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
v   = (Int -> Int -> Int -> Int) -> Int -> Vector Int -> Int
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl' (\ !Int
s !Int
i !Int
cc -> if Int
cc Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.unsafeIndex Vector Int
v' Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Either Int (Vector n Int) -> Int
forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
u then Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 else Int
s ) Int
0 (Vector Int -> Int)
-> (Vector Int -> Vector Int) -> Vector Int -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
U.tail (Vector Int -> Int) -> Vector Int -> Int
forall a b. (a -> b) -> a -> b
$ Vector Int
v'

{-# INLINEABLE matchStepFuzzy #-}
matchStepFuzzy :: KnownNat n => Either Int (S.Vector n Int) -> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy :: Either Int (Vector n Int)
-> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy Either Int (Vector n Int)
l s :: MatchState n ()
s@(MatchState !Int
f !Vector n Int
m ()
_) (Match !CodeUnitIndex
i (MatchPart !Int
b !Int
e))
  | Int
e Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== (Int -> Int)
-> (Vector n Int -> Int) -> Either Int (Vector n Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either Int -> Int
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Either Int (Vector n Int)
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1                             = MatchState n () -> Next (MatchState n ())
forall a. a -> Next a
Done (MatchState n () -> Next (MatchState n ()))
-> MatchState n () -> Next (MatchState n ())
forall a b. (a -> b) -> a -> b
$ Int
-> Either Int (Vector n Int)
-> MatchState n ()
-> Int
-> Int
-> ()
-> MatchState n ()
forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch (CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
i) Either Int (Vector n Int)
l MatchState n ()
s Int
b Int
e ()
  | Int
f Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
b                                                    = MatchState n () -> Next (MatchState n ())
forall a. a -> Next a
Step (MatchState n () -> Next (MatchState n ()))
-> MatchState n () -> Next (MatchState n ())
forall a b. (a -> b) -> a -> b
$ Int
-> Either Int (Vector n Int)
-> MatchState n ()
-> Int
-> Int
-> ()
-> MatchState n ()
forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch (CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
i) Either Int (Vector n Int)
l MatchState n ()
s Int
b Int
e ()
  | Int
f Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
b Bool -> Bool -> Bool
&& Int
e Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
f Bool -> Bool -> Bool
&& Bool
monotonic                                  = MatchState n () -> Next (MatchState n ())
forall a. a -> Next a
Step (MatchState n () -> Next (MatchState n ()))
-> MatchState n () -> Next (MatchState n ())
forall a b. (a -> b) -> a -> b
$ Int
-> Either Int (Vector n Int)
-> MatchState n ()
-> Int
-> Int
-> ()
-> MatchState n ()
forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch (CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
i) Either Int (Vector n Int)
l MatchState n ()
s Int
b Int
e ()
  | Bool
otherwise                                                     = MatchState n () -> Next (MatchState n ())
forall a. a -> Next a
Step   MatchState n ()
s
  where
    monotonic :: Bool
monotonic = Vector n Int -> Int -> Int
forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
f Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Int -> Int)
-> (Vector n Int -> Int) -> Either Int (Vector n Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> Int -> Int
forall a b. a -> b -> a
const (Int -> Int -> Int) -> Int -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
f) (Vector Int -> Int
forall a. (Unbox a, Num a) => Vector a -> a
U.sum (Vector Int -> Int)
-> (Vector n Int -> Vector Int) -> Vector n Int -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Int -> Vector a -> Vector a
U.slice Int
f (Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
f) (Vector Int -> Vector Int)
-> (Vector n Int -> Vector Int) -> Vector n Int -> Vector Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized) Either Int (Vector n Int)
l

{-# INLINEABLE matchStepOrderless #-}
matchStepOrderless :: KnownNat n => Either Int (S.Vector n Int) -> MatchState n (Int , Int) -> Match Int -> Next (MatchState n (Int , Int))
matchStepOrderless :: Either Int (Vector n Int)
-> MatchState n (Int, Int)
-> Match Int
-> Next (MatchState n (Int, Int))
matchStepOrderless !Either Int (Vector n Int)
lv s :: MatchState n (Int, Int)
s@(MatchState Int
r !Vector n Int
m (!Int
lm , !Int
li)) (Match (CodeUnitIndex !Int
c) !Int
i)
  | Vector n Int -> Int -> Int
forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Either Int (Vector n Int) -> Int
forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
lv Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
li       = MatchState n (Int, Int) -> Next (MatchState n (Int, Int))
forall a. a -> Next a
go   (MatchState n (Int, Int) -> Next (MatchState n (Int, Int)))
-> MatchState n (Int, Int) -> Next (MatchState n (Int, Int))
forall a b. (a -> b) -> a -> b
$ Int -> Vector n Int -> (Int, Int) -> MatchState n (Int, Int)
forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState (Int
rInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) ((Vector Int -> Vector Int) -> Vector n Int -> Vector n Int
forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe ((forall s. MVector s Int -> ST s ()) -> Vector Int -> Vector Int
forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (\MVector s Int
mv -> MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i Int
c)) Vector n Int
m) (Int
i , Int
c)
  | Vector n Int -> Int -> Int
forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& Int -> Either Int (Vector n Int) -> Int
forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
lm Either Int (Vector n Int)
lv Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Either Int (Vector n Int) -> Int
forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
lv  = MatchState n (Int, Int) -> Next (MatchState n (Int, Int))
forall a. a -> Next a
Step (MatchState n (Int, Int) -> Next (MatchState n (Int, Int)))
-> MatchState n (Int, Int) -> Next (MatchState n (Int, Int))
forall a b. (a -> b) -> a -> b
$ Int -> Vector n Int -> (Int, Int) -> MatchState n (Int, Int)
forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
r ((Vector Int -> Vector Int) -> Vector n Int -> Vector n Int
forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe ((forall s. MVector s Int -> ST s ()) -> Vector Int -> Vector Int
forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (\MVector s Int
mv -> MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i Int
c ST s () -> ST s () -> ST s ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.write MVector s Int
MVector (PrimState (ST s)) Int
mv Int
lm Int
0)) Vector n Int
m) (Int
i , Int
c)
  | Bool
otherwise                                             = MatchState n (Int, Int) -> Next (MatchState n (Int, Int))
forall a. a -> Next a
Step MatchState n (Int, Int)
s
  where
    go :: a -> Next a
go = if Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 then a -> Next a
forall a. a -> Next a
Done else a -> Next a
forall a. a -> Next a
Step

kConsecutive :: Int ->  Text -> [Text]
kConsecutive :: Int -> Text -> [Text]
kConsecutive Int
k Text
t = (Text -> Text) -> [Text] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Int -> Text -> Text
T.take Int
k) ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
take (Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> [Text]
T.tails (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ Text
t

{-# INLINE run #-}
run :: CaseSensitivity -> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
run :: CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
run CaseSensitivity
IgnoreCase     = a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runLower
run CaseSensitivity
CaseSensitive  = a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runText

-- | A general function to construct a Matcher. Returns Nothing if the string is empty or if the number of needles turns out to be non-positive
makeMatcher :: forall a. CaseSensitivity -> (Text -> Int) -- ^ The function to determine the number of needles from the query string.
                                                          -- The proxy argument is instantiated at the resulting value.
                    -> (forall n. KnownNat n => Proxy n -> CaseSensitivity -> Text -> MatcherSized n a) -- ^ The functions for constructing the matcher
                    -> Text -- ^ The query string
                    -> Maybe (Matcher a) -- ^ Nothing if the string is empty or if the number of needles turns out to be non-positive
makeMatcher :: CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Maybe (Matcher a)
makeMatcher CaseSensitivity
c Text -> Int
lenf forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n a
matf Text
t
  | Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| Text -> Int
lenf Text
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0                                    = Maybe (Matcher a)
forall a. Maybe a
Nothing
  | SomeNat Proxy n
p <- Natural -> SomeNat
someNatVal (Natural -> SomeNat) -> (Text -> Natural) -> Text -> SomeNat
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegralUnsafe (Int -> Natural) -> (Text -> Int) -> Text -> Natural
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> Int
lenf (Text -> SomeNat) -> Text -> SomeNat
forall a b. (a -> b) -> a -> b
$ Text
t    = Matcher a -> Maybe (Matcher a)
forall a. a -> Maybe a
Just (Matcher a -> Maybe (Matcher a))
-> (Text -> Matcher a) -> Text -> Maybe (Matcher a)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. MatcherSized n a -> Matcher a
forall a (n :: Nat). KnownNat n => MatcherSized n a -> Matcher a
Matcher (MatcherSized n a -> Matcher a)
-> (Text -> MatcherSized n a) -> Text -> Matcher a
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Proxy n -> CaseSensitivity -> Text -> MatcherSized n a
forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n a
matf Proxy n
p CaseSensitivity
c (Text -> Maybe (Matcher a)) -> Text -> Maybe (Matcher a)
forall a b. (a -> b) -> a -> b
$ Text
t

{-# INLINE withSensitivity #-}
withSensitivity :: CaseSensitivity -> Text -> Text
withSensitivity :: CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
IgnoreCase    = Text -> Text
lowerUtf16
withSensitivity CaseSensitivity
CaseSensitive = Text -> Text
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id

-- | Constructs the matcher for fuzzy matching. The needles are all possible contigous subtrings of the string being matched. The Nat @n@ must be instantiated at the
--  length @n@ of the query string. They are n choose 2 such substrings, so to the complexity of matching is \(O(m + n^2)\) where @m@ is the length of candidate string.
--  This is a rough (and probably wrong) estimate as the updating the matchstate for each found match is not a constant time operation. Not sure if Aho Corasick is
--  the optimal way for this kind of matching but in practice it seems fast enough.
fuzzyMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized :: p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized p n
_ CaseSensitivity
c Text
t = MatcherSized :: forall (n :: Nat) a.
CaseSensitivity
-> AcMachine a -> Either Int (Vector n Int) -> MatcherSized n a
MatcherSized {caseSensitivity :: CaseSensitivity
caseSensitivity = CaseSensitivity
c , machina :: AcMachine MatchPart
machina = [([CodeUnit], MatchPart)] -> AcMachine MatchPart
forall v. [([CodeUnit], v)] -> AcMachine v
build ([([CodeUnit], MatchPart)] -> AcMachine MatchPart)
-> ([Int] -> [([CodeUnit], MatchPart)])
-> [Int]
-> AcMachine MatchPart
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int -> [([CodeUnit], MatchPart)])
-> [Int] -> [([CodeUnit], MatchPart)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Int -> [([CodeUnit], MatchPart)]
go ([Int] -> AcMachine MatchPart) -> [Int] -> AcMachine MatchPart
forall a b. (a -> b) -> a -> b
$ [Text -> Int
T.length Text
t , Text -> Int
T.length Text
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 .. Int
1]
                             , sizes :: Either Int (Vector n Int)
sizes = if Vector n Int -> Int
forall a (n :: Nat). (Unbox a, Num a) => Vector n a -> a
S.sum Vector n Int
sz Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
sz then Int -> Either Int (Vector n Int)
forall a b. a -> Either a b
Left (Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
sz) else Vector n Int -> Either Int (Vector n Int)
forall a b. b -> Either a b
Right Vector n Int
sz }
  where
    sz :: Vector n Int
sz      = Vector n Int -> Maybe (Vector n Int) -> Vector n Int
forall a. a -> Maybe a -> a
fromMaybe (Int -> Vector n Int
forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
1) (Maybe (Vector n Int) -> Vector n Int)
-> (Text -> Maybe (Vector n Int)) -> Text -> Vector n Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Int] -> Maybe (Vector n Int)
forall a (n :: Nat).
(Unbox a, KnownNat n) =>
[a] -> Maybe (Vector n a)
S.fromList ([Int] -> Maybe (Vector n Int))
-> (Text -> [Int]) -> Text -> Maybe (Vector n Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Int) -> String -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ([CodeUnit] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([CodeUnit] -> Int) -> (Char -> [CodeUnit]) -> Char -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> [CodeUnit]
unpackUtf16 (Text -> [CodeUnit]) -> (Char -> Text) -> Char -> [CodeUnit]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c (Text -> Text) -> (Char -> Text) -> Char -> Text
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Char -> Text
T.singleton)  (String -> [Int]) -> (Text -> String) -> Text -> [Int]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> String
T.unpack  (Text -> Vector n Int) -> Text -> Vector n Int
forall a b. (a -> b) -> a -> b
$ Text
t
    go :: Int -> [([CodeUnit], MatchPart)]
go !Int
k   = (Text -> Int -> ([CodeUnit], MatchPart))
-> [Text] -> [Int] -> [([CodeUnit], MatchPart)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
t' Int
l -> (Text -> [CodeUnit]
unpackUtf16 (Text -> [CodeUnit]) -> (Text -> Text) -> Text -> [CodeUnit]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c (Text -> [CodeUnit]) -> Text -> [CodeUnit]
forall a b. (a -> b) -> a -> b
$ Text
t' , Int -> Int -> MatchPart
MatchPart Int
l (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1))) (Int -> Text -> [Text]
kConsecutive Int
k Text
t) [Int
0 ..]

-- | Unsized version of fuzzyMatcherSized
fuzzyMatcher :: CaseSensitivity -> Text -> Maybe (Matcher MatchPart)
fuzzyMatcher :: CaseSensitivity -> Text -> Maybe (Matcher MatchPart)
fuzzyMatcher CaseSensitivity
c  = CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n MatchPart)
-> Text
-> Maybe (Matcher MatchPart)
forall a.
CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Maybe (Matcher a)
makeMatcher CaseSensitivity
c Text -> Int
T.length forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized

-- | Constructs the matcher for orderless matching, the needles are the words from the query string and the proxy argument should be instantiated at the
--  number of words.
orderlessMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized :: p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized p n
_ CaseSensitivity
c Text
t = MatcherSized :: forall (n :: Nat) a.
CaseSensitivity
-> AcMachine a -> Either Int (Vector n Int) -> MatcherSized n a
MatcherSized {caseSensitivity :: CaseSensitivity
caseSensitivity = CaseSensitivity
c , machina :: AcMachine Int
machina = [([CodeUnit], Int)] -> AcMachine Int
forall v. [([CodeUnit], v)] -> AcMachine v
build ([([CodeUnit], Int)] -> AcMachine Int)
-> ([Int] -> [([CodeUnit], Int)]) -> [Int] -> AcMachine Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [[CodeUnit]] -> [Int] -> [([CodeUnit], Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Text -> [CodeUnit]
unpackUtf16 (Text -> [CodeUnit]) -> [Text] -> [[CodeUnit]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text]
wrds) ([Int] -> AcMachine Int) -> [Int] -> AcMachine Int
forall a b. (a -> b) -> a -> b
$ [Int
0 ..]
                               , sizes :: Either Int (Vector n Int)
sizes = Vector n Int -> Either Int (Vector n Int)
forall a b. b -> Either a b
Right (Vector n Int -> Either Int (Vector n Int))
-> ([Text] -> Vector n Int) -> [Text] -> Either Int (Vector n Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector n Int -> Maybe (Vector n Int) -> Vector n Int
forall a. a -> Maybe a -> a
fromMaybe (Int -> Vector n Int
forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
1) (Maybe (Vector n Int) -> Vector n Int)
-> ([Text] -> Maybe (Vector n Int)) -> [Text] -> Vector n Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Int] -> Maybe (Vector n Int)
forall a (n :: Nat).
(Unbox a, KnownNat n) =>
[a] -> Maybe (Vector n a)
S.fromList ([Int] -> Maybe (Vector n Int))
-> ([Text] -> [Int]) -> [Text] -> Maybe (Vector n Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Text -> Int) -> [Text] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (CodeUnitIndex -> Int
codeUnitIndex (CodeUnitIndex -> Int) -> (Text -> CodeUnitIndex) -> Text -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> CodeUnitIndex
lengthUtf16) ([Text] -> Either Int (Vector n Int))
-> [Text] -> Either Int (Vector n Int)
forall a b. (a -> b) -> a -> b
$ [Text]
wrds }
  where
    wrds :: [Text]
wrds = CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c (Text -> Text) -> [Text] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> [Text]
T.words Text
t

-- | Unsized version of orderlessMatcherSized
orderlessMatcher :: CaseSensitivity -> Text -> Maybe (Matcher Int)
orderlessMatcher :: CaseSensitivity -> Text -> Maybe (Matcher Int)
orderlessMatcher CaseSensitivity
c = CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n Int)
-> Text
-> Maybe (Matcher Int)
forall a.
CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Maybe (Matcher a)
makeMatcher CaseSensitivity
c ([Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Text] -> Int) -> (Text -> [Text]) -> Text -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Text -> [Text]
T.words) forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n Int
forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized

{-# INLINEABLE fuzzyMatchSized#-}
fuzzyMatchSized :: KnownNat n => MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized :: MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized (MatcherSized CaseSensitivity
c AcMachine MatchPart
m Either Int (Vector n Int)
l) = MatchState n () -> Maybe (MatchFull n)
forall a. MatchState n a -> Maybe (MatchFull n)
full (MatchState n () -> Maybe (MatchFull n))
-> (Text -> MatchState n ()) -> Text -> Maybe (MatchFull n)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. CaseSensitivity
-> MatchState n ()
-> (MatchState n () -> Match MatchPart -> Next (MatchState n ()))
-> AcMachine MatchPart
-> Text
-> MatchState n ()
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
run CaseSensitivity
c (Int -> Vector n Int -> () -> MatchState n ()
forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState (-Int
1) (Int -> Vector n Int
forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
0) ()) (Either Int (Vector n Int)
-> MatchState n () -> Match MatchPart -> Next (MatchState n ())
forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy Either Int (Vector n Int)
l) AcMachine MatchPart
m
  where
    full :: MatchState n a -> Maybe (MatchFull n)
full (MatchState !Int
e !Vector n Int
u a
_) = if Int
e Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
u then MatchFull n -> Maybe (MatchFull n)
forall a. a -> Maybe a
Just (MatchFull n -> Maybe (MatchFull n))
-> MatchFull n -> Maybe (MatchFull n)
forall a b. (a -> b) -> a -> b
$ Int -> Vector n Int -> MatchFull n
forall (n :: Nat). Int -> Vector n Int -> MatchFull n
MatchFull (Either Int (Vector n Int) -> Vector n Int -> Int
forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int) -> Vector n Int -> Int
matchScore Either Int (Vector n Int)
l Vector n Int
u) Vector n Int
u else Maybe (MatchFull n)
forall a. Maybe a
Nothing

fuzzyMatch :: Matcher MatchPart -> Text -> Maybe [Text]
fuzzyMatch :: Matcher MatchPart -> Text -> Maybe [Text]
fuzzyMatch (Matcher MatcherSized n MatchPart
m) Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts (Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized (Vector n Int -> Vector Int)
-> Either Int (Vector n Int) -> Either Int (Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MatcherSized n MatchPart -> Either Int (Vector n Int)
forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
m) Text
t (Vector Int -> [Text])
-> (MatchFull n -> Vector Int) -> MatchFull n -> [Text]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized (Vector n Int -> Vector Int)
-> (MatchFull n -> Vector n Int) -> MatchFull n -> Vector Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. MatchFull n -> Vector n Int
forall (n :: Nat). MatchFull n -> Vector n Int
indices (MatchFull n -> [Text]) -> Maybe (MatchFull n) -> Maybe [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized MatcherSized n MatchPart
m Text
t

{-# INLINEABLE orderlessMatchSized#-}
orderlessMatchSized :: KnownNat n => MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized :: MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized (MatcherSized CaseSensitivity
c AcMachine Int
m Either Int (Vector n Int)
l) = MatchState n (Int, Int) -> Maybe (MatchFull n)
forall (n :: Nat) a. MatchState n a -> Maybe (MatchFull n)
full (MatchState n (Int, Int) -> Maybe (MatchFull n))
-> (Text -> MatchState n (Int, Int)) -> Text -> Maybe (MatchFull n)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. CaseSensitivity
-> MatchState n (Int, Int)
-> (MatchState n (Int, Int)
    -> Match Int -> Next (MatchState n (Int, Int)))
-> AcMachine Int
-> Text
-> MatchState n (Int, Int)
forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
run CaseSensitivity
c (Int -> Vector n Int -> (Int, Int) -> MatchState n (Int, Int)
forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
0 (Int -> Vector n Int
forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
0) (Int
0,Int
0)) (Either Int (Vector n Int)
-> MatchState n (Int, Int)
-> Match Int
-> Next (MatchState n (Int, Int))
forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n (Int, Int)
-> Match Int
-> Next (MatchState n (Int, Int))
matchStepOrderless Either Int (Vector n Int)
l) AcMachine Int
m
  where
    ln :: Int
ln = (Int -> Int)
-> (Vector n Int -> Int) -> Either Int (Vector n Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either Int -> Int
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Either Int (Vector n Int)
l
    full :: MatchState n a -> Maybe (MatchFull n)
full MatchState n a
u = if MatchState n a -> Int
forall (n :: Nat) a. MatchState n a -> Int
endLocation MatchState n a
u Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
ln then MatchFull n -> Maybe (MatchFull n)
forall a. a -> Maybe a
Just (MatchFull n -> Maybe (MatchFull n))
-> MatchFull n -> Maybe (MatchFull n)
forall a b. (a -> b) -> a -> b
$ Int -> Vector n Int -> MatchFull n
forall (n :: Nat). Int -> Vector n Int -> MatchFull n
MatchFull Int
0 (MatchState n a -> Vector n Int
forall (n :: Nat) a. MatchState n a -> Vector n Int
partialMatch MatchState n a
u) else Maybe (MatchFull n)
forall a. Maybe a
Nothing

orderlessMatch :: Matcher Int -> Text -> Maybe [Text]
orderlessMatch :: Matcher Int -> Text -> Maybe [Text]
orderlessMatch (Matcher MatcherSized n Int
m) Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
partsOrderless (Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized (Vector n Int -> Vector Int)
-> Either Int (Vector n Int) -> Either Int (Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MatcherSized n Int -> Either Int (Vector n Int)
forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n Int
m) Text
t (Vector Int -> [Text])
-> (MatchFull n -> Vector Int) -> MatchFull n -> [Text]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized (Vector n Int -> Vector Int)
-> (MatchFull n -> Vector n Int) -> MatchFull n -> Vector Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. MatchFull n -> Vector n Int
forall (n :: Nat). MatchFull n -> Vector n Int
indices (MatchFull n -> [Text]) -> Maybe (MatchFull n) -> Maybe [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MatcherSized n Int -> Text -> Maybe (MatchFull n)
forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized MatcherSized n Int
m Text
t

-- | The parts of a string resulting from a match using the fuzzy matcher.
parts :: Either Int (U.Vector Int) -- ^ The information about the lengths of different needles.
  -> Text -- ^ The candidate string that has been matched
  -> U.Vector Int -- ^ The vector recording the positions of the needle in the matched string.
  -> [Text] -- ^ The candidate string split up according to  the match
parts :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts Either Int (Vector Int)
v Text
t = ([Text], CodeUnitIndex) -> [Text]
done (([Text], CodeUnitIndex) -> [Text])
-> (Vector Int -> ([Text], CodeUnitIndex)) -> Vector Int -> [Text]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (([Text], CodeUnitIndex)
 -> CodeUnitIndex -> ([Text], CodeUnitIndex))
-> ([Text], CodeUnitIndex)
-> [CodeUnitIndex]
-> ([Text], CodeUnitIndex)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ([Text], CodeUnitIndex) -> CodeUnitIndex -> ([Text], CodeUnitIndex)
cut ([] , Text -> CodeUnitIndex
lengthUtf16 Text
t) ([CodeUnitIndex] -> ([Text], CodeUnitIndex))
-> (Vector Int -> [CodeUnitIndex])
-> Vector Int
-> ([Text], CodeUnitIndex)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
minify Either Int (Vector Int)
v
  where
    done :: ([Text], CodeUnitIndex) -> [Text]
done ([Text]
ms , CodeUnitIndex
cp) = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf16 CodeUnitIndex
0 CodeUnitIndex
cp Text
t Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
ms
    cut :: ([Text], CodeUnitIndex) -> CodeUnitIndex -> ([Text], CodeUnitIndex)
cut  (![Text]
ms , !CodeUnitIndex
cp) !CodeUnitIndex
cc  = (CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf16 CodeUnitIndex
cc (CodeUnitIndex
cp CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
cc) Text
t Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
ms , CodeUnitIndex
cc )

-- | The parts of a string resulting from a match using the orderless matcher. See parts for an explanation of arguments.
partsOrderless :: Either Int (U.Vector Int) -> Text -> U.Vector Int -> [Text]
partsOrderless :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
partsOrderless Either Int (Vector Int)
v Text
t Vector Int
u = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts ((Vector Int -> Vector Int)
-> Either Int (Vector Int) -> Either Int (Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Vector Int -> Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector Int -> Vector a
`U.backpermute` (Vector Int, Vector Int) -> Vector Int
forall a b. (a, b) -> a
fst (Vector Int, Vector Int)
up) Either Int (Vector Int)
v) Text
t ((Vector Int, Vector Int) -> Vector Int
forall a b. (a, b) -> b
snd (Vector Int, Vector Int)
up)
  where
    up :: (Vector Int, Vector Int)
up = Vector (Int, Int) -> (Vector Int, Vector Int)
forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip (Vector (Int, Int) -> (Vector Int, Vector Int))
-> (Vector Int -> Vector (Int, Int))
-> Vector Int
-> (Vector Int, Vector Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (forall s. MVector s (Int, Int) -> ST s ())
-> Vector (Int, Int) -> Vector (Int, Int)
forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (Comparison (Int, Int)
-> MVector (PrimState (ST s)) (Int, Int) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
V.sortBy (((Int, Int) -> Int) -> Comparison (Int, Int)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, Int) -> Int
forall a b. (a, b) -> b
snd)) (Vector (Int, Int) -> Vector (Int, Int))
-> (Vector Int -> Vector (Int, Int))
-> Vector Int
-> Vector (Int, Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int -> Int -> (Int, Int)) -> Vector Int -> Vector (Int, Int)
forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap (,) (Vector Int -> (Vector Int, Vector Int))
-> Vector Int -> (Vector Int, Vector Int)
forall a b. (a -> b) -> a -> b
$ Vector Int
u

{-# INLINE eIndexU #-}
eIndexU :: Int -> Either a (U.Vector Int) -> Int
eIndexU :: Int -> Either a (Vector Int) -> Int
eIndexU Int
i = (a -> Int) -> (Vector Int -> Int) -> Either a (Vector Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> a -> Int
forall a b. a -> b -> a
const Int
1) (Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
`U.unsafeIndex` Int
i)

-- | Shorten a match by collapsing the contiguous sub-matches together.
minify :: Either Int (U.Vector Int) -> U.Vector Int -> [CodeUnitIndex]
minify :: Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
minify Either Int (Vector Int)
v Vector Int
s
  | Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1       = (Int -> CodeUnitIndex) -> [Int] -> [CodeUnitIndex]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Int -> CodeUnitIndex
CodeUnitIndex [Int
a , Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Either Int (Vector Int) -> Int
forall a. Int -> Either a (Vector Int) -> Int
eIndexU Int
0 Either Int (Vector Int)
v]
  | Bool
otherwise             = (Int -> CodeUnitIndex) -> [Int] -> [CodeUnitIndex]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Int -> CodeUnitIndex
CodeUnitIndex ([Int] -> [CodeUnitIndex])
-> (Vector Int -> [Int]) -> Vector Int -> [CodeUnitIndex]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.last Vector Int
s Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:) ([Int] -> [Int]) -> (Vector Int -> [Int]) -> Vector Int -> [Int]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, [Int]) -> [Int]
forall a b. (a, b) -> b
snd ((Int, [Int]) -> [Int])
-> (Vector Int -> (Int, [Int])) -> Vector Int -> [Int]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((Int, [Int]) -> Int -> Int -> (Int, [Int]))
-> (Int, [Int]) -> Vector Int -> (Int, [Int])
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl' (Int, [Int]) -> Int -> Int -> (Int, [Int])
go (Int
a , [Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Either Int (Vector Int) -> Int
forall a. Int -> Either a (Vector Int) -> Int
eIndexU Int
0 Either Int (Vector Int)
v]) (Vector Int -> (Int, [Int]))
-> (Vector Int -> Vector Int) -> Vector Int -> (Int, [Int])
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
U.tail (Vector Int -> [CodeUnitIndex]) -> Vector Int -> [CodeUnitIndex]
forall a b. (a -> b) -> a -> b
$ Vector Int
s
  where
    a :: Int
a = Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.unsafeHead Vector Int
s
    go :: (Int, [Int]) -> Int -> Int -> (Int, [Int])
go (!Int
l , [Int]
s) !Int
i !Int
c = if Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Either Int (Vector Int) -> Int
forall a. Int -> Either a (Vector Int) -> Int
eIndexU (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Either Int (Vector Int)
v then (Int
c , [Int]
s) else (Int
c , Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Either Int (Vector Int) -> Int
forall a. Int -> Either a (Vector Int) -> Int
eIndexU (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Either Int (Vector Int)
v Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int
l Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
s )
-- | The default ordering used in this module to sort matches within a given bucket. Prefers the matche for which the last part is closest to the end. To tie
-- break prefers the shorter matched string.
defOrdering :: Text -> S.Vector n Int -> Text -> S.Vector n Int -> Ordering
defOrdering :: Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering Text
t1 Vector n Int
s1 Text
t2 Vector n Int
s2
  | Ordering
el Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
EQ   = Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Text -> Int
T.length Text
t1) (Text -> Int
T.length Text
t2)
  | Bool
otherwise  = Ordering
el
  where
    el :: Ordering
el = Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Text -> Int
T.length Text
t1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum (Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
s1)) (Text -> Int
T.length Text
t2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum (Vector n Int -> Vector Int
forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
s2))

-- | Search functions suitable for fuzzy matching. The candidate @c@ will match query @s@ if @c@ contains all the characters in @s@ in order. In general there
--   can be several ways of matching. This tries to find a match with minimum number of parts of. It does not find the minimum number of parts, if that requires
--   reducing the extent of the partial match during search. E.g. matching @"as"@ against @"talash"@ the split will be @["tal","as","h"]@ and not
--   @["t","a","la","s","h"]@. While matching @"talash best match testing hat"@ against @"tea"@ will not result in @["talash best match ","te","sting h","a","t"]@ since
--   @"te"@ occurs only after we have match all three letters and we can't know if we will find the @"a"@ without going through the string.
fuzzySettings :: KnownNat n => Int -> SearchSettings (MatcherSized n MatchPart) n
fuzzySettings :: Int -> SearchSettings (MatcherSized n MatchPart) n
fuzzySettings !Int
m = SearchSettings :: forall a (n :: Nat).
(a -> Text -> Maybe (MatchFull n))
-> (a -> Int)
-> Int
-> (Text -> Vector n Int -> Text -> Vector n Int -> Ordering)
-> SearchSettings a n
SearchSettings { match :: MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
match = MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized , fullscore :: MatcherSized n MatchPart -> Int
fullscore = \MatcherSized n MatchPart
t -> (Int -> Int)
-> (Vector n Int -> Int) -> Either Int (Vector n Int) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either Int -> Int
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id Vector n Int -> Int
forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length (MatcherSized n MatchPart -> Either Int (Vector n Int)
forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
t) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 , maxFullMatches :: Int
maxFullMatches = Int
m , orderAs :: Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs = Text -> Vector n Int -> Text -> Vector n Int -> Ordering
forall (n :: Nat).
Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering}

-- | Search functions that match the words in i.e. space separated substring in any order. @"talash best"@ will match @"be as"@ with the split
--   @["tal","as","h","be","st"]@ but @"talash best"@ will not match @"bet"@.
orderlessSettings :: KnownNat n => Int -> SearchSettings (MatcherSized n Int) n
orderlessSettings :: Int -> SearchSettings (MatcherSized n Int) n
orderlessSettings Int
n = SearchSettings :: forall a (n :: Nat).
(a -> Text -> Maybe (MatchFull n))
-> (a -> Int)
-> Int
-> (Text -> Vector n Int -> Text -> Vector n Int -> Ordering)
-> SearchSettings a n
SearchSettings {match :: MatcherSized n Int -> Text -> Maybe (MatchFull n)
match = MatcherSized n Int -> Text -> Maybe (MatchFull n)
forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized , fullscore :: MatcherSized n Int -> Int
fullscore = Int -> MatcherSized n Int -> Int
forall a b. a -> b -> a
const Int
0, maxFullMatches :: Int
maxFullMatches = Int
n , orderAs :: Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs = Text -> Vector n Int -> Text -> Vector n Int -> Ordering
forall (n :: Nat).
Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering}

-- | Given a matcher, search for matches in a vector of text. This function only searches for matches among the strings at indices which are in 3rd argument.
searchSome :: forall a n. KnownNat n => SearchSettings a n -- ^ The configuration for finding matches
  -> a -- ^ The matcher
  -> V.Vector Text -- ^ The vector of candidates
  -> U.Vector Int -- ^ The subset of indices of candidates to search from
  -> (U.Vector Int , U.Vector (Indices n)) -- ^ The new set of filtered indices in the vector and the vector containing the indices for each match found.
searchSome :: SearchSettings a n
-> a
-> Vector Text
-> Vector Int
-> (Vector Int, Vector (Indices n))
searchSome SearchSettings a n
config !a
t Vector Text
v Vector Int
i = (forall s. ST s (Vector Int, Vector (Indices n)))
-> (Vector Int, Vector (Indices n))
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int, Vector (Indices n)))
 -> (Vector Int, Vector (Indices n)))
-> (forall s. ST s (Vector Int, Vector (Indices n)))
-> (Vector Int, Vector (Indices n))
forall a b. (a -> b) -> a -> b
$ (STVector s Int
 -> Vector (STVector s (Indices n))
 -> ST s (Vector Int, Vector (Indices n)))
-> (STVector s Int, Vector (STVector s (Indices n)))
-> ST s (Vector Int, Vector (Indices n))
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (SearchSettings a n
-> a
-> Vector Text
-> Vector Int
-> STVector s Int
-> Vector (STVector s (Indices n))
-> ST s (Vector Int, Vector (Indices n))
forall s a (n :: Nat).
KnownNat n =>
SearchSettings a n
-> a
-> Vector Text
-> Vector Int
-> STVector s Int
-> Vector (STVector s (Indices n))
-> ST s (Vector Int, Vector (Indices n))
searchSomeST SearchSettings a n
config a
t Vector Text
v Vector Int
i)
                                   ((STVector s Int, Vector (STVector s (Indices n)))
 -> ST s (Vector Int, Vector (Indices n)))
-> ST s (STVector s Int, Vector (STVector s (Indices n)))
-> ST s (Vector Int, Vector (Indices n))
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ((,) (STVector s Int
 -> Vector (STVector s (Indices n))
 -> (STVector s Int, Vector (STVector s (Indices n))))
-> ST s (STVector s Int)
-> ST
     s
     (Vector (STVector s (Indices n))
      -> (STVector s Int, Vector (STVector s (Indices n))))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
M.unsafeNew (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
i)
                                            ST
  s
  (Vector (STVector s (Indices n))
   -> (STVector s Int, Vector (STVector s (Indices n))))
-> ST s (Vector (STVector s (Indices n)))
-> ST s (STVector s Int, Vector (STVector s (Indices n)))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int
-> ST s (STVector s (Indices n))
-> ST s (Vector (STVector s (Indices n)))
forall (m :: * -> *) a. Monad m => Int -> m a -> m (Vector a)
V.replicateM (SearchSettings a n -> a -> Int
forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore SearchSettings a n
config a
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> ST s (MVector (PrimState (ST s)) (Indices n))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
M.unsafeNew (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
i) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config)))

-- This functions is somewhat ridiculous and probably over optimized. Its only purpose is to be used in searchSome. searchSome can more simply be written as
-- a mapMaybe followed by a sort but this doesn't allow for a early termination of matching once we have found enough matches. A streaming solution will probably
-- be simpler for generating the candidates but will still need to be read into a vector and sorted so for now this works and is fast enough.
searchSomeST :: forall s a n. KnownNat n => SearchSettings a n -- ^
  -> a -> V.Vector Text -> U.Vector Int -> M.STVector s Int -- ^ The mutable vector into which we write the new filtered indices
  -> V.Vector (M.STVector s (Indices n)) -- ^ The vector @mv@ containing the buckets i.e. mutable vectors one for each possible score gathering the matches
  ->  ST s (U.Vector Int , U.Vector (Indices n)) -- ^ The final vector of filtered indices and the vector of
searchSomeST :: SearchSettings a n
-> a
-> Vector Text
-> Vector Int
-> STVector s Int
-> Vector (STVector s (Indices n))
-> ST s (Vector Int, Vector (Indices n))
searchSomeST SearchSettings a n
config !a
t Vector Text
v Vector Int
i STVector s Int
mi Vector (STVector s (Indices n))
mv = (Int, Vector Int) -> ST s (Vector Int, Vector (Indices n))
end ((Int, Vector Int) -> ST s (Vector Int, Vector (Indices n)))
-> ST s (Int, Vector Int) -> ST s (Vector Int, Vector (Indices n))
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ((Int, Vector Int) -> Int -> ST s (Int, Vector Int))
-> (Int, Vector Int) -> Vector Int -> ST s (Int, Vector Int)
forall (m :: * -> *) b a.
(Monad m, Unbox b) =>
(a -> b -> m a) -> a -> Vector b -> m a
U.foldM' (Int, Vector Int) -> Int -> ST s (Int, Vector Int)
go (Int
0, Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
U.replicate (SearchSettings a n -> a -> Int
forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore SearchSettings a n
config a
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
0) Vector Int
i
  where
    -- If we don't have enough matches with the highest score, @cc@ goes through other score in the decreasing order filling up the bucket of highest order.
    -- Stopping when either the bucket is full or else we have run through all the scores. Before filling each bucket is sorted according the ordering supplied  
    -- by the configuration. Once the filling is done this bucket is frozen see `sortTake` and returned along with the frozen vector of filtered indcies see `end`.
    end :: (Int, Vector Int) -> ST s (Vector Int, Vector (Indices n))
end (Int
l , Vector Int
u)  = (,) (Vector Int
 -> Vector (Indices n) -> (Vector Int, Vector (Indices n)))
-> ST s (Vector Int)
-> ST s (Vector (Indices n) -> (Vector Int, Vector (Indices n)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (MVector (PrimState (ST s)) Int -> ST s (Vector Int))
-> (STVector s Int -> MVector (PrimState (ST s)) Int)
-> STVector s Int
-> ST s (Vector Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> STVector s Int -> STVector s Int
forall a s. Unbox a => Int -> MVector s a -> MVector s a
M.unsafeTake Int
l (STVector s Int -> ST s (Vector Int))
-> STVector s Int -> ST s (Vector Int)
forall a b. (a -> b) -> a -> b
$ STVector s Int
mi) ST s (Vector (Indices n) -> (Vector Int, Vector (Indices n)))
-> ST s (Vector (Indices n))
-> ST s (Vector Int, Vector (Indices n))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Vector Int -> ST s (Vector (Indices n))
sortTake Vector Int
u
    sortTake :: Vector Int -> ST s (Vector (Indices n))
sortTake Vector Int
u = (\Int
l -> MVector (PrimState (ST s)) (Indices n) -> ST s (Vector (Indices n))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (MVector (PrimState (ST s)) (Indices n)
 -> ST s (Vector (Indices n)))
-> (STVector s (Indices n)
    -> MVector (PrimState (ST s)) (Indices n))
-> STVector s (Indices n)
-> ST s (Vector (Indices n))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> STVector s (Indices n) -> STVector s (Indices n)
forall a s. Unbox a => Int -> MVector s a -> MVector s a
M.unsafeTake Int
l (STVector s (Indices n) -> ST s (Vector (Indices n)))
-> STVector s (Indices n) -> ST s (Vector (Indices n))
forall a b. (a -> b) -> a -> b
$ STVector s (Indices n)
msv) (Int -> ST s (Vector (Indices n)))
-> ST s Int -> ST s (Vector (Indices n))
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Int -> Int -> ST s ()
doSort (-Int
1) (Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.unsafeLast Vector Int
u) ST s () -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Int -> Int -> Int -> ST s Int) -> Int -> Vector Int -> ST s Int
forall (m :: * -> *) b a.
(Monad m, Unbox b) =>
(a -> Int -> b -> m a) -> a -> Vector b -> m a
U.ifoldM' Int -> Int -> Int -> ST s Int
cc (Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.unsafeLast Vector Int
u) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
U.reverse (Vector Int -> Vector Int)
-> (Vector Int -> Vector Int) -> Vector Int -> Vector Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
U.unsafeInit (Vector Int -> Vector Int) -> Vector Int -> Vector Int
forall a b. (a -> b) -> a -> b
$ Vector Int
u))
    msv :: STVector s (Indices n)
msv = Vector (STVector s (Indices n)) -> STVector s (Indices n)
forall a. Vector a -> a
V.unsafeLast Vector (STVector s (Indices n))
mv
    cc :: Int -> Int -> Int -> ST s Int
cc Int
r Int
ix Int
n
      | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config     = Int -> ST s Int
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
r
      | Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config = Int -> Int -> ST s ()
doSort Int
ix Int
n ST s () -> ST s () -> ST s ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Int -> Int -> ST s ()
doFill Int
r (SearchSettings a n -> a -> Int
forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore SearchSettings a n
config a
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix) Int
n ST s () -> Int -> ST s Int
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n
      | Bool
otherwise                      = Int -> Int -> ST s ()
doSort Int
ix Int
n ST s () -> ST s () -> ST s ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Int -> Int -> ST s ()
doFill Int
r (SearchSettings a n -> a -> Int
forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore SearchSettings a n
config a
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix) (SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
r) ST s () -> Int -> ST s Int
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config
    doFill :: Int -> Int -> Int -> ST s ()
doFill Int
r Int
ix Int
n = (Int -> ST s ()) -> [Int] -> ST s ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (\Int
k -> MVector (PrimState (ST s)) (Indices n)
-> Int -> Indices n -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite STVector s (Indices n)
MVector (PrimState (ST s)) (Indices n)
msv (Int
rInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k) (Indices n -> ST s ()) -> ST s (Indices n) -> ST s ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< MVector (PrimState (ST s)) (Indices n) -> Int -> ST s (Indices n)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
M.unsafeRead (Vector (STVector s (Indices n)) -> Int -> STVector s (Indices n)
forall a. Vector a -> Int -> a
V.unsafeIndex Vector (STVector s (Indices n))
mv Int
ix) Int
k) [Int
0 .. Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
    doSort :: Int -> Int -> ST s ()
doSort   Int
ix   = Comparison (Indices n)
-> MVector (PrimState (ST s)) (Indices n) -> Int -> Int -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> Int -> Int -> m ()
V.sortByBounds Comparison (Indices n)
cmfn (Vector (STVector s (Indices n)) -> Int -> STVector s (Indices n)
forall a. Vector a -> Int -> a
V.unsafeIndex Vector (STVector s (Indices n))
mv (SearchSettings a n -> a -> Int
forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore SearchSettings a n
config a
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix)) Int
0
    cmfn :: Comparison (Indices n)
cmfn (!Int
i1,!Vector n Int
s1) (!Int
i2,!Vector n Int
s2) =  SearchSettings a n
-> Text -> Vector n Int -> Text -> Vector n Int -> Ordering
forall a (n :: Nat).
SearchSettings a n
-> Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs SearchSettings a n
config (Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.unsafeIndex Vector Text
v Int
i1) Vector n Int
s1 (Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.unsafeIndex Vector Text
v Int
i2) Vector n Int
s2
    bstMch :: Int -> Maybe (MatchFull n)
bstMch = SearchSettings a n -> a -> Text -> Maybe (MatchFull n)
forall a (n :: Nat).
SearchSettings a n -> a -> Text -> Maybe (MatchFull n)
match SearchSettings a n
config a
t (Text -> Maybe (MatchFull n))
-> (Int -> Text) -> Int -> Maybe (MatchFull n)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.unsafeIndex Vector Text
v
    bign :: Int -> Bool
bign   = (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= SearchSettings a n -> Int
forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches SearchSettings a n
config)
    -- The idx is the number of total number of matches that have been written up to now. nf is the vector carrying the same information for each bucket.
    -- elm is current index to check for a match. So we check if v ! elem is a match, if it is we write the match into the appropriate bucket and elem into
    -- the vector of filtered indices  and increment idx and the appropriate index of nf. If we have already reached the needed number of matches of full score
    -- we instead just skip matching and write idx into the vector of filtered indices.
    go :: (Int, Vector Int) -> Int -> ST s (Int, Vector Int)
go (!Int
idx , !Vector Int
nf) !Int
elm
      | Int -> Bool
bign (Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.last Vector Int
nf)                                                  = MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite STVector s Int
MVector (PrimState (ST s)) Int
mi Int
idx Int
elm ST s () -> (Int, Vector Int) -> ST s (Int, Vector Int)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 , Vector Int
nf)
      | Just !MatchFull n
mch <- Int -> Maybe (MatchFull n)
bstMch Int
elm , Int -> Bool
bign (Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.unsafeIndex Vector Int
nf (MatchFull n -> Int
forall (n :: Nat). MatchFull n -> Int
scored MatchFull n
mch))    = MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite STVector s Int
MVector (PrimState (ST s)) Int
mi Int
idx Int
elm ST s () -> (Int, Vector Int) -> ST s (Int, Vector Int)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 , Vector Int
nf)
      | Just (MatchFull !Int
s !Vector n Int
ixs) <- Int -> Maybe (MatchFull n)
bstMch Int
elm                            = MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite STVector s Int
MVector (PrimState (ST s)) Int
mi Int
idx Int
elm ST s () -> ST s () -> ST s ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Vector n Int -> ST s ()
wrt Int
s Vector n Int
ixs ST s () -> (Int, Vector Int) -> ST s (Int, Vector Int)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int -> (Int, Vector Int)
inc Int
s
      | Bool
otherwise                                                         = (Int, Vector Int) -> ST s (Int, Vector Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
idx , Vector Int
nf)
      where
        wrt :: Int -> Vector n Int -> ST s ()
wrt Int
s Vector n Int
ixs = MVector (PrimState (ST s)) (Indices n)
-> Int -> Indices n -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite (Vector (STVector s (Indices n)) -> Int -> STVector s (Indices n)
forall a. Vector a -> Int -> a
V.unsafeIndex Vector (STVector s (Indices n))
mv Int
s) (Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.unsafeIndex Vector Int
nf Int
s) (Int
elm , Vector n Int
ixs)
        inc :: Int -> (Int, Vector Int)
inc Int
s = (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 , (forall s. MVector s Int -> ST s ()) -> Vector Int -> Vector Int
forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (\MVector s Int
mu -> MVector (PrimState (ST s)) Int -> (Int -> Int) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
M.unsafeModify MVector s Int
MVector (PrimState (ST s)) Int
mu (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
s) Vector Int
nf)