{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# OPTIONS_GHC -Wno-duplicate-exports #-}
-- | This modules provides the function `searchSome` for searching the candidates provided by `Vector` `Text`. The information about the location of matches
-- is stored in a length-tagged unboxed vector `S.Vector`. Such vectors have an `Unbox` instances which allows us to store the collection of such mathces in an
-- unboxed `U.Vector`. This significantly reduces the memory usage and pressure on garbage collector. As a result the matchers used by this function are tagged
-- with the number @n@ of needles need to be matched and are provided by `MatcherSized`. An unsized interface is provided by `Matcher` which is existenially
-- quantified over the number of needles. Functions for constructing matching and matchers have both a sized and unsized version.

module Talash.Core ( -- * Types
                     MatcherSized (..) , Matcher (..) , MatchState (..) , MatchPart (..) , MatchFull (..) , SearchSettings (..) , Indices
                     -- * Matchers and matching
                     , makeMatcher , emptyMatcher
                     -- ** Fuzzy style
                     , fuzzyMatcherSized , fuzzyMatcher , fuzzyMatchSized , fuzzyMatch , fuzzyMatchParts , fuzzyMatchPartsAs
                     -- ** Orderless Style
                     , orderlessMatcherSized , orderlessMatcher , orderlessMatchSized , orderlessMatch , orderlessMatchParts , orderlessMatchPartsAs
                     -- * Search
                     , fuzzySettings , orderlessSettings , parts , partsAs , partsOrderless , partsOrderlessAs , minify) where

import qualified Data.Text as T
import Data.Text.AhoCorasick.Automaton
import Data.Text.Utf8 hiding (indices)
import qualified Data.Vector.Algorithms.Intro as V
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M
import qualified Data.Vector.Unboxed.Sized as S
import GHC.TypeNats
import Talash.Intro hiding (someNatVal)

-- | The MatcherSized type consists of a state machine for matching a fixed number of needles. The number of matches needed is encoded in the Nat parameterzing
--   the type. Here the purpose is to improve the memory consumption by utlizing the `Unbox` instance for sized tagged unboxed vectors from
--   (vector-sized)[https://hackage.haskell.org/package/vector-sized] package. This significantly reduces the memory consumption. At least in the present
--   implementation there is no benefit for correctness and dealing with the length tag is occasionally annoying.
data MatcherSized (n :: Nat) a = MatcherSized {
                              forall (n :: Nat) a. MatcherSized n a -> CaseSensitivity
caseSensitivity :: CaseSensitivity ,
                              -- | An AhoCorasick state machine from the alfred-margaret package which does the actual string matching
                              forall (n :: Nat) a. MatcherSized n a -> AcMachine a
machina :: {-# UNPACK #-} !(AcMachine a) ,
                              -- | The sizes of the /basic/ needles in code unit indices. The Left Int case is for when the length of all the
                              -- needles is 1 with Int the number of needles.
                              forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes :: !(Either Int (S.Vector n Int))}

-- | The existential version of MatcherSized
data Matcher a      = forall n. KnownNat n => Matcher (MatcherSized n a)

-- | The matching process essentially takes the form of a fold with possible early termination over the matches produced. See the runLower from the
--   alfred-margaret. Here MatchState is the return type of this fold and essentially it records the positions of the matches. Here like in alfred-margaret
--   position is the code unit index of the first code unit beyond the match. We can't use the CodeUnitIndex here because it doesn't have an unbox instance.
data MatchState (n :: Nat) a = MatchState {
                                 -- | This is used to record the present extent of the match. What extent means is different to different matching styles.
                                 forall (n :: Nat) a. MatchState n a -> Int
endLocation :: {-# UNPACK #-} !Int ,
                                 -- | The vector recording the position of the matches.
                                 forall (n :: Nat) a. MatchState n a -> Vector n Int
partialMatch :: {-# UNPACK #-} !(S.Vector n Int) ,
                                 -- | Any auxiliary information needed to describe the state of the match.
                                 forall (n :: Nat) a. MatchState n a -> a
aux :: !a} deriving Int -> MatchState n a -> ShowS
forall (n :: Nat) a. Show a => Int -> MatchState n a -> ShowS
forall (n :: Nat) a. Show a => [MatchState n a] -> ShowS
forall (n :: Nat) a. Show a => MatchState n a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchState n a] -> ShowS
$cshowList :: forall (n :: Nat) a. Show a => [MatchState n a] -> ShowS
show :: MatchState n a -> String
$cshow :: forall (n :: Nat) a. Show a => MatchState n a -> String
showsPrec :: Int -> MatchState n a -> ShowS
$cshowsPrec :: forall (n :: Nat) a. Show a => Int -> MatchState n a -> ShowS
Show

data MatchPart = MatchPart {MatchPart -> Int
matchBegin :: {-# UNPACK #-} !Int , MatchPart -> Int
matchEnd :: {-# UNPACK #-} !Int} deriving Int -> MatchPart -> ShowS
[MatchPart] -> ShowS
MatchPart -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchPart] -> ShowS
$cshowList :: [MatchPart] -> ShowS
show :: MatchPart -> String
$cshow :: MatchPart -> String
showsPrec :: Int -> MatchPart -> ShowS
$cshowsPrec :: Int -> MatchPart -> ShowS
Show

-- | The full match consisting of a score for the match and vector consisting of the positions of the match. The score is intended as for bucketing and as a
--   result shouldn't be two large and must be non-negative . For the fuzzy style in this module @n@ contiguous matches contribute @n-1@ to the score. The
--   scores thus range from @0@ to @n-1@ where @n@ is the length of the string to be matched. For orderless style this score is always @0@.
data MatchFull (n :: Nat) = MatchFull {forall (n :: Nat). MatchFull n -> Int
scored :: {-# UNPACK #-} !Int , forall (n :: Nat). MatchFull n -> Vector n Int
indices :: {-# UNPACK #-} !(S.Vector n Int)} deriving Int -> MatchFull n -> ShowS
forall (n :: Nat). Int -> MatchFull n -> ShowS
forall (n :: Nat). [MatchFull n] -> ShowS
forall (n :: Nat). MatchFull n -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchFull n] -> ShowS
$cshowList :: forall (n :: Nat). [MatchFull n] -> ShowS
show :: MatchFull n -> String
$cshow :: forall (n :: Nat). MatchFull n -> String
showsPrec :: Int -> MatchFull n -> ShowS
$cshowsPrec :: forall (n :: Nat). Int -> MatchFull n -> ShowS
Show

-- | The configuration for a search style with n needles and matcher of type a
data SearchSettings a (n :: Nat) = SearchSettings {
                                     -- | Given the matcher and the candidate text, find a match or return Nothing if there is none.
                                     forall a (n :: Nat).
SearchSettings a n -> a -> Text -> Maybe (MatchFull n)
match :: a -> Text -> Maybe (MatchFull n) ,
                                     -- | The maximum score for a given matcher. It determines the number of buckets.
                                     forall a (n :: Nat). SearchSettings a n -> a -> Int
fullscore :: a -> Int ,
                                     -- | Maximum number of matches with full score to produce.
                                     forall a (n :: Nat). SearchSettings a n -> Int
maxFullMatches :: Int ,
                                     -- | The ordering to sort the matches within a given bucket. It is run with two candidates and their corresponding matches.
                                     forall a (n :: Nat).
SearchSettings a n
-> Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs :: Text -> S.Vector n Int -> Text -> S.Vector n Int -> Ordering}

-- | Type synonym for the index of a candidate in the backing vector along with the positions of the matches for it.
type Indices (n :: Nat) = (Int , S.Vector n Int)

-- | Unsafe, use with care. eIndex i return 1 for Left and for Right the element at @i@-th position in the vector. The vector must have at least @i+1@ elements.
-- This uses unsafeIndex so no bound checks are performed.
{-# INLINE eIndex #-}
eIndex :: KnownNat n => Int -> Either a (S.Vector n Int) -> Int
eIndex :: forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Int
1) (forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
`S.unsafeIndex` Int
i)

{-# INLINEABLE updateMatch #-}
updateMatch :: KnownNat n => Int -> Either Int (S.Vector n Int) -> MatchState n a -> Int -> Int -> a -> MatchState n a
updateMatch :: forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch !Int
c Either Int (Vector n Int)
l (MatchState !Int
f !Vector n Int
m a
_) !Int
b !Int
e !a
a = forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
e (forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe (forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify forall {f :: * -> *}.
PrimMonad f =>
MVector (PrimState f) Int -> f ()
doWrites) Vector n Int
m) a
a
  where
    doWrites :: MVector (PrimState f) Int -> f ()
doWrites MVector (PrimState f) Int
s = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldlM (\Int
d Int
i -> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector (PrimState f) Int
s Int
i Int
d forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Int
d forall a. Num a => a -> a -> a
- forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
l) Int
c [Int
e , Int
eforall a. Num a => a -> a -> a
-Int
1 .. Int
b]

-- | The score for a fuzzy match.
{-# INLINE matchScore #-}
matchScore :: KnownNat n => Either Int (S.Vector n Int) -> S.Vector n Int -> Int
matchScore :: forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int) -> Vector n Int -> Int
matchScore Either Int (Vector n Int)
u Vector n Int
v
  | forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
v forall a. Eq a => a -> a -> Bool
== Int
0       = Int
0
  | Vector Int
v' <- forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
v   = forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl' (\ !Int
s !Int
i !Int
cc -> if Int
cc forall a. Num a => a -> a -> a
- forall a. Unbox a => Vector a -> Int -> a
U.unsafeIndex Vector Int
v' Int
i forall a. Eq a => a -> a -> Bool
== forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
u then Int
sforall a. Num a => a -> a -> a
+Int
1 else Int
s ) Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Vector a -> Vector a
U.tail forall a b. (a -> b) -> a -> b
$ Vector Int
v'

{-# INLINEABLE matchStepFuzzy #-}
matchStepFuzzy :: KnownNat n => Either Int (S.Vector n Int) -> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy :: forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy Either Int (Vector n Int)
l s :: MatchState n ()
s@(MatchState !Int
f !Vector n Int
m ()
_) (Match (CodeUnitIndex !Int
i) (MatchPart !Int
b !Int
e))
  | Int
e forall a. Num a => a -> a -> a
- Int
b forall a. Eq a => a -> a -> Bool
== forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. a -> a
id forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Either Int (Vector n Int)
l forall a. Num a => a -> a -> a
- Int
1                                                                           = forall a. a -> Next a
Done forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch Int
i Either Int (Vector n Int)
l MatchState n ()
s Int
b Int
e ()
  | (Int
b forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& Int
f forall a. Eq a => a -> a -> Bool
== (-Int
1)) Bool -> Bool -> Bool
|| (Int
f forall a. Num a => a -> a -> a
+ Int
1 forall a. Eq a => a -> a -> Bool
== Int
b Bool -> Bool -> Bool
&& forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
f forall a. Num a => a -> a -> a
+ Int
e forall a. Ord a => a -> a -> Bool
< Int
i forall a. Num a => a -> a -> a
+ Int
b) Bool -> Bool -> Bool
|| (Int
f forall a. Ord a => a -> a -> Bool
>= Int
b Bool -> Bool -> Bool
&& Int
e forall a. Ord a => a -> a -> Bool
> Int
f Bool -> Bool -> Bool
&& Bool
monotonic)    = forall a. a -> Next a
Step forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) a.
KnownNat n =>
Int
-> Either Int (Vector n Int)
-> MatchState n a
-> Int
-> Int
-> a
-> MatchState n a
updateMatch Int
i Either Int (Vector n Int)
l MatchState n ()
s Int
b Int
e ()
  | Bool
otherwise                                                                                                   = forall a. a -> Next a
Step   MatchState n ()
s
  where
    monotonic :: Bool
monotonic = forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
f forall a. Num a => a -> a -> a
+ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ Int
eforall a. Num a => a -> a -> a
-Int
f) (forall a. (Unbox a, Num a) => Vector a -> a
U.sum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Int -> Int -> Vector a -> Vector a
U.slice Int
f (Int
eforall a. Num a => a -> a -> a
-Int
f) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized) Either Int (Vector n Int)
l forall a. Ord a => a -> a -> Bool
<= Int
i

{-# INLINEABLE matchStepOrderless #-}
matchStepOrderless :: KnownNat n => Either Int (S.Vector n Int) -> MatchState n (Int , Int) -> Match Int -> Next (MatchState n (Int , Int))
matchStepOrderless :: forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n (Int, Int)
-> Match Int
-> Next (MatchState n (Int, Int))
matchStepOrderless !Either Int (Vector n Int)
lv s :: MatchState n (Int, Int)
s@(MatchState Int
r !Vector n Int
m (!Int
lm , !Int
li)) (Match (CodeUnitIndex !Int
c) !Int
i)
  | forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
i forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& Int
c forall a. Num a => a -> a -> a
- forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
lv forall a. Ord a => a -> a -> Bool
>= Int
li       = forall a. a -> Next a
go   forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState (Int
rforall a. Num a => a -> a -> a
+Int
1) (forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe (forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (\MVector s Int
mv -> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Int
mv Int
i Int
c)) Vector n Int
m) (Int
i , Int
c)
  | forall (n :: Nat) a. Unbox a => Vector n a -> Int -> a
S.unsafeIndex Vector n Int
m Int
i forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
lm Either Int (Vector n Int)
lv forall a. Ord a => a -> a -> Bool
< forall (n :: Nat) a.
KnownNat n =>
Int -> Either a (Vector n Int) -> Int
eIndex Int
i Either Int (Vector n Int)
lv  = forall a. a -> Next a
Step forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
r (forall a b (n :: Nat).
(Vector a -> Vector b) -> Vector n a -> Vector n b
S.withVectorUnsafe (forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (\MVector s Int
mv -> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Int
mv Int
i Int
c forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.write MVector s Int
mv Int
lm Int
0)) Vector n Int
m) (Int
i , Int
c)
  | Bool
otherwise                                             = forall a. a -> Next a
Step MatchState n (Int, Int)
s
  where
    go :: a -> Next a
go = if Int
r forall a. Eq a => a -> a -> Bool
== forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
m forall a. Num a => a -> a -> a
- Int
1 then forall a. a -> Next a
Done else forall a. a -> Next a
Step

kConsecutive :: Int ->  Text -> [Text]
kConsecutive :: Int -> Text -> [Text]
kConsecutive Int
k Text
t = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Int -> Text -> Text
T.take Int
k) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> [a]
take (Int
1 forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
t forall a. Num a => a -> a -> a
- Int
k) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.tails forall a b. (a -> b) -> a -> b
$ Text
t

-- | A general function to construct a Matcher. Returns Nothing if the string is empty or if the number of needles turns out to be non-positive
makeMatcher :: forall a. CaseSensitivity -> (Text -> Int) -- ^ The function to determine the number of needles from the query string.
                                                          -- The proxy argument is instantiated at the resulting value.
                    -> (forall n. KnownNat n => Proxy n -> CaseSensitivity -> Text -> MatcherSized n a) -- ^ The functions for constructing the matcher
                    -> Text -- ^ The query string
                    -> Matcher a -- ^ Nothing if the string is empty or if the number of needles turns out to be non-positive
makeMatcher :: forall a.
CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Matcher a
makeMatcher CaseSensitivity
c Text -> Int
lenf forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n a
matf Text
t
  | SomeNat Proxy n
p <- Nat -> SomeNat
someNatVal forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegralforall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int
lenf forall a b. (a -> b) -> a -> b
$ Text
t    = forall a (n :: Nat). KnownNat n => MatcherSized n a -> Matcher a
Matcher forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat).
KnownNat n =>
Proxy n -> CaseSensitivity -> Text -> MatcherSized n a
matf Proxy n
p CaseSensitivity
c forall a b. (a -> b) -> a -> b
$ Text
t

{-# INLINE withSensitivity #-}
withSensitivity :: CaseSensitivity -> Text -> Text
withSensitivity :: CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
IgnoreCase    = Text -> Text
lowerUtf8
withSensitivity CaseSensitivity
CaseSensitive = forall a. a -> a
id

-- | Constructs the matcher for fuzzy matching. The needles are all possible contigous subtrings of the string being matched. The Nat @n@ must be instantiated at the
--  length @n@ of the query string. They are n choose 2 such substrings, so to the complexity of matching is \(O(m + n^2)\) where @m@ is the length of candidate string.
--  This is a rough (and probably wrong) estimate as the updating the matchstate for each found match is not a constant time operation. Not sure if Aho Corasick is
--  the optimal way for this kind of matching but in practice it seems fast enough.
fuzzyMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized :: forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized p n
_ CaseSensitivity
c Text
t = MatcherSized {caseSensitivity :: CaseSensitivity
caseSensitivity = CaseSensitivity
c , machina :: AcMachine MatchPart
machina = forall v. [(Text, v)] -> AcMachine v
build forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Int -> [(Text, MatchPart)]
go forall a b. (a -> b) -> a -> b
$ [Text -> Int
T.length Text
t , Text -> Int
T.length Text
t forall a. Num a => a -> a -> a
- Int
1 .. Int
1]
                                       , sizes :: Either Int (Vector n Int)
sizes = if forall a (n :: Nat). (Unbox a, Num a) => Vector n a -> a
S.sum Vector n Int
sz forall a. Eq a => a -> a -> Bool
== forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
sz then forall a b. a -> Either a b
Left (forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
sz) else forall a b. b -> Either a b
Right Vector n Int
sz }
  where
    sz :: Vector n Int
sz      = forall a. a -> Maybe a -> a
fromMaybe (forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
1) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a (n :: Nat).
(Unbox a, KnownNat n) =>
[a] -> Maybe (Vector n a)
S.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [CodeUnit]
unpackUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
T.singleton)  forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack  forall a b. (a -> b) -> a -> b
$ Text
t
    go :: Int -> [(Text, MatchPart)]
go !Int
k   = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
t' Int
l -> (CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c Text
t' , Int -> Int -> MatchPart
MatchPart Int
l (Int
l forall a. Num a => a -> a -> a
+ Int
k forall a. Num a => a -> a -> a
-Int
1))) (Int -> Text -> [Text]
kConsecutive Int
k Text
t) [Int
0 ..]

-- | Unsized version of fuzzyMatcherSized
fuzzyMatcher :: CaseSensitivity -> Text -> Matcher MatchPart
fuzzyMatcher :: CaseSensitivity -> Text -> Matcher MatchPart
fuzzyMatcher CaseSensitivity
c  = forall a.
CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Matcher a
makeMatcher CaseSensitivity
c Text -> Int
T.length forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
fuzzyMatcherSized

{-# INLINE emptyMatcher #-}
emptyMatcher :: MatcherSized 0 a
emptyMatcher :: forall a. MatcherSized 0 a
emptyMatcher = forall (n :: Nat) a.
CaseSensitivity
-> AcMachine a -> Either Int (Vector n Int) -> MatcherSized n a
MatcherSized CaseSensitivity
IgnoreCase (forall v. [(Text, v)] -> AcMachine v
build []) (forall a b. a -> Either a b
Left Int
0)

-- | Constructs the matcher for orderless matching, the needles are the words from the query string and the proxy argument should be instantiated at the
--  number of words.
orderlessMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized :: forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized p n
_ CaseSensitivity
c Text
t = MatcherSized {caseSensitivity :: CaseSensitivity
caseSensitivity = CaseSensitivity
c , machina :: AcMachine Int
machina = forall v. [(Text, v)] -> AcMachine v
build forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
wrds forall a b. (a -> b) -> a -> b
$ [Int
0 ..]
                               , sizes :: Either Int (Vector n Int)
sizes = forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a -> a
fromMaybe (forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
1) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a (n :: Nat).
(Unbox a, KnownNat n) =>
[a] -> Maybe (Vector n a)
S.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (CodeUnitIndex -> Int
codeUnitIndex forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> CodeUnitIndex
lengthUtf8) forall a b. (a -> b) -> a -> b
$ [Text]
wrds }
  where
    wrds :: [Text]
wrds = CaseSensitivity -> Text -> Text
withSensitivity CaseSensitivity
c forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> [Text]
T.words Text
t

-- | Unsized version of orderlessMatcherSized
orderlessMatcher :: CaseSensitivity -> Text -> Matcher Int
orderlessMatcher :: CaseSensitivity -> Text -> Matcher Int
orderlessMatcher CaseSensitivity
c = forall a.
CaseSensitivity
-> (Text -> Int)
-> (forall (n :: Nat).
    KnownNat n =>
    Proxy n -> CaseSensitivity -> Text -> MatcherSized n a)
-> Text
-> Matcher a
makeMatcher CaseSensitivity
c (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.words) forall (n :: Nat) (p :: Nat -> *).
KnownNat n =>
p n -> CaseSensitivity -> Text -> MatcherSized n Int
orderlessMatcherSized

{-# INLINEABLE fuzzyMatchSized#-}
fuzzyMatchSized :: KnownNat n => MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized :: forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized (MatcherSized CaseSensitivity
c AcMachine MatchPart
m Either Int (Vector n Int)
l) = forall {a}. MatchState n a -> Maybe (MatchFull n)
full forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
c (forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState (-Int
1) (forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
0) ()) (forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n () -> Match MatchPart -> Next (MatchState n ())
matchStepFuzzy Either Int (Vector n Int)
l) AcMachine MatchPart
m
  where
    full :: MatchState n a -> Maybe (MatchFull n)
full s :: MatchState n a
s@(MatchState !Int
e !Vector n Int
u a
_) = if Int
e forall a. Num a => a -> a -> a
+ Int
1 forall a. Eq a => a -> a -> Bool
== forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Vector n Int
u then forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall (n :: Nat). Int -> Vector n Int -> MatchFull n
MatchFull (forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int) -> Vector n Int -> Int
matchScore Either Int (Vector n Int)
l Vector n Int
u) Vector n Int
u else forall a. Maybe a
Nothing

fuzzyMatch :: Matcher MatchPart -> Text -> Maybe [Text]
fuzzyMatch :: Matcher MatchPart -> Text -> Maybe [Text]
fuzzyMatch (Matcher MatcherSized n MatchPart
m) Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat). MatchFull n -> Vector n Int
indices forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized MatcherSized n MatchPart
m Text
t

fuzzyMatchParts :: KnownNat n => MatcherSized n MatchPart -> Text -> S.Vector n Int -> [Text]
fuzzyMatchParts :: forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Vector n Int -> [Text]
fuzzyMatchParts MatcherSized n MatchPart
m Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized

fuzzyMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n MatchPart -> Text -> S.Vector n Int -> [a]
fuzzyMatchPartsAs :: forall (n :: Nat) a.
KnownNat n =>
(Bool -> Text -> a)
-> MatcherSized n MatchPart -> Text -> Vector n Int -> [a]
fuzzyMatchPartsAs Bool -> Text -> a
f MatcherSized n MatchPart
m Text
t = forall a.
(Bool -> Text -> a)
-> Either Int (Vector Int) -> Text -> Vector Int -> [a]
partsAs Bool -> Text -> a
f (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized

{-# INLINEABLE orderlessMatchSized#-}
orderlessMatchSized :: KnownNat n => MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized :: forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized (MatcherSized CaseSensitivity
c AcMachine Int
m Either Int (Vector n Int)
l) = forall {n :: Nat} {a}. MatchState n a -> Maybe (MatchFull n)
full forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a v.
CaseSensitivity
-> a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
runWithCase CaseSensitivity
c (forall (n :: Nat) a. Int -> Vector n Int -> a -> MatchState n a
MatchState Int
0 (forall (n :: Nat) a. (KnownNat n, Unbox a) => a -> Vector n a
S.replicate Int
0) (Int
0,Int
0)) (forall (n :: Nat).
KnownNat n =>
Either Int (Vector n Int)
-> MatchState n (Int, Int)
-> Match Int
-> Next (MatchState n (Int, Int))
matchStepOrderless Either Int (Vector n Int)
l) AcMachine Int
m
  where
    ln :: Int
ln = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. a -> a
id forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length Either Int (Vector n Int)
l
    full :: MatchState n a -> Maybe (MatchFull n)
full MatchState n a
u = if forall (n :: Nat) a. MatchState n a -> Int
endLocation MatchState n a
u forall a. Eq a => a -> a -> Bool
== Int
ln then forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall (n :: Nat). Int -> Vector n Int -> MatchFull n
MatchFull Int
0 (forall (n :: Nat) a. MatchState n a -> Vector n Int
partialMatch MatchState n a
u) else forall a. Maybe a
Nothing

orderlessMatch :: Matcher Int -> Text -> Maybe [Text]
orderlessMatch :: Matcher Int -> Text -> Maybe [Text]
orderlessMatch (Matcher MatcherSized n Int
m) Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
partsOrderless (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n Int
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat). MatchFull n -> Vector n Int
indices forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized MatcherSized n Int
m Text
t

orderlessMatchParts :: KnownNat n => MatcherSized n Int -> Text -> S.Vector n Int -> [Text]
orderlessMatchParts :: forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Vector n Int -> [Text]
orderlessMatchParts MatcherSized n Int
m Text
t = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
partsOrderless (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n Int
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized

orderlessMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n Int -> Text -> S.Vector n Int -> [a]
orderlessMatchPartsAs :: forall (n :: Nat) a.
KnownNat n =>
(Bool -> Text -> a)
-> MatcherSized n Int -> Text -> Vector n Int -> [a]
orderlessMatchPartsAs Bool -> Text -> a
f MatcherSized n Int
m Text
t = forall a.
(Bool -> Text -> a)
-> Either Int (Vector Int) -> Text -> Vector Int -> [a]
partsOrderlessAs Bool -> Text -> a
f (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n Int
m) Text
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized

-- | The parts of a string resulting from a match using the fuzzy matcher.
parts :: Either Int (U.Vector Int) -- ^ The information about the lengths of different needles.
  -> Text -- ^ The candidate string that has been matched
  -> U.Vector Int -- ^ The vector recording the positions of the needle in the matched string.
  -> [Text] -- ^ The candidate string split up according to  the match
parts :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts Either Int (Vector Int)
v Text
t Vector Int
u
  | forall a. Unbox a => Vector a -> Bool
U.null Vector Int
u  = [Text
t]
  |Bool
otherwise  =  ([Text], CodeUnitIndex) -> [Text]
done forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ([Text], CodeUnitIndex) -> CodeUnitIndex -> ([Text], CodeUnitIndex)
cut ([] , Text -> CodeUnitIndex
lengthUtf8 Text
t) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
minify Either Int (Vector Int)
v forall a b. (a -> b) -> a -> b
$ Vector Int
u
  where
    done :: ([Text], CodeUnitIndex) -> [Text]
done ([Text]
ms , CodeUnitIndex
cp) = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 CodeUnitIndex
0 CodeUnitIndex
cp Text
t forall a. a -> [a] -> [a]
: [Text]
ms
    cut :: ([Text], CodeUnitIndex) -> CodeUnitIndex -> ([Text], CodeUnitIndex)
cut  (![Text]
ms , !CodeUnitIndex
cp) !CodeUnitIndex
cc  = (CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 CodeUnitIndex
cc (CodeUnitIndex
cp forall a. Num a => a -> a -> a
- CodeUnitIndex
cc) Text
t forall a. a -> [a] -> [a]
: [Text]
ms , CodeUnitIndex
cc )

partsAs :: (Bool -> Text -> a) -> Either Int (U.Vector Int) -> Text -> U.Vector Int -> [a]
partsAs :: forall a.
(Bool -> Text -> a)
-> Either Int (Vector Int) -> Text -> Vector Int -> [a]
partsAs Bool -> Text -> a
f = Either Int (Vector Int) -> Text -> Vector Int -> [a]
go
  where
    go :: Either Int (Vector Int) -> Text -> Vector Int -> [a]
go Either Int (Vector Int)
v Text
t Vector Int
u
      | forall a. Unbox a => Vector a -> Bool
U.null Vector Int
u  = [Bool -> Text -> a
f Bool
False Text
t]
      |Bool
otherwise  =  ([a], CodeUnitIndex, Bool) -> [a]
done forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ([a], CodeUnitIndex, Bool)
-> CodeUnitIndex -> ([a], CodeUnitIndex, Bool)
cut ([] , Text -> CodeUnitIndex
lengthUtf8 Text
t , Bool
False) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
minify Either Int (Vector Int)
v forall a b. (a -> b) -> a -> b
$ Vector Int
u
      where
        done :: ([a], CodeUnitIndex, Bool) -> [a]
done ([a]
ms , CodeUnitIndex
cp, Bool
b) = Bool -> Text -> a
f Bool
b (CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 CodeUnitIndex
0 CodeUnitIndex
cp Text
t) forall a. a -> [a] -> [a]
: [a]
ms
        cut :: ([a], CodeUnitIndex, Bool)
-> CodeUnitIndex -> ([a], CodeUnitIndex, Bool)
cut  (![a]
ms , !CodeUnitIndex
cp , !Bool
b) !CodeUnitIndex
cc  = (Bool -> Text -> a
f Bool
b (CodeUnitIndex -> CodeUnitIndex -> Text -> Text
unsafeSliceUtf8 CodeUnitIndex
cc (CodeUnitIndex
cp forall a. Num a => a -> a -> a
- CodeUnitIndex
cc) Text
t) forall a. a -> [a] -> [a]
: [a]
ms , CodeUnitIndex
cc , Bool -> Bool
not Bool
b)

-- | The parts of a string resulting from a match using the orderless matcher. See parts for an explanation of arguments.
partsOrderless :: Either Int (U.Vector Int) -> Text -> U.Vector Int -> [Text]
partsOrderless :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
partsOrderless Either Int (Vector Int)
v Text
t Vector Int
u = Either Int (Vector Int) -> Text -> Vector Int -> [Text]
parts (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (forall a. Unbox a => Vector a -> Vector Int -> Vector a
`U.backpermute` forall a b. (a, b) -> a
fst (Vector Int, Vector Int)
up) Either Int (Vector Int)
v) Text
t (forall a b. (a, b) -> b
snd (Vector Int, Vector Int)
up)
  where
    up :: (Vector Int, Vector Int)
up = forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
V.sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap (,) forall a b. (a -> b) -> a -> b
$ Vector Int
u

partsOrderlessAs :: (Bool -> Text -> a) -> Either Int (U.Vector Int) -> Text -> U.Vector Int -> [a]
partsOrderlessAs :: forall a.
(Bool -> Text -> a)
-> Either Int (Vector Int) -> Text -> Vector Int -> [a]
partsOrderlessAs Bool -> Text -> a
f Either Int (Vector Int)
v Text
t Vector Int
u = forall a.
(Bool -> Text -> a)
-> Either Int (Vector Int) -> Text -> Vector Int -> [a]
partsAs Bool -> Text -> a
f (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (forall a. Unbox a => Vector a -> Vector Int -> Vector a
`U.backpermute` forall a b. (a, b) -> a
fst (Vector Int, Vector Int)
up) Either Int (Vector Int)
v) Text
t (forall a b. (a, b) -> b
snd (Vector Int, Vector Int)
up)
  where
    up :: (Vector Int, Vector Int)
up = forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a.
Unbox a =>
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
U.modify (forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
V.sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap (,) forall a b. (a -> b) -> a -> b
$ Vector Int
u

{-# INLINE eIndexU #-}
eIndexU :: Int -> Either a (U.Vector Int) -> Int
eIndexU :: forall a. Int -> Either a (Vector Int) -> Int
eIndexU Int
i = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Int
1) (forall a. Unbox a => Vector a -> Int -> a
`U.unsafeIndex` Int
i)

-- | Shorten a match by collapsing the contiguous sub-matches together.
minify :: Either Int (U.Vector Int) -> U.Vector Int -> [CodeUnitIndex]
minify :: Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
minify Either Int (Vector Int)
v Vector Int
s
  | forall a. Unbox a => Vector a -> Int
U.length Vector Int
s forall a. Eq a => a -> a -> Bool
== Int
1       = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Int -> CodeUnitIndex
CodeUnitIndex [Int
a , Int
a forall a. Num a => a -> a -> a
- forall a. Int -> Either a (Vector Int) -> Int
eIndexU Int
0 Either Int (Vector Int)
v]
  | Bool
otherwise             = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Int -> CodeUnitIndex
CodeUnitIndex forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Unbox a => Vector a -> a
U.last Vector Int
s forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl' (Int, [Int]) -> Int -> Int -> (Int, [Int])
go (Int
a , [Int
a forall a. Num a => a -> a -> a
- forall a. Int -> Either a (Vector Int) -> Int
eIndexU Int
0 Either Int (Vector Int)
v]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Vector a -> Vector a
U.tail forall a b. (a -> b) -> a -> b
$ Vector Int
s
  where
    a :: Int
a = forall a. Unbox a => Vector a -> a
U.unsafeHead Vector Int
s
    go :: (Int, [Int]) -> Int -> Int -> (Int, [Int])
go (!Int
l , [Int]
s) !Int
i !Int
c = if Int
c forall a. Num a => a -> a -> a
- Int
l forall a. Ord a => a -> a -> Bool
<= forall a. Int -> Either a (Vector Int) -> Int
eIndexU (Int
iforall a. Num a => a -> a -> a
+Int
1) Either Int (Vector Int)
v then (Int
c , [Int]
s) else (Int
c , Int
c forall a. Num a => a -> a -> a
- forall a. Int -> Either a (Vector Int) -> Int
eIndexU (Int
iforall a. Num a => a -> a -> a
+Int
1) Either Int (Vector Int)
v forall a. a -> [a] -> [a]
: Int
l forall a. a -> [a] -> [a]
: [Int]
s )

-- | The default ordering used in this module to sort matches within a given bucket. Prefers the matche for which the last part is closest to the end. To tie
-- break prefers the shorter matched string.
defOrdering :: Text -> S.Vector n Int -> Text -> S.Vector n Int -> Ordering
defOrdering :: forall (n :: Nat).
Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering Text
t1 Vector n Int
s1 Text
t2 Vector n Int
s2
  | Ordering
el forall a. Eq a => a -> a -> Bool
== Ordering
EQ   = forall a. Ord a => a -> a -> Ordering
compare (Text -> Int
T.length Text
t1) (Text -> Int
T.length Text
t2)
  | Bool
otherwise  = Ordering
el
  where
    el :: Ordering
el = forall a. Ord a => a -> a -> Ordering
compare (Text -> Int
T.length Text
t1 forall a. Num a => a -> a -> a
- forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
s1)) (Text -> Int
T.length Text
t2 forall a. Num a => a -> a -> a
- forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum (forall (n :: Nat) a. Vector n a -> Vector a
S.fromSized Vector n Int
s2))

-- | Search functions suitable for fuzzy matching. The candidate @c@ will match query @s@ if @c@ contains all the characters in @s@ in order. In general there
--   can be several ways of matching. This tries to find a match with minimum number of parts of. It does not find the minimum number of parts, if that requires
--   reducing the extent of the partial match during search. E.g. matching @"as"@ against @"talash"@ the split will be @["tal","as","h"]@ and not
--   @["t","a","la","s","h"]@. While matching @"talash best match testing hat"@ against @"tea"@ will not result in @["talash best match ","te","sting h","a","t"]@ since
--   @"te"@ occurs only after we have match all three letters and we can't know if we will find the @"a"@ without going through the string.
fuzzySettings :: KnownNat n => Int -> SearchSettings (MatcherSized n MatchPart) n
fuzzySettings :: forall (n :: Nat).
KnownNat n =>
Int -> SearchSettings (MatcherSized n MatchPart) n
fuzzySettings !Int
m = SearchSettings { match :: MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
match = forall (n :: Nat).
KnownNat n =>
MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
fuzzyMatchSized , fullscore :: MatcherSized n MatchPart -> Int
fullscore = \MatcherSized n MatchPart
t -> forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. a -> a
id forall (n :: Nat) a. KnownNat n => Vector n a -> Int
S.length (forall (n :: Nat) a. MatcherSized n a -> Either Int (Vector n Int)
sizes MatcherSized n MatchPart
t) forall a. Num a => a -> a -> a
- Int
1 , maxFullMatches :: Int
maxFullMatches = Int
m , orderAs :: Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs = forall (n :: Nat).
Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering}

-- | Search functions that match the words in i.e. space separated substring in any order. @"talash best"@ will match @"be as"@ with the split
--   @["tal","as","h","be","st"]@ but @"talash best"@ will not match @"bet"@.
orderlessSettings :: KnownNat n => Int -> SearchSettings (MatcherSized n Int) n
orderlessSettings :: forall (n :: Nat).
KnownNat n =>
Int -> SearchSettings (MatcherSized n Int) n
orderlessSettings Int
n = SearchSettings {match :: MatcherSized n Int -> Text -> Maybe (MatchFull n)
match = forall (n :: Nat).
KnownNat n =>
MatcherSized n Int -> Text -> Maybe (MatchFull n)
orderlessMatchSized , fullscore :: MatcherSized n Int -> Int
fullscore = forall a b. a -> b -> a
const Int
0, maxFullMatches :: Int
maxFullMatches = Int
n , orderAs :: Text -> Vector n Int -> Text -> Vector n Int -> Ordering
orderAs = forall (n :: Nat).
Text -> Vector n Int -> Text -> Vector n Int -> Ordering
defOrdering}