talash-0.3.0: Line oriented fast enough text search
Safe HaskellSafe-Inferred
LanguageHaskell2010

Talash.Chunked

Synopsis

Documentation

data Ocassion Source #

Instances

Instances details
Show Ocassion Source # 
Instance details

Defined in Talash.Chunked

Eq Ocassion Source # 
Instance details

Defined in Talash.Chunked

Ord Ocassion Source # 
Instance details

Defined in Talash.Chunked

newtype Chunks (n :: Nat) Source #

Constructors

Chunks 

Fields

Instances

Instances details
Show (Chunks n) Source # 
Instance details

Defined in Talash.Chunked

Methods

showsPrec :: Int -> Chunks n -> ShowS #

show :: Chunks n -> String #

showList :: [Chunks n] -> ShowS #

Eq (Chunks n) Source # 
Instance details

Defined in Talash.Chunked

Methods

(==) :: Chunks n -> Chunks n -> Bool #

(/=) :: Chunks n -> Chunks n -> Bool #

Ord (Chunks n) Source # 
Instance details

Defined in Talash.Chunked

Methods

compare :: Chunks n -> Chunks n -> Ordering #

(<) :: Chunks n -> Chunks n -> Bool #

(<=) :: Chunks n -> Chunks n -> Bool #

(>) :: Chunks n -> Chunks n -> Bool #

(>=) :: Chunks n -> Chunks n -> Bool #

max :: Chunks n -> Chunks n -> Chunks n #

min :: Chunks n -> Chunks n -> Chunks n #

data SearchFunctions a b Source #

Constructors

SearchFunctions 

Fields

display :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) ((Bool -> Text -> b) -> MatcherSized n a -> Text -> Vector n Int -> [b]) Source #

makeMatcher :: forall a b. Lens' (SearchFunctions a b) (Text -> Matcher a) Source #

match :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) (MatcherSized n a -> Text -> Maybe (MatchFull n)) Source #

data SearchEnv n a b Source #

The constant environment in which the search runs.

Constructors

SearchEnv 

Fields

allMatches :: forall n a b. Lens' (SearchEnv n a b) (IOVector (Vector n Bit)) Source #

candidates :: forall n a b. Lens' (SearchEnv n a b) (Chunks n) Source #

maxMatches :: forall n a b. Lens' (SearchEnv n a b) Int Source #

query :: forall n a b. Lens' (SearchEnv n a b) (MVar (Maybe Text)) Source #

searchFunctions :: forall n a b b. Lens (SearchEnv n a b) (SearchEnv n a b) (SearchFunctions a b) (SearchFunctions a b) Source #

send :: forall n a b n m. (KnownNat n, KnownNat m) => SimpleGetter (SearchEnv n a b) (Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()) Source #

matchChunk :: forall n m a. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> (Vector m Bit, MatchSetSized n) Source #

matchChunkM :: forall n m a s. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> ST s (Vector m Bit, MatchSetSized n) Source #

resetMatches :: forall n m a b. KnownNat n => SearchEnv n a b -> SearchStateSized m a -> IO () Source #

searchEnv :: KnownNat n => SearchFunctions a b -> Int -> (forall n m. (KnownNat n, KnownNat m) => Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()) -> Chunks n -> IO (SearchEnv n a b) Source #

searchLoop :: KnownNat n => SearchEnv n a b -> IO () Source #

makeChunks :: forall n. KnownNat n => Vector Text -> Chunks n Source #

setToVectorST :: (a -> b) -> Set a -> ST s (Vector b) Source #

sendQuery :: KnownNat n => SearchEnv n a b -> Text -> IO () Source #

readVectorHandleWith Source #

Arguments

:: (Text -> Text)

The function to transform the candidates.

-> (Vector Text -> Vector Text)

The function to apply to the constructed vector before compacting.

-> Handle

The handle to read from

-> IO (Vector Text) 

data CaseSensitivity #

Constructors

CaseSensitive 
IgnoreCase 

Instances

Instances details
FromJSON CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

ToJSON CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

Generic CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

Associated Types

type Rep CaseSensitivity :: Type -> Type #

Show CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

NFData CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

Methods

rnf :: CaseSensitivity -> () #

Eq CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

Hashable CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

type Rep CaseSensitivity 
Instance details

Defined in Data.Text.CaseSensitivity

type Rep CaseSensitivity = D1 ('MetaData "CaseSensitivity" "Data.Text.CaseSensitivity" "alfred-margaret-2.0.0.0-5xj5mmrDprP1Dn3hNNnaoN" 'False) (C1 ('MetaCons "CaseSensitive" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "IgnoreCase" 'PrefixI 'False) (U1 :: Type -> Type))

type Indices (n :: Nat) = (Int, Vector n Int) Source #

Type synonym for the index of a candidate in the backing vector along with the positions of the matches for it.

data SearchSettings a (n :: Nat) Source #

The configuration for a search style with n needles and matcher of type a

Constructors

SearchSettings (a -> Text -> Maybe (MatchFull n)) (a -> Int) Int (Text -> Vector n Int -> Text -> Vector n Int -> Ordering) 

data MatchFull (n :: Nat) Source #

The full match consisting of a score for the match and vector consisting of the positions of the match. The score is intended as for bucketing and as a result shouldn't be two large and must be non-negative . For the fuzzy style in this module n contiguous matches contribute n-1 to the score. The scores thus range from 0 to n-1 where n is the length of the string to be matched. For orderless style this score is always 0.

Constructors

MatchFull 

Fields

Instances

Instances details
Show (MatchFull n) Source # 
Instance details

Defined in Talash.Core

data MatchPart Source #

Constructors

MatchPart 

Fields

Instances

Instances details
Show MatchPart Source # 
Instance details

Defined in Talash.Core

data MatchState (n :: Nat) a Source #

The matching process essentially takes the form of a fold with possible early termination over the matches produced. See the runLower from the alfred-margaret. Here MatchState is the return type of this fold and essentially it records the positions of the matches. Here like in alfred-margaret position is the code unit index of the first code unit beyond the match. We can't use the CodeUnitIndex here because it doesn't have an unbox instance.

Constructors

MatchState 

Fields

  • endLocation :: !Int

    This is used to record the present extent of the match. What extent means is different to different matching styles.

  • partialMatch :: !(Vector n Int)

    The vector recording the position of the matches.

  • aux :: !a

    Any auxiliary information needed to describe the state of the match.

Instances

Instances details
Show a => Show (MatchState n a) Source # 
Instance details

Defined in Talash.Core

Methods

showsPrec :: Int -> MatchState n a -> ShowS #

show :: MatchState n a -> String #

showList :: [MatchState n a] -> ShowS #

data Matcher a Source #

The existential version of MatcherSized

Constructors

forall n.KnownNat n => Matcher (MatcherSized n a) 

data MatcherSized (n :: Nat) a Source #

The MatcherSized type consists of a state machine for matching a fixed number of needles. The number of matches needed is encoded in the Nat parameterzing the type. Here the purpose is to improve the memory consumption by utlizing the Unbox instance for sized tagged unboxed vectors from (vector-sized)[https:/hackage.haskell.orgpackage/vector-sized] package. This significantly reduces the memory consumption. At least in the present implementation there is no benefit for correctness and dealing with the length tag is occasionally annoying.

Constructors

MatcherSized 

Fields

  • caseSensitivity :: CaseSensitivity
     
  • machina :: !(AcMachine a)

    An AhoCorasick state machine from the alfred-margaret package which does the actual string matching

  • sizes :: !(Either Int (Vector n Int))

    The sizes of the basic needles in code unit indices. The Left Int case is for when the length of all the needles is 1 with Int the number of needles.

fuzzyMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart Source #

Constructs the matcher for fuzzy matching. The needles are all possible contigous subtrings of the string being matched. The Nat n must be instantiated at the length n of the query string. They are n choose 2 such substrings, so to the complexity of matching is \(O(m + n^2)\) where m is the length of candidate string. This is a rough (and probably wrong) estimate as the updating the matchstate for each found match is not a constant time operation. Not sure if Aho Corasick is the optimal way for this kind of matching but in practice it seems fast enough.

fuzzyMatcher :: CaseSensitivity -> Text -> Matcher MatchPart Source #

Unsized version of fuzzyMatcherSized

orderlessMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n Int Source #

Constructs the matcher for orderless matching, the needles are the words from the query string and the proxy argument should be instantiated at the number of words.

orderlessMatcher :: CaseSensitivity -> Text -> Matcher Int Source #

Unsized version of orderlessMatcherSized

orderlessMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n Int -> Text -> Vector n Int -> [a] Source #

parts Source #

Arguments

:: Either Int (Vector Int)

The information about the lengths of different needles.

-> Text

The candidate string that has been matched

-> Vector Int

The vector recording the positions of the needle in the matched string.

-> [Text]

The candidate string split up according to the match

The parts of a string resulting from a match using the fuzzy matcher.

partsAs :: (Bool -> Text -> a) -> Either Int (Vector Int) -> Text -> Vector Int -> [a] Source #

partsOrderless :: Either Int (Vector Int) -> Text -> Vector Int -> [Text] Source #

The parts of a string resulting from a match using the orderless matcher. See parts for an explanation of arguments.

partsOrderlessAs :: (Bool -> Text -> a) -> Either Int (Vector Int) -> Text -> Vector Int -> [a] Source #

minify :: Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex] Source #

Shorten a match by collapsing the contiguous sub-matches together.

fuzzySettings :: KnownNat n => Int -> SearchSettings (MatcherSized n MatchPart) n Source #

Search functions suitable for fuzzy matching. The candidate c will match query s if c contains all the characters in s in order. In general there can be several ways of matching. This tries to find a match with minimum number of parts of. It does not find the minimum number of parts, if that requires reducing the extent of the partial match during search. E.g. matching "as" against "talash" the split will be ["tal","as","h"] and not ["t","a","la","s","h"]. While matching "talash best match testing hat" against "tea" will not result in ["talash best match ","te","sting h","a","t"] since "te" occurs only after we have match all three letters and we can't know if we will find the "a" without going through the string.

orderlessSettings :: KnownNat n => Int -> SearchSettings (MatcherSized n Int) n Source #

Search functions that match the words in i.e. space separated substring in any order. "talash best" will match "be as" with the split ["tal","as","h","be","st"] but "talash best" will not match "bet".

data ChunkIndex Source #

Constructors

ChunkIndex 

Fields

Instances

Instances details
Show ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

Eq ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

Ord ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

Unbox ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

Vector Vector ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

MVector MVector ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

newtype Vector ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

newtype MVector s ChunkIndex Source # 
Instance details

Defined in Talash.ScoredMatch

data ScoredMatchSized (n :: Nat) Source #

Constructors

ScoredMatchSized 

Fields

Instances

Instances details
KnownNat n => Vector Vector (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

KnownNat n => MVector MVector (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

Show (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

Eq (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

Ord (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

KnownNat n => Unbox (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

newtype MVector s (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch

newtype Vector (ScoredMatchSized n) Source # 
Instance details

Defined in Talash.ScoredMatch