Copyright	Unison Computing 2021
License	MIT
Maintainer	runar.bjarnason@unison.cloud
Stability	experimental
Safe Haskell	None
Language	Haskell2010

Text.FuzzyFind

Description

A package that provides an API for fuzzy text search in Haskell, using a modified version of the Smith-Waterman algorithm. The search is intended to behave similarly to the excellent fzf tool by Junegunn Choi.

Synopsis

bestMatch :: String -> String -> Maybe Alignment
fuzzyFind :: [String] -> [String] -> [Alignment]
fuzzyFindOn :: (a -> String) -> [String] -> [a] -> [(Alignment, a)]
type Score = Int
data Alignment = Alignment {
- score :: !Score
- result :: !Result
}
defaultMatchScore :: Int
defaultMismatchScore :: Int
defaultBoundaryBonus :: Int
defaultCamelCaseBonus :: Int
defaultFirstCharBonusMultiplier :: Int
defaultGapPenalty :: Int
defaultConsecutiveBonus :: Int
segmentToString :: ResultSegment -> String
highlight :: Alignment -> String
highlight' :: Alignment -> Text
bestMatch' :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> String -> Maybe Alignment
data ResultSegment
- = Gap !String
- | Match !String
newtype Result = Result {
- segments :: Seq ResultSegment
}
mergeResults :: Result -> Result -> Result

Documentation

bestMatch Source #

Arguments

:: String	The query pattern.
-> String	The input string.
-> Maybe Alignment

bestMatch query string will return Nothing if query is not a subsequence of string. Otherwise, it will return the "best" way to line up the characters in query with the characters in string. Lower-case characters in the query are assumed to be case-insensitive, and upper-case characters are assumed to be case-sensitive.

For example:

> bestMatch "ff" "FuzzyFind"
Just (Alignment {score = 25, result = Result {[Match "F", Gap "uzzy", Match "F", Gap "ind"]}})

The score indicates how "good" the match is. Better matches have higher scores. There's no maximum score (except for the upper limit of the Int datatype), but the lowest score is 0.

A substring from the query will generate a Match, and any characters from the input that don't result in a Match will generate a Gap. Concatenating all the Match and Gap results should yield the original input string.

Note that the matched characters in the input always occur in the same order as they do in the query pattern.

The algorithm prefers (and will generate higher scores for) the following kinds of matches:

Contiguous characters from the query string. For example, bestMatch "pp" will find the last two ps in "pickled pepper".
Characters at the beginnings of words. For example, bestMatch "pp" will find the first two Ps in "Peter Piper".
Characters at CamelCase humps. For example, bestMatch "bm" "BatMan" will score higher than bestMatch "bm" "Batman".
The algorithm strongly prefers the first character of the query pattern to be at the beginning of a word or CamelHump. For example, bestMatch "mn" "Bat Man" will score higher than bestMatch "atn" "Batman".

All else being equal, matches that occur later in the input string are preferred.

fuzzyFind Source #

Arguments

:: [String]	The query patterns.
-> [String]	The input strings.
-> [Alignment]

Finds input strings that match all the given input patterns. For each input that matches, it returns one Alignment. The output is not sorted. ascending.

For example:

> import Data.Foldable
> traverse_ (putStrLn . ("\n" ++) . highlight) $ fuzzyFind ["dad", "mac", "dam"] ["red macadamia", "Madam Card"]

Madam Card
* *** ** *

red macadamia
  * *******

fuzzyFindOn :: (a -> String) -> [String] -> [a] -> [(Alignment, a)] Source #

A version of fuzzyFind that searches on the given text field of the data.

type Score = Int Source #

data Alignment Source #

An Alignment is a Score together with a Result. Better results have higher scores.

Constructors

Alignment
Fields score :: !Score result :: !Result

Instances

Instances details

Eq Alignment Source #
Instance details Defined in Text.FuzzyFind Methods (==) :: Alignment -> Alignment -> Bool # (/=) :: Alignment -> Alignment -> Bool #
Ord Alignment Source #
Instance details Defined in Text.FuzzyFind Methods compare :: Alignment -> Alignment -> Ordering # (<) :: Alignment -> Alignment -> Bool # (<=) :: Alignment -> Alignment -> Bool # (>) :: Alignment -> Alignment -> Bool # (>=) :: Alignment -> Alignment -> Bool # max :: Alignment -> Alignment -> Alignment # min :: Alignment -> Alignment -> Alignment #
Show Alignment Source #
Instance details Defined in Text.FuzzyFind Methods showsPrec :: Int -> Alignment -> ShowS # show :: Alignment -> String # showList :: [Alignment] -> ShowS #
Generic Alignment Source #
Instance details Defined in Text.FuzzyFind Associated Types type Rep Alignment :: Type -> Type # Methods from :: Alignment -> Rep Alignment x # to :: Rep Alignment x -> Alignment #
Semigroup Alignment Source #
Instance details Defined in Text.FuzzyFind Methods (<>) :: Alignment -> Alignment -> Alignment # sconcat :: NonEmpty Alignment -> Alignment # stimes :: Integral b => b -> Alignment -> Alignment #
Monoid Alignment Source #
Instance details Defined in Text.FuzzyFind Methods mempty :: Alignment # mappend :: Alignment -> Alignment -> Alignment # mconcat :: [Alignment] -> Alignment #
type Rep Alignment Source #
Instance details Defined in Text.FuzzyFind type Rep Alignment = D1 ('MetaData "Alignment" "Text.FuzzyFind" "fuzzyfind-2.2.0-EY8SAdTKTsH9UhyISHXH74" 'False) (C1 ('MetaCons "Alignment" 'PrefixI 'True) (S1 ('MetaSel ('Just "score") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 Score) :*: S1 ('MetaSel ('Just "result") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 Result)))

defaultMatchScore :: Int Source #

The base score given to a matching character

defaultMismatchScore :: Int Source #

The base score given to a mismatched character

defaultBoundaryBonus :: Int Source #

Bonus points given to characters matching at the beginning of words

defaultCamelCaseBonus :: Int Source #

Bonus points given to characters matching a hump of a CamelCase word. We subtract a point from the word boundary score, since a word boundary will incur a gap penalty.

defaultFirstCharBonusMultiplier :: Int Source #

Double any bonus points for matching the first pattern of the character. This way we strongly prefer starting the match at the beginning of a word.

defaultGapPenalty :: Int Source #

We prefer consecutive runs of matched characters in the pattern, so we impose a penalty for any gaps, which is added to the size of the gap.

defaultConsecutiveBonus :: Int Source #

We give a bonus to consecutive matching characters. A number about the same as the boundary bonus will prefer runs of consecutive characters vs finding acronyms.

segmentToString :: ResultSegment -> String Source #

highlight :: Alignment -> String Source #

Renders an Alignment as a pair of lines with "*" on the lower line indicating the location of pattern matches.

highlight' :: Alignment -> Text Source #

bestMatch' Source #

Arguments

:: Int	Base score for a matching character. See `defaultMatchScore`.
-> Int	Base score for a mismatched character. See `defaultMismatchScore`.
-> Int	Additional penalty for a gap. See `defaultGapPenalty`.
-> Int	Bonus score for a match at the beginning of a word. See `defaultBoundaryBonus`.
-> Int	Bonus score for a match on a CamelCase hump. See `defaultCamelCaseBonus`.
-> Int	Bonus multiplier for matching the first character of the pattern. See `defaultFirstCharBonusMultiplier`.
-> Int	Bonus score for each consecutive character matched. See `defaultFirstCharBonusMultiplier`.
-> String	The query pattern.
-> String	The input string.
-> Maybe Alignment

A highly configurable version of bestMatch.

data ResultSegment Source #

Constructors

Gap !String
Match !String

Instances

Instances details

Eq ResultSegment Source #
Instance details Defined in Text.FuzzyFind Methods (==) :: ResultSegment -> ResultSegment -> Bool # (/=) :: ResultSegment -> ResultSegment -> Bool #
Ord ResultSegment Source #
Instance details Defined in Text.FuzzyFind Methods compare :: ResultSegment -> ResultSegment -> Ordering # (<) :: ResultSegment -> ResultSegment -> Bool # (<=) :: ResultSegment -> ResultSegment -> Bool # (>) :: ResultSegment -> ResultSegment -> Bool # (>=) :: ResultSegment -> ResultSegment -> Bool # max :: ResultSegment -> ResultSegment -> ResultSegment # min :: ResultSegment -> ResultSegment -> ResultSegment #
Show ResultSegment Source #
Instance details Defined in Text.FuzzyFind Methods showsPrec :: Int -> ResultSegment -> ShowS # show :: ResultSegment -> String # showList :: [ResultSegment] -> ShowS #
Generic ResultSegment Source #
Instance details Defined in Text.FuzzyFind Associated Types type Rep ResultSegment :: Type -> Type # Methods from :: ResultSegment -> Rep ResultSegment x # to :: Rep ResultSegment x -> ResultSegment #
type Rep ResultSegment Source #
Instance details Defined in Text.FuzzyFind type Rep ResultSegment = D1 ('MetaData "ResultSegment" "Text.FuzzyFind" "fuzzyfind-2.2.0-EY8SAdTKTsH9UhyISHXH74" 'False) (C1 ('MetaCons "Gap" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 String)) :+: C1 ('MetaCons "Match" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 String)))

newtype Result Source #

Concatenating all the ResultSegments should yield the original input string.

Constructors

Result
Fields segments :: Seq ResultSegment

Instances

Instances details

Eq Result Source #
Instance details Defined in Text.FuzzyFind Methods (==) :: Result -> Result -> Bool # (/=) :: Result -> Result -> Bool #
Ord Result Source #
Instance details Defined in Text.FuzzyFind Methods compare :: Result -> Result -> Ordering # (<) :: Result -> Result -> Bool # (<=) :: Result -> Result -> Bool # (>) :: Result -> Result -> Bool # (>=) :: Result -> Result -> Bool # max :: Result -> Result -> Result # min :: Result -> Result -> Result #
Show Result Source #
Instance details Defined in Text.FuzzyFind Methods showsPrec :: Int -> Result -> ShowS # show :: Result -> String # showList :: [Result] -> ShowS #
Generic Result Source #
Instance details Defined in Text.FuzzyFind Associated Types type Rep Result :: Type -> Type # Methods from :: Result -> Rep Result x # to :: Rep Result x -> Result #
Semigroup Result Source #
Instance details Defined in Text.FuzzyFind Methods (<>) :: Result -> Result -> Result # sconcat :: NonEmpty Result -> Result # stimes :: Integral b => b -> Result -> Result #
Monoid Result Source #
Instance details Defined in Text.FuzzyFind Methods mempty :: Result # mappend :: Result -> Result -> Result # mconcat :: [Result] -> Result #
type Rep Result Source #
Instance details Defined in Text.FuzzyFind type Rep Result = D1 ('MetaData "Result" "Text.FuzzyFind" "fuzzyfind-2.2.0-EY8SAdTKTsH9UhyISHXH74" 'True) (C1 ('MetaCons "Result" 'PrefixI 'True) (S1 ('MetaSel ('Just "segments") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Seq ResultSegment))))

mergeResults :: Result -> Result -> Result Source #