{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}

{-|
Module      : FuzzyFind
Description : Provides fuzzy matching on text
Copyright   : Unison Computing, 2021
License     : MIT
Maintainer  : runar.bjarnason@unison.cloud
Stability   : experimental

A package that provides an API for fuzzy text search in Haskell, using a
modified version of the Smith-Waterman algorithm. The search is intended to
behave similarly to the excellent fzf tool by Junegunn Choi.
-}
module Text.FuzzyFind where

import Control.Monad (join)
import Data.Array
  ( Array,
    (!),
  )
import qualified Data.Array as Array
import Data.Char (isAlphaNum, isLower, isUpper, toLower)
import Data.Foldable (maximumBy, toList, foldl')
import Data.Function (on)
import Data.Sequence
  ( Seq (..),
    ViewL (..),
    ViewR (..),
    viewl,
    viewr,
    (<|)
  )
import Data.List (sortOn)
import qualified Data.Sequence as Seq
import GHC.Generics (Generic)

-- | Calling @bestMatch query string@ will return 'Nothing' if @query@ is not a
-- subsequence of @string@. Otherwise, it will return the "best" way to line up
-- the characters in @query@ with the characters in @string@. Lower-case
-- characters in the @query@ are assumed to be case-insensitive, and upper-case
-- characters are assumed to be case-sensitive.
--
-- For example:
--
-- @
-- > bestMatch "ff" \"FuzzyFind\"
-- Just (Alignment {score = 25, result = Result {[Match \"F\", Gap "uzzy", Match \"F\", Gap "ind"]}})
-- @
--
-- The score indicates how "good" the match is. Better matches have higher
-- scores. There's no maximum score (except for the upper limit of the 'Int'
-- datatype), but the lowest score is @0@.
--
-- A substring from the query will generate a 'Match', and any characters from
-- the -- input that don't result in a 'Match' will generate a 'Gap'.
-- Concatenating all the 'Match' and 'Gap' results should yield the original
-- input string.
--
-- Note that the matched characters in the input always occur in the same order
-- as they do in the query pattern.
--
-- The algorithm prefers (and will generate higher scores for) the following
-- kinds of matches:
--
--   1. Contiguous characters from the query string. For example, @bestMatch "pp"@
-- will find the last two ps in "pickled pepper".
--   2. Characters at the beginnings of words. For example, @bestMatch "pp"@
-- will find the first two Ps in "Peter Piper".
--   3. Characters at CamelCase humps. For example, @bestMatch "bm" "BatMan"@
--   4. The algorithm strongly prefers the first character of the query pattern
-- to be at the beginning of a word or CamelHump. For example,
-- @bestMatch "mn" "Bat Man"@ will score higher than @bestMatch "atn" "Batman"@.
--
-- All else being equal, matches that occur later in the input string are preferred.
bestMatch :: String -- ^ The query pattern.
          -> String -- ^ The input string.
          -> Maybe Alignment
bestMatch :: String -> String -> Maybe Alignment
bestMatch = Int
-> Int
-> (Int -> Int)
-> Int
-> Int
-> Int
-> Int
-> String
-> String
-> Maybe Alignment
bestMatch' Int
defaultMatchScore
                       Int
defaultMismatchScore
                       Int -> Int
defaultGapPenalty
                       Int
defaultBoundaryBonus
                       Int
defaultCamelCaseBonus
                       Int
defaultFirstCharBonusMultiplier
                       Int
defaultConsecutiveBonus

-- | Finds input strings that match all the given input patterns. For each input
-- that matches, it returns one 'Alignment'. The output is sorted by 'score',
-- ascending.
--
-- For example:
--
-- @
-- > import Data.Foldable
-- > traverse_ (putStrLn . ("\\n" ++) . highlight) $ fuzzyFind ["dad", "mac", "dam"] ["red macadamia", "Madam Card"]
--
-- Madam Card
-- * *** ** *
--
-- red macadamia
--   * *******
-- @
fuzzyFind :: [String] -- ^ The query patterns.
          -> [String] -- ^ The input strings.
          -> [Alignment]
fuzzyFind :: [String] -> [String] -> [Alignment]
fuzzyFind query :: [String]
query strings :: [String]
strings =
  (Alignment -> Int) -> [Alignment] -> [Alignment]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn Alignment -> Int
score
    ([Alignment] -> [Alignment]) -> [Alignment] -> [Alignment]
forall a b. (a -> b) -> a -> b
$   [String]
strings
    [String] -> (String -> [Alignment]) -> [Alignment]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (\s :: String
s -> Maybe Alignment -> [Alignment]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
          (Maybe Alignment -> [Alignment]) -> Maybe Alignment -> [Alignment]
forall a b. (a -> b) -> a -> b
$ (Maybe Alignment -> String -> Maybe Alignment)
-> Maybe Alignment -> [String] -> Maybe Alignment
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\a :: Maybe Alignment
a q :: String
q -> Alignment -> Alignment -> Alignment
forall a. Semigroup a => a -> a -> a
(<>) (Alignment -> Alignment -> Alignment)
-> Maybe Alignment -> Maybe (Alignment -> Alignment)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Alignment
a Maybe (Alignment -> Alignment)
-> Maybe Alignment -> Maybe Alignment
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> String -> String -> Maybe Alignment
bestMatch String
q String
s) (Alignment -> Maybe Alignment
forall a. a -> Maybe a
Just Alignment
forall a. Monoid a => a
mempty) [String]
query
        )

instance Semigroup Alignment where
  Alignment n :: Int
n r :: Result
r <> :: Alignment -> Alignment -> Alignment
<> Alignment m :: Int
m s :: Result
s = Int -> Result -> Alignment
Alignment (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
m) (Result -> Result -> Result
mergeResults Result
r Result
s)

instance Monoid Alignment where
  mempty :: Alignment
mempty = Int -> Result -> Alignment
Alignment 0 Result
forall a. Monoid a => a
mempty

type Score = Int

-- | An 'Alignment' is a 'Score' together with a 'Result'. Better results have
-- higher scores.
data Alignment
  = Alignment { Alignment -> Int
score :: Score, Alignment -> Result
result :: Result }
  deriving (Alignment -> Alignment -> Bool
(Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Bool) -> Eq Alignment
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Alignment -> Alignment -> Bool
$c/= :: Alignment -> Alignment -> Bool
== :: Alignment -> Alignment -> Bool
$c== :: Alignment -> Alignment -> Bool
Eq, Eq Alignment
Eq Alignment =>
(Alignment -> Alignment -> Ordering)
-> (Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Bool)
-> (Alignment -> Alignment -> Alignment)
-> (Alignment -> Alignment -> Alignment)
-> Ord Alignment
Alignment -> Alignment -> Bool
Alignment -> Alignment -> Ordering
Alignment -> Alignment -> Alignment
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Alignment -> Alignment -> Alignment
$cmin :: Alignment -> Alignment -> Alignment
max :: Alignment -> Alignment -> Alignment
$cmax :: Alignment -> Alignment -> Alignment
>= :: Alignment -> Alignment -> Bool
$c>= :: Alignment -> Alignment -> Bool
> :: Alignment -> Alignment -> Bool
$c> :: Alignment -> Alignment -> Bool
<= :: Alignment -> Alignment -> Bool
$c<= :: Alignment -> Alignment -> Bool
< :: Alignment -> Alignment -> Bool
$c< :: Alignment -> Alignment -> Bool
compare :: Alignment -> Alignment -> Ordering
$ccompare :: Alignment -> Alignment -> Ordering
$cp1Ord :: Eq Alignment
Ord, Int -> Alignment -> ShowS
[Alignment] -> ShowS
Alignment -> String
(Int -> Alignment -> ShowS)
-> (Alignment -> String)
-> ([Alignment] -> ShowS)
-> Show Alignment
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Alignment] -> ShowS
$cshowList :: [Alignment] -> ShowS
show :: Alignment -> String
$cshow :: Alignment -> String
showsPrec :: Int -> Alignment -> ShowS
$cshowsPrec :: Int -> Alignment -> ShowS
Show, (forall x. Alignment -> Rep Alignment x)
-> (forall x. Rep Alignment x -> Alignment) -> Generic Alignment
forall x. Rep Alignment x -> Alignment
forall x. Alignment -> Rep Alignment x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Alignment x -> Alignment
$cfrom :: forall x. Alignment -> Rep Alignment x
Generic)

-- | The base score given to a matching character
defaultMatchScore :: Int
defaultMatchScore :: Int
defaultMatchScore = 16

-- | The base score given to a mismatched character
defaultMismatchScore :: Int
defaultMismatchScore :: Int
defaultMismatchScore = 0

-- | Bonus points given to characters matching at the beginning of words
defaultBoundaryBonus :: Int
defaultBoundaryBonus :: Int
defaultBoundaryBonus = Int
defaultMatchScore Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 2

-- | Bonus points given to characters matching a hump of a CamelCase word.
-- We subtract a point from the word boundary score, since a word boundary will
-- incur a gap penalty.
defaultCamelCaseBonus :: Int
defaultCamelCaseBonus :: Int
defaultCamelCaseBonus = Int
defaultBoundaryBonus Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1

-- | Double any bonus points for matching the first pattern of the character.
-- This way we strongly prefer starting the match at the beginning of a word.
defaultFirstCharBonusMultiplier :: Int
defaultFirstCharBonusMultiplier :: Int
defaultFirstCharBonusMultiplier = 2

-- | We prefer consecutive runs of matched characters in the pattern, so we
-- impose a penalty for any gaps, proportional to the size of the gap.
defaultGapPenalty :: Int -> Int
defaultGapPenalty :: Int -> Int
defaultGapPenalty 1 = 3
defaultGapPenalty n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max 0 (3 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

-- | We give a bonus to consecutive matching characters.
-- A number about the same as the `boundaryBonus` will strongly prefer
-- runs of consecutive characters vs finding acronyms.
defaultConsecutiveBonus :: Int
defaultConsecutiveBonus :: Int
defaultConsecutiveBonus = Int -> Int
defaultGapPenalty 8

-- | Renders an 'Alignment' as a pair of lines with "*" on the lower line
-- indicating the location of pattern matches.
highlight :: Alignment -> String
highlight :: Alignment -> String
highlight (Alignment s :: Int
s (Result segments :: Seq ResultSegment
segments)) =
  (ResultSegment -> String) -> Seq ResultSegment -> String
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ResultSegment -> String
prettySegment Seq ResultSegment
segments String -> ShowS
forall a. Semigroup a => a -> a -> a
<> "\n" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> (ResultSegment -> String) -> Seq ResultSegment -> String
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ResultSegment -> String
showGaps Seq ResultSegment
segments
 where
  prettySegment :: ResultSegment -> String
prettySegment (Gap   xs :: Seq Char
xs) = Seq Char -> String
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq Char
xs
  prettySegment (Match xs :: Seq Char
xs) = Seq Char -> String
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq Char
xs
  showGaps :: ResultSegment -> String
showGaps (Gap   xs :: Seq Char
xs) = Int -> Char -> String
forall a. Int -> a -> [a]
replicate (Seq Char -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Seq Char
xs) ' '
  showGaps (Match xs :: Seq Char
xs) = Int -> Char -> String
forall a. Int -> a -> [a]
replicate (Seq Char -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Seq Char
xs) '*'

-- | A highly configurable version of 'bestMatch'.
bestMatch'
  :: Int -- ^ Base score for a matching character. See 'defaultMatchScore'.
  -> Int -- ^ Base score for a mismatched character. See 'defaultMismatchScore'.
  -> (Int -> Int) -- ^ Penalty for a gap of the given length. See 'defaultGapPenalty'.
  -> Int -- ^ Bonus score for a match at the beginning of a word. See 'defaultBoundaryBonus'.
  -> Int -- ^ Bonus score for a match on a CamelCase hump. See 'defaultCamelCaseBonus'.
  -> Int -- ^ Bonus multiplier for matching the first character of the pattern.
         --   See 'defaultFirstCharBonusMultiplier'.
  -> Int -- ^ Bonus score for each consecutive character matched. See 'defaultFirstCharBonusMultiplier'.
  -> String -- ^ The query pattern.
  -> String -- ^ The input string.
  -> Maybe Alignment
bestMatch' :: Int
-> Int
-> (Int -> Int)
-> Int
-> Int
-> Int
-> Int
-> String
-> String
-> Maybe Alignment
bestMatch' matchScore :: Int
matchScore mismatchScore :: Int
mismatchScore gapPenalty :: Int -> Int
gapPenalty boundaryBonus :: Int
boundaryBonus camelCaseBonus :: Int
camelCaseBonus firstCharBonusMultiplier :: Int
firstCharBonusMultiplier consecutiveBonus :: Int
consecutiveBonus query :: String
query str :: String
str
  = Int -> Result -> Alignment
Alignment (Int -> Int -> Int
totalScore Int
m Int
nx) (Result -> Alignment) -> (Result -> Result) -> Result -> Alignment
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Result -> Result
reverseResult (Result -> Alignment) -> Maybe Result -> Maybe Alignment
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Result
traceback
 where
  totalScore :: Int -> Int -> Int
totalScore i :: Int
i j :: Int
j = if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then 0 else Array (Int, Int) Int
hs Array (Int, Int) Int -> (Int, Int) -> Int
forall i e. Ix i => Array i e -> i -> e
! (Int
i, Int
j) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Array (Int, Int) Int
bonuses Array (Int, Int) Int -> (Int, Int) -> Int
forall i e. Ix i => Array i e -> i -> e
! (Int
i, Int
j)
  table :: String
table = [String] -> String
unlines
    [ [String] -> String
unwords
      ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ (if Int
y Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 then Char -> String
forall a. Show a => a -> String
show (Char -> String) -> Char -> String
forall a b. (a -> b) -> a -> b
$ Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
y else "   ")
      String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [ Int -> String
forall a. Show a => a -> String
show (Int -> Int -> Int
totalScore Int
x Int
y) | Int
x <- [0 .. Int
Item [Int]
m] ]
    | Int
y <- [0 .. Int
Item [Int]
n]
    ]
  similarity :: Char -> Char -> Int
similarity a :: Char
a b :: Char
b =
    if Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
b Bool -> Bool -> Bool
|| Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Char
toLower Char
b then Int
matchScore else Int
mismatchScore
  traceback :: Maybe Result
traceback = (String -> Result
gaps (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
nx String
str) Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<>) (Result -> Result) -> Maybe Result -> Maybe Result
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Int, Int) -> Maybe Result
go (Int
m, Int
nx)
  go :: (Int, Int) -> Maybe Result
go (0, j :: Int
j) = Result -> Maybe Result
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Result -> Maybe Result) -> Result -> Maybe Result
forall a b. (a -> b) -> a -> b
$ String -> Result
gaps (Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
j String
str)
  go (i :: Int
i, 0) = Maybe Result
forall a. Maybe a
Nothing
  go (i :: Int
i, j :: Int
j) = if Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
i) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
    then (Char -> Result
match (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<>) (Result -> Result) -> Maybe Result -> Maybe Result
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Int, Int) -> Maybe Result
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)
    else (Char -> Result
gap (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<>) (Result -> Result) -> Maybe Result -> Maybe Result
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Int, Int) -> Maybe Result
go (Int
i, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)
  nx :: Int
nx = Int -> Int -> Int
localMax Int
m Int
n
  localMax :: Int -> Int -> Int
localMax m :: Int
m n :: Int
n = (Int -> Int -> Ordering) -> [Int] -> Int
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy
    (\b :: Int
b d :: Int
d -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Int
totalScore Int
m Int
b) (Int -> Int -> Int
totalScore Int
m Int
d))
    [ Int
j | Int
j <- [1 .. Int
Item [Int]
n] ]
  m :: Int
m       = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
query
  n :: Int
n       = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str
  a' :: Array Int Char
a'      = (Int, Int) -> String -> Array Int Char
forall i e. Ix i => (i, i) -> [e] -> Array i e
Array.listArray (1, Int
m) String
query
  b' :: Array Int Char
b'      = (Int, Int) -> String -> Array Int Char
forall i e. Ix i => (i, i) -> [e] -> Array i e
Array.listArray (1, Int
n) String
str
  hs :: Array (Int, Int) Int
hs      = ((Int, Int), (Int, Int)) -> [Int] -> Array (Int, Int) Int
forall i e. Ix i => (i, i) -> [e] -> Array i e
Array.listArray ((Int, Int), (Int, Int))
bounds [ Int -> Int -> Int
h Int
i Int
j | (i :: Int
i, j :: Int
j) <- ((Int, Int), (Int, Int)) -> [(Int, Int)]
forall a. Ix a => (a, a) -> [a]
Array.range ((Int, Int), (Int, Int))
bounds ]
  bonuses :: Array (Int, Int) Int
bonuses = ((Int, Int), (Int, Int)) -> [Int] -> Array (Int, Int) Int
forall i e. Ix i => (i, i) -> [e] -> Array i e
Array.listArray ((Int, Int), (Int, Int))
bounds [ Int -> Int -> Int
bonus Int
i Int
j | (i :: Int
i, j :: Int
j) <- ((Int, Int), (Int, Int)) -> [(Int, Int)]
forall a. Ix a => (a, a) -> [a]
Array.range ((Int, Int), (Int, Int))
bounds ]
  bounds :: ((Int, Int), (Int, Int))
bounds  = ((0, 0), (Int
m, Int
n))
  bonus :: Int -> Int -> Int
bonus 0 j :: Int
j = 0
  bonus i :: Int
i 0 = 0
  bonus i :: Int
i j :: Int
j = if Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
i) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
    then Int
multiplier Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
boundary Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
camel Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
consecutive)
    else 0
   where
    boundary :: Int
boundary =
      if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 2 Bool -> Bool -> Bool
|| Char -> Bool
isAlphaNum (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Bool -> Bool -> Bool
&& Bool -> Bool
not (Char -> Bool
isAlphaNum (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)))
        then Int
boundaryBonus
        else 0
    camel :: Int
camel = if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1 Bool -> Bool -> Bool
&& Char -> Bool
isLower (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)) Bool -> Bool -> Bool
&& Char -> Bool
isUpper (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j)
      then Int
camelCaseBonus
      else 0
    multiplier :: Int
multiplier = if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 then Int
firstCharBonusMultiplier else 1
    consecutive :: Int
consecutive =
      let
        similar :: Bool
similar = Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
i) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
        afterMatch :: Bool
afterMatch =
          Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1 Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1 Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
        beforeMatch :: Bool
beforeMatch =
          Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
m Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n Bool -> Bool -> Bool
&& Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
      in
        if Bool
similar Bool -> Bool -> Bool
&& (Bool
afterMatch Bool -> Bool -> Bool
|| Bool
beforeMatch) then Int
consecutiveBonus else 0
  h :: Int -> Int -> Int
h 0 _ = 0
  h _ 0 = 0
  h i :: Int
i j :: Int
j = Int
scoreMatch Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` Int
scoreGap Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` 0
   where
    scoreMatch :: Int
scoreMatch =
      Array (Int, Int) Int
hs Array (Int, Int) Int -> (Int, Int) -> Int
forall i e. Ix i => Array i e -> i -> e
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Char -> Int
similarity (Array Int Char
a' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
i) (Array Int Char
b' Array Int Char -> Int -> Char
forall i e. Ix i => Array i e -> i -> e
! Int
j) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Array (Int, Int) Int
bonuses Array (Int, Int) Int -> (Int, Int) -> Int
forall i e. Ix i => Array i e -> i -> e
! (Int
i, Int
j)
    scoreGap :: Int
scoreGap = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [ Array (Int, Int) Int
hs Array (Int, Int) Int -> (Int, Int) -> Int
forall i e. Ix i => Array i e -> i -> e
! (Int
i, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int -> Int
gapPenalty Int
l | Int
l <- [1 .. Int
Item [Int]
j] ]

data ResultSegment = Gap (Seq Char) | Match (Seq Char)
  deriving (ResultSegment -> ResultSegment -> Bool
(ResultSegment -> ResultSegment -> Bool)
-> (ResultSegment -> ResultSegment -> Bool) -> Eq ResultSegment
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ResultSegment -> ResultSegment -> Bool
$c/= :: ResultSegment -> ResultSegment -> Bool
== :: ResultSegment -> ResultSegment -> Bool
$c== :: ResultSegment -> ResultSegment -> Bool
Eq, Eq ResultSegment
Eq ResultSegment =>
(ResultSegment -> ResultSegment -> Ordering)
-> (ResultSegment -> ResultSegment -> Bool)
-> (ResultSegment -> ResultSegment -> Bool)
-> (ResultSegment -> ResultSegment -> Bool)
-> (ResultSegment -> ResultSegment -> Bool)
-> (ResultSegment -> ResultSegment -> ResultSegment)
-> (ResultSegment -> ResultSegment -> ResultSegment)
-> Ord ResultSegment
ResultSegment -> ResultSegment -> Bool
ResultSegment -> ResultSegment -> Ordering
ResultSegment -> ResultSegment -> ResultSegment
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ResultSegment -> ResultSegment -> ResultSegment
$cmin :: ResultSegment -> ResultSegment -> ResultSegment
max :: ResultSegment -> ResultSegment -> ResultSegment
$cmax :: ResultSegment -> ResultSegment -> ResultSegment
>= :: ResultSegment -> ResultSegment -> Bool
$c>= :: ResultSegment -> ResultSegment -> Bool
> :: ResultSegment -> ResultSegment -> Bool
$c> :: ResultSegment -> ResultSegment -> Bool
<= :: ResultSegment -> ResultSegment -> Bool
$c<= :: ResultSegment -> ResultSegment -> Bool
< :: ResultSegment -> ResultSegment -> Bool
$c< :: ResultSegment -> ResultSegment -> Bool
compare :: ResultSegment -> ResultSegment -> Ordering
$ccompare :: ResultSegment -> ResultSegment -> Ordering
$cp1Ord :: Eq ResultSegment
Ord, Int -> ResultSegment -> ShowS
[ResultSegment] -> ShowS
ResultSegment -> String
(Int -> ResultSegment -> ShowS)
-> (ResultSegment -> String)
-> ([ResultSegment] -> ShowS)
-> Show ResultSegment
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ResultSegment] -> ShowS
$cshowList :: [ResultSegment] -> ShowS
show :: ResultSegment -> String
$cshow :: ResultSegment -> String
showsPrec :: Int -> ResultSegment -> ShowS
$cshowsPrec :: Int -> ResultSegment -> ShowS
Show, (forall x. ResultSegment -> Rep ResultSegment x)
-> (forall x. Rep ResultSegment x -> ResultSegment)
-> Generic ResultSegment
forall x. Rep ResultSegment x -> ResultSegment
forall x. ResultSegment -> Rep ResultSegment x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep ResultSegment x -> ResultSegment
$cfrom :: forall x. ResultSegment -> Rep ResultSegment x
Generic)

-- | Concatenating all the 'ResultSegment's should yield the original input string.
newtype Result = Result { Result -> Seq ResultSegment
segments :: Seq ResultSegment }
  deriving (Result -> Result -> Bool
(Result -> Result -> Bool)
-> (Result -> Result -> Bool) -> Eq Result
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Result -> Result -> Bool
$c/= :: Result -> Result -> Bool
== :: Result -> Result -> Bool
$c== :: Result -> Result -> Bool
Eq, Eq Result
Eq Result =>
(Result -> Result -> Ordering)
-> (Result -> Result -> Bool)
-> (Result -> Result -> Bool)
-> (Result -> Result -> Bool)
-> (Result -> Result -> Bool)
-> (Result -> Result -> Result)
-> (Result -> Result -> Result)
-> Ord Result
Result -> Result -> Bool
Result -> Result -> Ordering
Result -> Result -> Result
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Result -> Result -> Result
$cmin :: Result -> Result -> Result
max :: Result -> Result -> Result
$cmax :: Result -> Result -> Result
>= :: Result -> Result -> Bool
$c>= :: Result -> Result -> Bool
> :: Result -> Result -> Bool
$c> :: Result -> Result -> Bool
<= :: Result -> Result -> Bool
$c<= :: Result -> Result -> Bool
< :: Result -> Result -> Bool
$c< :: Result -> Result -> Bool
compare :: Result -> Result -> Ordering
$ccompare :: Result -> Result -> Ordering
$cp1Ord :: Eq Result
Ord, Int -> Result -> ShowS
[Result] -> ShowS
Result -> String
(Int -> Result -> ShowS)
-> (Result -> String) -> ([Result] -> ShowS) -> Show Result
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Result] -> ShowS
$cshowList :: [Result] -> ShowS
show :: Result -> String
$cshow :: Result -> String
showsPrec :: Int -> Result -> ShowS
$cshowsPrec :: Int -> Result -> ShowS
Show, (forall x. Result -> Rep Result x)
-> (forall x. Rep Result x -> Result) -> Generic Result
forall x. Rep Result x -> Result
forall x. Result -> Rep Result x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Result x -> Result
$cfrom :: forall x. Result -> Rep Result x
Generic)

match :: Char -> Result
match :: Char -> Result
match a :: Char
a = Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match [Char
Item (Seq Char)
a]]

gap :: Char -> Result
gap :: Char -> Result
gap a :: Char
a = Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Gap [Char
Item (Seq Char)
a]]

gaps :: String -> Result
gaps :: String -> Result
gaps s :: String
s = Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Gap (Seq Char -> ResultSegment)
-> (String -> Seq Char) -> String -> ResultSegment
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Seq Char
forall a. [a] -> Seq a
Seq.fromList (String -> Item (Seq ResultSegment))
-> String -> Item (Seq ResultSegment)
forall a b. (a -> b) -> a -> b
$ ShowS
forall a. [a] -> [a]
reverse String
s]

reverseResult :: Result -> Result
reverseResult :: Result -> Result
reverseResult (Result xs :: Seq ResultSegment
xs) = Seq ResultSegment -> Result
Result (Seq ResultSegment -> Result)
-> (Seq ResultSegment -> Seq ResultSegment)
-> Seq ResultSegment
-> Result
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Seq ResultSegment -> Seq ResultSegment
forall a. Seq a -> Seq a
Seq.reverse (Seq ResultSegment -> Result) -> Seq ResultSegment -> Result
forall a b. (a -> b) -> a -> b
$ ResultSegment -> ResultSegment
reverseSegment (ResultSegment -> ResultSegment)
-> Seq ResultSegment -> Seq ResultSegment
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Seq ResultSegment
xs

reverseSegment :: ResultSegment -> ResultSegment
reverseSegment :: ResultSegment -> ResultSegment
reverseSegment (Gap xs :: Seq Char
xs) = Seq Char -> ResultSegment
Gap (Seq Char -> Seq Char
forall a. Seq a -> Seq a
Seq.reverse Seq Char
xs)
reverseSegment (Match xs :: Seq Char
xs) = Seq Char -> ResultSegment
Match (Seq Char -> Seq Char
forall a. Seq a -> Seq a
Seq.reverse Seq Char
xs)

instance Monoid Result where
  mempty :: Result
mempty = Seq ResultSegment -> Result
Result []

instance Semigroup Result where
  Result Empty <> :: Result -> Result -> Result
<> as :: Result
as = Result
as
  as :: Result
as <> Result Empty = Result
as
  Result (Seq ResultSegment -> ViewR ResultSegment
forall a. Seq a -> ViewR a
viewr -> h :: Seq ResultSegment
h :> Gap []) <> as :: Result
as = Seq ResultSegment -> Result
Result Seq ResultSegment
h Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result
as
  as :: Result
as <> Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Gap [] :< t :: Seq ResultSegment
t) = Result
as Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment -> Result
Result Seq ResultSegment
t
  Result (Seq ResultSegment -> ViewR ResultSegment
forall a. Seq a -> ViewR a
viewr -> h :: Seq ResultSegment
h :> Match []) <> as :: Result
as = Seq ResultSegment -> Result
Result Seq ResultSegment
h Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result
as
  as :: Result
as <> Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Match [] :< t :: Seq ResultSegment
t) = Result
as Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment -> Result
Result Seq ResultSegment
t
  Result (Seq ResultSegment -> ViewR ResultSegment
forall a. Seq a -> ViewR a
viewr -> i :: Seq ResultSegment
i :> Gap l :: Seq Char
l) <> Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Gap h :: Seq Char
h :< t :: Seq ResultSegment
t) =
    Seq ResultSegment -> Result
Result (Seq ResultSegment
i Seq ResultSegment -> Seq ResultSegment -> Seq ResultSegment
forall a. Semigroup a => a -> a -> a
<> [Seq Char -> ResultSegment
Gap (Seq Char
l Seq Char -> Seq Char -> Seq Char
forall a. Semigroup a => a -> a -> a
<> Seq Char
h)] Seq ResultSegment -> Seq ResultSegment -> Seq ResultSegment
forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
t)
  Result (Seq ResultSegment -> ViewR ResultSegment
forall a. Seq a -> ViewR a
viewr -> i :: Seq ResultSegment
i :> Match l :: Seq Char
l) <> Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Match h :: Seq Char
h :< t :: Seq ResultSegment
t) =
    Seq ResultSegment -> Result
Result (Seq ResultSegment
i Seq ResultSegment -> Seq ResultSegment -> Seq ResultSegment
forall a. Semigroup a => a -> a -> a
<> [Seq Char -> ResultSegment
Match (Seq Char
l Seq Char -> Seq Char -> Seq Char
forall a. Semigroup a => a -> a -> a
<> Seq Char
h)] Seq ResultSegment -> Seq ResultSegment -> Seq ResultSegment
forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
t)
  Result a :: Seq ResultSegment
a <> Result b :: Seq ResultSegment
b = Seq ResultSegment -> Result
Result (Seq ResultSegment
a Seq ResultSegment -> Seq ResultSegment -> Seq ResultSegment
forall a. Semigroup a => a -> a -> a
<> Seq ResultSegment
b)

mergeResults :: Result -> Result -> Result
mergeResults :: Result -> Result -> Result
mergeResults as :: Result
as bs :: Result
bs = Result -> Result -> Result
merge Result
as Result
bs
 where
  drop' :: Int -> Result -> Result
  drop' :: Int -> Result -> Result
drop' n :: Int
n m :: Result
m | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 1 = Result
m
  drop' n :: Int
n (Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Gap g :: Seq Char
g :< t :: Seq ResultSegment
t)) =
    Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Gap (Int -> Seq Char -> Seq Char
forall a. Int -> Seq a -> Seq a
Seq.drop Int
n Seq Char
g)] Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Int -> Result -> Result
drop' (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
t)
  drop' n :: Int
n (Result (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl -> Match g :: Seq Char
g :< t :: Seq ResultSegment
t)) =
    Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match (Int -> Seq Char -> Seq Char
forall a. Int -> Seq a -> Seq a
Seq.drop Int
n Seq Char
g)] Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Int -> Result -> Result
drop' (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
t)
  merge :: Result -> Result -> Result
  merge :: Result -> Result -> Result
merge (Result Seq.Empty) ys :: Result
ys                 = Result
ys
  merge xs :: Result
xs                 (Result Seq.Empty) = Result
xs
  merge (Result xs :: Seq ResultSegment
xs)        (Result ys :: Seq ResultSegment
ys       ) = case (Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl Seq ResultSegment
xs, Seq ResultSegment -> ViewL ResultSegment
forall a. Seq a -> ViewL a
viewl Seq ResultSegment
ys) of
    (Gap g :: Seq Char
g :< t :: Seq ResultSegment
t, Gap g' :: Seq Char
g' :< t' :: Seq ResultSegment
t')
      | Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g' -> Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Gap Seq Char
g]
      Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))
      | Bool
otherwise -> Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Gap Seq Char
g']
      Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
g') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Match m :: Seq Char
m :< t :: Seq ResultSegment
t, Match m' :: Seq Char
m' :< t' :: Seq ResultSegment
t')
      | Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m' -> Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match Seq Char
m]
      Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))
      | Bool
otherwise -> Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match Seq Char
m']
      Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Gap g :: Seq Char
g :< t :: Seq ResultSegment
t, Match m' :: Seq Char
m' :< t' :: Seq ResultSegment
t') ->
      Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match Seq Char
m'] Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m') (Seq ResultSegment -> Result
Result Seq ResultSegment
xs)) (Seq ResultSegment -> Result
Result Seq ResultSegment
t')
    (Match m :: Seq Char
m :< t :: Seq ResultSegment
t, Gap g' :: Seq Char
g' :< t' :: Seq ResultSegment
t') ->
      Seq ResultSegment -> Result
Result [Seq Char -> ResultSegment
Match Seq Char
m] Result -> Result -> Result
forall a. Semigroup a => a -> a -> a
<> Result -> Result -> Result
merge (Seq ResultSegment -> Result
Result Seq ResultSegment
t) (Int -> Result -> Result
drop' (Seq Char -> Int
forall a. Seq a -> Int
Seq.length Seq Char
m) (Seq ResultSegment -> Result
Result Seq ResultSegment
ys))