{-# LANGUAGE OverloadedLists #-}

{- |
  This module provides a 'showDistance' function showing the differences between 2 pieces of text
   using the Levenshtein distance. That distance is defined as the minimum number of edits: insertions, deletions, substitutions
   to go from one text to another.

   Several options are available to customize this processing:
     - split size: texts are broken into new lines first. Then if the texts are too large there are split into smaller pieces
       in order to compute their difference. This is done in order to reduce the size of the edit matrix which is used to compute all the edit costs
       the default is 200

     - separators: opening and closing pieces of text (brackets by default) used to highlight a difference

     - shorten size: there is the possibly to display mostly the differences with a bit of context around if the input text is too large.
       The text gets elided around separators if it gets greater than the shorten size (the default is 20)

     - shorten text: the text to use when eliding characters in the original text (the default is "...")

     - display edit operations: edit operations, insert/delete/substitute/keep can be annotated if necessary

Here are some examples:

@
import Data.Text.Edits

-- "between the e and the n the letter i was added"
showDistance "kitten" "kittein" === ("kitte[]n", "kitte[i]n")

-- "at the end of the text 3 letters have been deleted"
showDistance "kitten" "kit" === ("kit[ten]", "kit[]")

-- "between the t and the n 2 letters have been modified"
showDistance "kitten" "kitsin" === ("kit[te]n", "kit[si]n" )
@

-}
module Data.Text.Edits
  ( SplitSize (..),
    ShortenOptions (..),
    Separators (..),
    DisplayOptions (..),
    EditOperation (..),
    showDistance,
    defaultDisplayOptions,
    defaultDisplayEditOperations,
    taggedDisplayEditOperation,
    defaultSplitSize,
    parensSeparators,
    bracketsSeparators,
    makeCharSeparators,
  )
where

import Data.Text qualified as T
import Data.Text.Costs
import Data.Text.Difference
import Data.Text.EditMatrix
import Data.Text.EditOperation
import Data.Text.Shorten
import Data.Vector qualified as V
import Protolude

-- | Size to use when splitting a large piece of text
newtype SplitSize = SplitSize Int deriving (SplitSize -> SplitSize -> Bool
(SplitSize -> SplitSize -> Bool)
-> (SplitSize -> SplitSize -> Bool) -> Eq SplitSize
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SplitSize -> SplitSize -> Bool
$c/= :: SplitSize -> SplitSize -> Bool
== :: SplitSize -> SplitSize -> Bool
$c== :: SplitSize -> SplitSize -> Bool
Eq, Int -> SplitSize -> ShowS
[SplitSize] -> ShowS
SplitSize -> [Char]
(Int -> SplitSize -> ShowS)
-> (SplitSize -> [Char])
-> ([SplitSize] -> ShowS)
-> Show SplitSize
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [SplitSize] -> ShowS
$cshowList :: [SplitSize] -> ShowS
show :: SplitSize -> [Char]
$cshow :: SplitSize -> [Char]
showsPrec :: Int -> SplitSize -> ShowS
$cshowsPrec :: Int -> SplitSize -> ShowS
Show)

-- | Default split size
defaultSplitSize :: SplitSize
defaultSplitSize :: SplitSize
defaultSplitSize = Int -> SplitSize
SplitSize Int
200

-- | Show the distance between 2 pieces of text
showDistance :: Text -> Text -> (Text, Text)
showDistance :: Text -> Text -> (Text, Text)
showDistance = SplitSize -> DisplayOptions -> Text -> Text -> (Text, Text)
showDistanceWith SplitSize
defaultSplitSize DisplayOptions
defaultDisplayOptions

-- | Show the distance between 2 pieces of text and specify splitting / display options
showDistanceWith :: SplitSize -> DisplayOptions -> Text -> Text -> (Text, Text)
showDistanceWith :: SplitSize -> DisplayOptions -> Text -> Text -> (Text, Text)
showDistanceWith SplitSize
splitSize DisplayOptions
displayOptions Text
ts1 Text
ts2 =
  SplitSize
-> Text
-> Text
-> (Text, Text)
-> ((Text, Text) -> (Text, Text) -> (Text, Text))
-> (Text, Text)
forall a.
SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts SplitSize
splitSize Text
ts1 Text
ts2 ([], []) (((Text, Text) -> (Text, Text) -> (Text, Text)) -> (Text, Text))
-> ((Text, Text) -> (Text, Text) -> (Text, Text)) -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ \(Text, Text)
ts (Text
line1, Text
line2) -> do
    let matrix :: Matrix Cost
matrix = Costs Char -> [Char] -> [Char] -> Matrix Cost
forall a. Costs a -> [a] -> [a] -> Matrix Cost
createEditMatrix Costs Char
textLevenshteinCosts (Text -> [Char]
forall a b. ConvertText a b => a -> b
toS Text
line1) (Text -> [Char]
forall a b. ConvertText a b => a -> b
toS Text
line2)
    let operations :: [EditOperation Char]
operations = Vector (EditOperation Char) -> [EditOperation Char]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Vector (EditOperation Char) -> [EditOperation Char])
-> Vector (EditOperation Char) -> [EditOperation Char]
forall a b. (a -> b) -> a -> b
$ Vector Char
-> Vector Char -> Matrix Cost -> Vector (EditOperation Char)
forall a.
Vector a -> Vector a -> Matrix Cost -> Vector (EditOperation a)
makeEditOperations ([Char] -> Vector Char
forall a. [a] -> Vector a
V.fromList ([Char] -> Vector Char) -> (Text -> [Char]) -> Text -> Vector Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
forall a b. ConvertText a b => a -> b
toS (Text -> Vector Char) -> Text -> Vector Char
forall a b. (a -> b) -> a -> b
$ Text
line1) ([Char] -> Vector Char
forall a. [a] -> Vector a
V.fromList ([Char] -> Vector Char) -> (Text -> [Char]) -> Text -> Vector Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
forall a b. ConvertText a b => a -> b
toS (Text -> Vector Char) -> Text -> Vector Char
forall a b. (a -> b) -> a -> b
$ Text
line2) Matrix Cost
matrix
    (Text, Text)
ts (Text, Text) -> (Text, Text) -> (Text, Text)
forall a. Semigroup a => a -> a -> a
<> (DisplayOptions -> [EditOperation Char] -> Text
displayDiffs DisplayOptions
displayOptions [EditOperation Char]
operations, DisplayOptions -> [EditOperation Char] -> Text
displayDiffs DisplayOptions
displayOptions ([EditOperation Char] -> Text) -> [EditOperation Char] -> Text
forall a b. (a -> b) -> a -> b
$ EditOperation Char -> EditOperation Char
forall a. EditOperation a -> EditOperation a
inverse (EditOperation Char -> EditOperation Char)
-> [EditOperation Char] -> [EditOperation Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [EditOperation Char]
operations)

-- | Split texts and apply the difference on each part
foldSplitTexts :: SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts :: forall a.
SplitSize -> Text -> Text -> a -> (a -> (Text, Text) -> a) -> a
foldSplitTexts SplitSize
splitSize Text
t1 Text
t2 a
initial a -> (Text, Text) -> a
f = do
  let ([Text]
s1, [Text]
s2) = (SplitSize -> Text -> [Text]
split SplitSize
splitSize Text
t1, SplitSize -> Text -> [Text]
split SplitSize
splitSize Text
t2)
  (a -> (Text, Text) -> a) -> a -> [(Text, Text)] -> a
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' a -> (Text, Text) -> a
f a
initial ([Text] -> [Text] -> [(Text, Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
s1 [Text]
s2)

-- | Split a text on newlines then split each line on a maximum split size
--   We then perform the edit distance algorithm on smaller sizes of text in order to control memory and CPU
split :: SplitSize -> Text -> [Text]
split :: SplitSize -> Text -> [Text]
split SplitSize
splitSize = (Text -> [Text]) -> [Text] -> [Text]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (SplitSize -> Text -> [Text]
splitToSize SplitSize
splitSize) ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> [Text]
T.splitOn Text
"\n"

-- | Split a text on a maximum split size
splitToSize :: SplitSize -> Text -> [Text]
splitToSize :: SplitSize -> Text -> [Text]
splitToSize ss :: SplitSize
ss@(SplitSize Int
n) Text
t =
  if Text -> Int
T.length Text
t Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n
    then [Item [Text]
Text
t]
    else Int -> Text -> Text
T.take Int
n Text
t Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: SplitSize -> Text -> [Text]
splitToSize SplitSize
ss (Int -> Text -> Text
T.drop Int
n Text
t)