-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Algorithm.Diff
-- Copyright   :  (c) Sterling Clover 2008-2011, Kevin Charter 2011
-- License     :  BSD 3 Clause
-- Maintainer  :  s.clover@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- This is an implementation of the diff algorithm as described in
-- \"An \( O(ND) \) Difference Algorithm and Its Variations (1986)\"
-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927>.
-- For inputs of size \( O(N) \) with the number of differences \( D \)
-- it has \( O(ND) \) time and \( O(D^2) \) space complexity.
-----------------------------------------------------------------------------

module Data.Algorithm.Diff
    ( Diff, PolyDiff(..)
    -- * Comparing lists for differences
    , getDiff
    , getDiffBy

    -- * Finding chunks of differences
    , getGroupedDiff
    , getGroupedDiffBy
    ) where

import Prelude hiding (pi)
import Data.Array (listArray, (!))
import Data.Bifunctor

data DI = F | S deriving (Int -> DI -> ShowS
[DI] -> ShowS
DI -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DI] -> ShowS
$cshowList :: [DI] -> ShowS
show :: DI -> String
$cshow :: DI -> String
showsPrec :: Int -> DI -> ShowS
$cshowsPrec :: Int -> DI -> ShowS
Show, DI -> DI -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DI -> DI -> Bool
$c/= :: DI -> DI -> Bool
== :: DI -> DI -> Bool
$c== :: DI -> DI -> Bool
Eq)

-- | A value is either from the 'First' list, the 'Second' or from 'Both'.
-- 'Both' contains both the left and right values, in case you are using a form
-- of equality that doesn't check all data (for example, if you are using a
-- newtype to only perform equality on side of a tuple).
data PolyDiff a b = First a | Second b | Both a b
    deriving (Int -> PolyDiff a b -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall a b. (Show a, Show b) => Int -> PolyDiff a b -> ShowS
forall a b. (Show a, Show b) => [PolyDiff a b] -> ShowS
forall a b. (Show a, Show b) => PolyDiff a b -> String
showList :: [PolyDiff a b] -> ShowS
$cshowList :: forall a b. (Show a, Show b) => [PolyDiff a b] -> ShowS
show :: PolyDiff a b -> String
$cshow :: forall a b. (Show a, Show b) => PolyDiff a b -> String
showsPrec :: Int -> PolyDiff a b -> ShowS
$cshowsPrec :: forall a b. (Show a, Show b) => Int -> PolyDiff a b -> ShowS
Show, PolyDiff a b -> PolyDiff a b -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall a b. (Eq a, Eq b) => PolyDiff a b -> PolyDiff a b -> Bool
/= :: PolyDiff a b -> PolyDiff a b -> Bool
$c/= :: forall a b. (Eq a, Eq b) => PolyDiff a b -> PolyDiff a b -> Bool
== :: PolyDiff a b -> PolyDiff a b -> Bool
$c== :: forall a b. (Eq a, Eq b) => PolyDiff a b -> PolyDiff a b -> Bool
Eq)

instance Functor (PolyDiff a) where
  fmap :: forall a b. (a -> b) -> PolyDiff a a -> PolyDiff a b
fmap a -> b
_ (First a
a) = forall a b. a -> PolyDiff a b
First a
a
  fmap a -> b
g (Second a
b) = forall a b. b -> PolyDiff a b
Second (a -> b
g a
b)
  fmap a -> b
g (Both a
a a
b) = forall a b. a -> b -> PolyDiff a b
Both a
a (a -> b
g a
b)

instance Bifunctor PolyDiff where
  bimap :: forall a b c d.
(a -> b) -> (c -> d) -> PolyDiff a c -> PolyDiff b d
bimap a -> b
f c -> d
_ (First a
a) = forall a b. a -> PolyDiff a b
First (a -> b
f a
a)
  bimap a -> b
_ c -> d
g (Second c
b) = forall a b. b -> PolyDiff a b
Second (c -> d
g c
b)
  bimap a -> b
f c -> d
g (Both a
a c
b) = forall a b. a -> b -> PolyDiff a b
Both (a -> b
f a
a) (c -> d
g c
b)

-- | This is 'PolyDiff' specialized so both sides are the same type.
type Diff a = PolyDiff a a

data DL = DL {DL -> Int
poi :: !Int, DL -> Int
poj :: !Int, DL -> [DI]
path::[DI]} deriving (Int -> DL -> ShowS
[DL] -> ShowS
DL -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DL] -> ShowS
$cshowList :: [DL] -> ShowS
show :: DL -> String
$cshow :: DL -> String
showsPrec :: Int -> DL -> ShowS
$cshowsPrec :: Int -> DL -> ShowS
Show, DL -> DL -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DL -> DL -> Bool
$c/= :: DL -> DL -> Bool
== :: DL -> DL -> Bool
$c== :: DL -> DL -> Bool
Eq)

instance Ord DL
        where DL
x <= :: DL -> DL -> Bool
<= DL
y = if DL -> Int
poi DL
x forall a. Eq a => a -> a -> Bool
== DL -> Int
poi DL
y
                then  DL -> Int
poj DL
x forall a. Ord a => a -> a -> Bool
> DL -> Int
poj DL
y
                else DL -> Int
poi DL
x forall a. Ord a => a -> a -> Bool
<= DL -> Int
poi DL
y

canDiag :: (a -> b -> Bool) -> [a] -> [b] -> Int -> Int -> Int -> Int -> Bool
canDiag :: forall a b.
(a -> b -> Bool) -> [a] -> [b] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> b -> Bool
eq [a]
as [b]
bs Int
lena Int
lenb = \ Int
i Int
j ->
   if Int
i forall a. Ord a => a -> a -> Bool
< Int
lena Bool -> Bool -> Bool
&& Int
j forall a. Ord a => a -> a -> Bool
< Int
lenb then (Array Int a
arAs forall i e. Ix i => Array i e -> i -> e
! Int
i) a -> b -> Bool
`eq` (Array Int b
arBs forall i e. Ix i => Array i e -> i -> e
! Int
j) else Bool
False
    where arAs :: Array Int a
arAs = forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lena forall a. Num a => a -> a -> a
- Int
1) [a]
as
          arBs :: Array Int b
arBs = forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lenb forall a. Num a => a -> a -> a
- Int
1) [b]
bs

dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd [DL]
dls = DL
hdforall a. a -> [a] -> [a]
:forall {a}. Ord a => [a] -> [a]
pairMaxes [DL]
rst
  where (DL
hd:[DL]
rst) = [DL] -> [DL]
nextDLs [DL]
dls
        nextDLs :: [DL] -> [DL]
nextDLs [] = []
        nextDLs (DL
dl:[DL]
rest) = DL
dl'forall a. a -> [a] -> [a]
:DL
dl''forall a. a -> [a] -> [a]
:[DL] -> [DL]
nextDLs [DL]
rest
          where dl' :: DL
dl'  = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$ DL
dl {poi :: Int
poi=DL -> Int
poi DL
dl forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
F forall a. a -> [a] -> [a]
: [DI]
pdl)}
                dl'' :: DL
dl'' = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$ DL
dl {poj :: Int
poj=DL -> Int
poj DL
dl forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
S forall a. a -> [a] -> [a]
: [DI]
pdl)}
                pdl :: [DI]
pdl = DL -> [DI]
path DL
dl
        pairMaxes :: [a] -> [a]
pairMaxes [] = []
        pairMaxes [a
x] = [a
x]
        pairMaxes (a
x:a
y:[a]
rest) = forall a. Ord a => a -> a -> a
max a
x a
yforall a. a -> [a] -> [a]
:[a] -> [a]
pairMaxes [a]
rest

addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd DL
dl
    | Int -> Int -> Bool
cd Int
pi Int
pj = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$
                 DL
dl {poi :: Int
poi = Int
pi forall a. Num a => a -> a -> a
+ Int
1, poj :: Int
poj = Int
pj forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path = DL -> [DI]
path DL
dl}
    | Bool
otherwise   = DL
dl
    where pi :: Int
pi = DL -> Int
poi DL
dl; pj :: Int
pj = DL -> Int
poj DL
dl

lcs :: (a -> b -> Bool) -> [a] -> [b] -> [DI]
lcs :: forall a b. (a -> b -> Bool) -> [a] -> [b] -> [DI]
lcs a -> b -> Bool
eq [a]
as [b]
bs = DL -> [DI]
path forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\DL
dl -> DL -> Int
poi DL
dl forall a. Eq a => a -> a -> Bool
/= Int
lena Bool -> Bool -> Bool
|| DL -> Int
poj DL
dl forall a. Eq a => a -> a -> Bool
/= Int
lenb) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a) -> a -> [a]
iterate ((Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$
            DL {poi :: Int
poi=Int
0,poj :: Int
poj=Int
0,path :: [DI]
path=[]}
            where cd :: Int -> Int -> Bool
cd = forall a b.
(a -> b -> Bool) -> [a] -> [b] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> b -> Bool
eq [a]
as [b]
bs Int
lena Int
lenb
                  lena :: Int
lena = forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as; lenb :: Int
lenb = forall (t :: * -> *) a. Foldable t => t a -> Int
length [b]
bs

-- | Takes two lists and returns a list of differences between them. This is
-- 'getDiffBy' with '==' used as predicate.
getDiff :: (Eq a) => [a] -> [a] -> [Diff a]
getDiff :: forall a. Eq a => [a] -> [a] -> [Diff a]
getDiff = forall a b. (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b]
getDiffBy forall a. Eq a => a -> a -> Bool
(==)

-- | Takes two lists and returns a list of differences between them, grouped
-- into chunks. This is 'getGroupedDiffBy' with '==' used as predicate.
getGroupedDiff :: (Eq a) => [a] -> [a] -> [Diff [a]]
getGroupedDiff :: forall a. Eq a => [a] -> [a] -> [Diff [a]]
getGroupedDiff = forall a b. (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
getGroupedDiffBy forall a. Eq a => a -> a -> Bool
(==)

-- | A form of 'getDiff' with no 'Eq' constraint. Instead, an equality predicate
-- is taken as the first argument.
getDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b]
getDiffBy :: forall a b. (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b]
getDiffBy a -> b -> Bool
eq [a]
a [b]
b = [a] -> [b] -> [DI] -> [PolyDiff a b]
markup [a]
a [b]
b forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b -> Bool) -> [a] -> [b] -> [DI]
lcs a -> b -> Bool
eq [a]
a [b]
b
    where markup :: [a] -> [b] -> [DI] -> [PolyDiff a b]
markup (a
x:[a]
xs) (b
y:[b]
ys) [DI]
ds
            | a -> b -> Bool
eq a
x b
y = forall a b. a -> b -> PolyDiff a b
Both a
x b
y forall a. a -> [a] -> [a]
: [a] -> [b] -> [DI] -> [PolyDiff a b]
markup [a]
xs [b]
ys [DI]
ds
          markup (a
x:[a]
xs)   [b]
ys   (DI
F:[DI]
ds) = forall a b. a -> PolyDiff a b
First a
x  forall a. a -> [a] -> [a]
: [a] -> [b] -> [DI] -> [PolyDiff a b]
markup [a]
xs [b]
ys [DI]
ds
          markup   [a]
xs   (b
y:[b]
ys) (DI
S:[DI]
ds) = forall a b. b -> PolyDiff a b
Second b
y forall a. a -> [a] -> [a]
: [a] -> [b] -> [DI] -> [PolyDiff a b]
markup [a]
xs [b]
ys [DI]
ds
          markup [a]
_ [b]
_ [DI]
_ = []

getGroupedDiffBy :: (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
getGroupedDiffBy :: forall a b. (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff [a] [b]]
getGroupedDiffBy a -> b -> Bool
eq [a]
a [b]
b = forall {a} {a}. [PolyDiff a a] -> [PolyDiff [a] [a]]
go forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b -> Bool) -> [a] -> [b] -> [PolyDiff a b]
getDiffBy a -> b -> Bool
eq [a]
a [b]
b
    where go :: [PolyDiff a a] -> [PolyDiff [a] [a]]
go (First a
x  : [PolyDiff a a]
xs) = let ([a]
fs, [PolyDiff a a]
rest) = forall {a} {b}. [PolyDiff a b] -> ([a], [PolyDiff a b])
goFirsts  [PolyDiff a a]
xs in forall a b. a -> PolyDiff a b
First  (a
xforall a. a -> [a] -> [a]
:[a]
fs)     forall a. a -> [a] -> [a]
: [PolyDiff a a] -> [PolyDiff [a] [a]]
go [PolyDiff a a]
rest
          go (Second a
x : [PolyDiff a a]
xs) = let ([a]
fs, [PolyDiff a a]
rest) = forall {a} {a}. [PolyDiff a a] -> ([a], [PolyDiff a a])
goSeconds [PolyDiff a a]
xs in forall a b. b -> PolyDiff a b
Second (a
xforall a. a -> [a] -> [a]
:[a]
fs)     forall a. a -> [a] -> [a]
: [PolyDiff a a] -> [PolyDiff [a] [a]]
go [PolyDiff a a]
rest
          go (Both a
x a
y : [PolyDiff a a]
xs) = let ([(a, a)]
fs, [PolyDiff a a]
rest) = forall {a} {b}. [PolyDiff a b] -> ([(a, b)], [PolyDiff a b])
goBoth    [PolyDiff a a]
xs
                                   ([a]
fxs, [a]
fys) = forall a b. [(a, b)] -> ([a], [b])
unzip [(a, a)]
fs
                               in forall a b. a -> b -> PolyDiff a b
Both (a
xforall a. a -> [a] -> [a]
:[a]
fxs) (a
yforall a. a -> [a] -> [a]
:[a]
fys) forall a. a -> [a] -> [a]
: [PolyDiff a a] -> [PolyDiff [a] [a]]
go [PolyDiff a a]
rest
          go [] = []

          goFirsts :: [PolyDiff a b] -> ([a], [PolyDiff a b])
goFirsts  (First a
x  : [PolyDiff a b]
xs) = let ([a]
fs, [PolyDiff a b]
rest) = [PolyDiff a b] -> ([a], [PolyDiff a b])
goFirsts  [PolyDiff a b]
xs in (a
xforall a. a -> [a] -> [a]
:[a]
fs, [PolyDiff a b]
rest)
          goFirsts  [PolyDiff a b]
xs = ([],[PolyDiff a b]
xs)

          goSeconds :: [PolyDiff a a] -> ([a], [PolyDiff a a])
goSeconds (Second a
x : [PolyDiff a a]
xs) = let ([a]
fs, [PolyDiff a a]
rest) = [PolyDiff a a] -> ([a], [PolyDiff a a])
goSeconds [PolyDiff a a]
xs in (a
xforall a. a -> [a] -> [a]
:[a]
fs, [PolyDiff a a]
rest)
          goSeconds [PolyDiff a a]
xs = ([],[PolyDiff a a]
xs)

          goBoth :: [PolyDiff a b] -> ([(a, b)], [PolyDiff a b])
goBoth    (Both a
x b
y : [PolyDiff a b]
xs) = let ([(a, b)]
fs, [PolyDiff a b]
rest) = [PolyDiff a b] -> ([(a, b)], [PolyDiff a b])
goBoth [PolyDiff a b]
xs    in ((a
x,b
y)forall a. a -> [a] -> [a]
:[(a, b)]
fs, [PolyDiff a b]
rest)
          goBoth    [PolyDiff a b]
xs = ([],[PolyDiff a b]
xs)