{-# LANGUAGE DeriveFunctor #-}
{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Algorithm.Diff
-- Copyright   :  (c) Sterling Clover 2008-2011, Kevin Charter 2011
-- License     :  BSD 3 Clause
-- Maintainer  :  s.clover@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- This is an implementation of the O(ND) diff algorithm as described in
-- \"An O(ND) Difference Algorithm and Its Variations (1986)\"
-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927>. It is O(mn) in space.
-- The algorithm is the same one used by standared Unix diff.
-----------------------------------------------------------------------------

module Data.Algorithm.Diff
    ( Diff(..)
    -- * Comparing lists for differences
    , getDiff
    , getDiffBy

    -- * Finding chunks of differences
    , getGroupedDiff
    , getGroupedDiffBy
    ) where

import Prelude hiding (pi)

import Data.Array

data DI = F | S | B deriving (Int -> DI -> ShowS
[DI] -> ShowS
DI -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DI] -> ShowS
$cshowList :: [DI] -> ShowS
show :: DI -> String
$cshow :: DI -> String
showsPrec :: Int -> DI -> ShowS
$cshowsPrec :: Int -> DI -> ShowS
Show, DI -> DI -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DI -> DI -> Bool
$c/= :: DI -> DI -> Bool
== :: DI -> DI -> Bool
$c== :: DI -> DI -> Bool
Eq)

-- | A value is either from the 'First' list, the 'Second' or from 'Both'.
-- 'Both' contains both the left and right values, in case you are using a form
-- of equality that doesn't check all data (for example, if you are using a
-- newtype to only perform equality on side of a tuple).
data Diff a = First a | Second a | Both a a deriving (Int -> Diff a -> ShowS
forall a. Show a => Int -> Diff a -> ShowS
forall a. Show a => [Diff a] -> ShowS
forall a. Show a => Diff a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Diff a] -> ShowS
$cshowList :: forall a. Show a => [Diff a] -> ShowS
show :: Diff a -> String
$cshow :: forall a. Show a => Diff a -> String
showsPrec :: Int -> Diff a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Diff a -> ShowS
Show, Diff a -> Diff a -> Bool
forall a. Eq a => Diff a -> Diff a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Diff a -> Diff a -> Bool
$c/= :: forall a. Eq a => Diff a -> Diff a -> Bool
== :: Diff a -> Diff a -> Bool
$c== :: forall a. Eq a => Diff a -> Diff a -> Bool
Eq, forall a b. a -> Diff b -> Diff a
forall a b. (a -> b) -> Diff a -> Diff b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Diff b -> Diff a
$c<$ :: forall a b. a -> Diff b -> Diff a
fmap :: forall a b. (a -> b) -> Diff a -> Diff b
$cfmap :: forall a b. (a -> b) -> Diff a -> Diff b
Functor)

data DL = DL {DL -> Int
poi :: !Int, DL -> Int
poj :: !Int, DL -> [DI]
path::[DI]} deriving (Int -> DL -> ShowS
[DL] -> ShowS
DL -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DL] -> ShowS
$cshowList :: [DL] -> ShowS
show :: DL -> String
$cshow :: DL -> String
showsPrec :: Int -> DL -> ShowS
$cshowsPrec :: Int -> DL -> ShowS
Show, DL -> DL -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DL -> DL -> Bool
$c/= :: DL -> DL -> Bool
== :: DL -> DL -> Bool
$c== :: DL -> DL -> Bool
Eq)

instance Ord DL
        where DL
x <= :: DL -> DL -> Bool
<= DL
y = if DL -> Int
poi DL
x forall a. Eq a => a -> a -> Bool
== DL -> Int
poi DL
y
                then  DL -> Int
poj DL
x forall a. Ord a => a -> a -> Bool
> DL -> Int
poj DL
y
                else DL -> Int
poi DL
x forall a. Ord a => a -> a -> Bool
<= DL -> Int
poi DL
y

canDiag :: (a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag :: forall a.
(a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> a -> Bool
eq [a]
as [a]
bs Int
lena Int
lenb = \ Int
i Int
j ->
   if Int
i forall a. Ord a => a -> a -> Bool
< Int
lena Bool -> Bool -> Bool
&& Int
j forall a. Ord a => a -> a -> Bool
< Int
lenb then (Array Int a
arAs forall i e. Ix i => Array i e -> i -> e
! Int
i) a -> a -> Bool
`eq` (Array Int a
arBs forall i e. Ix i => Array i e -> i -> e
! Int
j) else Bool
False
    where arAs :: Array Int a
arAs = forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lena forall a. Num a => a -> a -> a
- Int
1) [a]
as
          arBs :: Array Int a
arBs = forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lenb forall a. Num a => a -> a -> a
- Int
1) [a]
bs

dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd [DL]
dls = DL
hdforall a. a -> [a] -> [a]
:forall {a}. Ord a => [a] -> [a]
pairMaxes [DL]
rst
  where (DL
hd:[DL]
rst) = [DL] -> [DL]
nextDLs [DL]
dls
        nextDLs :: [DL] -> [DL]
nextDLs [] = []
        nextDLs (DL
dl:[DL]
rest) = DL
dl'forall a. a -> [a] -> [a]
:DL
dl''forall a. a -> [a] -> [a]
:[DL] -> [DL]
nextDLs [DL]
rest
          where dl' :: DL
dl'  = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$ DL
dl {poi :: Int
poi=DL -> Int
poi DL
dl forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
F forall a. a -> [a] -> [a]
: [DI]
pdl)}
                dl'' :: DL
dl'' = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$ DL
dl {poj :: Int
poj=DL -> Int
poj DL
dl forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
S forall a. a -> [a] -> [a]
: [DI]
pdl)}
                pdl :: [DI]
pdl = DL -> [DI]
path DL
dl
        pairMaxes :: [a] -> [a]
pairMaxes [] = []
        pairMaxes [a
x] = [a
x]
        pairMaxes (a
x:a
y:[a]
rest) = forall a. Ord a => a -> a -> a
max a
x a
yforall a. a -> [a] -> [a]
:[a] -> [a]
pairMaxes [a]
rest

addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd DL
dl
    | Int -> Int -> Bool
cd Int
pi Int
pj = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$
                 DL
dl {poi :: Int
poi = Int
pi forall a. Num a => a -> a -> a
+ Int
1, poj :: Int
poj = Int
pj forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
B forall a. a -> [a] -> [a]
: DL -> [DI]
path DL
dl)}
    | Bool
otherwise   = DL
dl
    where pi :: Int
pi = DL -> Int
poi DL
dl; pj :: Int
pj = DL -> Int
poj DL
dl

lcs :: (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs :: forall a. (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs a -> a -> Bool
eq [a]
as [a]
bs = DL -> [DI]
path forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\DL
dl -> DL -> Int
poi DL
dl forall a. Eq a => a -> a -> Bool
/= Int
lena Bool -> Bool -> Bool
|| DL -> Int
poj DL
dl forall a. Eq a => a -> a -> Bool
/= Int
lenb) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a) -> a -> [a]
iterate ((Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd forall a b. (a -> b) -> a -> b
$
            DL {poi :: Int
poi=Int
0,poj :: Int
poj=Int
0,path :: [DI]
path=[]}
            where cd :: Int -> Int -> Bool
cd = forall a.
(a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> a -> Bool
eq [a]
as [a]
bs Int
lena Int
lenb
                  lena :: Int
lena = forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as; lenb :: Int
lenb = forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
bs

-- | Takes two lists and returns a list of differences between them. This is
-- 'getDiffBy' with '==' used as predicate.
getDiff :: (Eq t) => [t] -> [t] -> [Diff t]
getDiff :: forall t. Eq t => [t] -> [t] -> [Diff t]
getDiff = forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy forall a. Eq a => a -> a -> Bool
(==)

-- | Takes two lists and returns a list of differences between them, grouped
-- into chunks. This is 'getGroupedDiffBy' with '==' used as predicate.
getGroupedDiff :: (Eq t) => [t] -> [t] -> [Diff [t]]
getGroupedDiff :: forall t. Eq t => [t] -> [t] -> [Diff [t]]
getGroupedDiff = forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy forall a. Eq a => a -> a -> Bool
(==)

-- | A form of 'getDiff' with no 'Eq' constraint. Instead, an equality predicate
-- is taken as the first argument.
getDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy :: forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy t -> t -> Bool
eq [t]
a [t]
b = forall {a}. [a] -> [a] -> [DI] -> [Diff a]
markup [t]
a [t]
b forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$ forall a. (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs t -> t -> Bool
eq [t]
a [t]
b
    where markup :: [a] -> [a] -> [DI] -> [Diff a]
markup (a
x:[a]
xs)   [a]
ys   (DI
F:[DI]
ds) = forall a. a -> Diff a
First a
x  forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup   [a]
xs   (a
y:[a]
ys) (DI
S:[DI]
ds) = forall a. a -> Diff a
Second a
y forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup (a
x:[a]
xs) (a
y:[a]
ys) (DI
B:[DI]
ds) = forall a. a -> a -> Diff a
Both a
x a
y forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup [a]
_ [a]
_ [DI]
_ = []

getGroupedDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy :: forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy t -> t -> Bool
eq [t]
a [t]
b = forall {a}. [Diff a] -> [Diff [a]]
go forall a b. (a -> b) -> a -> b
$ forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy t -> t -> Bool
eq [t]
a [t]
b
    where go :: [Diff a] -> [Diff [a]]
go (First a
x  : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = forall {a}. [Diff a] -> ([a], [Diff a])
goFirsts  [Diff a]
xs in forall a. a -> Diff a
First  (a
xforall a. a -> [a] -> [a]
:[a]
fs)     forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go (Second a
x : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = forall {a}. [Diff a] -> ([a], [Diff a])
goSeconds [Diff a]
xs in forall a. a -> Diff a
Second (a
xforall a. a -> [a] -> [a]
:[a]
fs)     forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go (Both a
x a
y : [Diff a]
xs) = let ([(a, a)]
fs, [Diff a]
rest) = forall {b}. [Diff b] -> ([(b, b)], [Diff b])
goBoth    [Diff a]
xs
                                   ([a]
fxs, [a]
fys) = forall a b. [(a, b)] -> ([a], [b])
unzip [(a, a)]
fs
                               in forall a. a -> a -> Diff a
Both (a
xforall a. a -> [a] -> [a]
:[a]
fxs) (a
yforall a. a -> [a] -> [a]
:[a]
fys) forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go [] = []

          goFirsts :: [Diff a] -> ([a], [Diff a])
goFirsts  (First a
x  : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
goFirsts  [Diff a]
xs in (a
xforall a. a -> [a] -> [a]
:[a]
fs, [Diff a]
rest)
          goFirsts  [Diff a]
xs = ([],[Diff a]
xs)

          goSeconds :: [Diff a] -> ([a], [Diff a])
goSeconds (Second a
x : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
goSeconds [Diff a]
xs in (a
xforall a. a -> [a] -> [a]
:[a]
fs, [Diff a]
rest)
          goSeconds [Diff a]
xs = ([],[Diff a]
xs)

          goBoth :: [Diff b] -> ([(b, b)], [Diff b])
goBoth    (Both b
x b
y : [Diff b]
xs) = let ([(b, b)]
fs, [Diff b]
rest) = [Diff b] -> ([(b, b)], [Diff b])
goBoth [Diff b]
xs    in ((b
x,b
y)forall a. a -> [a] -> [a]
:[(b, b)]
fs, [Diff b]
rest)
          goBoth    [Diff b]
xs = ([],[Diff b]
xs)