-----------------------------------------------------------------------------
-- |
-- Module      :  Data.List.Ordered
-- Copyright   :  (c) 2009-2010 Leon P Smith
-- License     :  BSD3
--
-- Maintainer  :  leon@melding-monads.com
-- Stability   :  experimental
-- Portability :  portable
--
-- This module implements bag and set operations on ordered lists.
-- Except for variations of the  'sort' and 'isSorted' functions,
-- every function assumes that any list arguments are sorted lists.
-- Assuming this precondition is met,  every resulting list is also
-- sorted.
--
-- Note that these functions handle multisets, and are left-biased.
-- Thus, even assuming the arguments are sorted,  'isect' does not always
-- return the same results as Data.List.intersection,  due to multiplicity.
--
-----------------------------------------------------------------------------

module  Data.List.Ordered
     (
        -- * Predicates
        member, memberBy, has, hasBy
     ,  subset, subsetBy
     ,  isSorted, isSortedBy

        -- * Insertion Functions
     ,  insertBag, insertBagBy
     ,  insertSet, insertSetBy

        -- * Set-like operations
     ,  isect, isectBy
     ,  union, unionBy
     ,  minus, minusBy
     ,  xunion, xunionBy
     ,  merge, mergeBy
     ,  mergeAll  , mergeAllBy
     ,  unionAll  , unionAllBy

        -- * Lists to Ordered Lists
     ,  nub, nubBy
     ,  sort, sortBy
     ,  sortOn, sortOn'
     ,  nubSort, nubSortBy
     ,  nubSortOn, nubSortOn'

     )  where

import Data.List(sort,sortBy)

-- |  The 'isSorted' predicate returns 'True' if the elements of a list occur in non-descending order,  equivalent to @'isSortedBy' ('<=')@.
isSorted :: Ord a => [a] -> Bool
isSorted = isSortedBy (<=)

-- |  The 'isSortedBy' function returns 'True' iff the predicate returns true
-- for all adjacent pairs of elements in the list.
isSortedBy :: (a -> a -> Bool) -> [a] -> Bool
isSortedBy lte = loop
  where
    loop []       = True
    loop [_]      = True
    loop (x:y:zs) = (x `lte` y) && loop (y:zs)

-- |  The 'member' function returns 'True' if the element appears in the
-- ordered list.
member :: Ord a => a -> [a] -> Bool
member = memberBy compare

-- |  The 'memberBy' function is the non-overloaded version of 'member'.
memberBy :: (a -> a -> Ordering) -> a -> [a] -> Bool
memberBy cmp x = loop
  where
    loop []     = False
    loop (y:ys) = case cmp x y of
                    LT -> False
                    EQ -> True
                    GT -> loop ys

-- |  The 'has' function returns 'True' if the element appears in the list;
-- it is equivalent to 'member' except the order of the arguments is reversed,
-- making it a function from an ordered list to its characteristic function.
has :: Ord a => [a] -> a -> Bool
has xs y = memberBy compare y xs

-- |  The 'hasBy' function is the non-overloaded version of 'has'.
hasBy :: (a -> a -> Ordering) -> [a] -> a -> Bool
hasBy cmp xs y = memberBy cmp y xs

-- |  The 'insertBag' function inserts an element into a list.  If the element
-- is already there,  then another copy of the element is inserted.
insertBag :: Ord a => a -> [a] -> [a]
insertBag = insertBagBy compare

-- |  The 'insertBagBy' function is the non-overloaded version of 'insertBag'.
insertBagBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
insertBagBy cmp = loop
  where
    loop x [] = [x]
    loop x (y:ys)
      = case cmp x y of
         GT -> y:loop x ys
         _  -> x:y:ys

-- |  The 'insertSet' function inserts an element into an ordered list.
-- If the element is already there,  then the element replaces the existing
-- element.
insertSet :: Ord a => a -> [a] -> [a]
insertSet = insertSetBy compare

-- |  The 'insertSetBy' function is the non-overloaded version of 'insertSet'.
insertSetBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
insertSetBy cmp = loop
  where
    loop x [] = [x]
    loop x (y:ys) = case cmp x y of
            LT -> x:y:ys
            EQ -> x:ys
            GT -> y:loop x ys

{-
-- This function is moderately interesting,  as it encompasses all the
-- "Venn diagram" functions on two sets. (though not merge;  which isn't
-- a set function)

-- However, it doesn't seem that useful,  considering that of the 8 possible
-- functions,  there are only 4 interesting variations:  isect, union, minus,
-- and xunion.  Due to interactions with GHC's optimizer,  coded separately,
-- these have a smaller combined object code size than the object code size
-- for genSectBy.  (Or,  turn off certain optimizations and lose speed.)

-- Each individual object code can be recovered from genSectBy via GHC's
-- inliner and constant propagation;  but this doesn't save much in terms
-- of source code size and reduces portability.

-- Note that the Static Argument Transformation is necessary for this to work
-- correctly;  inlining genSectBy allows for cmp and p to be inlined as well,
-- or at least eliminate some indirect jumps.  All of the *By functions in
-- this module follow this idiom for this reason.

genSectBy :: (a -> a -> Ordering)
          -> (Bool -> Bool -> Bool)
          -> [a] -> [a] -> [a]
genSectBy cmp p = loop
  where
    loop [] ys | p False True = ys
               | otherwise    = []
    loop xs [] | p True False = xs
               | otherwise    = []
    loop (x:xs) (y:ys)
      = case cmp x y of
          LT | p True False -> x : loop xs (y:ys)
             | otherwise    ->     loop xs (y:ys)
          EQ | p True True  -> x : loop xs ys
             | otherwise    ->     loop xs ys
          GT | p False True -> y : loop (x:xs) ys
             | otherwise    ->     loop (x:xs) ys

-- Here's another variation that was suggested to me.  It is more general
-- than genSectBy, as it can implement a merge; but it cannot implement
-- a left-biased merge

foldrMergeBy :: (a -> b -> Ordering)
             -> (a -> c -> c) -> (b -> c -> c) -> (a -> b -> c -> c) -> c
             -> [a] -> [b] -> c
foldrMergeBy cmp addA addB unify z = loop
  where
    loop xs [] = foldr addA z xs
    loop [] ys = foldr addB z ys
    loop (x:xs) (y:ys)
      = case cmp x y of
          LT -> x `addA` loop  xs (y:ys)
          EQ -> unify x y (loop xs ys)
          GT -> y `addB` loop (x:xs) ys
-}

-- |  The 'isect' function computes the intersection of two ordered lists.
-- An element occurs in the output as many times as the minimum number of
-- occurrences in either input.  If either input is a set,  then the output
-- is a set.
--
-- > isect [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 3,4 ]
-- > isect [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1, 2,2 ]
isect :: Ord a => [a] -> [a] -> [a]
isect = isectBy compare

-- |  The 'isectBy' function is the non-overloaded version of 'isect'.
isectBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
isectBy cmp = loop
  where
     loop [] _ys  = []
     loop _xs []  = []
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT ->     loop xs (y:ys)
          EQ -> x : loop xs ys
          GT ->     loop (x:xs) ys

-- |  The 'union' function computes the union of two ordered lists.
-- An element occurs in the output as many times as the maximum number
-- of occurrences in either input.  If both inputs are sets,  then the
-- output is a set.
--
-- > union [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2, 3,4, 5,6 ]
-- > union [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1, 2,2,2 ]
union :: Ord a => [a] -> [a] -> [a]
union = unionBy compare

-- |  The 'unionBy' function is the non-overloaded version of 'union'.
unionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
unionBy cmp = loop
  where
     loop [] ys = ys
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ -> x : loop xs ys
          GT -> y : loop (x:xs) ys

-- |  The 'minus' function computes the difference of two ordered lists.
-- An element occurs in the output as many times as it occurs in
-- the first input, minus the number of occurrences in the second input.
-- If the first input is a set,  then the output is a set.
--
-- > minus [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2 ]
-- > minus [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 2 ]
minus :: Ord a => [a] -> [a] -> [a]
minus = minusBy compare

-- |  The 'minusBy' function is the non-overloaded version of 'minus'.
minusBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
minusBy cmp = loop
  where
     loop [] _ys = []
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ ->     loop xs ys
          GT ->     loop (x:xs) ys

-- |  The 'xunion' function computes the exclusive union of two ordered lists.
-- An element occurs in the output as many times as the absolute difference
-- between the number of occurrences in the inputs.  If both inputs
-- are sets,  then the output is a set.
--
-- > xunion [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2, 5,6 ]
-- > xunion [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1, 2 ]
xunion :: Ord a => [a] -> [a] -> [a]
xunion = xunionBy compare

-- |  The 'xunionBy' function is the non-overloaded version of 'xunion'.
xunionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
xunionBy cmp = loop
  where
     loop [] ys = ys
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ ->     loop xs ys
          GT -> y : loop (x:xs) ys

-- |  The 'merge' function combines all elements of two ordered lists.
-- An element occurs in the output as many times as the sum of the
-- occurrences in the lists.
--
-- > merge [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2,  3,3,4,4,  5,6 ]
-- > merge [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1,1,  2,2,2,2,2 ]
merge :: Ord a => [a] -> [a] -> [a]
merge = mergeBy compare

-- |  The 'mergeBy' function is the non-overloaded version of 'merge'.
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy cmp = loop
  where
    loop [] ys  = ys
    loop xs []  = xs
    loop (x:xs) (y:ys)
      = case cmp x y of
         GT -> y : loop (x:xs) ys
         _  -> x : loop xs (y:ys)

-- |  The 'subset' function returns true if the first list is a sub-list
-- of the second.
subset :: Ord a => [a] -> [a] -> Bool
subset = subsetBy compare

-- |  The 'subsetBy' function is the non-overloaded version of 'subset'.
subsetBy :: (a -> a -> Ordering) -> [a] -> [a] -> Bool
subsetBy cmp = loop
  where
    loop [] _ys = True
    loop _xs [] = False
    loop (x:xs) (y:ys)
      = case cmp x y of
         LT -> False
         EQ -> loop xs ys
         GT -> loop (x:xs) ys

{-
-- This is Ian Lynagh's mergesort implementation,  which appeared as
-- Data.List.sort, with the static argument transformation applied.
-- It's not clear whether this modification is truly worthwhile or not.

sort :: Ord a => [a] -> [a]
sort = sortBy compare

sortBy :: (a -> a -> Ordering) -> [a] -> [a]
sortBy cmp = foldTree (mergeBy cmp) [] . map (\x -> [x])
-}

-- |  The 'sortOn' function provides the decorate-sort-undecorate idiom,
-- also known as the \"Schwartzian transform\".
sortOn :: Ord b => (a -> b) -> [a] -> [a]
sortOn f  = map snd . sortOn' fst .  map (\x -> (f x, x))

-- |  This variant of 'sortOn' recomputes the sorting key every comparison.
-- This can be better for functions that are cheap to compute.
-- This is definitely better for projections,  as the decorate-sort-undecorate
-- saves nothing and adds two traversals of the list and extra memory
-- allocation.
sortOn' :: Ord b => (a -> b) -> [a] -> [a]
sortOn' f = sortBy (\x y -> compare (f x) (f y))

-- |  The 'nubSort' function is equivalent to @'nub' '.' 'sort'@,  except
-- somewhat more efficient as duplicates are removed as it sorts.  It is
-- essentially Data.List.sort,  with 'merge' replaced by 'union'.
nubSort :: Ord a => [a] -> [a]
nubSort = nubSortBy compare

-- |  The 'nubSortBy' function is the non-overloaded version of 'nubSort'.
nubSortBy :: (a -> a -> Ordering) -> [a] -> [a]
nubSortBy cmp = foldTree' (unionBy cmp) [] . runs
  where
    -- 'runs' partitions the input into sublists that are monotonic,
    -- contiguous,  and non-overlapping.   Descending runs are reversed
    -- and adjacent duplicates are eliminated,  so every run returned is
    -- strictly ascending.

    runs (a:b:xs)
      = case cmp a b of
          LT -> asc b (a:) xs
          EQ -> runs (a:xs)
          GT -> desc b [a] xs
    runs xs = [xs]

    desc a as []  = [a:as]
    desc a as (b:bs)
      = case cmp a b of
          LT -> (a:as) : runs (b:bs)
          EQ -> desc a as bs
          GT -> desc b (a:as) bs

    asc a as [] = [as [a]]
    asc a as (b:bs)
      = case cmp a b of
         LT -> asc b (\ys -> as (a:ys)) bs
         EQ -> asc a as bs
         GT -> as [a] : runs (b:bs)

-- |  The 'nubSortOn' function provides decorate-sort-undecorate for 'nubSort'.
nubSortOn :: Ord b => (a -> b) -> [a] -> [a]
nubSortOn f = map snd . nubSortOn' fst . map (\x -> (f x, x))

-- |  This variant of 'nubSortOn' recomputes the sorting key for each comparison.
nubSortOn' :: Ord b => (a -> b) -> [a] -> [a]
nubSortOn' f = nubSortBy (\x y -> compare (f x) (f y))

-- | On ordered lists,  'nub' is equivalent to 'Data.List.nub', except that
-- it runs in linear time instead of quadratic.   On unordered lists it also
-- removes elements that are smaller than any preceding element.
--
-- > nub [1,1,1,2,2] == [1,2]
-- > nub [2,0,1,3,3] == [2,3]
-- > nub = nubBy (<)

nub :: Ord a => [a] -> [a]
nub = nubBy (<)

-- | The 'nubBy' function is the greedy algorithm that returns a
-- sublist of its input such that:
--
-- > isSortedBy pred (nubBy pred xs) == True
--
-- This is true for all lists,  not just ordered lists,  and all binary
-- predicates,  not just total orders.   On infinite lists,  this statement
-- is true in a certain mathematical sense,  but not a computational one.
nubBy :: (a -> a -> Bool) -> [a] -> [a]
nubBy p []     = []
nubBy p (x:xs) = x : loop x xs
  where
    loop _ [] = []
    loop x (y:ys)
       | p x y     = y : loop y ys
       | otherwise = loop x ys

-- | The function @'foldTree'' plus zero@ computes the sum of a list
-- using a balanced tree of operations.  'foldTree'' necessarily diverges
-- on infinite lists, hence it is a stricter variant of 'foldTree'.
-- 'foldTree'' is used in the implementation of 'sort' and 'nubSort'.

foldTree' :: (a -> a -> a) -> a -> [a] -> a
foldTree' plus zero xs
  = case xs of
      []    -> zero
      (_:_) -> loop xs
  where
    loop [x] = x
    loop xs  = loop (pairs xs)

    pairs (x:y:zs) = plus x y : pairs zs
    pairs zs       = zs

-- | The function @'foldTree' plus zero@ computes the sum of a list using
-- a sequence of balanced trees of operations.   Given an appropriate @plus@
-- operator,  this function can be productive on an infinite list, hence it
-- is lazier than 'foldTree''.   'foldTree' is used in the implementation of
-- 'mergeAll' and 'unionAll'.

foldTree :: (a -> a -> a) -> a -> [a] -> a
foldTree plus zero xs
  = case xs of
      []    -> zero
      (_:_) -> loop xs
  where
    loop [x]    = x
    loop (x:xs) = x `plus` loop (pairs xs)

    pairs (x:y:zs) = plus x y : pairs zs
    pairs zs       = zs

-- | The 'mergeAll' function merges a (potentially) infinite number of
-- ordered lists, under the assumption that the heads of the inner lists
-- are sorted.  An element is duplicated in the result as many times as
-- the total number of occurrences in all inner lists.
--
-- The 'mergeAll' function is closely related to @'foldr' 'merge' []@.
-- The former does not assume that the outer list is finite, whereas
-- the latter makes no assumption about the heads of the inner lists.
-- When both sets of assumptions are met,  these two functions are
-- equivalent.
--
-- This implementation of 'mergeAll'  uses a tree of comparisons, and is
-- based on input from Dave Bayer, Heinrich Apfelmus, Omar Antolin Camarena,
-- and Will Ness.
mergeAll :: Ord a => [[a]] -> [a]
mergeAll = mergeAllBy compare

-- | The 'mergeAllBy' function is the non-overloaded variant of the 'mergeAll' function.
mergeAllBy :: (a -> a -> Ordering) -> [[a]] -> [a]
mergeAllBy cmp = foldTree merge' []
  where
    merge' []     ys = ys
    merge' (x:xs) ys = x : mergeBy cmp xs ys

-- | The 'unionAll' computes the union of a (potentially) infinite number
-- of lists,  under the assumption that the heads of the inner lists
-- are sorted.  The result will duplicate an element as many times as
-- the maximum number of occurrences in any single list.  Thus, the result
-- is a set if and only if every inner list is a set.
--
-- Analogous to 'mergeAll',  'unionAll' is closely related to
-- @'foldr' 'union' []@;  The outer does not assume that the outer list
-- is finite,  whereas the right fold does not assume anything about the
-- heads of the inner lists. When both sets of assumptions are met,  the
-- functions are equivalent.
--
-- This implementation is also based on implicit heaps,  providing
-- a tree of comparisons.
unionAll :: Ord a => [[a]] -> [a]
unionAll = unionAllBy compare

-- | The 'unionAllBy' function is the non-overloaded variant of the 'unionAll' function.
unionAllBy :: (a -> a -> Ordering) -> [[a]] -> [a]
unionAllBy cmp = foldTree union' []
  where
    msg = "Data.List.Ordered.unionAllBy:  the heads of the lists are not sorted"

    union' []     ys = ys
    union' (x:xs) ys = x : case ys of
                             []     -> xs
                             (y:yt) -> case cmp x y of
                                         LT -> unionBy cmp xs ys
                                         EQ -> unionBy cmp xs yt
                                         GT -> error msg