--------------------------------------------
-- | This module defines the notion of filters and filter combinators
--   for processing XML documents.
--
--   These XML transformation combinators are described in the paper
--   ``Haskell and XML: Generic Combinators or Type-Based Translation?''
--   Malcolm Wallace and Colin Runciman, Proceedings ICFP'99.
--------------------------------------------
module Text.XML.HaXml.Combinators
  (-- * The content filter type.
    CFilter

   -- * Simple filters.
   -- ** Selection filters.
   -- $selection
  , keep, none, children, childrenBy, position

   -- ** Predicate filters.
   -- $pred
  , elm, txt, tag, attr, attrval, tagWith

   -- ** Search filters.
  , find, iffind, ifTxt

   -- * Filter combinators
   -- ** Basic combinators.
  , o, union, cat, andThen
  , (|>|), with, without
  , (/>), (</), et
  , path
   -- ** Recursive search.
   -- $recursive
  , deep, deepest, multi
   -- ** Interior editing.
  , when, guards, chip, inplace, recursivelyInPlace, foldXml
   -- ** Constructive filters.
   -- $constructive
  , mkElem, mkElemAttr, literal, cdata, replaceTag, replaceAttrs, addAttribute

   -- * C-like conditionals.
   -- $cond
  , ThenElse(..), (?>)

   -- * Filters with labelled results.
  , LabelFilter
   -- ** Using and combining labelled filters.
  , oo, x
   -- ** Some label-generating filters.
  , numbered, interspersed, tagged, attributed, textlabelled, extracted

  ) where


import Text.XML.HaXml.Types
import Text.XML.HaXml.Namespaces
import Data.Maybe (fromMaybe)

infixl 6 `with`, `without`
infixr 5 `o`, `oo`, `union`, `andThen`		-- , `orelse`
infixl 5 />, </, |>|
infixr 4 `when`, `guards`
infixr 3 ?>, :>



-- THE CONTENT FILTER TYPE

-- | All document transformations are /content filters/.
--   A filter takes a single XML 'Content' value and returns a sequence
--   of 'Content' values, possibly empty.
type CFilter i  = Content i -> [Content i]



-- BASIC SELECTION FILTERS
-- $selection
-- In the algebra of combinators, @none@ is the zero, and @keep@ the identity.
-- (They have a more general type than just CFilter.)
keep :: a->[a]
keep = \x->[x]
none :: a->[b]
none = \x->[]

-- | Throw away current node, keep just the (unprocessed) children.
children :: CFilter i
children (CElem (Elem _ _ cs) _) = cs
children _ = []

-- | Select the @n@'th positional result of a filter.
position :: Int -> CFilter i -> CFilter i
position n f = (\cs-> [cs!!n]) . f



-- BASIC PREDICATE FILTERS
-- $pred
-- These filters either keep or throw away some content based on
-- a simple test.  For instance, @elm@ keeps only a tagged element,
-- @txt@ keeps only non-element text, @tag@ keeps only an element
-- with the named tag, @attr@ keeps only an element with the named
-- attribute, @attrval@ keeps only an element with the given
-- attribute value, @tagWith@ keeps only an element whose tag name
-- satisfies the given predicate.

elm, txt   :: CFilter i
tag        :: String -> CFilter i
attr       :: String -> CFilter i
attrval    :: Attribute -> CFilter i
tagWith    :: (String->Bool) -> CFilter i

elm x@(CElem _ _) = [x]
elm _             = []

txt x@(CString _ _ _) = [x]
txt x@(CRef _ _)      = [x]
txt _                 = []

tag t x@(CElem (Elem n _ _) _) | t==printableName n  = [x]
tag _ _  = []

tagWith p x@(CElem (Elem n _ _) _) | p (printableName n)  = [x]
tagWith _ _  = []

attr n x@(CElem (Elem _ as _) _) | n `elem` (map (printableName.fst) as)  = [x]
attr _ _  = []

attrval av x@(CElem (Elem _ as _) _) | av `elem` as  = [x]
attrval _  _  = []



-- SEARCH FILTERS

-- | For a mandatory attribute field, @find key cont@ looks up the value of
--   the attribute name @key@, and applies the continuation @cont@ to
--   the value.
find :: String -> (String->CFilter i) -> CFilter i
find key cont c@(CElem (Elem _ as _) _) = cont (show (lookfor (N key) as)) c
  where lookfor x = fromMaybe (error ("missing attribute: "++key)) . lookup x
-- 'lookfor' has the more general type :: (Eq a,Show a) => a -> [(a,b)] -> b

-- | When an attribute field may be absent, use @iffind key yes no@ to lookup
--   its value.  If the attribute is absent, it acts as the @no@ filter,
--   otherwise it applies the @yes@ filter.
iffind :: String -> (String->CFilter i) -> CFilter i -> CFilter i
iffind  key  yes no c@(CElem (Elem _ as _) _) =
  case (lookup (N key) as) of
    Nothing               -> no c
    (Just v@(AttValue _)) -> yes (show v) c
iffind _key _yes no other = no other

-- | @ifTxt yes no@ processes any textual content with the @yes@ filter,
--   but otherwise is the same as the @no@ filter.
ifTxt :: (String->CFilter i) -> CFilter i -> CFilter i
ifTxt  yes _no c@(CString _ s _) = yes s c
ifTxt _yes  no c                 = no c



-- C-LIKE CONDITIONALS
--
-- $cond
-- These definitions provide C-like conditionals, lifted to the filter level.
--
-- The @(cond ? yes : no)@ style in C becomes @(cond ?> yes :> no)@ in Haskell.

-- | Conjoin the two branches of a conditional.
data ThenElse a = a :> a

-- | Select between the two branches of a joined conditional.
(?>) :: (a->[b]) -> ThenElse (a->[b]) -> (a->[b])
p ?> (f :> g) = \c-> if (not.null.p) c then f c else g c



-- FILTER COMBINATORS


-- | Sequential (/Irish/,/backwards/) composition
o :: CFilter i -> CFilter i -> CFilter i
f `o` g = concatMap f . g

-- | Binary parallel composition.  Each filter uses a copy of the input,
-- rather than one filter using the result of the other.
--   (Has a more general type than just CFilter.)
union :: (a->[b]) -> (a->[b]) -> (a->[b])
union = lift (++)		-- in Haskell 98:   union = lift List.union
  where
    lift :: (a->b->d) -> (c->a) -> (c->b) -> c -> d
    lift f g h = \x-> f (g x) (h x)

-- | Glue a list of filters together.  (A list version of union;
--   also has a more general type than just CFilter.)
cat :: [a->[b]] -> (a->[b])
--   Specification: cat fs = \e-> concat [ f e | f <- fs ]
--   more efficient implementation below:
cat [] = const []
cat fs = foldr1 union fs

-- | A special form of filter composition where the second filter
--   works over the same data as the first, but also uses the
--   first's result.
andThen :: (a->c) -> (c->a->b) -> (a->b)
andThen f g = \x-> g (f x) x			-- lift g f id

-- | Process children using specified filters.
childrenBy :: CFilter i -> CFilter i 
childrenBy f = f `o` children

-- | Directional choice:
--   in @f |>| g@ give g-productions only if no f-productions
(|>|) :: (a->[b]) -> (a->[b]) -> (a->[b])
f |>| g = \x-> let fx = f x in if null fx then g x else fx
--      f |>| g  =  f ?> f :> g

-- | Pruning: in @f `with` g@,
--   keep only those f-productions which have at least one g-production
with :: CFilter i -> CFilter i -> CFilter i
f `with` g = filter (not.null.g) . f

-- | Pruning: in @f `without` g@,
--   keep only those f-productions which have no g-productions
without :: CFilter i -> CFilter i -> CFilter i
f `without` g = filter (null.g) . f

-- | Pronounced /slash/, @f \/> g@ means g inside f
(/>) :: CFilter i -> CFilter i -> CFilter i
f /> g = g `o` children `o` f

-- | Pronounced /outside/, @f \<\/ g@ means f containing g
(</) :: CFilter i -> CFilter i -> CFilter i
f </ g = f `with` (g `o` children)

-- | Join an element-matching filter with a text-only filter
et :: (String->CFilter i) -> CFilter i -> CFilter i
et f g = (f `oo` tagged elm)
            |>|
         (g `o` txt)

-- | Express a list of filters like an XPath query, e.g.
--   @path [children, tag \"name1\", attr \"attr1\", children, tag \"name2\"]@
--   is like the XPath query @\/name1[\@attr1]\/name2@.
path :: [CFilter i] -> CFilter i
path fs = foldr (flip (o)) keep fs


-- RECURSIVE SEARCH
-- $recursive
-- Recursive search has three variants: @deep@ does a breadth-first
-- search of the tree, @deepest@ does a depth-first search, @multi@ returns
-- content at all tree-levels, even those strictly contained within results
-- that have already been returned.
deep, deepest, multi :: CFilter i -> CFilter i
deep f     = f |>| (deep f `o` children)
deepest f  = (deepest f `o` children) |>| f
multi f    = f `union` (multi f `o` children)

-- | Interior editing:
--   @f `when` g@ applies @f@ only when the predicate @g@ succeeds,
--   otherwise the content is unchanged.
when   :: CFilter i -> CFilter i -> CFilter i
-- | Interior editing:
--   @g `guards` f@ applies @f@ only when the predicate @g@ succeeds,
--   otherwise the content is discarded.
guards :: CFilter i -> CFilter i -> CFilter i
f `when` g       = g ?> f :> keep
g `guards` f     = g ?> f :> none	-- = f `o` (keep `with` g)

-- | Process CHildren In Place.  The filter is applied to any children
--   of an element content, and the element rebuilt around the results.
chip :: CFilter i -> CFilter i
chip  f (CElem (Elem n as cs) i) = [ CElem (Elem n as (concatMap f cs)) i ]
chip _f c = [c]
-- chip f = inplace (f `o` children)

-- | Process an element In Place.  The filter is applied to the element
--   itself, and then the original element rebuilt around the results.
inplace :: CFilter i -> CFilter i
inplace  f c@(CElem (Elem name as _) i) = [ CElem (Elem name as (f c)) i ]
inplace _f c = [c]

-- | Recursively process an element in place.  That is, the filter is
--   applied to the element itself, then recursively to the results of the
--   filter, all the way to the bottom, then the original element rebuilt
--   around the final results.
recursivelyInPlace :: CFilter i -> CFilter i
recursivelyInPlace f = inplace (recursivelyInPlace f `o` f)


-- | Recursive application of filters: a fold-like operator.  Defined
--   as @f `o` chip (foldXml f)@.
foldXml :: CFilter i -> CFilter i
foldXml f = f `o` chip (foldXml f)




-- CONSTRUCTIVE CONTENT FILTERS
--
-- $constructive
-- The constructive filters are primitive filters for building new elements,
-- or editing existing elements.

-- | Build an element with the given tag name - its content is the results
--   of the given list of filters.
mkElem :: String -> [CFilter i] -> CFilter i
mkElem h cfs = \t-> [ CElem (Elem (N h) [] (cat cfs t)) undefined ]

-- | Build an element with the given name, attributes, and content.
mkElemAttr :: String -> [(String,CFilter i)] -> [CFilter i] -> CFilter i
mkElemAttr h as cfs = \t-> [ CElem (Elem (N h) (map (attr t) as) (cat cfs t))
                                   undefined ]
  where attr t (n,vf) =
            let v = concat [ s | (CString _ s _) <- (deep txt `o` vf) t ]
            in  (N n, AttValue [Left v])

-- | Build some textual content.
literal :: String -> CFilter i
literal s = const [CString False s undefined]

-- | Build some CDATA content.
cdata :: String -> CFilter i
cdata s = const [CString True s undefined]

-- | Rename an element tag (leaving attributes in place).
replaceTag :: String -> CFilter i
replaceTag n (CElem (Elem _ as cs) i) = [CElem (Elem (N n) as cs) i]
replaceTag _ _ = []

-- | Replace the attributes of an element (leaving tag the same).
replaceAttrs :: [(String,String)] -> CFilter i
replaceAttrs as (CElem (Elem n _ cs) i) = [CElem (Elem n as' cs) i]
    where as' = map (\(n,v)-> (N n, AttValue [Left v])) as
replaceAttrs _  _ = []

-- | Add the desired attribute name and value to the topmost element,
--   without changing the element in any other way.
addAttribute :: String -> String -> CFilter a
addAttribute name val (CElem (Elem n   as   cs) i) =
                      [CElem (Elem n (a:as) cs) i]
  where a = (N name, AttValue [Left val])
addAttribute _ _ _ = []



-- LABELLING
-- $labelling
-- LabelFilters are a way of annotating the results of a filter operation
-- with some arbitrary values drawn from the tree values.  Typically, the
-- annotations are then consumed by a label-processing filter (of
-- type @a -> CFilter@).  This is useful way of passing information between
-- sections of the tree as you process it.  An example may help to explain.
--
-- Let's say we want to add an attribute to every node of the tree,
-- containing a textual representation of its path from the root,
-- e.g. "/foo/bar/quux".  Where there are multiple identically-tagged elements
-- under the same parent node of the original tree, we expect them to have
-- a distinguishing attribute called "name".
--
-- Step one.  Given the path prefix to this node, how do we add the "xpath"
-- attribute?
--
-- > annotateOne :: String -> CFilter a
-- > annotateOne prefix =
-- >    (f `oo` ((tagged `x` attributed "name") (attr "name")))
-- >    |>|
-- >    (g `oo` (tagged keep))
-- >  where
-- >    f (tag,att) = addAttribute "xpath" (prefix++"/"++tag++"["++att++"]")
-- >    g  tag      = addAttribute "xpath" (prefix++"/"++tag)@
--
-- First, the @attr "name"@ filter distinguishes whether this node contains
-- the attribute, hence choosing whether the left or right branch of the
-- @|>|@ is taken.  If the attribute is /not/ present, then the LabelFilter
-- @tagged keep@ selects the current node, and annotates it with the
-- tagname of the element.  The @oo@ applies the label-consuming function @g@
-- to the result, and this injects the "xpath" attribute by suffixing
-- the tagname to the known path prefix.
--
-- If the "name" attribute /is/ present, then there are /two/ labelling filters
-- applied to the current node, annotating it with the pair of its tag
-- and the value of the attribute "name".  The label-consuming function @f@ is
-- applied to the pair with @oo@, to inject the "xpath" attribute with a more
-- complex representation of its path.
--
-- Step two.  Recursively apply the annotation throughout the tree.
--
-- > labelAllPaths :: CFilter a
-- > labelAllPaths = allPaths `o` initialise
-- >   where
-- >     initialise = annotateOne "/"
-- > 
-- >     allPaths :: CFilter a
-- >     allPaths = inplace ( allPaths
-- >                          `o`
-- >                          (\prefix-> annotateOne prefix `o` children)
-- >                          `oo`
-- >                          (attributed "xpath" keep)
-- >                        )
--
-- In order to apply @annotateOne@ to any node, we need to know the path
-- prefix thus far into the tree.  So, we read the "xpath" attribute from
-- the current node (assumed to have already been processed) as a
-- LabelFilter, then consume the label by passing it to @annotateOne@ on
-- the children of the current node.  Using @inplace@ rebuilds the processed
-- children into the current node, after recursively dealing with their
-- children.



-- | A LabelFilter is like a CFilter except that it pairs up a polymorphic
--   value (label) with each of its results.
type LabelFilter i a = Content i -> [(a,Content i)]

-- | Compose a label-processing filter with a label-generating filter.
oo :: (a->CFilter i) -> LabelFilter i a -> CFilter i
f `oo` g = concatMap (uncurry f) . g

{-
-- | Process the information labels (very nearly monadic bind).
oo :: (b -> CFilter b c) -> CFilter a b -> CFilter a c
f `oo` g = concatMap info . g
    where info c@(CElem _ i)     = f i c
          info c@(CString _ _ i) = f i c
          info c@(CRef _ i)      = f i c
          info c                 = [c]
-}

-- | Combine labels.  Think of this as a pair-wise zip on labels.
--   e.g. @(numbered `x` tagged)@
x :: (CFilter i->LabelFilter i a) -> (CFilter i->LabelFilter i b) ->
       (CFilter i->LabelFilter i (a,b))
f `x` g = \cf c-> let gs = map fst (g cf c)
                      fs = map fst (f cf c)
                  in zip (zip fs gs) (cf c)


-- Some basic label-generating filters.

-- | Number the results from 1 upwards.
numbered :: CFilter i -> LabelFilter i Int
numbered f = zip [1..] . f

-- | In @interspersed a f b@, label each result of @f@ with the string @a@,
--   except for the last one which is labelled with the string @b@.
interspersed :: String -> CFilter i -> String -> LabelFilter i String
interspersed a f b =
  (\xs-> zip (replicate (len xs) a ++ [b]) xs) . f
  where
  len [] = 0
  len xs = length xs - 1

-- | Label each element in the result with its tag name.  Non-element
--   results get an empty string label.
tagged :: CFilter i -> LabelFilter i String
tagged f = extracted name f
  where name (CElem (Elem n _ _) _) = printableName n
        name _                      = ""

-- | Label each element in the result with the value of the named attribute.
--   Elements without the attribute, and non-element results, get an
--   empty string label.
attributed :: String -> CFilter i -> LabelFilter i String
attributed key f = extracted att f
  where att (CElem (Elem _ as _) _) =
            case (lookup (N key) as) of
              Nothing  -> ""
              (Just v@(AttValue _)) -> show v
        att _ = ""

-- | Label each textual part of the result with its text.  Element
--   results get an empty string label.
textlabelled :: CFilter i -> LabelFilter i (Maybe String)
textlabelled f = extracted text f
  where text (CString _ s _) = Just s
        text _               = Nothing

-- | Label each content with some information extracted from itself.
extracted :: (Content i->a) -> CFilter i -> LabelFilter i a
extracted proj f = concatMap (\c->[(proj c, c)]) . f
                                                                                


{-
-- MISC

-- | I haven't yet remembered \/ worked out what this does.
combine :: (Read a,Show a) => ([a]->a) -> LabelFilter String -> CFilter
combine f lf = \c-> [ CString False (show (f [ read l | (l,_) <- lf c ])) ]
-}


{- OLD STUFF - OBSOLETE
-- Keep an element by its numbered position (starting at 1).
position :: Int -> [Content] -> [Content]
position n | n>0  = (:[]) . (!!(n-1))
           | otherwise = const []

-- Chop and remove the root portions of trees to depth n.
layer :: Int -> [Content] -> [Content]
layer n = apply n (concatMap lay)
  where lay (CElem (Elem _ _ cs)) = cs
        lay _ = []
        apply 0 f xs = xs
        apply n f xs = apply (n-1) f (f xs)

combine :: (Read a, Show a) => ([a]->a) -> [Content] -> [Content]
combine f = \cs-> [ CString False (show (f [ read s | CString _ s <- cs ])) ]
-}