-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.DTDValidation.XmlRE
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   A module for regular expression matching, adapted for XML DTDs.

   This module is based on the module RE.

-}

-- ------------------------------------------------------------

module Text.XML.HXT.DTDValidation.XmlRE
    ( RE

    , checkRE
    , matches
    , printRE

    , re_unit
    , re_zero
    , re_sym
    , re_rep
    , re_plus
    , re_opt
    , re_seq
    , re_alt
    , re_dot
    )
where

-- import           Debug.Trace                         (trace)

import           Data.List                           (foldl')

import           Text.XML.HXT.DTDValidation.RE       hiding (matches)

import           Text.XML.HXT.Arrow.Edit             (removeComment,
                                                      removeWhiteSpace)
import qualified Text.XML.HXT.DOM.XmlNode            as XN
import           Text.XML.HXT.DTDValidation.TypeDefs

-- |
-- Derives a regular expression with respect to a list of elements.
--
--    * 1.parameter re :  regular expression
--
--    - 2.parameter list :  list of elements to which the regular expression is applied
--
--    - returns : the derived regular expression

matches :: RE String -> XmlTrees -> RE String
matches re list
    = foldl' delta re (removeUnimportantStuff $$ list)
      where
      removeUnimportantStuff :: XmlArrow
      removeUnimportantStuff = processBottomUp (removeWhiteSpace >>> removeComment)

      -- trace of growth of REs
      -- delta' re el = delta (trace (("RE : " ++) . (++ "\n" ) . show $ re) re) el


-- |
-- Derives a regular expression with respect to one element.
--
-- L(delta e x) = x \ L(e)
--
--    * 1.parameter re :  regular expression to be derived
--
--    - 2.parameter el :  the element on which the regular expression is applied
--
--    - returns : the derived regular expression

delta :: RE String -> XmlTree -> RE String
delta re el
    | not (allowed el) = re
    | otherwise        = case re of
        RE_ZERO m                -> re_zero m
        RE_UNIT                  -> re_zero (elemName el ++" unexpected.")
        RE_SYM sym
            | sym == k_pcdata    -> if ((XN.isText el) || (XN.isCdata el))
                                    then re_unit
                                    else re_zero ("Character data expected, but "++ elemName el ++" found.")
            | expectedNode el sym -> re_unit
            | otherwise           -> re_zero ("Element "++ show sym ++" expected, but "++ elemName el ++" found.")
        RE_REP e                  -> re_seq (delta e el) (re_rep e)
        RE_PLUS e                 -> re_seq (delta e el) (re_rep e)
        RE_OPT e                  -> delta e el
        RE_SEQ e f
            | nullable e          -> re_alt (re_seq (delta e el) f) (delta f el)
            | otherwise           -> re_seq (delta e el) f
        RE_ALT e f                -> re_alt (delta e el) (delta f el)
        RE_DOT                    -> re_unit

    where
    expectedNode        :: XmlTree -> String -> Bool
    expectedNode n sym
        | XN.isElem n   = nameOfElem n == sym
        | otherwise     = False

    elemName            :: XmlTree -> String
    elemName n
        | XN.isElem n   = "element "++ show (nameOfElem n)
        | otherwise     = "character data"

    allowed     :: XmlTree -> Bool
    allowed n   = XN.isElem n || XN.isText n || XN.isCdata n

-- ------------------------------------------------------------