-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.XmlRegex
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   Regular Expression Matcher working on lists of XmlTrees

   It's intended to import this module with an explicit
   import declaration for not spoiling the namespace
   with these somewhat special arrows

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.XmlRegex
    ( XmlRegex
    , mkZero
    , mkUnit
    , mkPrim
    , mkPrim'
    , mkPrimA
    , mkDot
    , mkStar
    , mkAlt
    , mkAlts
    , mkSeq
    , mkSeqs
    , mkRep
    , mkRng
    , mkOpt
    , mkPerm
    , mkPerms
    , mkMerge
    , nullable
    , delta
    , matchXmlRegex
    , splitXmlRegex
    , scanXmlRegex
    , matchRegexA
    , splitRegexA
    , scanRegexA
    )
where

import           Control.Arrow.ListArrows

import           Data.Maybe

import           Text.XML.HXT.DOM.Interface
import           Text.XML.HXT.DOM.ShowXml   (xshow)

-- ------------------------------------------------------------
-- the exported regex arrows

-- | check whether a sequence of XmlTrees match an Xml regular expression
--
-- The arrow for 'matchXmlRegex'.
--
-- The expession is build up from simple arrows acting as predicate ('mkPrimA') for
-- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition
-- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt')

matchRegexA             :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA XmlRegex
re LA XmlTree XmlTree
ts       = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [XmlTrees]) -> LA XmlTree XmlTrees
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. (\ XmlTrees
s -> [XmlTrees] -> (String -> [XmlTrees]) -> Maybe String -> [XmlTrees]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [XmlTrees
s] ([XmlTrees] -> String -> [XmlTrees]
forall a b. a -> b -> a
const []) (Maybe String -> [XmlTrees])
-> (XmlTrees -> Maybe String) -> XmlTrees -> [XmlTrees]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex XmlRegex
re (XmlTrees -> [XmlTrees]) -> XmlTrees -> [XmlTrees]
forall a b. (a -> b) -> a -> b
$ XmlTrees
s)

-- | split the sequence of trees computed by the filter a into
--
-- The arrow for 'splitXmlRegex'.
--
-- a first part matching the regex and a rest,
-- if a prefix of the input sequence does not match the regex, the arrow fails
-- else the pair containing the result lists is returned

splitRegexA             :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA XmlRegex
re LA XmlTree XmlTree
ts       = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [(XmlTrees, XmlTrees)])
-> LA XmlTree (XmlTrees, XmlTrees)
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. (Maybe (XmlTrees, XmlTrees) -> [(XmlTrees, XmlTrees)]
forall a. Maybe a -> [a]
maybeToList (Maybe (XmlTrees, XmlTrees) -> [(XmlTrees, XmlTrees)])
-> (XmlTrees -> Maybe (XmlTrees, XmlTrees))
-> XmlTrees
-> [(XmlTrees, XmlTrees)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re)

-- | scan the input sequence with a regex and give the result as a list of lists of trees back
-- the regex must at least match one input tree, so the empty sequence should not match the regex
--
-- The arrow for 'scanXmlRegex'.

scanRegexA              :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA XmlRegex
re LA XmlTree XmlTree
ts        = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [XmlTrees]) -> LA XmlTree XmlTrees
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. ([XmlTrees] -> Maybe [XmlTrees] -> [XmlTrees]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [XmlTrees] -> [XmlTrees])
-> (XmlTrees -> Maybe [XmlTrees]) -> XmlTrees -> [XmlTrees]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex XmlRegex
re)

-- ------------------------------------------------------------

data XmlRegex   = Zero String
                | Unit
                | Sym  (XmlTree -> Bool) String    -- optional external repr. of predicate
                | Dot
                | Star  XmlRegex
                | Alt   XmlRegex XmlRegex
                | Seq   XmlRegex XmlRegex
                | Rep   Int      XmlRegex          -- 1 or more repetitions
                | Rng   Int Int  XmlRegex          -- n..m repetitions
                | Perm  XmlRegex XmlRegex
                | Merge XmlRegex XmlRegex

-- ------------------------------------------------------------

{- just for documentation

class Inv a where
    inv         :: a -> Bool

instance Inv XmlRegex where
    inv (Zero _)        = True
    inv Unit            = True
    inv (Sym p _)       = p holds for some XmlTrees
    inv Dot             = True
    inv (Star e)        = inv e
    inv (Alt e1 e2)     = inv e1 &&
                          inv e2
    inv (Seq e1 e2)     = inv e1 &&
                          inv e2
    inv (Rep i e)       = i > 0 && inv e
    inv (Rng i j e)     = (i < j || (i == j && i > 1)) &&
                          inv e
    inv (Perm e1 e2)    = inv e1 &&
                          inv e2
-}
-- ------------------------------------------------------------
--
-- smart constructors

mkZero          :: String -> XmlRegex
mkZero :: String -> XmlRegex
mkZero          = String -> XmlRegex
Zero

mkUnit          :: XmlRegex
mkUnit :: XmlRegex
mkUnit          = XmlRegex
Unit

mkPrim          :: (XmlTree -> Bool) -> XmlRegex
mkPrim :: (XmlTree -> Bool) -> XmlRegex
mkPrim XmlTree -> Bool
p        = (XmlTree -> Bool) -> String -> XmlRegex
Sym XmlTree -> Bool
p String
""

mkPrim'         :: (XmlTree -> Bool) -> String -> XmlRegex
mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex
mkPrim'         = (XmlTree -> Bool) -> String -> XmlRegex
Sym

mkPrimA         :: LA XmlTree XmlTree -> XmlRegex
mkPrimA :: LA XmlTree XmlTree -> XmlRegex
mkPrimA LA XmlTree XmlTree
a       = (XmlTree -> Bool) -> XmlRegex
mkPrim (Bool -> Bool
not (Bool -> Bool) -> (XmlTree -> Bool) -> XmlTree -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (XmlTrees -> Bool) -> (XmlTree -> XmlTrees) -> XmlTree -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LA XmlTree XmlTree -> XmlTree -> XmlTrees
forall a b. LA a b -> a -> [b]
runLA LA XmlTree XmlTree
a)

mkDot           :: XmlRegex
mkDot :: XmlRegex
mkDot           = XmlRegex
Dot

mkStar                  :: XmlRegex -> XmlRegex
mkStar :: XmlRegex -> XmlRegex
mkStar (Zero String
_)         = XmlRegex
mkUnit                -- {}* == ()
mkStar e :: XmlRegex
e@XmlRegex
Unit           = XmlRegex
e                     -- ()* == ()
mkStar e :: XmlRegex
e@(Star XmlRegex
_e1)     = XmlRegex
e                     -- (r*)* == r*
mkStar (Rep Int
1 XmlRegex
e1)       = XmlRegex -> XmlRegex
mkStar XmlRegex
e1             -- (r+)* == r*
mkStar e :: XmlRegex
e@(Alt XmlRegex
_ XmlRegex
_)      = XmlRegex -> XmlRegex
Star (XmlRegex -> XmlRegex
rmStar XmlRegex
e)       -- (a*|b)* == (a|b)*
mkStar XmlRegex
e                = XmlRegex -> XmlRegex
Star XmlRegex
e

rmStar  :: XmlRegex -> XmlRegex
rmStar :: XmlRegex -> XmlRegex
rmStar (Alt XmlRegex
e1 XmlRegex
e2)      = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex
rmStar XmlRegex
e1) (XmlRegex -> XmlRegex
rmStar XmlRegex
e2)
rmStar (Star XmlRegex
e1)        = XmlRegex -> XmlRegex
rmStar XmlRegex
e1
rmStar (Rep Int
1 XmlRegex
e1)       = XmlRegex -> XmlRegex
rmStar XmlRegex
e1
rmStar XmlRegex
e1               = XmlRegex
e1

mkAlt                                   :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e1            (Zero String
_)            = XmlRegex
e1                            -- e1 u {} = e1
mkAlt (Zero String
_)      XmlRegex
e2                  = XmlRegex
e2                            -- {} u e2 = e2
mkAlt e1 :: XmlRegex
e1@(Star XmlRegex
Dot) XmlRegex
_e2                 = XmlRegex
e1                            -- A* u e1 = A*
mkAlt XmlRegex
_e1           e2 :: XmlRegex
e2@(Star XmlRegex
Dot)       = XmlRegex
e2                            -- e1 u A* = A*
mkAlt (Sym XmlTree -> Bool
p1 String
e1)   (Sym XmlTree -> Bool
p2 String
e2)         = (XmlTree -> Bool) -> String -> XmlRegex
mkPrim' (\ XmlTree
x -> XmlTree -> Bool
p1 XmlTree
x Bool -> Bool -> Bool
|| XmlTree -> Bool
p2 XmlTree
x)  (String -> String -> String
e String
e1 String
e2) -- melting of predicates
                                          where
                                            e :: String -> String -> String
e String
"" String
x2 = String
x2
                                            e String
x1 String
"" = String
x1
                                            e String
x1 String
x2 = String
x1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
x2
mkAlt XmlRegex
e1            e2 :: XmlRegex
e2@(Sym XmlTree -> Bool
_ String
_)        = XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e2 XmlRegex
e1                   -- symmetry: predicates always first
mkAlt e1 :: XmlRegex
e1@(Sym XmlTree -> Bool
_ String
_)  (Alt e2 :: XmlRegex
e2@(Sym XmlTree -> Bool
_ String
_) XmlRegex
e3)
                                        = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e1 XmlRegex
e2) XmlRegex
e3        -- prepare melting of predicates
mkAlt (Alt XmlRegex
e1 XmlRegex
e2)   XmlRegex
e3                  = XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e1 (XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e2 XmlRegex
e3)        -- associativity
mkAlt XmlRegex
e1 XmlRegex
e2                             = XmlRegex -> XmlRegex -> XmlRegex
Alt XmlRegex
e1 XmlRegex
e2

mkAlts                          :: [XmlRegex] -> XmlRegex
mkAlts :: [XmlRegex] -> XmlRegex
mkAlts                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkAlt (String -> XmlRegex
mkZero String
"")

mkSeq                           :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq e1 :: XmlRegex
e1@(Zero String
_) XmlRegex
_e2           = XmlRegex
e1
mkSeq XmlRegex
_e1         e2 :: XmlRegex
e2@(Zero String
_)   = XmlRegex
e2
mkSeq XmlRegex
Unit        XmlRegex
e2            = XmlRegex
e2
mkSeq XmlRegex
e1          XmlRegex
Unit          = XmlRegex
e1
mkSeq (Seq XmlRegex
e1 XmlRegex
e2) XmlRegex
e3            = XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
e1 (XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
e2 XmlRegex
e3)
mkSeq XmlRegex
e1 XmlRegex
e2                     = XmlRegex -> XmlRegex -> XmlRegex
Seq XmlRegex
e1 XmlRegex
e2

mkSeqs                          :: [XmlRegex] -> XmlRegex
mkSeqs :: [XmlRegex] -> XmlRegex
mkSeqs                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
mkUnit

mkRep           :: Int -> XmlRegex -> XmlRegex
mkRep :: Int -> XmlRegex -> XmlRegex
mkRep Int
0 XmlRegex
e                       = XmlRegex -> XmlRegex
mkStar XmlRegex
e
mkRep Int
_ e :: XmlRegex
e@(Zero String
_)              = XmlRegex
e
mkRep Int
_ e :: XmlRegex
e@XmlRegex
Unit                  = XmlRegex
e
mkRep Int
i XmlRegex
e                       = Int -> XmlRegex -> XmlRegex
Rep Int
i XmlRegex
e

mkRng   :: Int -> Int -> XmlRegex -> XmlRegex
mkRng :: Int -> Int -> XmlRegex -> XmlRegex
mkRng Int
0  Int
0  XmlRegex
_e                  = XmlRegex
mkUnit
mkRng Int
1  Int
1  XmlRegex
e                   = XmlRegex
e
mkRng Int
lb Int
ub XmlRegex
_e
    | Int
lb Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
ub                   = String -> XmlRegex
Zero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$
                                  String
"illegal range " String -> String -> String
forall a. [a] -> [a] -> [a]
++
                                  Int -> String
forall a. Show a => a -> String
show Int
lb String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".." String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
ub
mkRng Int
_l Int
_u e :: XmlRegex
e@(Zero String
_)          = XmlRegex
e
mkRng Int
_l Int
_u e :: XmlRegex
e@XmlRegex
Unit              = XmlRegex
e
mkRng Int
lb Int
ub XmlRegex
e                   = Int -> Int -> XmlRegex -> XmlRegex
Rng Int
lb Int
ub XmlRegex
e

mkOpt   :: XmlRegex -> XmlRegex
mkOpt :: XmlRegex -> XmlRegex
mkOpt   = Int -> Int -> XmlRegex -> XmlRegex
mkRng Int
0 Int
1

mkPerm                           :: XmlRegex -> XmlRegex -> XmlRegex
mkPerm :: XmlRegex -> XmlRegex -> XmlRegex
mkPerm e1 :: XmlRegex
e1@(Zero String
_) XmlRegex
_             = XmlRegex
e1
mkPerm XmlRegex
_           e2 :: XmlRegex
e2@(Zero String
_)   = XmlRegex
e2
mkPerm XmlRegex
Unit        XmlRegex
e2            = XmlRegex
e2
mkPerm XmlRegex
e1          XmlRegex
Unit          = XmlRegex
e1
mkPerm XmlRegex
e1          XmlRegex
e2            = XmlRegex -> XmlRegex -> XmlRegex
Perm XmlRegex
e1 XmlRegex
e2

mkPerms                          :: [XmlRegex] -> XmlRegex
mkPerms :: [XmlRegex] -> XmlRegex
mkPerms                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
mkUnit

mkMerge                          :: XmlRegex -> XmlRegex -> XmlRegex
mkMerge :: XmlRegex -> XmlRegex -> XmlRegex
mkMerge e1 :: XmlRegex
e1@(Zero String
_) XmlRegex
_            = XmlRegex
e1
mkMerge XmlRegex
_           e2 :: XmlRegex
e2@(Zero String
_)  = XmlRegex
e2
mkMerge XmlRegex
Unit        XmlRegex
e2           = XmlRegex
e2
mkMerge XmlRegex
e1          XmlRegex
Unit         = XmlRegex
e1
mkMerge XmlRegex
e1          XmlRegex
e2           = XmlRegex -> XmlRegex -> XmlRegex
Merge XmlRegex
e1 XmlRegex
e2

-- ------------------------------------------------------------

instance Show XmlRegex where
    show :: XmlRegex -> String
show (Zero String
s)       = String
"{err:" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"}"
    show XmlRegex
Unit           = String
"()"
    show (Sym XmlTree -> Bool
_p String
"")    = String
"<pred>"
    show (Sym XmlTree -> Bool
_p String
r )    = String
r
    show XmlRegex
Dot            = String
"."
    show (Star XmlRegex
e)       = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")*"
    show (Alt XmlRegex
e1 XmlRegex
e2)    = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
    show (Seq XmlRegex
e1 XmlRegex
e2)    = XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2
    show (Rep Int
1 XmlRegex
e)      = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")+"
    show (Rep Int
i XmlRegex
e)      = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"){" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
",}"
    show (Rng Int
0 Int
1 XmlRegex
e)    = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")?"
    show (Rng Int
i Int
j XmlRegex
e)    = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"){" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"," String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
j String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"}"
    show (Perm XmlRegex
e1 XmlRegex
e2)   = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"
    show (Merge XmlRegex
e1 XmlRegex
e2)  = String
"(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"

-- ------------------------------------------------------------

unexpected              :: XmlTree -> String -> String
unexpected :: XmlTree -> String -> String
unexpected XmlTree
t String
e          = String -> String
emsg String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ (Int -> String -> String
cut Int
80 (String -> String) -> (XmlTrees -> String) -> XmlTrees -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> String
xshow) [XmlTree
t]
    where
      emsg :: String -> String
emsg String
""           = String
"unexpected: "
      emsg String
s            = String
"expected: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
", but got: "
      cut :: Int -> String -> String
cut Int
n String
s
          | String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest   = String
s'
          | Bool
otherwise   = String
s' String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"..."
          where
            (String
s', String
rest)  = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n String
s

-- ------------------------------------------------------------

nullable        :: XmlRegex -> Bool
nullable :: XmlRegex -> Bool
nullable (Zero String
_)       = Bool
False
nullable XmlRegex
Unit           = Bool
True
nullable (Sym XmlTree -> Bool
_p String
_)     = Bool
False         -- assumption: p holds for at least one tree
nullable XmlRegex
Dot            = Bool
False
nullable (Star XmlRegex
_)       = Bool
True
nullable (Alt XmlRegex
e1 XmlRegex
e2)    = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
||
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Seq XmlRegex
e1 XmlRegex
e2)    = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Rep Int
_i XmlRegex
e)     = XmlRegex -> Bool
nullable XmlRegex
e
nullable (Rng Int
i Int
_ XmlRegex
e)    = Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
||
                          XmlRegex -> Bool
nullable XmlRegex
e
nullable (Perm XmlRegex
e1 XmlRegex
e2)   = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Merge XmlRegex
e1 XmlRegex
e2)  = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2

-- ------------------------------------------------------------

delta   :: XmlRegex -> XmlTree -> XmlRegex
delta :: XmlRegex -> XmlTree -> XmlRegex
delta e :: XmlRegex
e@(Zero String
_)   XmlTree
_    = XmlRegex
e
delta XmlRegex
Unit         XmlTree
c    = String -> XmlRegex
mkZero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$ XmlTree -> String -> String
unexpected XmlTree
c String
""
delta (Sym XmlTree -> Bool
p String
e)    XmlTree
c
    | XmlTree -> Bool
p XmlTree
c               = XmlRegex
mkUnit
    | Bool
otherwise         = String -> XmlRegex
mkZero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$ XmlTree -> String -> String
unexpected XmlTree
c String
e
delta XmlRegex
Dot          XmlTree
_    = XmlRegex
mkUnit
delta e :: XmlRegex
e@(Star XmlRegex
e1)  XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e
delta (Alt XmlRegex
e1 XmlRegex
e2)  XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
delta (Seq XmlRegex
e1 XmlRegex
e2)  XmlTree
c
    | XmlRegex -> Bool
nullable XmlRegex
e1       = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2) (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
    | Bool
otherwise         = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2
delta (Rep Int
i XmlRegex
e)    XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e XmlTree
c) (Int -> XmlRegex -> XmlRegex
mkRep (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) XmlRegex
e)
delta (Rng Int
i Int
j XmlRegex
e)  XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e XmlTree
c) (Int -> Int -> XmlRegex -> XmlRegex
mkRng ((Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` Int
0) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) XmlRegex
e)
delta (Perm XmlRegex
e1 XmlRegex
e2) XmlTree
c    = case XmlRegex
e1' of
                            (Zero String
_) -> XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
e1 (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
                            XmlRegex
_        -> XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
e1' XmlRegex
e2
                          where
                          e1' :: XmlRegex
e1' = XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c
delta (Merge XmlRegex
e1 XmlRegex
e2) XmlTree
c   = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkMerge (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2)
                                (XmlRegex -> XmlRegex -> XmlRegex
mkMerge XmlRegex
e1 (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c))

-- ------------------------------------------------------------

delta'          :: XmlRegex -> XmlTrees -> XmlRegex
delta' :: XmlRegex -> XmlTrees -> XmlRegex
delta'          = (XmlRegex -> XmlTree -> XmlRegex)
-> XmlRegex -> XmlTrees -> XmlRegex
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl XmlRegex -> XmlTree -> XmlRegex
delta

-- | match a sequence of XML trees with a regular expression over trees
--
-- If the input matches, the result is Nothing, else Just an error message is returned

matchXmlRegex           :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex XmlRegex
e
    = XmlRegex -> Maybe String
res (XmlRegex -> Maybe String)
-> (XmlTrees -> XmlRegex) -> XmlTrees -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> XmlRegex
delta' XmlRegex
e
    where
    res :: XmlRegex -> Maybe String
res (Zero String
er)       = String -> Maybe String
forall a. a -> Maybe a
Just String
er
    res XmlRegex
re
        | XmlRegex -> Bool
nullable XmlRegex
re   = Maybe String
forall a. Maybe a
Nothing       -- o.k.
        | Bool
otherwise     = String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String) -> String -> Maybe String
forall a b. (a -> b) -> a -> b
$ String
"input does not match " String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e

-- ------------------------------------------------------------

-- | split a sequence of XML trees into a pair of a a matching prefix and a rest
--
-- If there is no matching prefix, Nothing is returned

splitXmlRegex           :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re        = XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' XmlRegex
re []

splitXmlRegex'          :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' XmlRegex
re XmlTrees
res []
    | XmlRegex -> Bool
nullable XmlRegex
re       = (XmlTrees, XmlTrees) -> Maybe (XmlTrees, XmlTrees)
forall a. a -> Maybe a
Just (XmlTrees -> XmlTrees
forall a. [a] -> [a]
reverse XmlTrees
res, [])
    | Bool
otherwise         = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing

splitXmlRegex' (Zero String
_) XmlTrees
_ XmlTrees
_
                        = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing

splitXmlRegex' XmlRegex
re XmlTrees
res xs :: XmlTrees
xs@(XmlTree
x:XmlTrees
xs')
    | Maybe (XmlTrees, XmlTrees) -> Bool
forall a. Maybe a -> Bool
isJust Maybe (XmlTrees, XmlTrees)
res'       = Maybe (XmlTrees, XmlTrees)
res'
    | XmlRegex -> Bool
nullable XmlRegex
re       = (XmlTrees, XmlTrees) -> Maybe (XmlTrees, XmlTrees)
forall a. a -> Maybe a
Just (XmlTrees -> XmlTrees
forall a. [a] -> [a]
reverse XmlTrees
res, XmlTrees
xs)
    | Bool
otherwise         = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing
    where
    re' :: XmlRegex
re'  = XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
re XmlTree
x
    res' :: Maybe (XmlTrees, XmlTrees)
res' = XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' XmlRegex
re' (XmlTree
xXmlTree -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
:XmlTrees
res) XmlTrees
xs'

-- ------------------------------------------------------------

-- | scan a sequence of XML trees and split it into parts matching the given regex
--
-- If the parts cannot be split because of a missing match, or because of the
-- empty sequence as match, Nothing is returned

scanXmlRegex                            :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex XmlRegex
re XmlTrees
ts                      = XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' XmlRegex
re (XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re XmlTrees
ts)

scanXmlRegex'                           :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' XmlRegex
_  Maybe (XmlTrees, XmlTrees)
Nothing                = Maybe [XmlTrees]
forall a. Maybe a
Nothing
scanXmlRegex' XmlRegex
_  (Just (XmlTrees
rs, []))        = [XmlTrees] -> Maybe [XmlTrees]
forall a. a -> Maybe a
Just [XmlTrees
rs]
scanXmlRegex' XmlRegex
_  (Just ([], XmlTrees
_))         = Maybe [XmlTrees]
forall a. Maybe a
Nothing       -- re is nullable (the empty word matches), nothing split off
                                                        -- would give infinite list of empty lists
scanXmlRegex' XmlRegex
re (Just (XmlTrees
rs, XmlTrees
rest))
    | Maybe [XmlTrees] -> Bool
forall a. Maybe a -> Bool
isNothing Maybe [XmlTrees]
res                     = Maybe [XmlTrees]
forall a. Maybe a
Nothing
    | Bool
otherwise                         = [XmlTrees] -> Maybe [XmlTrees]
forall a. a -> Maybe a
Just (XmlTrees
rs XmlTrees -> [XmlTrees] -> [XmlTrees]
forall a. a -> [a] -> [a]
: Maybe [XmlTrees] -> [XmlTrees]
forall a. HasCallStack => Maybe a -> a
fromJust Maybe [XmlTrees]
res)
    where
    res :: Maybe [XmlTrees]
res = XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' XmlRegex
re (XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re XmlTrees
rest)

-- ------------------------------------------------------------