module Text.XML.HXT.Arrow.XmlRegex
( XmlRegex
, mkZero
, mkUnit
, mkPrim
, mkPrimA
, mkDot
, mkStar
, mkAlt
, mkAlts
, mkSeq
, mkSeqs
, mkRep
, mkRng
, mkOpt
, nullable
, delta
, matchXmlRegex
, splitXmlRegex
, scanXmlRegex
, matchRegexA
, splitRegexA
, scanRegexA
)
where
import Control.Arrow.ListArrows
import Data.Maybe
import Text.XML.HXT.DOM.Interface
matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA re ts = ts >>. (\ s -> maybe [s] (const []) . matchXmlRegex re $ s)
splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA re ts = ts >>. (maybeToList . splitXmlRegex re)
scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA re ts = ts >>. (fromMaybe [] . scanXmlRegex re)
data XmlRegex = Zero String
| Unit
| Sym (XmlTree -> Bool)
| Dot
| Star XmlRegex
| Alt XmlRegex XmlRegex
| Seq XmlRegex XmlRegex
| Rep Int XmlRegex
| Rng Int Int XmlRegex
mkZero :: String -> XmlRegex
mkZero = Zero
mkUnit :: XmlRegex
mkUnit = Unit
mkPrim :: (XmlTree -> Bool) -> XmlRegex
mkPrim = Sym
mkPrimA :: LA XmlTree XmlTree -> XmlRegex
mkPrimA a = mkPrim (not . null . runLA a)
mkDot :: XmlRegex
mkDot = Dot
mkStar :: XmlRegex -> XmlRegex
mkStar (Zero _) = mkUnit
mkStar e@Unit = e
mkStar e@(Star _e1) = e
mkStar (Rep 1 e1) = mkStar e1
mkStar e@(Alt _ _) = Star (rmStar e)
mkStar e = Star e
rmStar :: XmlRegex -> XmlRegex
rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2)
rmStar (Star e1) = rmStar e1
rmStar (Rep 1 e1) = rmStar e1
rmStar e1 = e1
mkAlt :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt e1 (Zero _) = e1
mkAlt (Zero _) e2 = e2
mkAlt e1@(Star Dot) _e2 = e1
mkAlt _e1 e2@(Star Dot) = e2
mkAlt (Sym p1) (Sym p2) = mkPrim $ \ x -> p1 x || p2 x
mkAlt e1 e2@(Sym _) = mkAlt e2 e1
mkAlt e1@(Sym _) (Alt e2@(Sym _) e3) = mkAlt (mkAlt e1 e2) e3
mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3)
mkAlt e1 e2 = Alt e1 e2
mkAlts :: [XmlRegex] -> XmlRegex
mkAlts = foldr mkAlt (mkZero "")
mkSeq :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq e1@(Zero _) _e2 = e1
mkSeq _e1 e2@(Zero _) = e2
mkSeq Unit e2 = e2
mkSeq e1 Unit = e1
mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3)
mkSeq e1 e2 = Seq e1 e2
mkSeqs :: [XmlRegex] -> XmlRegex
mkSeqs = foldr mkSeq mkUnit
mkRep :: Int -> XmlRegex -> XmlRegex
mkRep 0 e = mkStar e
mkRep _ e@(Zero _) = e
mkRep _ e@Unit = e
mkRep i e = Rep i e
mkRng :: Int -> Int -> XmlRegex -> XmlRegex
mkRng 0 0 _e = mkUnit
mkRng 1 1 e = e
mkRng lb ub _e
| lb > ub = Zero $
"illegal range " ++
show lb ++ ".." ++ show ub
mkRng _l _u e@(Zero _) = e
mkRng _l _u e@Unit = e
mkRng lb ub e = Rng lb ub e
mkOpt :: XmlRegex -> XmlRegex
mkOpt = mkRng 0 1
instance Show XmlRegex where
show (Zero s) = "{err:" ++ s ++ "}"
show Unit = "()"
show (Sym _p) = "{single tree pred}"
show Dot = "."
show (Star e) = "(" ++ show e ++ ")*"
show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")"
show (Seq e1 e2) = show e1 ++ show e2
show (Rep 1 e) = "(" ++ show e ++ ")+"
show (Rep i e) = "(" ++ show e ++ "){" ++ show i ++ ",}"
show (Rng 0 1 e) = "(" ++ show e ++ ")?"
show (Rng i j e) = "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}"
nullable :: XmlRegex -> Bool
nullable (Zero _) = False
nullable Unit = True
nullable (Sym _p) = False
nullable Dot = False
nullable (Star _) = True
nullable (Alt e1 e2) = nullable e1 ||
nullable e2
nullable (Seq e1 e2) = nullable e1 &&
nullable e2
nullable (Rep _i e) = nullable e
nullable (Rng i _ e) = i == 0 ||
nullable e
delta :: XmlRegex -> XmlTree -> XmlRegex
delta e@(Zero _) _ = e
delta Unit c = mkZero $
"unexpected char " ++ show c
delta (Sym p) c
| p c = mkUnit
| otherwise = mkZero $
"unexpected tree " ++ show c
delta Dot _ = mkUnit
delta e@(Star e1) c = mkSeq (delta e1 c) e
delta (Alt e1 e2) c = mkAlt (delta e1 c) (delta e2 c)
delta (Seq e1 e2) c
| nullable e1 = mkAlt (mkSeq (delta e1 c) e2) (delta e2 c)
| otherwise = mkSeq (delta e1 c) e2
delta (Rep i e) c = mkSeq (delta e c) (mkRep (i1) e)
delta (Rng i j e) c = mkSeq (delta e c) (mkRng ((i1) `max` 0) (j1) e)
delta' :: XmlRegex -> XmlTrees -> XmlRegex
delta' = foldl delta
matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex e
= res . delta' e
where
res (Zero er) = Just er
res re
| nullable re = Nothing
| otherwise = Just $ "input does not match " ++ show e
splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex re = splitXmlRegex' re []
splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' re res []
| nullable re = Just (reverse res, [])
| otherwise = Nothing
splitXmlRegex' (Zero _) _ _
= Nothing
splitXmlRegex' re res xs@(x:xs')
| isJust res' = res'
| nullable re = Just (reverse res, xs)
| otherwise = Nothing
where
re' = delta re x
res' = splitXmlRegex' re' (x:res) xs'
scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex re ts = scanXmlRegex' re (splitXmlRegex re ts)
scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' _ Nothing = Nothing
scanXmlRegex' _ (Just (rs, [])) = Just [rs]
scanXmlRegex' _ (Just ([], _)) = Nothing
scanXmlRegex' re (Just (rs, rest))
| isNothing res = Nothing
| otherwise = Just (rs : fromJust res)
where
res = scanXmlRegex' re (splitXmlRegex re rest)