module Text.XML.HaXml.Xtract.Parse (parseXtract,xtract) where
import Text.ParserCombinators.Poly hiding (bracket)
import Text.XML.HaXml.Xtract.Lex
import Text.XML.HaXml.Xtract.Combinators as D
import Text.XML.HaXml.Combinators as C
import Text.XML.HaXml.Types (Content)
import Data.List(isPrefixOf)
import Text.XML.HaXml.Escape (xmlUnEscapeContent,stdXmlEscaper)
unescape :: [Content i] -> [Content i]
unescape = xmlUnEscapeContent stdXmlEscaper
xtract :: (String->String) -> String -> CFilter i
xtract f query
| interiorRef lexedQ = dfilter (parseXtract lexedQ)
| otherwise = cfilter (parseXtract lexedQ)
where
lexedQ = lexXtract f query
interiorRef (Right (_,Symbol s): Right (_,Symbol "//"): _)
| s `elem` predicateIntro = True
interiorRef (Right (_,Symbol s): Right (_,Symbol "/"): _)
| s `elem` predicateIntro = True
interiorRef (_ : rest) = interiorRef rest
interiorRef [] = False
predicateIntro = [ "[", "("
, "&", "|", "~"
, "=", "!=", "<", "<=", ">", ">="
, ".=.",".!=.",".<.",".<=.",".>.",".>=." ]
parseXtract :: [Token] -> DFilter i
parseXtract = either error id . parseXtract'
parseXtract' :: [Token] -> Either String (DFilter i)
parseXtract' = fst . runParser (aquery liftLocal)
type XParser a = Parser (Either String (Posn,TokenT)) a
string :: XParser String
string = P (\inp -> case inp of
(Left err: _) -> Failure inp err
(Right (_,TokString n):ts) -> Success ts n
ts -> Failure ts "expected a string" )
number :: XParser Integer
number = P (\inp -> case inp of
(Left err: _) -> Failure inp err
(Right (_,TokNum n):ts) -> Success ts n
ts -> Failure ts "expected a number" )
symbol :: String -> XParser ()
symbol s = P (\inp -> case inp of
(Left err: _) -> Failure inp err
(Right (_, Symbol n):ts) | n==s -> Success ts ()
ts -> Failure ts ("expected symbol "++s) )
quote :: XParser ()
quote = oneOf [ symbol "'", symbol "\"" ]
pam :: [a->b] -> a -> [b]
pam fs x = [ f x | f <- fs ]
bracket :: XParser a -> XParser a
bracket p =
do symbol "("
x <- p
symbol ")"
return x
aquery :: ((CFilter i->CFilter i) -> (DFilter i->DFilter i))
-> XParser (DFilter i)
aquery lift = oneOf
[ do symbol "//"
tquery [lift C.multi]
, do symbol "/"
tquery [lift id]
, do symbol "./"
tquery [(local C.keep D./>)]
, do tquery [(local C.keep D./>)]
]
tquery :: [DFilter i->DFilter i] -> XParser (DFilter i)
tquery [] = tquery [id]
tquery (qf:cxt) = oneOf
[ do q <- bracket (tquery (qf:qf:cxt))
xquery cxt q
, do q <- xtag
xquery cxt (qf ((unescape .).q))
, do symbol "-"
return (qf (local C.txt))
]
xtag :: XParser (DFilter i)
xtag = oneOf
[ do s <- string
symbol "*"
return (local (C.tagWith (s `isPrefixOf`)))
, do s <- string
return (local (C.tag s))
, do symbol "*"
s <- string
return (local (C.tagWith (((reverse s) `isPrefixOf`) . reverse)))
, do symbol "*"
return (local C.elm)
]
xquery :: [DFilter i->DFilter i] -> DFilter i -> XParser (DFilter i)
xquery cxt q1 = oneOf
[ do symbol "/"
( do symbol "@"
attr <- string
return (D.iffind attr (\s->local (C.literal s)) D.none `D.o` q1)
`onFail`
tquery ((q1 D./>):cxt) )
, do symbol "//"
tquery ((\q2-> (liftLocal C.multi) q2
`D.o` local C.children `D.o` q1):cxt)
, do symbol "+"
q2 <- tquery cxt
return (D.cat [q1,q2])
, do symbol "["
is <- iindex
symbol "]"
xquery cxt (\xml-> concat . pam is . q1 xml)
, do symbol "["
p <- tpredicate
symbol "]"
xquery cxt (q1 `D.with` p)
, return q1
]
tpredicate :: XParser (DFilter i)
tpredicate =
do p <- vpredicate
f <- upredicate
return (f p)
upredicate :: XParser (DFilter i->DFilter i)
upredicate = oneOf
[ do symbol "&"
p2 <- tpredicate
return (`D.o` p2)
, do symbol "|"
p2 <- tpredicate
return (D.|>| p2)
, return id
]
vpredicate :: XParser (DFilter i)
vpredicate = oneOf
[ do bracket tpredicate
, do symbol "~"
p <- tpredicate
return (local C.keep `D.without` p)
, do tattribute
]
tattribute :: XParser (DFilter i)
tattribute = oneOf
[ do q <- aquery liftGlobal
uattribute q
, do symbol "@"
s <- string
vattribute (local C.keep, local (C.attr s), D.iffind s)
]
uattribute :: DFilter i -> XParser (DFilter i)
uattribute q = oneOf
[ do symbol "/"
symbol "@"
s <- string
vattribute (q, local (C.attr s), D.iffind s)
, do vattribute (q, local C.keep, D.ifTxt)
]
vattribute :: (DFilter i, DFilter i, (String->DFilter i)->DFilter i->DFilter i)
-> XParser (DFilter i)
vattribute (q,a,iffn) = oneOf
[ do cmp <- op
quote
s2 <- string
quote
return ((iffn (\s1->if cmp s1 s2 then D.keep else D.none) D.none)
`D.o` q)
, do cmp <- op
(q2,iffn2) <- wattribute
return ((iffn (\s1-> iffn2 (\s2-> if cmp s1 s2 then D.keep else D.none)
D.none)
D.none) `D.o` q)
, do cmp <- nop
n <- number
return ((iffn (\s->if cmp (read s) n then D.keep else D.none) D.none)
`D.o` q)
, do cmp <- nop
(q2,iffn2) <- wattribute
return ((iffn (\s1-> iffn2 (\s2-> if cmp (read s1) (read s2) then D.keep
else D.none)
D.none)
D.none) `D.o` q)
, do return ((a `D.o` q))
]
wattribute :: XParser (DFilter i, (String->DFilter i)->DFilter i->DFilter i)
wattribute = oneOf
[ do symbol "@"
s <- string
return (D.keep, D.iffind s)
, do q <- aquery liftGlobal
symbol "/"
symbol "@"
s <- string
return (q, D.iffind s)
, do q <- aquery liftGlobal
return (q, D.ifTxt)
]
iindex :: XParser [[a]->[a]]
iindex =
do i <- simpleindex
is <- idxcomma
return (i:is)
simpleindex :: XParser ([a]->[a])
simpleindex = oneOf
[ do n <- number
r <- rrange n
return r
, do symbol "$"
return (C.keep . last)
]
rrange, numberdollar :: Integer -> XParser ([a]->[a])
rrange n1 = oneOf
[ do symbol "-"
numberdollar n1
, return (take 1 . drop (fromInteger n1))
]
numberdollar n1 = oneOf
[ do n2 <- number
return (take (fromInteger (1+n2-n1)) . drop (fromInteger n1))
, do symbol "$"
return (drop (fromInteger n1))
]
idxcomma :: XParser [[a]->[a]]
idxcomma = oneOf
[ do symbol ","
r <- simpleindex
rs <- idxcomma
return (r:rs)
, return []
]
op :: XParser (String->String->Bool)
op = oneOf
[ do symbol "="; return (==)
, do symbol "!="; return (/=)
, do symbol "<"; return (<)
, do symbol "<="; return (<=)
, do symbol ">"; return (>)
, do symbol ">="; return (>=)
]
nop :: XParser (Integer->Integer->Bool)
nop = oneOf
[ do symbol ".=."; return (==)
, do symbol ".!=."; return (/=)
, do symbol ".<."; return (<)
, do symbol ".<=."; return (<=)
, do symbol ".>."; return (>)
, do symbol ".>=."; return (>=)
]