module Data.Conduit.Parser.XML
(
tag
, tagName
, tagPredicate
, tagNoAttr
, tagIgnoreAttrs
, anyTag
, AttributeMap
, AttrParser()
, attr
, textAttr
, anyAttr
, ignoreAttrs
, content
, textContent
, Reexport.parseBytes
, Reexport.parseBytesPos
, parseText
, Reexport.parseTextPos
, Reexport.detectUtf
, Reexport.parseFile
, Reexport.parseLBS
, Reexport.ParseSettings()
, Reexport.DecodeEntities
, Reexport.psDecodeEntities
, Reexport.psRetainNamespaces
, Reexport.decodeXmlEntities
, Reexport.decodeHtmlEntities
, Reexport.XmlException(..)
) where
import Control.Applicative
import Control.Exception
import Control.Monad
import Control.Monad.Catch
import Data.Char
import Data.Conduit
import Data.Conduit.Parser
import Data.Conduit.Parser.XML.Internal
import Data.Map as Map hiding (map, null)
import Data.Text as Text (Text, all, unpack)
import Data.XML.Types
import Text.Parser.Combinators
import qualified Text.XML.Stream.Parse as Reexport
tag :: MonadCatch m
=> (Name -> Maybe a)
-> (a -> AttrParser b)
-> (b -> ConduitParser Event m c)
-> ConduitParser Event m c
tag checkName attrParser f = do
skipMany ignored
(name, attributes) <- beginElement
a <- maybe (unexpected $ "Invalid element name: " ++ show name) return $ checkName name
b <- either (unexpected . show) return $ runAttrParser' (attrParser a) attributes
result <- f b
skipMany ignored
endName <- endElement
when (endName /= name) . unexpected $ "Invalid closing tag: expected </" ++ unpack (nameLocalName name) ++ ">, got </" ++ unpack (nameLocalName endName) ++ ">"
return result
where ignored = beginDocument <|> endDocument <|> void beginDoctype <|> void endDoctype <|> void instruction <|> void comment <|> spaceContent
spaceContent :: (MonadCatch m) => ConduitParser Event m ()
spaceContent = do
t <- contentText
unless (Text.all isSpace t) . unexpected $ "Unexpected textual content: " ++ unpack t
runAttrParser' parser attributes = case runAttrParser parser attributes of
Left e -> Left e
Right (a, x) -> if null a then Right x else Left . toException $ Reexport.UnparsedAttributes (Map.toList a)
tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
tagPredicate p attrParser = tag (guard . p) (const attrParser)
tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
tagName name = tagPredicate (== name)
tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
tagNoAttr name f = tagName name (return ()) $ const f
tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
tagIgnoreAttrs name f = tagName name ignoreAttrs $ const f
anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a
anyTag handler = tag Just (\name -> (,) name <$> many anyAttr) (uncurry handler)
textContent :: MonadCatch m => ConduitParser Event m Text
textContent = do
skipMany ignored
mconcat <$> sepEndBy1 text ignored
where ignored = beginDocument <|> endDocument <|> void beginDoctype <|> endDoctype <|> void instruction <|> void comment
content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a
content parse = maybe (unexpected "Invalid content.") return . parse =<< textContent
newtype AttrParser a = AttrParser { runAttrParser :: AttributeMap -> Either SomeException (AttributeMap, a) }
instance Monad AttrParser where
return a = AttrParser $ \attributes -> Right (attributes, a)
(AttrParser p) >>= f = AttrParser $ p >=> (\(attributes', a) -> runAttrParser (f a) attributes')
instance Functor AttrParser where
fmap = liftM
instance Applicative AttrParser where
pure = return
(<*>) = ap
instance Alternative AttrParser where
empty = AttrParser $ const $ Left $ toException $ Reexport.XmlException "AttrParser.empty" Nothing
AttrParser f <|> AttrParser g = AttrParser $ \x -> either (const $ g x) Right (f x)
instance MonadThrow AttrParser where
throwM = AttrParser . const . throwM
textAttr :: Name -> AttrParser Text
textAttr name = AttrParser $ \attrs -> maybe raiseError (returnValue attrs) (Map.lookup name attrs)
where raiseError = Left . toException $ Reexport.XmlException ("Missing attribute: " ++ show name) Nothing
returnValue attrs contents = Right (Map.delete name attrs, contentsToText contents)
attr :: Name -> (Text -> Maybe a) -> AttrParser a
attr name fvalue = do
value <- textAttr name
maybe (throwM $ Reexport.XmlException ("Invalid attribute: " ++ show name) Nothing) return (fvalue value)
anyAttr :: AttrParser (Name, [Content])
anyAttr = AttrParser $ \attrs -> case keys attrs of
k:_ -> Right (Map.delete k attrs, (k, findWithDefault mempty k attrs))
_ -> Left . toException $ Reexport.XmlException "Expecting one more attribute." Nothing
ignoreAttrs :: AttrParser ()
ignoreAttrs = AttrParser . const $ Right (mempty, ())
contentsToText :: [Content] -> Text
contentsToText =
mconcat . map toText
where
toText (ContentText t) = t
toText (ContentEntity e) = mconcat ["&", e, ";"]
parseText :: (MonadThrow m) => Reexport.ParseSettings -> Conduit Text m Event
parseText = Reexport.parseText'