Safe Haskell	None
Language	Haskell2010

Data.Conduit.Parser.XML

Contents

XML parsers
Re-exports

Description

High-level primitives to parse a stream of XML Events.

Synopsis

XML parsers

Tags

tag Source #

Arguments

:: MonadCatch m
=> (Name -> Maybe a)	Tag name parser.
-> (a -> AttrParser b)	Attributes parser. It should consume all available attributes.
-> (b -> ConduitParser Event m c)	Children parser. It should consume all elements between the opening and closing tags.
-> ConduitParser Event m c

Parse an XML tag, depending on its name and attributes. This is the most generic tag parser.

Comments, instructions and whitespace are ignored.

tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #

Like tag, but match a single tag name.

tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #

Like tag, but use a predicate to select tag names.

tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #

Like tagName, but expect no attributes at all.

tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #

Like tagName, but ignore all attributes.

anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a Source #

Parse an XML tag, whatever its name and attributes.

Comments, instructions and whitespace are ignored.

Attributes

type AttributeMap = Map Name [Content] Source #

data AttrParser a Source #

Instances

Monad AttrParser Source #
Methods (>>=) :: AttrParser a -> (a -> AttrParser b) -> AttrParser b # (>>) :: AttrParser a -> AttrParser b -> AttrParser b # return :: a -> AttrParser a # fail :: String -> AttrParser a #
Functor AttrParser Source #
Methods fmap :: (a -> b) -> AttrParser a -> AttrParser b # (<$) :: a -> AttrParser b -> AttrParser a #
Applicative AttrParser Source #
Methods pure :: a -> AttrParser a # (<>) :: AttrParser (a -> b) -> AttrParser a -> AttrParser b # (>) :: AttrParser a -> AttrParser b -> AttrParser b # (<*) :: AttrParser a -> AttrParser b -> AttrParser a #
Alternative AttrParser Source #	Attribute parsers can be combined with (`<\|>`), `some`, `many`, `optional`, `choice`, etc.
Methods empty :: AttrParser a # (<\|>) :: AttrParser a -> AttrParser a -> AttrParser a # some :: AttrParser a -> AttrParser [a] # many :: AttrParser a -> AttrParser [a] #
MonadThrow AttrParser Source #
Methods throwM :: Exception e => e -> AttrParser a #

attr :: Name -> (Text -> Maybe a) -> AttrParser a Source #

Parse a single attribute using a specific name and a custom parsing function for its value.

textAttr :: Name -> AttrParser Text Source #

Parse a single textual attribute.

anyAttr :: AttrParser (Name, [Content]) Source #

Parse a single attribute, whatever its name or value.

ignoreAttrs :: AttrParser () Source #

Consume all remaining unparsed attributes.

Content

content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a Source #

Parse a tag content using a custom parsing function.

textContent :: MonadCatch m => ConduitParser Event m Text Source #

Parse a tag content as Text.

This parser fails if the tag is empty. To get mempty instead of failing, use textContent <|> mempty.

Re-exports

Event producers

parseBytes :: MonadThrow m => ParseSettings -> Conduit ByteString m Event #

Parses a byte stream into Events. This function is implemented fully in Haskell using attoparsec-text for parsing. The produced error messages do not give line/column information, so you may prefer to stick with the parser provided by libxml-enumerator. However, this has the advantage of not relying on any C libraries.

This relies on detectUtf to determine character encoding, and parseText' to do the actual parsing.

parseBytesPos :: MonadThrow m => ParseSettings -> Conduit ByteString m EventPos #

parseText :: MonadThrow m => ParseSettings -> Conduit Text m Event Source #

Alias for parseText'

parseTextPos :: MonadThrow m => ParseSettings -> Conduit Text m EventPos #

Same as parseText', but includes the position of each event.

Since 1.2.4

detectUtf :: MonadThrow m => Conduit ByteString m Text #

Automatically determine which UTF variant is being used. This function first checks for BOMs, removing them as necessary, and then check for the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and UTF-32LEBE. It defaults to assuming UTF-8.

parseFile :: MonadResource m => ParseSettings -> FilePath -> Producer m Event #

A helper function which reads a file from disk using enumFile, detects character encoding using detectUtf, parses the XML using parseBytes, and then hands off control to your supplied parser.

parseLBS :: MonadThrow m => ParseSettings -> ByteString -> Producer m Event #

Parse an event stream from a lazy ByteString.

Parser settings

data ParseSettings :: * #

Instances

Default ParseSettings
Methods def :: ParseSettings #

type DecodeEntities = Text -> Content #

psDecodeEntities :: ParseSettings -> DecodeEntities #

psRetainNamespaces :: ParseSettings -> Bool #

Whether the original xmlns attributes should be retained in the parsed values. For more information on motivation, see:

https://github.com/snoyberg/xml/issues/38

Default: False

Since 1.2.1

Entity decoding

decodeXmlEntities :: DecodeEntities #

Default implementation of DecodeEntities, which leaves the entity as-is. Numeric character references and the five standard entities (lt, gt, amp, quot, pos) are handled internally by the parser.

decodeHtmlEntities :: DecodeEntities #

HTML4-compliant entity decoder. Handles the additional 248 entities defined by HTML 4 and XHTML 1.

Note that HTML 5 introduces a drastically larger number of entities, and this code does not recognize most of them.

Exceptions

data XmlException :: * #

Constructors

XmlException
Fields xmlErrorMessage :: String xmlBadInput :: Maybe Event
InvalidEndElement Name (Maybe Event)
InvalidEntity String (Maybe Event)
MissingAttribute String
UnparsedAttributes [(Name, [Content])]

Instances

Show XmlException
Methods showsPrec :: Int -> XmlException -> ShowS # show :: XmlException -> String # showList :: [XmlException] -> ShowS #
Exception XmlException
Methods toException :: XmlException -> SomeException # fromException :: SomeException -> Maybe XmlException # displayException :: XmlException -> String #