Safe Haskell | None |
---|---|
Language | Haskell2010 |
High-level primitives to parse a stream of XML Event
s.
- tag :: MonadCatch m => (Name -> Maybe a) -> (a -> AttrParser b) -> (b -> ConduitParser Event m c) -> ConduitParser Event m c
- tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
- tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
- tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
- tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
- anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a
- type AttributeMap = Map Name [Content]
- data AttrParser a
- attr :: Name -> (Text -> Maybe a) -> AttrParser a
- textAttr :: Name -> AttrParser Text
- anyAttr :: AttrParser (Name, [Content])
- ignoreAttrs :: AttrParser ()
- content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a
- textContent :: MonadCatch m => ConduitParser Event m Text
- parseBytes :: MonadThrow m => ParseSettings -> Conduit ByteString m Event
- parseBytesPos :: MonadThrow m => ParseSettings -> Conduit ByteString m EventPos
- parseText :: MonadThrow m => ParseSettings -> Conduit Text m Event
- parseTextPos :: MonadThrow m => ParseSettings -> Conduit Text m EventPos
- detectUtf :: MonadThrow m => Conduit ByteString m Text
- parseFile :: MonadResource m => ParseSettings -> FilePath -> Producer m Event
- parseLBS :: MonadThrow m => ParseSettings -> ByteString -> Producer m Event
- data ParseSettings :: *
- type DecodeEntities = Text -> Content
- psDecodeEntities :: ParseSettings -> DecodeEntities
- psRetainNamespaces :: ParseSettings -> Bool
- decodeXmlEntities :: DecodeEntities
- decodeHtmlEntities :: DecodeEntities
- data XmlException :: *
- = XmlException { }
- | InvalidEndElement Name (Maybe Event)
- | InvalidEntity String (Maybe Event)
- | MissingAttribute String
- | UnparsedAttributes [(Name, [Content])]
XML parsers
Tags
:: MonadCatch m | |
=> (Name -> Maybe a) | Tag name parser. |
-> (a -> AttrParser b) | Attributes parser. It should consume all available attributes. |
-> (b -> ConduitParser Event m c) | Children parser. It should consume all elements between the opening and closing tags. |
-> ConduitParser Event m c |
Parse an XML tag, depending on its name and attributes. This is the most generic tag parser.
Comments, instructions and whitespace are ignored.
tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #
Like tag
, but match a single tag name.
tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #
Like tag
, but use a predicate to select tag names.
tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #
Like tagName
, but expect no attributes at all.
tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #
Like tagName
, but ignore all attributes.
anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a Source #
Parse an XML tag, whatever its name and attributes.
Comments, instructions and whitespace are ignored.
Attributes
data AttrParser a Source #
Monad AttrParser Source # | |
Functor AttrParser Source # | |
Applicative AttrParser Source # | |
Alternative AttrParser Source # | Attribute parsers can be combined with ( |
MonadThrow AttrParser Source # | |
attr :: Name -> (Text -> Maybe a) -> AttrParser a Source #
Parse a single attribute using a specific name and a custom parsing function for its value.
anyAttr :: AttrParser (Name, [Content]) Source #
Parse a single attribute, whatever its name or value.
ignoreAttrs :: AttrParser () Source #
Consume all remaining unparsed attributes.
Content
content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a Source #
Parse a tag content using a custom parsing function.
textContent :: MonadCatch m => ConduitParser Event m Text Source #
Re-exports
Event producers
parseBytes :: MonadThrow m => ParseSettings -> Conduit ByteString m Event #
Parses a byte stream into Event
s. This function is implemented fully in
Haskell using attoparsec-text for parsing. The produced error messages do
not give line/column information, so you may prefer to stick with the parser
provided by libxml-enumerator. However, this has the advantage of not
relying on any C libraries.
This relies on detectUtf
to determine character encoding, and parseText'
to do the actual parsing.
parseBytesPos :: MonadThrow m => ParseSettings -> Conduit ByteString m EventPos #
parseText :: MonadThrow m => ParseSettings -> Conduit Text m Event Source #
Alias for parseText'
parseTextPos :: MonadThrow m => ParseSettings -> Conduit Text m EventPos #
Same as parseText'
, but includes the position of each event.
Since 1.2.4
detectUtf :: MonadThrow m => Conduit ByteString m Text #
Automatically determine which UTF variant is being used. This function first checks for BOMs, removing them as necessary, and then check for the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and UTF-32LEBE. It defaults to assuming UTF-8.
parseFile :: MonadResource m => ParseSettings -> FilePath -> Producer m Event #
A helper function which reads a file from disk using enumFile
, detects
character encoding using detectUtf
, parses the XML using parseBytes
, and
then hands off control to your supplied parser.
parseLBS :: MonadThrow m => ParseSettings -> ByteString -> Producer m Event #
Parse an event stream from a lazy ByteString
.
Parser settings
data ParseSettings :: * #
type DecodeEntities = Text -> Content #
psRetainNamespaces :: ParseSettings -> Bool #
Whether the original xmlns attributes should be retained in the parsed values. For more information on motivation, see:
https://github.com/snoyberg/xml/issues/38
Default: False
Since 1.2.1
Entity decoding
decodeXmlEntities :: DecodeEntities #
Default implementation of DecodeEntities
, which leaves the
entity as-is. Numeric character references and the five standard
entities (lt, gt, amp, quot, pos) are handled internally by the
parser.
decodeHtmlEntities :: DecodeEntities #
HTML4-compliant entity decoder. Handles the additional 248 entities defined by HTML 4 and XHTML 1.
Note that HTML 5 introduces a drastically larger number of entities, and this code does not recognize most of them.