| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Data.XML.Parser.High
Description
High-level XML parsers, built on top of Data.XML.Parser.Mid:
- entity references are expanded
- CDATAs are abstracted away
- comments are ignored
- whitespace between tokens is ignored
- duplicate attributes are ignored
All documentation examples assume the following setup:
:set -XOverloadedStrings import Data.Attoparsec.ByteString
Synopsis
- module Data.XML.Parser.High.AttrParser
- module Data.XML.Parser.High.NameParser
- data Prolog = Prolog {}
- data Token
- data TokenParser m a
- data ContentParser m a
- noContent :: Applicative m => ContentParser m ()
- withContent :: TokenParser m a -> ContentParser m a
- anyContent :: CharParsing m => Monad m => ContentParser m ()
- runTokenParser :: TokenParser m a -> m a
- prolog :: CharParsing m => Monad m => TokenParser m Prolog
- instruction :: CharParsing m => Monad m => TokenParser m Instruction
- textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text
- textContent' :: CharParsing m => Monad m => TokenParser m Text
- tag :: CharParsing m => Monad m => EntityDecoder -> NameParser a -> (a -> AttrParser b) -> (b -> ContentParser m c) -> TokenParser m c
- tag' :: CharParsing m => Monad m => NameParser a -> AttrParser b -> ContentParser m c -> TokenParser m c
- anyTag :: CharParsing m => Monad m => TokenParser m ()
- anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token
- anyToken' :: CharParsing m => Monad m => TokenParser m Token
Documentation
XML document prolog.
Constructors
| Prolog | |
Fields | |
Constructors
| TokenProlog Prolog | |
| TokenInstruction Instruction | |
| TokenTag QName (Map QName Text) [Token] | |
| TokenTextContent Text |
data TokenParser m a Source #
A parser that consumes whole Tokens.
Instances
data ContentParser m a Source #
How to parse tag content.
Instances
| Functor m => Functor (ContentParser m) Source # | |
Defined in Data.XML.Parser.High Methods fmap :: (a -> b) -> ContentParser m a -> ContentParser m b # (<$) :: a -> ContentParser m b -> ContentParser m a # | |
noContent :: Applicative m => ContentParser m () Source #
Assert that content is empty.
>>>parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'>body</tag>"Left ...>>>parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'></tag>"Right ()>>>parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'/>"Right ()
withContent :: TokenParser m a -> ContentParser m a Source #
Assert that content is not empty, and parse it using given token parser.
>>>parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'>body</tag>"Right "body">>>parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'></tag>"Left ...>>>parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'/>"Left ...>>>parseOnly (runTokenParser $ tag' anyName anyAttr (withContent $ pure ())) "<tag key='value'/>"Left ...
anyContent :: CharParsing m => Monad m => ContentParser m () Source #
Accept any content, including empty content.
>>>parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'>body</tag>"Right ()>>>parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'></tag>"Right ()>>>parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'/>"Right ()
runTokenParser :: TokenParser m a -> m a Source #
prolog :: CharParsing m => Monad m => TokenParser m Prolog Source #
https://www.w3.org/TR/REC-xml/#NT-prolog
>>>parseOnly (runTokenParser prolog) "<?xml version='1.0'?><!DOCTYPE greeting>"Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})>>>parseOnly (runTokenParser prolog) "<?xml version='1.0'?> <!-- comments and whitespace are ignored --><!DOCTYPE greeting>"Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})
instruction :: CharParsing m => Monad m => TokenParser m Instruction Source #
Parse a processing instruction.
https://www.w3.org/TR/REC-xml/#dt-pi
>>>parseOnly (runTokenParser instruction) "<?php echo 'Hello World!'; ?>"Right (Instruction "php" "echo 'Hello World!'; ")>>>parseOnly (runTokenParser instruction) "<!-- comments and whitespace are ignored --> <?php echo 'Hello World!'; ?>"Right (Instruction "php" "echo 'Hello World!'; ")
textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text Source #
Parse textual content of a tag, including CDATA.
textContent' :: CharParsing m => Monad m => TokenParser m Text Source #
Same as textContent (decodePredefinedEntities <> decodeHtmlEntities), provided for convenience.
>>>parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag>body<!-- Ignored comment --><![CDATA[<innertag>innerbody</innertag>]]></tag>"Right "body<innertag>innerbody</innertag>"
Arguments
| :: CharParsing m | |
| => Monad m | |
| => EntityDecoder | How to expand entity references |
| -> NameParser a | How to parse tag name |
| -> (a -> AttrParser b) | How to parse tag attributes |
| -> (b -> ContentParser m c) | How to parse tag content |
| -> TokenParser m c |
Generic tag parser.
Arguments
| :: CharParsing m | |
| => Monad m | |
| => NameParser a | How to parse tag name |
| -> AttrParser b | How to parse tag attributes |
| -> ContentParser m c | How to parse tag content |
| -> TokenParser m c |
Simplified version of tag:
- no state forwarding between name, attributes and content parsers
- uses
decodePredefinedEntities <> decodeHtmlEntitiesto expand entity references
anyTag :: CharParsing m => Monad m => TokenParser m () Source #
Parse a tag with any name, any attributes and any content.
>>>parseOnly (runTokenParser anyTag) "<tag key='value'>body</tag>"Right ()>>>parseOnly (runTokenParser anyTag) "<tag key='value'/>"Right ()>>>parseOnly (runTokenParser anyTag) "<!-- ignored comment --><tag key='value'/>"Right ()>>>parseOnly (runTokenParser anyTag) "<tag key='value'>body<!-- ignored comment --></tag>"Right ()
anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token Source #
Parse any Token.
anyToken' :: CharParsing m => Monad m => TokenParser m Token Source #
Same as anyToken (decodePredefinedEntities <> decodeHtmlEntities), provided for convenience.