hextream-0.1.0.1: Streaming-friendly XML parsers

Safe HaskellNone
LanguageHaskell2010

Data.XML.Parser.Mid

Description

Mid-level XML parsers, built on top of Data.XML.Parser.Low:

  • some formatting details are abstracted away (e.g. quoting, whitespacing), therefore parsers are not reversible
  • entities delimited by an opening and closing sequence are recognized, except for tags which need a more complex, recursive logic
  • token parsers do not overlap, therefore XML document can be tokenized in a stateless way

All documentation examples assume the following setup:

:set -XOverloadedStrings
import Data.Attoparsec.ByteString
Synopsis

Documentation

data TokenParser m a Source #

A parser that consumes whole Tokens.

Instances
Monad m => Monad (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.Mid

Methods

(>>=) :: TokenParser m a -> (a -> TokenParser m b) -> TokenParser m b #

(>>) :: TokenParser m a -> TokenParser m b -> TokenParser m b #

return :: a -> TokenParser m a #

fail :: String -> TokenParser m a #

Functor m => Functor (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.Mid

Methods

fmap :: (a -> b) -> TokenParser m a -> TokenParser m b #

(<$) :: a -> TokenParser m b -> TokenParser m a #

(Parsing m, Monad m) => MonadFail (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.Mid

Methods

fail :: String -> TokenParser m a #

Applicative m => Applicative (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.Mid

Methods

pure :: a -> TokenParser m a #

(<*>) :: TokenParser m (a -> b) -> TokenParser m a -> TokenParser m b #

liftA2 :: (a -> b -> c) -> TokenParser m a -> TokenParser m b -> TokenParser m c #

(*>) :: TokenParser m a -> TokenParser m b -> TokenParser m b #

(<*) :: TokenParser m a -> TokenParser m b -> TokenParser m a #

Alternative m => Alternative (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.Mid

Methods

empty :: TokenParser m a #

(<|>) :: TokenParser m a -> TokenParser m a -> TokenParser m a #

some :: TokenParser m a -> TokenParser m [a] #

many :: TokenParser m a -> TokenParser m [a] #

tokenInstruction :: CharParsing m => Monad m => TokenParser m Instruction Source #

https://www.w3.org/TR/REC-xml/#dt-pi

>>> parseOnly (runTokenParser tokenInstruction) "<?xml-stylesheet type='text/xsl' href='style.xsl'?>"
Right (Instruction "xml-stylesheet" "type='text/xsl' href='style.xsl'")

tokenComment :: CharParsing m => Monad m => TokenParser m Text Source #

https://www.w3.org/TR/REC-xml/#NT-Comment

>>> parseOnly (runTokenParser tokenComment) "<!-- declarations for <head> & <body> -->"
Right " declarations for <head> & <body> "
>>> parseOnly (runTokenParser tokenComment) "<!-- B+, B, or B--->"
Right " B+, B, or B-"

tokenCdata :: CharParsing m => Monad m => TokenParser m Text Source #

https://www.w3.org/TR/REC-xml/#dt-cdsection

>>> parseOnly (runTokenParser tokenCdata) "<![CDATA[<greeting>Hello, world!</greeting>]]>"
Right "<greeting>Hello, world!</greeting>"

tokenDoctype :: CharParsing m => Monad m => TokenParser m Doctype Source #

https://www.w3.org/TR/REC-xml/#NT-doctypedecl

>>> parseOnly (runTokenParser tokenDoctype) "<!DOCTYPE greeting SYSTEM 'hello.dtd'>"
Right (Doctype "greeting" (Just (SystemID "hello.dtd")) [])
>>> parseOnly (runTokenParser tokenDoctype) "<!DOCTYPE foo [ <!ENTITY x '&lt;'> ]>"
Right (Doctype "foo" Nothing [GeneralEntityDeclaration "x" [ContentReference (EntityRef "lt")]])

tokenXmlDeclaration :: CharParsing m => Monad m => TokenParser m XMLDeclaration Source #

https://www.w3.org/TR/REC-xml/#NT-XMLDecl

>>> parseOnly (runTokenParser tokenXmlDeclaration) "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>"
Right (XMLDeclaration "1.0" (Just "UTF-8") (Just True))

tokenStartTag :: CharParsing m => Monad m => TokenParser m StartTag Source #

https://www.w3.org/TR/REC-xml/#NT-STag

>>> parseOnly (runTokenParser tokenStartTag) "<termdef id='dt-dog' term='dog'>"
Right (StartTag (QName {namePrefix = "", nameLocal = "termdef"}) [Attribute (QName {namePrefix = "", nameLocal = "id"}) [ContentText "dt-dog"],Attribute (QName {namePrefix = "", nameLocal = "term"}) [ContentText "dog"]])
>>> parse (runTokenParser tokenStartTag) "<updated>2003-12-13T18:30:02Z</updated>"
Done "2003-12-13T18:30:02Z</updated>" (StartTag (QName {namePrefix = "", nameLocal = "updated"}) [])

tokenEndTag :: CharParsing m => Monad m => TokenParser m QName Source #

https://www.w3.org/TR/REC-xml/#NT-ETag

>>> parseOnly (runTokenParser tokenEndTag) "</termdef>"
Right (QName {namePrefix = "", nameLocal = "termdef"})

tokenEmptyElementTag :: CharParsing m => Monad m => TokenParser m EmptyElementTag Source #

https://www.w3.org/TR/REC-xml/#NT-EmptyElemTag

>>> parseOnly (runTokenParser tokenEmptyElementTag) "<IMG align='left' src='http://www.w3.org/Icons/WWW/w3c_home' />"
Right (EmptyElementTag (QName {namePrefix = "", nameLocal = "IMG"}) [Attribute (QName {namePrefix = "", nameLocal = "align"}) [ContentText "left"],Attribute (QName {namePrefix = "", nameLocal = "src"}) [ContentText "http://www.w3.org/Icons/WWW/w3c_home"]])

tokenData :: CharParsing m => Monad m => TokenParser m [Content] Source #

https://www.w3.org/TR/REC-xml/#NT-CharData

>>> parseOnly (runTokenParser tokenData) "Rock &amp; roll"
Right [ContentText "Rock ",ContentReference (EntityRef "amp"),ContentText " roll"]