Safe Haskell | None |
---|---|
Language | Haskell98 |
DOM-based parsing and rendering.
This module requires that all entities be resolved at parsing. If you need to interact with unresolved entities, please use Text.XML.Unresolved. This is the recommended module for most uses cases.
While many of the datatypes in this module are simply re-exported from
Data.XML.Types
, Document
, Node
and Element
are all redefined here to
disallow the possibility of unresolved entities. Conversion functions are
provided to switch between the two sets of datatypes.
For simpler, bidirectional traversal of the DOM tree, see the Text.XML.Cursor module.
- data Document = Document {}
- data Prologue :: * = Prologue {}
- data Instruction :: * = Instruction {}
- data Miscellaneous :: *
- data Node
- data Element = Element {
- elementName :: Name
- elementAttributes :: Map Name Text
- elementNodes :: [Node]
- data Name :: * = Name {}
- data Doctype :: * = Doctype {}
- data ExternalID :: *
- readFile :: ParseSettings -> FilePath -> IO Document
- parseLBS :: ParseSettings -> ByteString -> Either SomeException Document
- parseLBS_ :: ParseSettings -> ByteString -> Document
- sinkDoc :: MonadThrow m => ParseSettings -> Consumer ByteString m Document
- parseText :: ParseSettings -> Text -> Either SomeException Document
- parseText_ :: ParseSettings -> Text -> Document
- sinkTextDoc :: MonadThrow m => ParseSettings -> Consumer Text m Document
- fromEvents :: MonadThrow m => Consumer EventPos m Document
- data UnresolvedEntityException = UnresolvedEntityException (Set Text)
- data XMLException = InvalidXMLFile FilePath SomeException
- writeFile :: RenderSettings -> FilePath -> Document -> IO ()
- renderLBS :: RenderSettings -> Document -> ByteString
- renderText :: RenderSettings -> Document -> Text
- renderBytes :: (MonadBase base m, PrimMonad base) => RenderSettings -> Document -> ConduitM a ByteString m ()
- def :: Default a => a
- data ParseSettings
- psDecodeEntities :: ParseSettings -> DecodeEntities
- psRetainNamespaces :: ParseSettings -> Bool
- decodeXmlEntities :: DecodeEntities
- decodeHtmlEntities :: DecodeEntities
- data RenderSettings
- rsPretty :: RenderSettings -> Bool
- rsNamespaces :: RenderSettings -> [(Text, Text)]
- rsAttrOrder :: RenderSettings -> Name -> Map Name Text -> [(Name, Text)]
- rsUseCDATA :: RenderSettings -> Content -> Bool
- orderAttrs :: [(Name, [Name])] -> Name -> Map Name Text -> [(Name, Text)]
- toXMLDocument :: Document -> Document
- fromXMLDocument :: Document -> Either (Set Text) Document
- toXMLNode :: Node -> Node
- fromXMLNode :: Node -> Either (Set Text) Node
- toXMLElement :: Element -> Element
- fromXMLElement :: Element -> Either (Set Text) Element
Data types
data Instruction :: *
Eq Instruction | |
Data Instruction | |
Ord Instruction | |
Show Instruction | |
Generic Instruction | |
NFData Instruction | |
type Rep Instruction = D1 D1Instruction (C1 C1_0Instruction ((:*:) (S1 S1_0_0Instruction (Rec0 Text)) (S1 S1_0_1Instruction (Rec0 Text)))) |
data Miscellaneous :: *
Eq Miscellaneous | |
Data Miscellaneous | |
Ord Miscellaneous | |
Show Miscellaneous | |
Generic Miscellaneous | |
NFData Miscellaneous | |
type Rep Miscellaneous = D1 D1Miscellaneous ((:+:) (C1 C1_0Miscellaneous (S1 NoSelector (Rec0 Instruction))) (C1 C1_1Miscellaneous (S1 NoSelector (Rec0 Text)))) |
Element | |
|
data Name :: *
A fully qualified name.
Prefixes are not semantically important; they are included only to
simplify pass-through parsing. When comparing names with Eq
or Ord
methods, prefixes are ignored.
The IsString
instance supports Clark notation; see
http://www.jclark.com/xml/xmlns.htm and
http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html. Use
the OverloadedStrings
language extension for very simple Name
construction:
myname :: Name myname = "{http://example.com/ns/my-namespace}my-name"
Name | |
|
data Doctype :: *
Note: due to the incredible complexity of DTDs, this type only supports external subsets. I've tried adding internal subset types, but they quickly gain more code than the rest of this module put together.
It is possible that some future version of this library might support internal subsets, but I am no longer actively working on adding them.
data ExternalID :: *
Eq ExternalID | |
Data ExternalID | |
Ord ExternalID | |
Show ExternalID | |
Generic ExternalID | |
NFData ExternalID | |
type Rep ExternalID = D1 D1ExternalID ((:+:) (C1 C1_0ExternalID (S1 NoSelector (Rec0 Text))) (C1 C1_1ExternalID ((:*:) (S1 NoSelector (Rec0 Text)) (S1 NoSelector (Rec0 Text))))) |
Parsing
Files
Bytes
parseLBS_ :: ParseSettings -> ByteString -> Document Source
sinkDoc :: MonadThrow m => ParseSettings -> Consumer ByteString m Document Source
Text
parseText :: ParseSettings -> Text -> Either SomeException Document Source
parseText_ :: ParseSettings -> Text -> Document Source
sinkTextDoc :: MonadThrow m => ParseSettings -> Consumer Text m Document Source
Other
fromEvents :: MonadThrow m => Consumer EventPos m Document Source
data XMLException Source
Rendering
renderLBS :: RenderSettings -> Document -> ByteString Source
renderText :: RenderSettings -> Document -> Text Source
renderBytes :: (MonadBase base m, PrimMonad base) => RenderSettings -> Document -> ConduitM a ByteString m () Source
Settings
Parsing
psRetainNamespaces :: ParseSettings -> Bool Source
Whether the original xmlns attributes should be retained in the parsed values. For more information on motivation, see:
https://github.com/snoyberg/xml/issues/38
Default: False
Since 1.2.1
Entity decoding
decodeXmlEntities :: DecodeEntities Source
Default implementation of DecodeEntities
: handles numeric entities and
the five standard character entities (lt, gt, amp, quot, apos).
decodeHtmlEntities :: DecodeEntities Source
HTML4-compliant entity decoder. Handles numerics, the five standard character entities, and the additional 248 entities defined by HTML 4 and XHTML 1.
Note that HTML 5 introduces a drastically larger number of entities, and this code does not recognize most of them.
Rendering
rsPretty :: RenderSettings -> Bool Source
rsNamespaces :: RenderSettings -> [(Text, Text)] Source
Defines some top level namespace definitions to be used, in the form of (prefix, namespace). This has absolutely no impact on the meaning of your documents, but can increase readability by moving commonly used namespace declarations to the top level.
rsAttrOrder :: RenderSettings -> Name -> Map Name Text -> [(Name, Text)] Source
Specify how to turn the unordered attributes used by the Text.XML module into an ordered list.
rsUseCDATA :: RenderSettings -> Content -> Bool Source
Determines if for a given text content the renderer should use a CDATA node.
Default: False
Since: 1.3.3
orderAttrs :: [(Name, [Name])] -> Name -> Map Name Text -> [(Name, Text)] Source
Convenience function to create an ordering function suitable for
use as the value of rsAttrOrder
. The ordering function is created
from an explicit ordering of the attributes, specified as a list of
tuples, as follows: In each tuple, the first component is the
Name
of an element, and the second component is a list of
attributes names. When the given element is rendered, the
attributes listed, when present, appear first in the given order,
followed by any other attributes in arbitrary order. If an element
does not appear, all of its attributes are rendered in arbitrary
order.
Conversion
toXMLDocument :: Document -> Document Source
toXMLElement :: Element -> Element Source