Copyright | Dennis Gosnell 2017 |
---|---|
License | BSD3 |
Maintainer | Dennis Gosnell (cdep.illabout@gmail.com) |
Stability | experimental |
Portability | unknown |
Safe Haskell | None |
Language | Haskell2010 |
This module provides indexed Cursor
s. It has a very similar API to
Text.XML.Cursor.
The big difference is in the Cursor
type. Cursor
wraps
around a Node
, while this module's Cursor
type wraps around an
IndexedNode
.
An IndexedNode
is a data type that contains both a Node
and a NodeIndex
.
The NodeIndex
gives a way to figure out how two IndexedNode
s compare to
each other in the Document
. It gives the ability to figure out which
IndexedNode
comes earlier in the Document
. This gives the ability to sort
lists of IndexedNode
s, based on their location in the Document
. See
NodeIndex
for more information.
- type IndexedCursor = Cursor IndexedNode
- type IndexedAxis = Axis IndexedNode
- newtype NodeIndex = NodeIndex {
- unNodeIndex :: Seq Int
- class HasNodeIndex a where
- rootIndex :: NodeIndex
- data IndexedNode = IndexedNode {}
- indexedCursorNodeIndex :: IndexedCursor -> NodeIndex
- nodeToRootIndexedNode :: Node -> IndexedNode
- toChildIndex :: NodeIndex -> Int -> NodeIndex
- nodeToIndexedNode :: NodeIndex -> Node -> IndexedNode
- childNodeToIndexedNode :: NodeIndex -> Int -> Node -> IndexedNode
- childNodesToIndexedNodes :: NodeIndex -> [Node] -> [IndexedNode]
- fromDocument :: Document -> IndexedCursor
- fromNode :: Node -> IndexedCursor
- toCursor :: (node -> [node]) -> node -> Cursor node
- node :: Cursor node -> node
- child :: Cursor node -> [Cursor node]
- parent :: Axis node
- precedingSibling :: Axis node
- followingSibling :: Axis node
- ancestor :: Axis node
- descendant :: Axis node
- orSelf :: Axis node -> Axis node
- preceding :: Axis node
- following :: Axis node
- (&|) :: (Cursor node -> [a]) -> (a -> b) -> Cursor node -> [b]
- (&/) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a]
- (&//) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a]
- (&.//) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a]
- ($|) :: Cursor node -> (Cursor node -> a) -> a
- ($/) :: Cursor node -> (Cursor node -> [a]) -> [a]
- ($//) :: Cursor node -> (Cursor node -> [a]) -> [a]
- ($.//) :: Cursor node -> (Cursor node -> [a]) -> [a]
- (>=>) :: Monad m => (a -> m b) -> (b -> m c) -> a -> m c
- check :: Boolean b => (Cursor a -> b) -> Axis a
- checkIndexedNode :: Boolean b => (IndexedNode -> b) -> IndexedAxis
- checkElement :: Boolean b => (Element -> b) -> IndexedAxis
- checkName :: Boolean b => (Name -> b) -> IndexedAxis
- element :: Name -> IndexedAxis
- content :: IndexedCursor -> [Text]
- attribute :: Name -> IndexedCursor -> [Text]
- attributeMay :: Name -> IndexedCursor -> Maybe Text
- laxAttribute :: Text -> IndexedCursor -> [Text]
- hasAttribute :: Name -> IndexedAxis
- attributeIs :: Name -> Text -> IndexedAxis
- descendantElementsNamed :: Name -> IndexedAxis
- ancestorElementsNamed :: Name -> IndexedAxis
- descendantElementsNamedWithAttr :: Name -> Name -> Text -> IndexedAxis
- descendantContent :: IndexedCursor -> [Text]
- attrValForElemCursor :: Name -> IndexedCursor -> Maybe Text
- indexedCursorFromByteString_ :: ByteString -> IndexedCursor
- indexedCursorFromByteString :: ByteString -> Either SomeException IndexedCursor
- indexedCursorFromText_ :: Text -> IndexedCursor
- indexedCursorFromText :: Text -> Either SomeException IndexedCursor
- indexedCursorFromByteStringWithOpts_ :: ParseSettings -> ByteString -> IndexedCursor
- indexedCursorFromByteStringWithOpts :: ParseSettings -> ByteString -> Either SomeException IndexedCursor
- indexedCursorFromTextWithOpts_ :: ParseSettings -> Text -> IndexedCursor
- indexedCursorFromTextWithOpts :: ParseSettings -> Text -> Either SomeException IndexedCursor
- pattern IndexedNodeContent :: Text -> IndexedNode
- pattern IndexedNodeElement :: Element -> IndexedNode
Cursor
type IndexedCursor = Cursor IndexedNode Source #
This is similar to Cursor
except for IndexedNode
.
type IndexedAxis = Axis IndexedNode Source #
This is similar to Axis
except for IndexedNode
.
NodeIndex
and IndexedNode
Index for a Node
in a Document
.
The root element has a value of '[]'. Every child element is given an
Int
index as the first element of the list, and the grandchild elements
are given an Int
index as the second element of the list, and so on. If
there are multiple root elements, then '[]' acts as a "virtual" root
element that contains all actual root elements.
>>>
let cursor = indexedCursorFromText_ "<foo><bar/></foo>"
>>>
unNodeIndex $ indexedCursorNodeIndex cursor
fromList []
This function will be used in the following examples.
>>>
:{
let getNodeIndex :: [IndexedCursor] -> Seq Int getNodeIndex = unNodeIndex . indexedCursorNodeIndex . head :}
The index of the first child of the root be [0]
>>>
let cursor = indexedCursorFromText_ "<foo><bar/><baz/></foo>"
>>>
getNodeIndex $ child cursor
fromList [0]
The index of the second child of the root would be [1]
.
>>>
let cursor = indexedCursorFromText_ "<foo><bar/><baz/></foo>"
>>>
getNodeIndex $ cursor $| child >=> followingSibling
fromList [1]
The index of the third child of the root would be [2]
.
>>>
let cursor = indexedCursorFromText_ "<foo><bar/><baz/><zap/></foo>"
>>>
getNodeIndex $ cursor $| child >=> followingSibling >=> followingSibling
fromList [2]
The index of the first child of the first child of the root would be
[0, 0]
.
>>>
let cursor = indexedCursorFromText_ "<foo><bar><hello/></bar></foo>"
>>>
getNodeIndex $ cursor $| child >=> child
fromList [0,0]
The index of the second child of the first child of the root would be
[0, 1]
(since the [Int]
is stored reversed).
>>>
let cursor = indexedCursorFromText_ "<foo><bar><hello/><bye/></bar></foo>"
>>>
getNodeIndex $ cursor $| child >=> child >=> followingSibling
fromList [0,1]
The index of the third child of the fourth child of the root would be
[3, 2]
.
>>>
let doc = "<foo><zero/><one/><two/><three><sub0/><sub1/><sub2/></three></foo>"
>>>
let cursor = indexedCursorFromText_ doc
>>>
:{
let xpath = child >=> -- focusing on <zero/> followingSibling >=> -- focusing on <one/> followingSibling >=> -- focusing on <two/> followingSibling >=> -- focusing on <three/> child >=> -- focusing on the <sub0/> element followingSibling >=> -- focusing on the <sub1/> element followingSibling -- focusing on the <sub2/> eleemnt in getNodeIndex $ xpath cursor :} fromList [3,2]
class HasNodeIndex a where Source #
data IndexedNode Source #
IndexedNode
just wraps together a Node
and a NodeIndex
for that
Node
.
indexedCursorNodeIndex :: IndexedCursor -> NodeIndex Source #
Get the NodeIndex
from the IndexedNode
pointed to by an
IndexedCursor
.
nodeToRootIndexedNode :: Node -> IndexedNode Source #
Convert a Node
to a root IndexedNode
.
nodeToIndexedNode :: NodeIndex -> Node -> IndexedNode Source #
Given a NodeIndex
, create an IndexedNode
for a Node
.
childNodeToIndexedNode :: NodeIndex -> Int -> Node -> IndexedNode Source #
In
, create
an childNodeToIndexedNode
parentIndex childIndexInt childNodeIndexedNode
out of childNode
, creating its NodeIndex
using
toChildIndex
.
childNodesToIndexedNodes :: NodeIndex -> [Node] -> [IndexedNode] Source #
In
convert a list of
childNodesToIndexedNodes
parentIndex childNodesNode
childNodes
to a list of IndexNode
s using the NodeIndex
parentIndex
.
Converting
fromDocument :: Document -> IndexedCursor Source #
fromNode :: Node -> IndexedCursor Source #
Convert a Node
to a root IndexedCursor
.
Generic functions re-exported from Text.XML.Cursor.Generic
child :: Cursor node -> [Cursor node] #
The child axis. XPath: the child axis contains the children of the context node.
The parent axis. As described in XPath: the parent axis contains the parent of the context node, if there is one.
Every node but the root element of the document has a parent. Parent nodes
will always be NodeElement
s.
precedingSibling :: Axis node #
The preceding-sibling axis. XPath: the preceding-sibling axis contains all the preceding siblings of the context node [...].
followingSibling :: Axis node #
The following-sibling axis. XPath: the following-sibling axis contains all the following siblings of the context node [...].
The ancestor axis. XPath: the ancestor axis contains the ancestors of the context node; the ancestors of the context node consist of the parent of context node and the parent's parent and so on; thus, the ancestor axis will always include the root node, unless the context node is the root node.
descendant :: Axis node #
The descendant axis. XPath: the descendant axis contains the descendants of the context node; a descendant is a child or a child of a child and so on; thus the descendant axis never contains attribute or namespace nodes.
orSelf :: Axis node -> Axis node #
Modify an axis by adding the context node itself as the first element of the result list.
The preceding axis. XPath: the preceding axis contains all nodes in the same document as the context node that are before the context node in document order, excluding any ancestors and excluding attribute nodes and namespace nodes.
The following axis. XPath: the following axis contains all nodes in the same document as the context node that are after the context node in document order, excluding any descendants and excluding attribute nodes and namespace nodes.
Generic operators re-exported from Text.XML.Cursor.Generic
(&|) :: (Cursor node -> [a]) -> (a -> b) -> Cursor node -> [b] infixr 1 #
Apply a function to the result of an axis.
(&/) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a] infixr 1 #
Combine two axes so that the second works on the children of the results of the first.
(&//) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a] infixr 1 #
Combine two axes so that the second works on the descendants of the results of the first.
(&.//) :: Axis node -> (Cursor node -> [a]) -> Cursor node -> [a] infixr 1 #
Combine two axes so that the second works on both the result nodes, and their descendants.
($/) :: Cursor node -> (Cursor node -> [a]) -> [a] infixr 1 #
Apply an axis to the children of a 'Cursor node'.
($//) :: Cursor node -> (Cursor node -> [a]) -> [a] infixr 1 #
Apply an axis to the descendants of a 'Cursor node'.
($.//) :: Cursor node -> (Cursor node -> [a]) -> [a] infixr 1 #
Apply an axis to a 'Cursor node' as well as its descendants.
(>=>) :: Monad m => (a -> m b) -> (b -> m c) -> a -> m c infixr 1 #
Left-to-right Kleisli composition of monads.
"check" functions for IndexedCursor
checkIndexedNode :: Boolean b => (IndexedNode -> b) -> IndexedAxis Source #
Filter nodes that don't pass a check.
checkElement :: Boolean b => (Element -> b) -> IndexedAxis Source #
Filter elements that don't pass a check, and remove all non-elements.
checkName :: Boolean b => (Name -> b) -> IndexedAxis Source #
Filter elements that don't pass a name check, and remove all non-elements.
XPath-style functions for IndexedCursor
element :: Name -> IndexedAxis Source #
Select only those elements with a matching tag name.
XPath: /A node test that is a QName is true if and only if the type of the node (see [5 Data Model]) is the principal node type and has an expanded-name equal to the expanded-name specified by the QName./
content :: IndexedCursor -> [Text] Source #
Select only text nodes, and directly give the Content
values.
XPath: The node test text() is true for any text node.
Note that this is not strictly an Axis
, but will work with most combinators.
>>>
let cursor = indexedCursorFromText_ "<foo>hello<bar/>bye</foo>"
>>>
cursor $| child >=> content
["hello","bye"]>>>
cursor $| child >=> child >=> content
[]
attribute :: Name -> IndexedCursor -> [Text] Source #
Select attributes on the current element (or nothing if it is not an element).
XPath: /the attribute axis contains the attributes of the context node; the axis will be empty unless the context node is an element/
Note that this is not strictly an Axis
, but will work with most combinators.
The return list of the generalised axis contains as elements lists of Content
elements, each full list representing an attribute value.
>>>
let cursor = indexedCursorFromText_ "<foo hello='cat' bar='3'>hello world</foo>"
>>>
cursor $| attribute "hello"
["cat"]>>>
cursor $| attribute "doesntexist"
[]>>>
cursor $| child >=> attribute "attroftext"
[]
attributeMay :: Name -> IndexedCursor -> Maybe Text Source #
laxAttribute :: Text -> IndexedCursor -> [Text] Source #
Select attributes on the current element (or nothing if it is not an element). Namespace and case are ignored.
XPath: /the attribute axis contains the attributes of the context node; the axis will be empty unless the context node is an element/
Note that this is not strictly an Axis
, but will work with most combinators.
The return list of the generalised axis contains as elements lists of Content
elements, each full list representing an attribute value.
>>>
let cursor = indexedCursorFromText_ "<foo HellO='cat'/>"
>>>
cursor $| laxAttribute "HellO"
["cat"]>>>
cursor $| laxAttribute "Hello"
["cat"]>>>
cursor $| laxAttribute "hello"
["cat"]>>>
cursor $| laxAttribute "bye"
[]
hasAttribute :: Name -> IndexedAxis Source #
Select only those element nodes with the given attribute.
attributeIs :: Name -> Text -> IndexedAxis Source #
Select only those element nodes containing the given attribute key/value pair.
descendantElementsNamed :: Name -> IndexedAxis Source #
For a given Name
, find all descendant
Element
s with that Name
.
descendantElementsNamedWithAttr :: Name -> Name -> Text -> IndexedAxis Source #
In
, find all
descendantElementsNamedWithAttr
elemName attrKey attrValdescendant
Element
s with elemName
that have an attribute called
attrKey
with a value of attrVal
.
descendantContent :: IndexedCursor -> [Text] Source #
Find all content
in all descendant
s.
>>>
let cursor = indexedCursorFromText_ "<foo>hello<bar>lala</bar>bye</foo>"
>>>
descendantContent cursor
["hello","lala","bye"]
>>>
let cursor = indexedCursorFromText_ "<foo/>"
>>>
descendantContent cursor
[]
attrValForElemCursor :: Name -> IndexedCursor -> Maybe Text Source #
Find attribute
with Name
on the element IndexedCursor
is pointing to.
>>>
let cursor = indexedCursorFromText_ "<foo hello='3'/>"
>>>
attrValForElemCursor "hello" cursor
Just "3">>>
attrValForElemCursor "bye" cursor
Nothing
Parse directly into IndexedCursor
indexedCursorFromByteString_ :: ByteString -> IndexedCursor Source #
This reads a Document
from a ByteString
with parseLBS_
, and then
converts that Document
to an IndexedCursor
.
indexedCursorFromByteString :: ByteString -> Either SomeException IndexedCursor Source #
Similar to indexedCursorFromByteString_
but uses parseLBS
instead of
parseLBS_
.
indexedCursorFromText_ :: Text -> IndexedCursor Source #
Similar to indexedCursorFromByteString_
but uses parseText_
instead of
parseLBS_
.
indexedCursorFromText :: Text -> Either SomeException IndexedCursor Source #
Similar to indexedCursorFromByteString_
but uses parseText
instead of
parseLBS_
.
indexedCursorFromByteStringWithOpts_ :: ParseSettings -> ByteString -> IndexedCursor Source #
Similar to indexedCursorFromByteString_
but also takes ParseSettings
.
indexedCursorFromByteStringWithOpts :: ParseSettings -> ByteString -> Either SomeException IndexedCursor Source #
Similar to indexedCursorFromByteString
but also takes ParseSettings
.
indexedCursorFromTextWithOpts_ :: ParseSettings -> Text -> IndexedCursor Source #
Similar to indexedCursorFromText_
but also takes ParseSettings
.
indexedCursorFromTextWithOpts :: ParseSettings -> Text -> Either SomeException IndexedCursor Source #
Similar to indexedCursorFromText
but also takes ParseSettings
.
Patterns
pattern IndexedNodeContent :: Text -> IndexedNode Source #
pattern IndexedNodeElement :: Element -> IndexedNode Source #