taggy-lens-0.1.2: Lenses for the taggy html/xml parser

Safe HaskellNone

Text.Taggy.Lens

Synopsis

Documentation

data Node

A Node is either an Element or some raw text.

Instances

Eq Node 
Show Node 
Plated Node

Plated instances are available for Element and Node, such that we can retrieve all of their transitive descendants.

>>> let markup' = "<html><foo>foo</foo>bar<baz></baz>qux</html>" :: Lazy.Text
>>> markup' ^.. html . to universe . traverse . content
["foo","bar","qux"]
AsMarkup Node

A Node is convertible to Markup

HasContent Node 
HasElements Node 
HasElement Node 

data Element

An Element here refers to a tag name, the attributes specified withing that tag, and all the children nodes of that element. An Element is basically anything but "raw" content.

Constructors

Element 

Fields

eltName :: !Text

name of the element. e.g a for a

eltAttrs :: !(HashMap AttrName AttrValue)

a (hash)map from attribute names to attribute values

eltChildren :: [Node]

children Nodes

name :: Lens' Element TextSource

A lens into the name of a given DOM element.

attrs :: Lens' Element (HashMap Text Text)Source

A lens into the attributes of a given DOM element.

>>> let markup = "<html xmlns=\"http://www.w3.org/1999/xhtml\"><head></head><body></body></html>" :: Lazy.Text
>>> markup ^? html . element . attrs
Just (fromList [("xmlns","http://www.w3.org/1999/xhtml")])
>>> markup ^? html . element . attrs . at "xmlns" & join
Just "http://www.w3.org/1999/xhtml"
>>> markup ^? html . element . attrs . at "style" & join
Nothing
>>> markup & html . element . attrs . at "xmlns" ?~ "http://www.w3.org/TR/html4/"
"<html xmlns=\"http://www.w3.org/TR/html4/\"><head></head><body></body></html>"

children :: Lens' Element [Node]Source

A lens into the child nodes, elements, or contents of a given DOM element.

>>> let markup = "<html><title>Your title goes here.</title><body>Your content goes here.</body></html>" :: Lazy.Text
>>> markup ^? html . element . children . ix 0
Just (NodeElement (Element {eltName = "title", eltAttrs = fromList [], eltChildren = [NodeContent "Your title goes here."]}))
>>> markup & html . element . children . ix 0 . element . children .~ [NodeContent "Lenses!"]
"<html><title>Lenses!</title><body>Your content goes here.</body></html>"

htmlWith :: Bool -> Prism' Text NodeSource

HTML document parsing and rendering.

>>> let markup = "<html><head><title>My Page</title></head><body><blink>Hello, world!</blink></body></html>" :: Lazy.Text
>>> markup ^? htmlWith False
Just (NodeElement (Element {eltName = "html", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "head", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "title", eltAttrs = fromList [], eltChildren = [NodeContent "My Page"]})]}),NodeElement (Element {eltName = "body", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "blink", eltAttrs = fromList [], eltChildren = [NodeContent "Hello, world!"]})]})]}))
>>> (markup ^? htmlWith False) ^. _Just . re (htmlWith False) == markup
True

The provided boolean specifies whether named entities should be translated to unicode. For a less general version of this prism, with translation by default, see 'html.'

>>> (True, False) & both %~ \convert -> "<span>&hearts;</span>" ^? htmlWith convert . element . contents
(Just "\9829",Just "&hearts;")

The parser produces a single node; if markup describes more than one element at the top-level, all but the first are discarded.

>>> (markup <> markup) ^? htmlWith False == markup ^? htmlWith False
True

html :: Prism' Text NodeSource

Like htmlWith, but converts named entities by default.

>>> let markup = "<html><head><title>My Page</title></head><body><blink>Hello, world!</blink></body></html>" :: Lazy.Text
>>> markup ^? htmlWith True == markup ^? html
True

class HasElement a whereSource

Construct a node from an element, or attempt to extract an element from a node.

>>> let markup = "<html><head><title>My Page</title></head><body><blink>Hello, world!</blink></body></html>" :: Lazy.Text
>>> markup ^? html . element
Just (Element {eltName = "html", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "head", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "title", eltAttrs = fromList [], eltChildren = [NodeContent "My Page"]})]}),NodeElement (Element {eltName = "body", eltAttrs = fromList [], eltChildren = [NodeElement (Element {eltName = "blink", eltAttrs = fromList [], eltChildren = [NodeContent "Hello, world!"]})]})]})
>>> markup ^? html . element. re element == markup ^? html
True

content :: Prism' Node TextSource

Construct a node from text, or attempt to extract text from a node.

>>> let markup = "<foo>bar</foo>" :: Lazy.Text
>>> markup ^? html . element . children . traverse . content
Just "bar"
>>> markup & html . element . children . traverse . content .~ "baz"
"<foo>baz</foo>"

attr :: Text -> Lens' Element (Maybe Text)Source

Given an attribute name, a lens into its value for a given element.

>>> let markup = "<html><foo class=\"a\"></foo><bar class=\"b\"></bar></html>" :: Lazy.Text
>>> markup ^.. htmlWith False . elements . attr "class" . _Just
["a","b"]

attributed :: Fold (HashMap Text Text) a -> Traversal' Element ElementSource

A traversal into attributes matching a provided property.

>>> let markup = "<html><foo class=\"a\"></foo><bar class=\"a\"></bar></html>" :: Lazy.Text
>>> markup ^.. htmlWith False . elements . attributed (ix "class" . only "a") . name
["foo","bar"]

named :: Fold Text a -> Traversal' Element ElementSource

A traversal into elements with a name matching a provided property.

>>> let markup = "<html><foo>bar</foo><baz>qux</baz><quux>corge</quux></html>" :: Lazy.Text
>>> markup ^.. htmlWith False . elements . named (to T.length . only 3) . name
["foo","baz"]

class HasElements a whereSource

A traversal into the immediate children of an element that are also elements, directly or via a Node.

>>> let markup = "<html><foo></foo><bar></bar><baz></baz></html>" :: Lazy.Text
>>> markup ^.. html . element . elements . name
["foo","bar","baz"]
>>> markup ^.. html . elements . element . name
["foo","bar","baz"]

class HasContent a whereSource

A traversal into the immediate children of an element that are text content, directly or via a Node.

>>> let markup = "<html><foo></foo>bar<baz></baz>qux</html>" :: Lazy.Text
>>> markup ^.. html . element . contents
["bar","qux"]
>>> markup ^.. html . contents
["bar","qux"]

allNamed :: HasElement a => Fold Text b -> Fold a ElementSource

A fold into all elements (current and descendants) who's name satisfy a provided property.

>>> let markup' = "<html><foo class=\"woah\">bar<qux><foo>baz</foo></qux></foo></html>" :: Lazy.Text
>>> markup' ^.. html . allNamed (only "foo") . contents
["bar","baz"]
>>> markup' ^.. html . allNamed (only "foo") . attributed (ix "class" . only "woah") . contents
["bar"]

allAttributed :: HasElement a => Fold (HashMap Text Text) b -> Fold a ElementSource

A fold into all elements (current and descendants) who's attributes satisfy a provided property.

>>> let markup' = "<html><foo class=\"woah\">bar<qux class=\"woah\"></qux></foo><quux class=\"woah\"></quux></html>" :: Lazy.Text
>>> markup' ^.. html . allAttributed (folded . only "woah") . name
["foo","qux","quux"]
>>> markup' ^.. html . allAttributed (folded . only "woah") . named (only "foo") . name
["foo"]