{-# LANGUAGE LambdaCase #-}
module Xmlbf.XmlHtml
( fromXmlHtmlNode
, fromRawXml
, fromRawHtml
) where
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import qualified Text.XmlHtml as XmlHtml
import qualified Xmlbf
fromXmlHtmlNode
:: XmlHtml.Node
-> Either String Xmlbf.Node
fromXmlHtmlNode :: Node -> Either String Node
fromXmlHtmlNode = \case
XmlHtml.Comment Text
_ -> forall a b. a -> Either a b
Left String
"Comments not supported"
XmlHtml.TextNode Text
t -> Text -> Either String Node
Xmlbf.text' Text
t
XmlHtml.Element Text
t [(Text, Text)]
as [Node]
cs -> do
[Node]
cs' <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Node -> Either String Node
fromXmlHtmlNode [Node]
cs
Text -> HashMap Text Text -> [Node] -> Either String Node
Xmlbf.element' Text
t (forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(Text, Text)]
as) [Node]
cs'
fromRawXml
:: B.ByteString
-> Either String [Xmlbf.Node]
fromRawXml :: ByteString -> Either String [Node]
fromRawXml = \ByteString
bs -> case String -> ByteString -> Either String Document
XmlHtml.parseXML String
"xmlbf-xmlhtml-input.xml" ByteString
bs of
Left String
e -> forall a b. a -> Either a b
Left (String
"Malformed XML: " forall a. [a] -> [a] -> [a]
++ String
e)
Right Document
d -> forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Node -> Either String Node
fromXmlHtmlNode (Document -> [Node]
XmlHtml.docContent Document
d)
fromRawHtml
:: B.ByteString
-> Either String [Xmlbf.Node]
fromRawHtml :: ByteString -> Either String [Node]
fromRawHtml = \ByteString
bs -> case String -> ByteString -> Either String Document
XmlHtml.parseHTML String
"xmlbf-xmlhtml-input.html" ByteString
bs of
Left String
e -> forall a b. a -> Either a b
Left (String
"Malformed HTML: " forall a. [a] -> [a] -> [a]
++ String
e)
Right Document
d -> forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Node -> Either String Node
fromXmlHtmlNode (Document -> [Node]
XmlHtml.docContent Document
d)