{-# LANGUAGE LambdaCase #-}
module Xmlbf.XmlHtml
( element
, element'
, nodesXml
, nodesHtml
) where
import Control.Monad ((>=>))
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import Data.Maybe (catMaybes)
import qualified Text.XmlHtml as XmlHtml
import qualified Xmlbf
--------------------------------------------------------------------------------
-- XmlHtml support
-- | Convert a 'XmlHtml.Node' from "Text.XmlHtml" into an 'Node' from "Xmlbf",
-- if possible.
element
:: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml".
-> Either String Xmlbf.Node -- ^ A 'Xmlbf.Node' from "Xmlbf".
element = element' >=> \case
Just x -> Right x
Nothing -> Left "Comments not supported"
-- | Like 'element', but returns 'Nothing' in case the given node is
-- a 'XmlHtml.Comment'. Children 'XmlHtml.Comment's are discarded from the
-- result.
element'
:: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml".
-> Either String (Maybe Xmlbf.Node)
element' = \case
XmlHtml.Comment _ -> Right Nothing
XmlHtml.TextNode t -> Right (Just (Xmlbf.Text t))
XmlHtml.Element t as cs -> do
cs' <- catMaybes <$> traverse element' cs
Just <$> Xmlbf.element t (HM.fromList as) cs'
-- | Parses a given UTF8-encoded raw XML fragment into @a@, using the @xeno@
-- Haskell library, so all of @xeno@'s parsing quirks apply.
--
-- You can provide the output of this function as input to "Xmlbf"'s
-- 'Xmlbf.runParser'.
--
-- The given XML can contain more zero or more text or element nodes.
--
-- Comments are discarded from the resulting nodes and their children.
--
-- Surrounding whitespace is not stripped.
nodesXml
:: B.ByteString -- ^ Raw XML fragment.
-> Either String [Xmlbf.Node] -- ^ 'Xmlbf.Node's from "Xmlbf"
nodesXml = \bs -> case XmlHtml.parseXML "xmlbf-xmlhtml-input.xml" bs of
Left e -> Left ("Malformed XML: " ++ show e)
Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d)
-- | Like 'nodesXml', but parses using @xmlhtml@'s quirks HTML mode.
nodesHtml
:: B.ByteString -- ^ Raw HTML fragment.
-> Either String [Xmlbf.Node] -- ^ 'Xmlbf.Node's from "Xmlbf"
nodesHtml = \bs -> case XmlHtml.parseHTML "xmlbf-xmlhtml-input.html" bs of
Left e -> Left ("Malformed HTML: " ++ show e)
Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d)