{-# LANGUAGE LambdaCase #-} module Xmlbf.XmlHtml ( element , element' , nodesXml , nodesHtml ) where import Control.Monad ((>=>)) import qualified Data.ByteString as B import qualified Data.HashMap.Strict as HM import Data.Maybe (catMaybes) import qualified Text.XmlHtml as XmlHtml import qualified Xmlbf -------------------------------------------------------------------------------- -- XmlHtml support -- | Convert a 'XmlHtml.Node' from "Text.XmlHtml" into an 'Node' from "Xmlbf", -- if possible. element :: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml". -> Either String Xmlbf.Node -- ^ A 'Xmlbf.Node' from "Xmlbf". element = element' >=> \case Just x -> Right x Nothing -> Left "Comments not supported" -- | Like 'element', but returns 'Nothing' in case the given node is -- a 'XmlHtml.Comment'. Children 'XmlHtml.Comment's are discarded from the -- result. element' :: XmlHtml.Node -- ^ A 'XmlHtml.Node' from "Text.XmlHtml". -> Either String (Maybe Xmlbf.Node) element' = \case XmlHtml.Comment _ -> Right Nothing XmlHtml.TextNode t -> Right (Just (Xmlbf.Text t)) XmlHtml.Element t as cs -> do cs' <- catMaybes <$> traverse element' cs Just <$> Xmlbf.element t (HM.fromList as) cs' -- | Parses a given UTF8-encoded raw XML fragment into @a@, using the @xeno@ -- Haskell library, so all of @xeno@'s parsing quirks apply. -- -- You can provide the output of this function as input to "Xmlbf"'s -- 'Xmlbf.runParser'. -- -- The given XML can contain more zero or more text or element nodes. -- -- Comments are discarded from the resulting nodes and their children. -- -- Surrounding whitespace is not stripped. nodesXml :: B.ByteString -- ^ Raw XML fragment. -> Either String [Xmlbf.Node] -- ^ 'Xmlbf.Node's from "Xmlbf" nodesXml = \bs -> case XmlHtml.parseXML "xmlbf-xmlhtml-input.xml" bs of Left e -> Left ("Malformed XML: " ++ show e) Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d) -- | Like 'nodesXml', but parses using @xmlhtml@'s quirks HTML mode. nodesHtml :: B.ByteString -- ^ Raw HTML fragment. -> Either String [Xmlbf.Node] -- ^ 'Xmlbf.Node's from "Xmlbf" nodesHtml = \bs -> case XmlHtml.parseHTML "xmlbf-xmlhtml-input.html" bs of Left e -> Left ("Malformed HTML: " ++ show e) Right d -> catMaybes <$> traverse element' (XmlHtml.docContent d)