{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Xmlbf.Xeno
( fromXenoNode
, fromRawXml
) where
import qualified Data.Bifunctor as Bif
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import Data.Monoid ((<>))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Encoding as T
import Data.Traversable (for)
import qualified HTMLEntities.Decoder
import qualified Xeno.DOM as Xeno
import qualified Xmlbf
fromXenoNode
:: Xeno.Node
-> Either String Xmlbf.Node
fromXenoNode :: Node -> Either String Node
fromXenoNode Node
x = do
Text
n <- ByteString -> Either String Text
decodeUtf8 (Node -> ByteString
Xeno.name Node
x)
[(Text, Text)]
as <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [(ByteString, ByteString)]
Xeno.attributes Node
x) forall a b. (a -> b) -> a -> b
$ \(ByteString
k,ByteString
v) -> do
(,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either String Text
decodeUtf8 ByteString
k forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either String Text
unescapeXmlUtf8 ByteString
v
[Node]
cs <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [Content]
Xeno.contents Node
x) forall a b. (a -> b) -> a -> b
$ \case
Xeno.Element Node
n1 -> Node -> Either String Node
fromXenoNode Node
n1
Xeno.Text ByteString
bs -> Text -> Either String Node
Xmlbf.text' forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs
Xeno.CData ByteString
bs -> Text -> Either String Node
Xmlbf.text' forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs
Text -> HashMap Text Text -> [Node] -> Either String Node
Xmlbf.element' Text
n (forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(Text, Text)]
as) [Node]
cs
fromRawXml
:: B.ByteString
-> Either String [Xmlbf.Node]
fromRawXml :: ByteString -> Either String [Node]
fromRawXml = \ByteString
bs -> case ByteString -> Either XenoException Node
Xeno.parse (ByteString
"<x>" forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
dropBomUtf8 ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
"</x>") of
Left XenoException
e -> forall a b. a -> Either a b
Left (String
"Malformed XML: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show XenoException
e)
Right Node
n -> Node -> Either String Node
fromXenoNode Node
n forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Xmlbf.Element Text
"x" HashMap Text Text
_ [Node]
cs -> forall a b. b -> Either a b
Right [Node]
cs
Node
_ -> forall a b. a -> Either a b
Left String
"Unknown result from fromXenoNode. Please report this as a bug."
decodeUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: ByteString -> Either String Text
decodeUtf8 ByteString
bs = forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
Bif.first forall a. Show a => a -> String
show (ByteString -> Either UnicodeException Text
T.decodeUtf8' ByteString
bs)
decodeUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE decodeUtf8Lazy #-}
decodeUtf8Lazy :: ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.fromStrict (ByteString -> Either String Text
decodeUtf8 ByteString
bs)
unescapeXmlUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE unescapeXmlUtf8 #-}
unescapeXmlUtf8 :: ByteString -> Either String Text
unescapeXmlUtf8 ByteString
bs = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.toStrict (ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs)
unescapeXmlUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE unescapeXmlUtf8Lazy #-}
unescapeXmlUtf8Lazy :: ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs = do
Text
t <- ByteString -> Either String Text
decodeUtf8 ByteString
bs
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Text
TB.toLazyText (Text -> Builder
HTMLEntities.Decoder.htmlEncodedText Text
t))
dropBomUtf8 :: B.ByteString -> B.ByteString
{-# INLINE dropBomUtf8 #-}
dropBomUtf8 :: ByteString -> ByteString
dropBomUtf8 ByteString
bs | ByteString -> ByteString -> Bool
B.isPrefixOf ByteString
"\xEF\xBB\xBF" ByteString
bs = Int -> ByteString -> ByteString
B.drop Int
3 ByteString
bs
| Bool
otherwise = ByteString
bs