{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.RDF.RDF4H.XmlParser.Xeno
( fromXenoNode
, fromRawXml
) where
import qualified Data.Bifunctor as Bif
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
#if !MIN_VERSION_base(4,13,0)
import Data.Monoid ((<>))
#else
#endif
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Encoding as T
import Data.Traversable (for)
import qualified HTMLEntities.Decoder
import qualified Xeno.DOM as Xeno
import qualified Text.RDF.RDF4H.XmlParser.Xmlbf as Xmlbf
fromXenoNode
:: Xeno.Node
-> Either String Xmlbf.Node
fromXenoNode :: Node -> Either String Node
fromXenoNode Node
x = do
Text
n <- ByteString -> Either String Text
decodeUtf8 (Node -> ByteString
Xeno.name Node
x)
[(Text, Text)]
as <- [(ByteString, ByteString)]
-> ((ByteString, ByteString) -> Either String (Text, Text))
-> Either String [(Text, Text)]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [(ByteString, ByteString)]
Xeno.attributes Node
x) (((ByteString, ByteString) -> Either String (Text, Text))
-> Either String [(Text, Text)])
-> ((ByteString, ByteString) -> Either String (Text, Text))
-> Either String [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ \(ByteString
k,ByteString
v) -> do
(,) (Text -> Text -> (Text, Text))
-> Either String Text -> Either String (Text -> (Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either String Text
decodeUtf8 ByteString
k Either String (Text -> (Text, Text))
-> Either String Text -> Either String (Text, Text)
forall a b.
Either String (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either String Text
unescapeXmlUtf8 ByteString
v
[Node]
cs <- [Content]
-> (Content -> Either String Node) -> Either String [Node]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [Content]
Xeno.contents Node
x) ((Content -> Either String Node) -> Either String [Node])
-> (Content -> Either String Node) -> Either String [Node]
forall a b. (a -> b) -> a -> b
$ \case
Xeno.Element Node
n1 -> Node -> Either String Node
fromXenoNode Node
n1
Xeno.Text ByteString
bs -> Text -> Either String Node
Xmlbf.text' (Text -> Either String Node)
-> Either String Text -> Either String Node
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs
Xeno.CData ByteString
bs -> Text -> Either String Node
Xmlbf.text' (Text -> Either String Node)
-> Either String Text -> Either String Node
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs
Text -> HashMap Text Text -> [Node] -> Either String Node
Xmlbf.element' Text
n ([(Text, Text)] -> HashMap Text Text
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(Text, Text)]
as) [Node]
cs
fromRawXml
:: B.ByteString
-> Either String [Xmlbf.Node]
fromRawXml :: ByteString -> Either String [Node]
fromRawXml = \ByteString
bs -> case ByteString -> Either XenoException Node
Xeno.parse (ByteString
"<x>" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
dropBomUtf8 ByteString
bs ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"</x>") of
Left XenoException
e -> String -> Either String [Node]
forall a b. a -> Either a b
Left (String
"Malformed XML: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ XenoException -> String
forall a. Show a => a -> String
show XenoException
e)
Right Node
n -> Node -> Either String Node
fromXenoNode Node
n Either String Node
-> (Node -> Either String [Node]) -> Either String [Node]
forall a b.
Either String a -> (a -> Either String b) -> Either String b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Xmlbf.Element Text
"x" HashMap Text Text
_ [Node]
cs -> [Node] -> Either String [Node]
forall a b. b -> Either a b
Right [Node]
cs
Node
_ -> String -> Either String [Node]
forall a b. a -> Either a b
Left String
"Unknown result from fromXenoNode. Please report this as a bug."
decodeUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: ByteString -> Either String Text
decodeUtf8 ByteString
bs = (UnicodeException -> String)
-> Either UnicodeException Text -> Either String Text
forall a b c. (a -> b) -> Either a c -> Either b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
Bif.first UnicodeException -> String
forall a. Show a => a -> String
show (ByteString -> Either UnicodeException Text
T.decodeUtf8' ByteString
bs)
decodeUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE decodeUtf8Lazy #-}
decodeUtf8Lazy :: ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs = (Text -> Text) -> Either String Text -> Either String Text
forall a b. (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.fromStrict (ByteString -> Either String Text
decodeUtf8 ByteString
bs)
unescapeXmlUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE unescapeXmlUtf8 #-}
unescapeXmlUtf8 :: ByteString -> Either String Text
unescapeXmlUtf8 ByteString
bs = (Text -> Text) -> Either String Text -> Either String Text
forall a b. (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.toStrict (ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs)
unescapeXmlUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE unescapeXmlUtf8Lazy #-}
unescapeXmlUtf8Lazy :: ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs = do
Text
t <- ByteString -> Either String Text
decodeUtf8 ByteString
bs
Text -> Either String Text
forall a. a -> Either String a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Text
TB.toLazyText (Text -> Builder
HTMLEntities.Decoder.htmlEncodedText Text
t))
dropBomUtf8 :: B.ByteString -> B.ByteString
{-# INLINE dropBomUtf8 #-}
dropBomUtf8 :: ByteString -> ByteString
dropBomUtf8 ByteString
bs | ByteString -> ByteString -> Bool
B.isPrefixOf ByteString
"\xEF\xBB\xBF" ByteString
bs = Int -> ByteString -> ByteString
B.drop Int
3 ByteString
bs
| Bool
otherwise = ByteString
bs