{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}

module Xmlbf.Xeno
 ( fromXenoNode
 , fromRawXml
 ) where

import qualified Data.Bifunctor as Bif
import qualified Data.ByteString as B
import qualified Data.HashMap.Strict as HM
import Data.Monoid ((<>))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Encoding as T
import Data.Traversable (for)
import qualified HTMLEntities.Decoder
import qualified Xeno.DOM as Xeno
import qualified Xmlbf

--------------------------------------------------------------------------------
-- Xeno support

-- | Convert a 'Xeno.Node' from "Xeno.DOM" into an 'Element' from "Xmlbf".
fromXenoNode
  :: Xeno.Node -- ^ A 'Xeno.Node' from "Xeno.DOM".
  -> Either String Xmlbf.Node -- ^ A 'Xmlbf.Node' from "Xmlbf".
fromXenoNode :: Node -> Either String Node
fromXenoNode Node
x = do
  Text
n <- ByteString -> Either String Text
decodeUtf8 (Node -> ByteString
Xeno.name Node
x)
  [(Text, Text)]
as <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [(ByteString, ByteString)]
Xeno.attributes Node
x) forall a b. (a -> b) -> a -> b
$ \(ByteString
k,ByteString
v) -> do
     (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either String Text
decodeUtf8 ByteString
k forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either String Text
unescapeXmlUtf8 ByteString
v
  [Node]
cs <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (Node -> [Content]
Xeno.contents Node
x) forall a b. (a -> b) -> a -> b
$ \case
     Xeno.Element Node
n1 -> Node -> Either String Node
fromXenoNode Node
n1
     Xeno.Text ByteString
bs -> Text -> Either String Node
Xmlbf.text' forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs
     Xeno.CData ByteString
bs -> Text -> Either String Node
Xmlbf.text' forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs
  Text -> HashMap Text Text -> [Node] -> Either String Node
Xmlbf.element' Text
n (forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(Text, Text)]
as) [Node]
cs

-- | Parses a given UTF8-encoded raw XML fragment into @a@, using the @xeno@
-- Haskell library, so all of @xeno@'s parsing quirks apply.
--
-- You can provide the output of this function as input to "Xmlbf"'s
-- 'Xmlbf.parse'.
--
-- The given XML can contain more zero or more text or element nodes.
--
-- Surrounding whitespace is not stripped.
fromRawXml
  :: B.ByteString                 -- ^ Raw XML fragment.
  -> Either String [Xmlbf.Node]   -- ^ 'Xmlbf.Node's from "Xmlbf"
fromRawXml :: ByteString -> Either String [Node]
fromRawXml = \ByteString
bs -> case ByteString -> Either XenoException Node
Xeno.parse (ByteString
"<x>" forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
dropBomUtf8 ByteString
bs forall a. Semigroup a => a -> a -> a
<> ByteString
"</x>") of
  Left XenoException
e -> forall a b. a -> Either a b
Left (String
"Malformed XML: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show XenoException
e)
  Right Node
n -> Node -> Either String Node
fromXenoNode Node
n forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Xmlbf.Element Text
"x" HashMap Text Text
_ [Node]
cs -> forall a b. b -> Either a b
Right [Node]
cs
    Node
_ -> forall a b. a -> Either a b
Left String
"Unknown result from fromXenoNode. Please report this as a bug."

--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Miscellaneous

decodeUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: ByteString -> Either String Text
decodeUtf8 ByteString
bs = forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
Bif.first forall a. Show a => a -> String
show (ByteString -> Either UnicodeException Text
T.decodeUtf8' ByteString
bs)

decodeUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE decodeUtf8Lazy #-}
decodeUtf8Lazy :: ByteString -> Either String Text
decodeUtf8Lazy ByteString
bs = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.fromStrict (ByteString -> Either String Text
decodeUtf8 ByteString
bs)

unescapeXmlUtf8 :: B.ByteString -> Either String T.Text
{-# INLINE unescapeXmlUtf8 #-}
unescapeXmlUtf8 :: ByteString -> Either String Text
unescapeXmlUtf8 ByteString
bs = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Text
TL.toStrict (ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs)

unescapeXmlUtf8Lazy :: B.ByteString -> Either String TL.Text
{-# INLINE unescapeXmlUtf8Lazy #-}
unescapeXmlUtf8Lazy :: ByteString -> Either String Text
unescapeXmlUtf8Lazy ByteString
bs = do
   Text
t <- ByteString -> Either String Text
decodeUtf8 ByteString
bs
   forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Text
TB.toLazyText (Text -> Builder
HTMLEntities.Decoder.htmlEncodedText Text
t))

dropBomUtf8 :: B.ByteString -> B.ByteString
{-# INLINE dropBomUtf8 #-}
dropBomUtf8 :: ByteString -> ByteString
dropBomUtf8 ByteString
bs | ByteString -> ByteString -> Bool
B.isPrefixOf ByteString
"\xEF\xBB\xBF" ByteString
bs = Int -> ByteString -> ByteString
B.drop Int
3 ByteString
bs
               | Bool
otherwise = ByteString
bs