-- | Efficient decoding and encoding of HTML entities in text.
module Text.HTMLEntity
( -- * Usage
decode
, decode'
, encode
-- ** Partial decoding/encoding
-- $partial
, decodePartial
, encodePartial
) where
import Data.Attoparsec.Text
import Data.Text (Text)
import Prelude.Compat
import Text.HTMLEntity.Parser
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import qualified Data.Text.IO as T
-- *** Decoding/encoding
-- | Decode HTML entities contained in the given text. Returns
-- @Left decodeError@ on failure. The parser will do its best to explain
-- the problem.
--
-- >>> mapM_ T.putStrLn $ decode "Héllo w⊛rld!"
-- Héllo w⊛rld!
--
-- >>> decode "&NonExistentEntity;"
-- Left "entity: Failed reading: Unknown entity name NonExistentEntity"
--
-- >>> decode ""
-- Left "entity: Failed reading: 100000000 is out of Char range"
--
-- >>> decode ""
-- Left "entity: Failed reading: 4294967295 is out of Char range"
decode :: Text -> Either String Text
decode = parseOnly decodeParser
{-# INLINE decode #-}
-- | Like 'decode', except that if a decode error occurs, the original
-- output is returned unmodified. Use if you're certain that your input is
-- well-formed.
--
-- >>> T.putStrLn $ decode' "W≐ll-formed inpu⊨"
-- W≐ll-formed inpu⊨
--
-- >>> T.putStrLn $ decode' "Utter nns&CurlyE;nse"
-- Utter nns&CurlyE;nse
decode' :: Text -> Text
decode' n = either (const n) id $ decode n
{-# INLINE decode' #-}
-- | Encodes the input for use as text in an HTML document.
--
-- 'encode' will use named entities where possible, except for most symbols
-- in the ASCII block, where it was deemed this would result in
-- unnecessarily bloated output.
--
-- >>> T.putStrLn $ encode "Héllo wörld!"
-- Héllo wörld!
--
-- >>> T.putStrLn $ encode "x ≂̸ y"
-- x ≂̸ y
--
-- >>> T.putStrLn $ encode "\2534\6188"
-- ০ᠬ
encode :: Text -> Text
encode = either (error "html-entity internal encoding error") id . parseOnly encodeParser
{-# INLINE encode #-}
{- $partial
These functions are provided for convenience if you're using attoparsec
in a streaming style, and all return 'Result' values. Use them as you
would normally.
-}
-- | Partial 'decode'.
decodePartial :: Text -> Result Text
decodePartial = parse decodeParser
{-# INLINE decodePartial #-}
-- | Partial 'encode'.
encodePartial :: Text -> Result Text
encodePartial = parse encodeParser
{-# INLINE encodePartial #-}