-- | Efficient decoding and encoding of HTML entities in text.
module Text.HTMLEntity
    ( -- * Usage
      decode
    , decode'
    , encode
    -- ** Partial decoding/encoding
    -- $partial
    , decodePartial
    , encodePartial
    ) where

import Data.Attoparsec.Text
import Data.Text (Text)
import Prelude.Compat
import Text.HTMLEntity.Parser

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import qualified Data.Text.IO as T

-- *** Decoding/encoding
-- | Decode HTML entities contained in the given text. Returns
-- @Left decodeError@ on failure. The parser will do its best to explain
-- the problem.
--
-- >>> mapM_ T.putStrLn $ decode "Héllo w⊛rld!"
-- Héllo w⊛rld!
--
-- >>> decode "&NonExistentEntity;"
-- Left "entity: Failed reading: Unknown entity name NonExistentEntity"
--
-- >>> decode "�"
-- Left "entity: Failed reading: 100000000 is out of Char range"
--
-- >>> decode "�"
-- Left "entity: Failed reading: 4294967295 is out of Char range"
decode :: Text -> Either String Text
decode = parseOnly decodeParser
{-# INLINE decode #-}

-- | Like 'decode', except that if a decode error occurs, the original
-- output is returned unmodified. Use if you're certain that your input is
-- well-formed.
--
-- >>> T.putStrLn $ decode' "W≐ll-formed inpu⊨"
-- W≐ll-formed inpu⊨
--
-- >>> T.putStrLn $ decode' "Utter n�ns&CurlyE;nse"
-- Utter n�ns&CurlyE;nse
decode' :: Text -> Text
decode' n = either (const n) id $ decode n
{-# INLINE decode' #-}

-- | Encodes the input for use as text in an HTML document.
--
-- 'encode' will use named entities where possible, except for most symbols
-- in the ASCII block, where it was deemed this would result in
-- unnecessarily bloated output.
--
-- >>> T.putStrLn $ encode "Héllo wörld!"
-- Héllo wörld!
--
-- >>> T.putStrLn $ encode "x ≂̸ y"
-- x ≂̸ y
--
-- >>> T.putStrLn $ encode "\2534\6188"
-- ০ᠬ
encode :: Text -> Text
encode = either (error "html-entity internal encoding error") id . parseOnly encodeParser
{-# INLINE encode #-}

{- $partial
These functions are provided for convenience if you're using attoparsec
in a streaming style, and all return 'Result' values. Use them as you
would normally.
-}
-- | Partial 'decode'.
decodePartial :: Text -> Result Text
decodePartial = parse decodeParser
{-# INLINE decodePartial #-}

-- | Partial 'encode'.
encodePartial :: Text -> Result Text
encodePartial = parse encodeParser
{-# INLINE encodePartial #-}