module Text.HTML.WraXML.String where
import qualified Text.XML.WraXML.String as XmlString
import qualified Text.HTML.Basic.Character as HtmlChar
import qualified Data.Char as Char
import qualified Text.XML.HXT.DOM.Unicode as Unicode
import qualified Control.Monad.Exception.Synchronous as Exc
import Text.XML.WraXML.Utility (compose, )
type T = [Atom]
type Atom = HtmlChar.T
{- |
Literal translation from pure strings.
This can only work, if the string does not contain special characters.
-}
fromString :: String -> T
fromString = map HtmlChar.fromUnicode
{- |
default routine
-}
fromUnicodeString :: String -> T
fromUnicodeString = map HtmlChar.asciiFromUnicode
fromUnicodeStringInternetExploder :: String -> T
fromUnicodeStringInternetExploder =
map HtmlChar.asciiFromUnicodeInternetExploder
toUnicodeStringOrFormat :: T -> String
toUnicodeStringOrFormat =
flip compose "" .
map HtmlChar.toUnicodeOrFormat
toUnicodeString :: T -> String
toUnicodeString =
map (Exc.resolve error . HtmlChar.toUnicode)
{- |
Decode plain characters using the given decoder,
and decode entities by HXT's XML entity table.
Decoding errors for both conversions are embedded where they occur.
-}
toUnicodeStringDecodingEmbedError ::
Unicode.DecodingFctEmbedErrors -> T -> XmlString.EmbeddedExceptions
toUnicodeStringDecodingEmbedError f =
concat .
HtmlChar.switchUnicodeRuns
(map Exc.fromEither . f)
((:[]) . HtmlChar.toUnicode . HtmlChar.fromCharRef)
((:[]) . HtmlChar.toUnicode . HtmlChar.fromEntityRef)
{- |
Convert characters to lower case.
This uses ISO latin encoding and may fail for exotic characters.
-}
toLower :: T -> T
toLower = map HtmlChar.toLower
toUpper :: T -> T
toUpper = map HtmlChar.toUpper
toCanonicalUnicodeString :: T -> String
toCanonicalUnicodeString = map Char.toLower . toUnicodeString
equalIgnoreCase :: T -> T -> Bool
equalIgnoreCase x y =
toCanonicalUnicodeString x == toCanonicalUnicodeString y
elemIgnoreCase :: T -> [T] -> Bool
elemIgnoreCase x ys =
elem (toCanonicalUnicodeString x) (map toCanonicalUnicodeString ys)