{-|
All kinds of representations of a character in XML combined in one type.
Note that an entity can in principle represent a large text,
thus an \"XML character\" might actually be a text.
However the standard entities consist of one character.
In contrast to our representation,
HaXml uses Unicode substrings instead of Unicode characters,
which is certainly more efficient for common XML texts
that contain mainly Unicode text and only few references.
However our representation is unique,
whereas HaXmls may represent a text as @"abc","def"@ or @"abcdef"@.
-}
module Text.XML.Basic.Character (
   T(..), toUnicode, toUnicodeGen,
   toUnicodeOrFormat, toUnicodeOrFormatGen,
   fromUnicode, fromCharRef, fromEntityRef,
   maybeUnicode, maybeCharRef, maybeEntityRef,
   isUnicode, isCharRef, isEntityRef, isRef,
   unicode, refC, refE,
   asciiFromUnicode, asciiFromUnicodeGen, minimalRefFromUnicode,
   reduceRef, reduceRefGen,
   validCharRef, switchUnicodeRuns,
   ) where

import qualified Text.XML.Basic.Format as Fmt
import qualified Text.XML.Basic.Entity as Ent
import qualified Data.Map as Map
import qualified Data.Char as Char
import Data.Maybe.HT (toMaybe, )
import Data.Tuple.HT (mapFst, )
import Control.Monad (mplus, )

import qualified Control.Monad.Exception.Synchronous as Exc


data T =
     Unicode Char
   | CharRef Int
   | EntityRef String
      deriving (T -> T -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: T -> T -> Bool
$c/= :: T -> T -> Bool
== :: T -> T -> Bool
$c== :: T -> T -> Bool
Eq)

{- |
If a reference cannot be resolved
then an @Exception@ constructor with an error message is returned.
-}
toUnicode :: T -> Exc.Exceptional String Char
toUnicode :: T -> Exceptional String Char
toUnicode =
   Map String Char -> T -> Exceptional String Char
toUnicodeGen Map String Char
Ent.mapNameToChar

toUnicodeGen :: Map.Map String Char -> T -> Exc.Exceptional String Char
toUnicodeGen :: Map String Char -> T -> Exceptional String Char
toUnicodeGen Map String Char
_ (Unicode Char
c) = forall e a. a -> Exceptional e a
Exc.Success Char
c
toUnicodeGen Map String Char
_ (CharRef Int
c) =
   if Int -> Bool
validCharRef Int
c
     then forall e a. a -> Exceptional e a
Exc.Success forall a b. (a -> b) -> a -> b
$ Int -> Char
Char.chr Int
c
     else forall e a. e -> Exceptional e a
Exc.Exception forall a b. (a -> b) -> a -> b
$ String
"Character number out of bound: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Int
c
toUnicodeGen Map String Char
dict (EntityRef String
name) =
   forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall e a. e -> Exceptional e a
Exc.Exception forall a b. (a -> b) -> a -> b
$ String
"Unknown entity &" forall a. [a] -> [a] -> [a]
++ String
name forall a. [a] -> [a] -> [a]
++ String
";") forall e a. a -> Exceptional e a
Exc.Success forall a b. (a -> b) -> a -> b
$
   forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
name Map String Char
dict


{- |
If a reference cannot be resolved
then a reference string is returned.
-}
toUnicodeOrFormat :: T -> ShowS
toUnicodeOrFormat :: T -> ShowS
toUnicodeOrFormat =
   Map String Char -> T -> ShowS
toUnicodeOrFormatGen Map String Char
Ent.mapNameToChar

toUnicodeOrFormatGen :: Map.Map String Char -> T -> ShowS
toUnicodeOrFormatGen :: Map String Char -> T -> ShowS
toUnicodeOrFormatGen Map String Char
dict =
   forall object. C object => object -> ShowS
Fmt.run forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map String Char -> T -> T
reduceRefGen Map String Char
dict


fromUnicode :: Char -> T
fromUnicode :: Char -> T
fromUnicode = Char -> T
Unicode

fromCharRef :: Int -> T
fromCharRef :: Int -> T
fromCharRef = Int -> T
CharRef

fromEntityRef :: String -> T
fromEntityRef :: String -> T
fromEntityRef = String -> T
EntityRef


maybeUnicode :: T -> Maybe Char
maybeUnicode :: T -> Maybe Char
maybeUnicode (Unicode Char
c) = forall a. a -> Maybe a
Just Char
c
maybeUnicode T
_           = forall a. Maybe a
Nothing

maybeCharRef :: T -> Maybe Int
maybeCharRef :: T -> Maybe Int
maybeCharRef (CharRef Int
n) = forall a. a -> Maybe a
Just Int
n
maybeCharRef T
_           = forall a. Maybe a
Nothing

maybeEntityRef :: T -> Maybe String
maybeEntityRef :: T -> Maybe String
maybeEntityRef (EntityRef String
s) = forall a. a -> Maybe a
Just String
s
maybeEntityRef T
_             = forall a. Maybe a
Nothing


isUnicode :: T -> Bool
isUnicode :: T -> Bool
isUnicode (Unicode Char
_) = Bool
True
isUnicode T
_           = Bool
False

isCharRef :: T -> Bool
isCharRef :: T -> Bool
isCharRef (CharRef Int
_) = Bool
True
isCharRef T
_           = Bool
False

isEntityRef :: T -> Bool
isEntityRef :: T -> Bool
isEntityRef (EntityRef String
_) = Bool
True
isEntityRef T
_             = Bool
False

isRef :: T -> Bool
isRef :: T -> Bool
isRef T
x = T -> Bool
isCharRef T
x Bool -> Bool -> Bool
&& T -> Bool
isEntityRef T
x



{-|
Convert unicode character to XML Char,
where Unicode constructor is only used for ASCII characters.
This is achieved by the following decision:
If there is a entity reference, use this.
If it is ASCII, represent it as Char.
Otherwise use a character reference.
-}
asciiFromUnicode :: Char -> T
asciiFromUnicode :: Char -> T
asciiFromUnicode =
   Map Char String -> Char -> T
asciiFromUnicodeGen Map Char String
Ent.mapCharToName

asciiFromUnicodeGen :: Map.Map Char String -> Char -> T
asciiFromUnicodeGen :: Map Char String -> Char -> T
asciiFromUnicodeGen Map Char String
dict Char
c =
   forall b a. b -> (a -> b) -> Maybe a -> b
maybe
      (if Char -> Bool
Char.isAscii Char
c
         then Char -> T
fromUnicode Char
c
         else Int -> T
fromCharRef (Char -> Int
Char.ord Char
c))
      String -> T
fromEntityRef forall a b. (a -> b) -> a -> b
$
   forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Char
c Map Char String
dict


{- |
Generate XML character from Unicode character
with minimal use of references.
The only references used are the XML entity references
@'@, @"@, @&@, @<@, @>@.
-}
minimalRefFromUnicode :: Char -> T
minimalRefFromUnicode :: Char -> T
minimalRefFromUnicode Char
c =
   forall b a. b -> (a -> b) -> Maybe a -> b
maybe
      (Char -> T
fromUnicode Char
c)
      String -> T
fromEntityRef forall a b. (a -> b) -> a -> b
$
   forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Char
c Map Char String
Ent.mapCharToName


-- * shortcuts for making the output of the Show instance valid

unicode :: Char -> T
unicode :: Char -> T
unicode = Char -> T
Unicode

refC :: Int -> T
refC :: Int -> T
refC = Int -> T
fromCharRef

refE :: String -> T
refE :: String -> T
refE = String -> T
fromEntityRef


switchUnicodeRuns ::
   (String -> a) -> (Int -> a) -> (String -> a) ->
   [T] -> [a]
switchUnicodeRuns :: forall a.
(String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a]
switchUnicodeRuns String -> a
uni Int -> a
charRef String -> a
entRef =
   let prepend :: T -> [Either String b] -> (Either String a, [Either String b])
prepend (Unicode Char
c) [Either String b]
rest =
          forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst (forall a b. a -> Either a b
Left forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
cforall a. a -> [a] -> [a]
:)) forall a b. (a -> b) -> a -> b
$
          case [Either String b]
rest of
             (Left String
s : [Either String b]
ss) -> (String
s, [Either String b]
ss)
             [Either String b]
_ -> ([], [Either String b]
rest)
       prepend (CharRef   Int
n) [Either String b]
rest = (forall a b. b -> Either a b
Right (Int -> a
charRef Int
n), [Either String b]
rest)
       prepend (EntityRef String
n) [Either String b]
rest = (forall a b. b -> Either a b
Right (String -> a
entRef  String
n), [Either String b]
rest)
   in  forall a b. (a -> b) -> [a] -> [b]
map (forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> a
uni forall a. a -> a
id) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
       forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\T
c -> forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {b}.
T -> [Either String b] -> (Either String a, [Either String b])
prepend T
c) []


instance Show T where
   showsPrec :: Int -> T -> ShowS
showsPrec Int
prec T
a =
      Bool -> ShowS -> ShowS
showParen (Int
prec forall a. Ord a => a -> a -> Bool
>= Int
10) forall a b. (a -> b) -> a -> b
$
      case T
a of
         Unicode   Char
c -> String -> ShowS
showString String
"unicode " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows Char
c
         CharRef   Int
n -> String -> ShowS
showString String
"refC " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows Int
n
         EntityRef String
n -> String -> ShowS
showString String
"refE " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows String
n
   showList :: [T] -> ShowS
showList =
      Bool -> ShowS -> ShowS
showParen Bool
True forall b c a. (b -> c) -> (a -> b) -> a -> c
.
      forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (String -> ShowS
showString String
"[]") forall b c a. (b -> c) -> (a -> b) -> a -> c
.
      forall a.
(String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a]
switchUnicodeRuns
         (\String
str -> String -> ShowS
showString String
"map unicode " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows String
str forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
" ++ ")
         (\Int
n -> String -> ShowS
showString String
"refC " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString  String
" : ")
         (\String
n -> String -> ShowS
showString String
"refE " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows String
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString  String
" : ")


instance Fmt.C T where
   run :: T -> ShowS
run (Unicode Char
c) = Char -> ShowS
showChar Char
c
   run (CharRef Int
n) = ShowS
Fmt.amp forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Fmt.sharp forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Fmt.semicolon
   run (EntityRef String
n) = ShowS
Fmt.amp forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Fmt.semicolon



{- |
Reduce the use of references.
Represent as much as possible characters as Unicode characters,
that is, using the Unicode constructor.
-}
reduceRef :: T -> T
reduceRef :: T -> T
reduceRef = Map String Char -> T -> T
reduceRefGen Map String Char
Ent.mapNameToChar

{- | try to convert a References to equivalent Unicode characters -}
reduceRefGen :: Map.Map String Char -> T -> T
reduceRefGen :: Map String Char -> T -> T
reduceRefGen Map String Char
dict T
x =
   forall b a. b -> (a -> b) -> Maybe a -> b
maybe T
x Char -> T
Unicode forall a b. (a -> b) -> a -> b
$
   forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
mplus
      (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Map String Char
dict forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< T -> Maybe String
maybeEntityRef T
x)
      (do Int
n <- T -> Maybe Int
maybeCharRef T
x
          forall a. Bool -> a -> Maybe a
toMaybe (Int -> Bool
validCharRef Int
n) (Int -> Char
Char.chr Int
n))

validCharRef :: Int -> Bool
validCharRef :: Int -> Bool
validCharRef Int
n =
   Int
0 forall a. Ord a => a -> a -> Bool
<= Int
n Bool -> Bool -> Bool
&& Int
n forall a. Ord a => a -> a -> Bool
<= Char -> Int
Char.ord forall a. Bounded a => a
maxBound