module Language.PureScript.PSString
( PSString
, toUTF16CodeUnits
, decodeString
, decodeStringEither
, decodeStringWithReplacement
, prettyPrintString
, prettyPrintStringJS
, mkString
) where
import Prelude
import GHC.Generics (Generic)
import Codec.Serialise (Serialise)
import Control.DeepSeq (NFData)
import Control.Exception (try, evaluate)
import Control.Applicative ((<|>))
import Data.Char qualified as Char
import Data.Bits (shiftR)
import Data.Either (fromRight)
import Data.List (unfoldr)
import Data.Scientific (toBoundedInteger)
import Data.String (IsString(..))
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.Encoding (decodeUtf16BE)
import Data.Text.Encoding.Error (UnicodeException)
import Data.Vector qualified as V
import Data.Word (Word16, Word8)
import Numeric (showHex)
import System.IO.Unsafe (unsafePerformIO)
import Data.Aeson qualified as A
import Data.Aeson.Types qualified as A
newtype PSString = PSString { PSString -> [Word16]
toUTF16CodeUnits :: [Word16] }
deriving (PSString -> PSString -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PSString -> PSString -> Bool
$c/= :: PSString -> PSString -> Bool
== :: PSString -> PSString -> Bool
$c== :: PSString -> PSString -> Bool
Eq, Eq PSString
PSString -> PSString -> Bool
PSString -> PSString -> Ordering
PSString -> PSString -> PSString
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PSString -> PSString -> PSString
$cmin :: PSString -> PSString -> PSString
max :: PSString -> PSString -> PSString
$cmax :: PSString -> PSString -> PSString
>= :: PSString -> PSString -> Bool
$c>= :: PSString -> PSString -> Bool
> :: PSString -> PSString -> Bool
$c> :: PSString -> PSString -> Bool
<= :: PSString -> PSString -> Bool
$c<= :: PSString -> PSString -> Bool
< :: PSString -> PSString -> Bool
$c< :: PSString -> PSString -> Bool
compare :: PSString -> PSString -> Ordering
$ccompare :: PSString -> PSString -> Ordering
Ord, NonEmpty PSString -> PSString
PSString -> PSString -> PSString
forall b. Integral b => b -> PSString -> PSString
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: forall b. Integral b => b -> PSString -> PSString
$cstimes :: forall b. Integral b => b -> PSString -> PSString
sconcat :: NonEmpty PSString -> PSString
$csconcat :: NonEmpty PSString -> PSString
<> :: PSString -> PSString -> PSString
$c<> :: PSString -> PSString -> PSString
Semigroup, Semigroup PSString
PSString
[PSString] -> PSString
PSString -> PSString -> PSString
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [PSString] -> PSString
$cmconcat :: [PSString] -> PSString
mappend :: PSString -> PSString -> PSString
$cmappend :: PSString -> PSString -> PSString
mempty :: PSString
$cmempty :: PSString
Monoid, forall x. Rep PSString x -> PSString
forall x. PSString -> Rep PSString x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep PSString x -> PSString
$cfrom :: forall x. PSString -> Rep PSString x
Generic)
instance NFData PSString
instance Serialise PSString
instance Show PSString where
show :: PSString -> String
show = forall a. Show a => a -> String
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. PSString -> String
codePoints
codePoints :: PSString -> String
codePoints :: PSString -> String
codePoints = forall a b. (a -> b) -> [a] -> [b]
map (forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> Char
Char.chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral) forall a. a -> a
id) forall b c a. (b -> c) -> (a -> b) -> a -> c
. PSString -> [Either Word16 Char]
decodeStringEither
decodeStringWithReplacement :: PSString -> String
decodeStringWithReplacement :: PSString -> String
decodeStringWithReplacement = forall a b. (a -> b) -> [a] -> [b]
map (forall b a. b -> Either a b -> b
fromRight Char
'\xFFFD') forall b c a. (b -> c) -> (a -> b) -> a -> c
. PSString -> [Either Word16 Char]
decodeStringEither
decodeStringEither :: PSString -> [Either Word16 Char]
decodeStringEither :: PSString -> [Either Word16 Char]
decodeStringEither = forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr [Word16] -> Maybe (Either Word16 Char, [Word16])
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. PSString -> [Word16]
toUTF16CodeUnits
where
decode :: [Word16] -> Maybe (Either Word16 Char, [Word16])
decode :: [Word16] -> Maybe (Either Word16 Char, [Word16])
decode (Word16
h:Word16
l:[Word16]
rest) | Word16 -> Bool
isLead Word16
h Bool -> Bool -> Bool
&& Word16 -> Bool
isTrail Word16
l = forall a. a -> Maybe a
Just (forall a b. b -> Either a b
Right (Word16 -> Word16 -> Char
unsurrogate Word16
h Word16
l), [Word16]
rest)
decode (Word16
c:[Word16]
rest) | Word16 -> Bool
isSurrogate Word16
c = forall a. a -> Maybe a
Just (forall a b. a -> Either a b
Left Word16
c, [Word16]
rest)
decode (Word16
c:[Word16]
rest) = forall a. a -> Maybe a
Just (forall a b. b -> Either a b
Right (Word16 -> Char
toChar Word16
c), [Word16]
rest)
decode [] = forall a. Maybe a
Nothing
unsurrogate :: Word16 -> Word16 -> Char
unsurrogate :: Word16 -> Word16 -> Char
unsurrogate Word16
h Word16
l = forall a. Enum a => Int -> a
toEnum ((Word16 -> Int
toInt Word16
h forall a. Num a => a -> a -> a
- Int
0xD800) forall a. Num a => a -> a -> a
* Int
0x400 forall a. Num a => a -> a -> a
+ (Word16 -> Int
toInt Word16
l forall a. Num a => a -> a -> a
- Int
0xDC00) forall a. Num a => a -> a -> a
+ Int
0x10000)
decodeString :: PSString -> Maybe Text
decodeString :: PSString -> Maybe Text
decodeString = forall {a} {a}. Either a a -> Maybe a
hush forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
decodeEither forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteString
BS.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Word16 -> [Word8]
unpair forall b c a. (b -> c) -> (a -> b) -> a -> c
. PSString -> [Word16]
toUTF16CodeUnits
where
unpair :: Word16 -> [Word8]
unpair Word16
w = [Word16 -> Word8
highByte Word16
w, Word16 -> Word8
lowByte Word16
w]
lowByte :: Word16 -> Word8
lowByte :: Word16 -> Word8
lowByte = forall a b. (Integral a, Num b) => a -> b
fromIntegral
highByte :: Word16 -> Word8
highByte :: Word16 -> Word8
highByte = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Bits a => a -> Int -> a
`shiftR` Int
8)
decodeEither :: ByteString -> Either UnicodeException Text
decodeEither :: ByteString -> Either UnicodeException Text
decodeEither = forall a. IO a -> a
unsafePerformIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. Exception e => IO a -> IO (Either e a)
try forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> IO a
evaluate forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeUtf16BE
hush :: Either a a -> Maybe a
hush = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const forall a. Maybe a
Nothing) forall a. a -> Maybe a
Just
instance IsString PSString where
fromString :: String -> PSString
fromString String
a = [Word16] -> PSString
PSString forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Word16]
encodeUTF16 String
a
where
surrogates :: Char -> (Word16, Word16)
surrogates :: Char -> (Word16, Word16)
surrogates Char
c = (Int -> Word16
toWord (Int
h forall a. Num a => a -> a -> a
+ Int
0xD800), Int -> Word16
toWord (Int
l forall a. Num a => a -> a -> a
+ Int
0xDC00))
where (Int
h, Int
l) = forall a. Integral a => a -> a -> (a, a)
divMod (forall a. Enum a => a -> Int
fromEnum Char
c forall a. Num a => a -> a -> a
- Int
0x10000) Int
0x400
encodeUTF16 :: Char -> [Word16]
encodeUTF16 :: Char -> [Word16]
encodeUTF16 Char
c | forall a. Enum a => a -> Int
fromEnum Char
c forall a. Ord a => a -> a -> Bool
> Int
0xFFFF = [Word16
high, Word16
low]
where (Word16
high, Word16
low) = Char -> (Word16, Word16)
surrogates Char
c
encodeUTF16 Char
c = [Int -> Word16
toWord forall a b. (a -> b) -> a -> b
$ forall a. Enum a => a -> Int
fromEnum Char
c]
instance A.ToJSON PSString where
toJSON :: PSString -> Value
toJSON PSString
str =
case PSString -> Maybe Text
decodeString PSString
str of
Just Text
t -> forall a. ToJSON a => a -> Value
A.toJSON Text
t
Maybe Text
Nothing -> forall a. ToJSON a => a -> Value
A.toJSON (PSString -> [Word16]
toUTF16CodeUnits PSString
str)
instance A.FromJSON PSString where
parseJSON :: Value -> Parser PSString
parseJSON Value
a = Parser PSString
jsonString forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser PSString
arrayOfCodeUnits
where
jsonString :: Parser PSString
jsonString = forall a. IsString a => String -> a
fromString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FromJSON a => Value -> Parser a
A.parseJSON Value
a
arrayOfCodeUnits :: Parser PSString
arrayOfCodeUnits = [Word16] -> PSString
PSString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Value -> Parser [Word16]
parseArrayOfCodeUnits Value
a
parseArrayOfCodeUnits :: A.Value -> A.Parser [Word16]
parseArrayOfCodeUnits :: Value -> Parser [Word16]
parseArrayOfCodeUnits = forall a. String -> (Array -> Parser a) -> Value -> Parser a
A.withArray String
"array of UTF-16 code units" (forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Value -> Parser Word16
parseCodeUnit forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList)
parseCodeUnit :: A.Value -> A.Parser Word16
parseCodeUnit :: Value -> Parser Word16
parseCodeUnit Value
b = forall a. String -> (Scientific -> Parser a) -> Value -> Parser a
A.withScientific String
"two-byte non-negative integer" (forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a. String -> Value -> Parser a
A.typeMismatch String
"" Value
b) forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall i. (Integral i, Bounded i) => Scientific -> Maybe i
toBoundedInteger) Value
b
prettyPrintString :: PSString -> Text
prettyPrintString :: PSString -> Text
prettyPrintString PSString
s = Text
"\"" forall a. Semigroup a => a -> a -> a
<> forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Either Word16 Char -> Text
encodeChar (PSString -> [Either Word16 Char]
decodeStringEither PSString
s) forall a. Semigroup a => a -> a -> a
<> Text
"\""
where
encodeChar :: Either Word16 Char -> Text
encodeChar :: Either Word16 Char -> Text
encodeChar (Left Word16
c) = Text
"\\x" forall a. Semigroup a => a -> a -> a
<> forall a. Enum a => Int -> a -> Text
showHex' Int
6 Word16
c
encodeChar (Right Char
c)
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t' = Text
"\\t"
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\r' = Text
"\\r"
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\n' = Text
"\\n"
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'"' = Text
"\\\""
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\'' = Text
"\\\'"
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\\' = Text
"\\\\"
| Char -> Bool
shouldPrint Char
c = Char -> Text
T.singleton Char
c
| Bool
otherwise = Text
"\\x" forall a. Semigroup a => a -> a -> a
<> forall a. Enum a => Int -> a -> Text
showHex' Int
6 (Char -> Int
Char.ord Char
c)
shouldPrint :: Char -> Bool
shouldPrint :: Char -> Bool
shouldPrint Char
' ' = Bool
True
shouldPrint Char
c =
Char -> GeneralCategory
Char.generalCategory Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
[ GeneralCategory
Char.UppercaseLetter
, GeneralCategory
Char.LowercaseLetter
, GeneralCategory
Char.TitlecaseLetter
, GeneralCategory
Char.OtherLetter
, GeneralCategory
Char.DecimalNumber
, GeneralCategory
Char.LetterNumber
, GeneralCategory
Char.OtherNumber
, GeneralCategory
Char.ConnectorPunctuation
, GeneralCategory
Char.DashPunctuation
, GeneralCategory
Char.OpenPunctuation
, GeneralCategory
Char.ClosePunctuation
, GeneralCategory
Char.InitialQuote
, GeneralCategory
Char.FinalQuote
, GeneralCategory
Char.OtherPunctuation
, GeneralCategory
Char.MathSymbol
, GeneralCategory
Char.CurrencySymbol
, GeneralCategory
Char.ModifierSymbol
, GeneralCategory
Char.OtherSymbol
]
prettyPrintStringJS :: PSString -> Text
prettyPrintStringJS :: PSString -> Text
prettyPrintStringJS PSString
s = Text
"\"" forall a. Semigroup a => a -> a -> a
<> forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Word16 -> Text
encodeChar (PSString -> [Word16]
toUTF16CodeUnits PSString
s) forall a. Semigroup a => a -> a -> a
<> Text
"\""
where
encodeChar :: Word16 -> Text
encodeChar :: Word16 -> Text
encodeChar Word16
c | Word16
c forall a. Ord a => a -> a -> Bool
> Word16
0xFF = Text
"\\u" forall a. Semigroup a => a -> a -> a
<> forall a. Enum a => Int -> a -> Text
showHex' Int
4 Word16
c
encodeChar Word16
c | Word16
c forall a. Ord a => a -> a -> Bool
> Word16
0x7E Bool -> Bool -> Bool
|| Word16
c forall a. Ord a => a -> a -> Bool
< Word16
0x20 = Text
"\\x" forall a. Semigroup a => a -> a -> a
<> forall a. Enum a => Int -> a -> Text
showHex' Int
2 Word16
c
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\b' = Text
"\\b"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\t' = Text
"\\t"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\n' = Text
"\\n"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\v' = Text
"\\v"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\f' = Text
"\\f"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\r' = Text
"\\r"
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'"' = Text
"\\\""
encodeChar Word16
c | Word16 -> Char
toChar Word16
c forall a. Eq a => a -> a -> Bool
== Char
'\\' = Text
"\\\\"
encodeChar Word16
c = Char -> Text
T.singleton forall a b. (a -> b) -> a -> b
$ Word16 -> Char
toChar Word16
c
showHex' :: Enum a => Int -> a -> Text
showHex' :: forall a. Enum a => Int -> a -> Text
showHex' Int
width a
c =
let hs :: String
hs = forall a. (Integral a, Show a) => a -> ShowS
showHex (forall a. Enum a => a -> Int
fromEnum a
c) String
"" in
String -> Text
T.pack (forall a. Int -> a -> [a]
replicate (Int
width forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length String
hs) Char
'0' forall a. Semigroup a => a -> a -> a
<> String
hs)
isLead :: Word16 -> Bool
isLead :: Word16 -> Bool
isLead Word16
h = Word16
h forall a. Ord a => a -> a -> Bool
>= Word16
0xD800 Bool -> Bool -> Bool
&& Word16
h forall a. Ord a => a -> a -> Bool
<= Word16
0xDBFF
isTrail :: Word16 -> Bool
isTrail :: Word16 -> Bool
isTrail Word16
l = Word16
l forall a. Ord a => a -> a -> Bool
>= Word16
0xDC00 Bool -> Bool -> Bool
&& Word16
l forall a. Ord a => a -> a -> Bool
<= Word16
0xDFFF
isSurrogate :: Word16 -> Bool
isSurrogate :: Word16 -> Bool
isSurrogate Word16
c = Word16 -> Bool
isLead Word16
c Bool -> Bool -> Bool
|| Word16 -> Bool
isTrail Word16
c
toChar :: Word16 -> Char
toChar :: Word16 -> Char
toChar = forall a. Enum a => Int -> a
toEnum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral
toWord :: Int -> Word16
toWord :: Int -> Word16
toWord = forall a b. (Integral a, Num b) => a -> b
fromIntegral
toInt :: Word16 -> Int
toInt :: Word16 -> Int
toInt = forall a b. (Integral a, Num b) => a -> b
fromIntegral
mkString :: Text -> PSString
mkString :: Text -> PSString
mkString = forall a. IsString a => String -> a
fromString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack