{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE CPP #-}
module Binrep.Type.Text
( Encoding(..)
, AsText
, Encode, encode
, Decode(..)
, encodeToRep
, decodeViaTextICU
) where
import Binrep.Type.Common ( Endianness(..) )
import Binrep.Type.ByteString ( Rep )
import Refined
import Refined.Unsafe
import Data.ByteString qualified as B
import Data.Text qualified as Text
import Data.Text ( Text )
import Data.Char qualified as Char
import Data.Text.Encoding qualified as Text
import Data.Either.Combinators qualified as Either
import GHC.Generics ( Generic )
import Data.Data ( Data )
import Data.Typeable ( Typeable, typeRep )
import System.IO.Unsafe qualified
import Control.Exception qualified
import Data.Text.Encoding.Error qualified
import Data.Text.ICU.Convert qualified as ICU
type Bytes = B.ByteString
data Encoding
= UTF8
| UTF16 Endianness
| UTF32 Endianness
| ASCII
| SJIS
deriving stock ((forall x. Encoding -> Rep Encoding x)
-> (forall x. Rep Encoding x -> Encoding) -> Generic Encoding
forall x. Rep Encoding x -> Encoding
forall x. Encoding -> Rep Encoding x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Encoding x -> Encoding
$cfrom :: forall x. Encoding -> Rep Encoding x
Generic, Typeable Encoding
Typeable Encoding
-> (forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Encoding -> c Encoding)
-> (forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Encoding)
-> (Encoding -> Constr)
-> (Encoding -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Encoding))
-> (forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding))
-> ((forall b. Data b => b -> b) -> Encoding -> Encoding)
-> (forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r)
-> (forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r)
-> (forall u. (forall d. Data d => d -> u) -> Encoding -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> Encoding -> u)
-> (forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding)
-> Data Encoding
Encoding -> DataType
Encoding -> Constr
(forall b. Data b => b -> b) -> Encoding -> Encoding
forall a.
Typeable a
-> (forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> Encoding -> u
forall u. (forall d. Data d => d -> u) -> Encoding -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Encoding
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Encoding -> c Encoding
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Encoding)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Encoding -> m Encoding
gmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> Encoding -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> Encoding -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> Encoding -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> Encoding -> [u]
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Encoding -> r
gmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding
$cgmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Encoding)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Encoding)
dataTypeOf :: Encoding -> DataType
$cdataTypeOf :: Encoding -> DataType
toConstr :: Encoding -> Constr
$ctoConstr :: Encoding -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Encoding
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Encoding
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Encoding -> c Encoding
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Encoding -> c Encoding
Data, Int -> Encoding -> ShowS
[Encoding] -> ShowS
Encoding -> String
(Int -> Encoding -> ShowS)
-> (Encoding -> String) -> ([Encoding] -> ShowS) -> Show Encoding
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Encoding] -> ShowS
$cshowList :: [Encoding] -> ShowS
show :: Encoding -> String
$cshow :: Encoding -> String
showsPrec :: Int -> Encoding -> ShowS
$cshowsPrec :: Int -> Encoding -> ShowS
Show, Encoding -> Encoding -> Bool
(Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool) -> Eq Encoding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Encoding -> Encoding -> Bool
$c/= :: Encoding -> Encoding -> Bool
== :: Encoding -> Encoding -> Bool
$c== :: Encoding -> Encoding -> Bool
Eq)
type AsText (enc :: Encoding) = Refined enc Text
class Encode (enc :: Encoding) where
encode' :: Text -> Bytes
instance Encode 'UTF8 where encode' :: Text -> Bytes
encode' = Text -> Bytes
Text.encodeUtf8
instance Encode 'ASCII where encode' :: Text -> Bytes
encode' = forall (enc :: Encoding). Encode enc => Text -> Bytes
encode' @'UTF8
instance Encode ('UTF16 'BE) where encode' :: Text -> Bytes
encode' = Text -> Bytes
Text.encodeUtf16BE
instance Encode ('UTF16 'LE) where encode' :: Text -> Bytes
encode' = Text -> Bytes
Text.encodeUtf16LE
instance Encode ('UTF32 'BE) where encode' :: Text -> Bytes
encode' = Text -> Bytes
Text.encodeUtf32BE
instance Encode ('UTF32 'LE) where encode' :: Text -> Bytes
encode' = Text -> Bytes
Text.encodeUtf32LE
instance Encode 'SJIS where encode' :: Text -> Bytes
encode' = String -> Text -> Bytes
encodeViaTextICU' String
"Shift-JIS"
encode :: forall enc. Encode enc => AsText enc -> Bytes
encode :: forall (enc :: Encoding). Encode enc => AsText enc -> Bytes
encode = forall (enc :: Encoding). Encode enc => Text -> Bytes
encode' @enc (Text -> Bytes) -> (AsText enc -> Text) -> AsText enc -> Bytes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AsText enc -> Text
forall {k} (p :: k) x. Refined p x -> x
unrefine
instance Predicate 'UTF8 Text where validate :: Proxy 'UTF8 -> Text -> Maybe RefineException
validate Proxy 'UTF8
_ Text
_ = Maybe RefineException
success
instance Typeable e => Predicate ('UTF16 e) Text where validate :: Proxy ('UTF16 e) -> Text -> Maybe RefineException
validate Proxy ('UTF16 e)
_ Text
_ = Maybe RefineException
success
instance Typeable e => Predicate ('UTF32 e) Text where validate :: Proxy ('UTF32 e) -> Text -> Maybe RefineException
validate Proxy ('UTF32 e)
_ Text
_ = Maybe RefineException
success
instance Predicate 'ASCII Text where
validate :: Proxy 'ASCII -> Text -> Maybe RefineException
validate Proxy 'ASCII
p Text
t = if (Char -> Bool) -> Text -> Bool
Text.all Char -> Bool
Char.isAscii Text
t
then Maybe RefineException
success
else TypeRep -> Text -> Maybe RefineException
throwRefineOtherException (Proxy 'ASCII -> TypeRep
forall {k} (proxy :: k -> *) (a :: k).
Typeable a =>
proxy a -> TypeRep
typeRep Proxy 'ASCII
p) Text
"not valid 7-bit ASCII"
instance Predicate 'SJIS Text where validate :: Proxy 'SJIS -> Text -> Maybe RefineException
validate Proxy 'SJIS
_ Text
_ = Maybe RefineException
success
class Decode (enc :: Encoding) where
decode :: Bytes -> Either String (AsText enc)
instance Decode 'UTF8 where decode :: Bytes -> Either String (AsText 'UTF8)
decode = (UnicodeException -> String)
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText 'UTF8)
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText UnicodeException -> String
forall a. Show a => a -> String
show Bytes -> Either UnicodeException Text
Text.decodeUtf8'
instance Decode ('UTF16 'BE) where decode :: Bytes -> Either String (AsText ('UTF16 'BE))
decode = (UnicodeException -> String)
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF16 'BE))
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText UnicodeException -> String
forall a. Show a => a -> String
show ((Bytes -> Either UnicodeException Text)
-> Bytes -> Either String (AsText ('UTF16 'BE)))
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF16 'BE))
forall a b. (a -> b) -> a -> b
$ (Bytes -> Text) -> Bytes -> Either UnicodeException Text
wrapUnsafeDecoder Bytes -> Text
Text.decodeUtf16BE
instance Decode ('UTF16 'LE) where decode :: Bytes -> Either String (AsText ('UTF16 'LE))
decode = (UnicodeException -> String)
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF16 'LE))
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText UnicodeException -> String
forall a. Show a => a -> String
show ((Bytes -> Either UnicodeException Text)
-> Bytes -> Either String (AsText ('UTF16 'LE)))
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF16 'LE))
forall a b. (a -> b) -> a -> b
$ (Bytes -> Text) -> Bytes -> Either UnicodeException Text
wrapUnsafeDecoder Bytes -> Text
Text.decodeUtf16LE
instance Decode ('UTF32 'BE) where decode :: Bytes -> Either String (AsText ('UTF32 'BE))
decode = (UnicodeException -> String)
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF32 'BE))
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText UnicodeException -> String
forall a. Show a => a -> String
show ((Bytes -> Either UnicodeException Text)
-> Bytes -> Either String (AsText ('UTF32 'BE)))
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF32 'BE))
forall a b. (a -> b) -> a -> b
$ (Bytes -> Text) -> Bytes -> Either UnicodeException Text
wrapUnsafeDecoder Bytes -> Text
Text.decodeUtf32BE
instance Decode ('UTF32 'LE) where decode :: Bytes -> Either String (AsText ('UTF32 'LE))
decode = (UnicodeException -> String)
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF32 'LE))
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText UnicodeException -> String
forall a. Show a => a -> String
show ((Bytes -> Either UnicodeException Text)
-> Bytes -> Either String (AsText ('UTF32 'LE)))
-> (Bytes -> Either UnicodeException Text)
-> Bytes
-> Either String (AsText ('UTF32 'LE))
forall a b. (a -> b) -> a -> b
$ (Bytes -> Text) -> Bytes -> Either UnicodeException Text
wrapUnsafeDecoder Bytes -> Text
Text.decodeUtf32LE
#if MIN_VERSION_text(2,0,0)
instance Decode 'ASCII where decode = decodeText $ wrapUnsafeDecoder Text.decodeASCII
#endif
instance Decode 'SJIS where decode :: Bytes -> Either String (AsText 'SJIS)
decode = ShowS
-> (Bytes -> Either String Text)
-> Bytes
-> Either String (AsText 'SJIS)
forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText ShowS
forall a. a -> a
id ((Bytes -> Either String Text)
-> Bytes -> Either String (AsText 'SJIS))
-> (Bytes -> Either String Text)
-> Bytes
-> Either String (AsText 'SJIS)
forall a b. (a -> b) -> a -> b
$ String -> Bytes -> Either String Text
decodeViaTextICU' String
"Shift-JIS"
encodeToRep
:: forall (rep :: Rep) enc
. (Encode enc, Predicate rep Bytes)
=> AsText enc
-> Either RefineException (Refined rep Bytes)
encodeToRep :: forall (rep :: Rep) (enc :: Encoding).
(Encode enc, Predicate rep Bytes) =>
AsText enc -> Either RefineException (Refined rep Bytes)
encodeToRep = Bytes -> Either RefineException (Refined rep Bytes)
forall {k} (p :: k) x.
Predicate p x =>
x -> Either RefineException (Refined p x)
refine (Bytes -> Either RefineException (Refined rep Bytes))
-> (AsText enc -> Bytes)
-> AsText enc
-> Either RefineException (Refined rep Bytes)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AsText enc -> Bytes
forall (enc :: Encoding). Encode enc => AsText enc -> Bytes
encode
decodeText
:: forall enc e
. (e -> String) -> (Bytes -> Either e Text) -> Bytes
-> Either String (AsText enc)
decodeText :: forall (enc :: Encoding) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText e -> String
g Bytes -> Either e Text
f = (e -> String)
-> (Text -> AsText enc)
-> Either e Text
-> Either String (AsText enc)
forall a c b d. (a -> c) -> (b -> d) -> Either a b -> Either c d
Either.mapBoth e -> String
g Text -> AsText enc
forall {k} x (p :: k). x -> Refined p x
reallyUnsafeRefine (Either e Text -> Either String (AsText enc))
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bytes -> Either e Text
f
wrapUnsafeDecoder
:: (Bytes -> Text)
-> Bytes -> Either Data.Text.Encoding.Error.UnicodeException Text
wrapUnsafeDecoder :: (Bytes -> Text) -> Bytes -> Either UnicodeException Text
wrapUnsafeDecoder Bytes -> Text
f =
IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
System.IO.Unsafe.unsafeDupablePerformIO
(IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> (Bytes -> IO (Either UnicodeException Text))
-> Bytes
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
Control.Exception.try
(IO Text -> IO (Either UnicodeException Text))
-> (Bytes -> IO Text) -> Bytes -> IO (Either UnicodeException Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO Text
forall a. a -> IO a
Control.Exception.evaluate
(Text -> IO Text) -> (Bytes -> Text) -> Bytes -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bytes -> Text
f
encodeViaTextICU :: String -> Text -> IO B.ByteString
encodeViaTextICU :: String -> Text -> IO Bytes
encodeViaTextICU String
charset Text
t = do
Converter
conv <- String -> Maybe Bool -> IO Converter
ICU.open String
charset Maybe Bool
forall a. Maybe a
Nothing
Bytes -> IO Bytes
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> IO Bytes) -> Bytes -> IO Bytes
forall a b. (a -> b) -> a -> b
$ Converter -> Text -> Bytes
ICU.fromUnicode Converter
conv Text
t
encodeViaTextICU' :: String -> Text -> B.ByteString
encodeViaTextICU' :: String -> Text -> Bytes
encodeViaTextICU' String
charset Text
t =
IO Bytes -> Bytes
forall a. IO a -> a
System.IO.Unsafe.unsafeDupablePerformIO (IO Bytes -> Bytes) -> IO Bytes -> Bytes
forall a b. (a -> b) -> a -> b
$ String -> Text -> IO Bytes
encodeViaTextICU String
charset Text
t
decodeViaTextICU :: String -> B.ByteString -> IO (Either String Text)
decodeViaTextICU :: String -> Bytes -> IO (Either String Text)
decodeViaTextICU String
charset Bytes
t = do
Converter
conv <- String -> Maybe Bool -> IO Converter
ICU.open String
charset Maybe Bool
forall a. Maybe a
Nothing
Either String Text -> IO (Either String Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String Text -> IO (Either String Text))
-> Either String Text -> IO (Either String Text)
forall a b. (a -> b) -> a -> b
$ Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Converter -> Bytes -> Text
ICU.toUnicode Converter
conv Bytes
t
decodeViaTextICU' :: String -> B.ByteString -> Either String Text
decodeViaTextICU' :: String -> Bytes -> Either String Text
decodeViaTextICU' String
charset Bytes
t = do
IO (Either String Text) -> Either String Text
forall a. IO a -> a
System.IO.Unsafe.unsafeDupablePerformIO (IO (Either String Text) -> Either String Text)
-> IO (Either String Text) -> Either String Text
forall a b. (a -> b) -> a -> b
$ String -> Bytes -> IO (Either String Text)
decodeViaTextICU String
charset Bytes
t