{-# LANGUAGE CPP                   #-}
{-# LANGUAGE ExplicitForAll        #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE Trustworthy           #-}
{-# OPTIONS_GHC -Wno-orphans  #-}

-- | This module implements type class which allow to have conversion to and
-- from 'Text', 'String' and 'ByteString' types (including both strict and lazy
-- versions). Usually you need to export 'Text' modules qualified and use
-- 'T.pack' \/ 'T.unpack' functions to convert to\/from 'Text'. Now you can
-- just use 'toText' \/ 'toString' functions.

module Universum.String.Conversion
       ( -- * Convenient type aliases
         LText
       , LByteString

         -- * Conversion type classes
       , ConvertUtf8 (..)
       , ToString (..)
       , ToLText (..)
       , ToText (..)

         -- * Show and read functions
       , readEither
       , readMaybe
       , show
       ) where

import Data.Bifunctor (first)
import Data.Either (Either)
import Data.Function (id, (.))
import Data.Maybe (Maybe)
import Data.String (String)
import qualified Data.Text.Internal as T

#if !MIN_VERSION_text(2,0,2)
import qualified Data.Text.Internal.Fusion.Common as TF
#endif

import Universum.Functor ((<$>))
import Universum.String.Reexport (ByteString, IsString, Read, Text, fromString)

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import qualified Data.ByteString.Lazy.UTF8 as LBU
import qualified Data.ByteString.UTF8 as BU
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as LT
import qualified Text.Read (readEither, readMaybe)

import qualified GHC.Show as Show (Show (show))

-- $setup
-- >>> :set -XTypeApplications -XOverloadedStrings
-- >>> import Universum.Base (Int)
-- >>> import Universum.Function (($))
-- >>> import Universum.Print (putStrLn)

-- | Type synonym for 'Data.Text.Lazy.Text'.
type LText = LT.Text

-- | Type synonym for 'Data.ByteString.Lazy.ByteString'.
type LByteString = LB.ByteString


-- | Type class for conversion to utf8 representation of text.
class ConvertUtf8 a b where
    -- | Encode as utf8 string (usually 'B.ByteString').
    --
    -- >>> encodeUtf8 @Text @ByteString "патак"
    -- "\208\191\208\176\209\130\208\176\208\186"
    encodeUtf8 :: a -> b

    -- | Decode from utf8 string.
    --
    -- >>> decodeUtf8 @Text @ByteString "\208\191\208\176\209\130\208\176\208\186"
    -- "\1087\1072\1090\1072\1082"
    -- >>> putStrLn $ decodeUtf8 @Text @ByteString "\208\191\208\176\209\130\208\176\208\186"
    -- патак
    decodeUtf8 :: b -> a

    {- | Decode as utf8 string but returning execption if byte sequence is malformed.

#if MIN_VERSION_text(1,2,3)
    >>> decodeUtf8 @Text @ByteString "\208\208\176\209\130\208\176\208\186"
    "\65533\1072\1090\1072\1082"
#else
    >>> decodeUtf8 @Text @ByteString "\208\208\176\209\130\208\176\208\186"
    "\65533\65533\1090\1072\1082"
#endif
    >>> decodeUtf8Strict @Text @ByteString "\208\208\176\209\130\208\176\208\186"
    Left Cannot decode byte '\xd0': Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream
    -}
    decodeUtf8Strict :: b -> Either T.UnicodeException a

instance ConvertUtf8 String B.ByteString where
    encodeUtf8 = BU.fromString
    decodeUtf8 = BU.toString
    decodeUtf8Strict = (T.unpack <$>) . decodeUtf8Strict

instance ConvertUtf8 T.Text B.ByteString where
    encodeUtf8 = T.encodeUtf8
    decodeUtf8 = T.decodeUtf8With T.lenientDecode
    decodeUtf8Strict = T.decodeUtf8'

instance ConvertUtf8 LT.Text B.ByteString where
    encodeUtf8 = LB.toStrict . encodeUtf8
    decodeUtf8 = LT.decodeUtf8With T.lenientDecode . LB.fromStrict
    decodeUtf8Strict = decodeUtf8Strict . LB.fromStrict

instance ConvertUtf8 String LB.ByteString where
    encodeUtf8 = LBU.fromString
    decodeUtf8 = LBU.toString
    decodeUtf8Strict = (T.unpack <$>) . decodeUtf8Strict

instance ConvertUtf8 T.Text LB.ByteString where
    encodeUtf8 = LB.fromStrict . T.encodeUtf8
    decodeUtf8 = T.decodeUtf8With T.lenientDecode . LB.toStrict
    decodeUtf8Strict = T.decodeUtf8' . LB.toStrict

instance ConvertUtf8 LT.Text LB.ByteString where
    encodeUtf8 = LT.encodeUtf8
    decodeUtf8 = LT.decodeUtf8With T.lenientDecode
    decodeUtf8Strict = LT.decodeUtf8'

-- | Type class for converting other strings to 'T.Text'.
class ToText a where
    toText :: a -> T.Text

instance ToText String where
    toText = T.pack

instance ToText T.Text where
    toText = id

instance ToText LT.Text where
    toText = LT.toStrict

-- | Type class for converting other strings to 'LT.Text'.
class ToLText a where
    toLText :: a -> LT.Text

instance ToLText String where
    toLText = LT.pack

instance ToLText T.Text where
    toLText = LT.fromStrict

instance ToLText LT.Text where
    toLText = id

-- | Type class for converting other strings to 'String'.
class ToString a where
    toString :: a -> String

instance ToString String where
    toString = id

instance ToString T.Text where
    toString = T.unpack

instance ToString LT.Text where
    toString = LT.unpack

{- [Note toString-toText-rewritting]

Note ON MAINTENANCE of rewrite rules below.

Whenever you want to allow a newer version of @text@ package, check:
* The @text conversions@ benchmark, that the numbers with and without the rules
are still the expected ones (see the comments there for what to expect).
* You may optionally check whether any changes to `pack` and `unpack` functions
and their `INLINE`/`NOINLINE` annotations took place.
The current rewrite rules match with what happens in `text` package
at the c826a6cb3d29242ce3632b490670ae7997689eaa commit, and next time
you can just check `git diff <that commit>..HEAD`.

If these points hold, it should be safe to raise the upper bound on `text` version.

If not, first do the necessary changes (preserving the backward compatibility),
update the commit id above if you checked the code of the `text` package.
Then bump the version constraint.
-}

{-

@toString . toText@ pattern may occur quite often after inlining because
we tend to use 'Text' rather than 'String' in function signatures, but
there are still some libraries which use 'String's and thus make us perform
conversions back and forth.

Note that, as documentation for 'T.pack' function mentions, @toString . toText@
is not strictly equal to the identity function. Thus, replacing @toString . toText@
with @id@ will result in losing transformation of surrogate code points.
But in the most cases this is what the user wants.
-}

{-# RULES "pack/unpack" [~0]
    forall s. T.unpack (T.pack s) = s
#-}

#if !MIN_VERSION_text(2,0,2)
{-

We can do even better than above if take rules defined in 'Data.Text' into
account.

Quoting investigation of @int-index:

If we look at @unpack@ and @pack@ they are defined as

@
unpack = S.unstreamList . stream
{-# INLINE [1] unpack #-}

pack = unstream . S.map safe . S.streamList
{-# INLINE [1] pack #-}
@

After they get inlined, the rule seems to be

@
(S.unstreamList . stream) ((unstream . S.map safe . S.streamList) a)
@

If we also inline function composition, we get

@
S.unstreamList (stream (unstream (S.map safe (S.streamList a))))
@

`stream` and `unstream` surely cancel out via this rule:

@
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
@

So we are left with

@
S.unstreamList (S.map safe (S.streamList a))
@

Now, what's this 'safe' function? Turns out it's defined as

@
safe :: Char -> Char
safe c
    | ord c .&. 0x1ff800 /= 0xd800 = c
    | otherwise                    = '\xfffd'
@

Aha, so it's mapping some codepoints to @'\xfffd'@!
There's a comment on top of it to explain this:

```
-- Unicode 'Data.Char.Surrogate' code points are not included in the set of Unicode
-- scalar values, but are unfortunately admitted as valid 'Char'
-- values by Haskell.  They cannot be represented in a 'Text'.  This
-- function remaps those code points to the Unicode replacement
-- character (U+FFFD, \'&#xfffd;\'), and leaves other code points
-- unchanged.
```

This logic is lost with the mentioned rewrite rule.
Not a huge loss, but it does mean that this rewrite rule isn't meaning preserving.


We hope that in most cases it's fine.
And if it's not, one can mark his function using either @pack@ or @unpack@
with @NOINLINE@ pragma to prevent the rule from firing.

So, eventually, we add the following rule:
-}
{-# RULES "pack/unpack internal" [1]
    forall s. TF.unstreamList (TF.map T.safe (TF.streamList s)) = s
#-}
#endif

{- In case if GHC didn't manage to inline and rewrite everything in
the remaining phases (@Data.Text.pack@ is inlined at 1-st phase),
we still have "pack/unpack" rule. Hopefully, one of them will fire.
-}

{- The opposite rule is safe to have because 'T.safe' /is/ the identity
function for strings made up from valid characters, and 'Text' is guaranteed
to have only valid ones.
However, for this case there is no @unstream (stream s) = id@ rule,
so we don't delve deep into internals. As long as @stream@ and @unstream@
only perform conversion between text and stream of characters, they should
be safe to collapse.
-}
{-# RULES "unpack/pack" [~0]
    forall s. T.pack (T.unpack s) = s
#-}

-- | Polymorhpic version of 'Text.Read.readEither'.
--
-- >>> readEither @Text @Int "123"
-- Right 123
-- >>> readEither @Text @Int "aa"
-- Left "Prelude.read: no parse"
--
-- @since 1.8.0
readEither :: (ToString a, Read b) => a -> Either Text b
readEither = first toText . Text.Read.readEither . toString

-- | Polymorhpic version of 'Text.Read.readMaybe'.
--
-- >>> readMaybe @Int @Text "123"
-- Just 123
-- >>> readMaybe @Int @Text "aa"
-- Nothing
readMaybe :: forall b a. (ToString a, Read b) => a -> Maybe b
readMaybe = Text.Read.readMaybe . toString

-- | Generalized version of 'Prelude.show'.
show :: forall b a . (Show.Show a, IsString b) => a -> b
show x = fromString (Show.show x)
{-# SPECIALIZE show :: Show.Show  a => a -> Text  #-}
{-# SPECIALIZE show :: Show.Show  a => a -> LText  #-}
{-# SPECIALIZE show :: Show.Show  a => a -> ByteString  #-}
{-# SPECIALIZE show :: Show.Show  a => a -> LByteString  #-}
{-# SPECIALIZE show :: Show.Show  a => a -> String  #-}