{-# LANGUAGE DeriveFunctor, GeneralizedNewtypeDeriving, Strict #-}
{-|
Module      : Text.OpenCC
Description : Higher-level bindings to OpenCC
License     : MIT
Stability   : Experimental
Portability : POSIX

This module exposes higher-level binding to OpenCC than
"Text.OpenCC.Raw". OpenCC resources are managed automatically, and
large string objects are passed without being copied.

There are three sets of higher-level bindings.

1. IO handles. You work in 'IO' monad and work with 'OpenCC'
handles directly.
2. One-shot. 'convert1' directly converts a 'String'.
3. Monadic. You work in 'OpenCCM' monad where you can use 'convert'.

__Caveat__: the one-shot interface is unsafe, but as long as OpenCC is
not shared or you don't care about errors, you will be fine.

'defaultTradToSimp' and 'defaultSimpToTrad' are suggested by OpenCC.
-}
module Text.OpenCC
  ( convert1
  , OpenCC, open, lastError, convertIO
  , OpenCCM, withOpenCC, unsafeWithOpenCC, convert
  , defaultSimpToTrad, defaultTradToSimp
  ) where

import qualified Data.Text as T
import           Data.Text.Encoding
import qualified Data.Text.IO as T
import qualified Data.ByteString as BS
import           Data.ByteString.Internal ( fromForeignPtr, c_strlen )
import           Foreign.C.String ( CString, withCString, withCStringLen )
import           Foreign.Ptr ( Ptr, FunPtr, ptrToIntPtr )
import           Foreign.ForeignPtr ( ForeignPtr, newForeignPtr, newForeignPtr_, withForeignPtr, castForeignPtr )
import           System.IO.Unsafe ( unsafePerformIO )
import           Control.Monad.Reader
import           Control.Monad.Trans.Maybe
import           Data.Bits ( complement )
import           Text.OpenCC.Raw

-- |OpenCC handle plus the finalizer. The OpenCC instance will be
-- finalized when the object is garbage collected.
type OpenCC = ForeignPtr ()

-- |Filename of default Simplified to Traditional configuration
defaultSimpToTrad :: String
defaultSimpToTrad :: String
defaultSimpToTrad = String
"s2t.json"

-- |Filename of default Traditional to Simplified configuration
defaultTradToSimp :: String
defaultTradToSimp :: String
defaultTradToSimp = String
"t2s.json"

-- |Open a new OpenCC session with specified configuration. 'Nothing'
-- is returned if error happens, and the error message can be
-- retrieved from 'lastError'.
open :: String -> MaybeT IO OpenCC
open :: String -> MaybeT IO OpenCC
open String
cfg = do
  RawOpenCC
raw <- IO RawOpenCC -> MaybeT IO RawOpenCC
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO RawOpenCC -> MaybeT IO RawOpenCC)
-> IO RawOpenCC -> MaybeT IO RawOpenCC
forall a b. (a -> b) -> a -> b
$ String -> (CString -> IO RawOpenCC) -> IO RawOpenCC
forall a. String -> (CString -> IO a) -> IO a
withCString String
cfg CString -> IO RawOpenCC
_openccOpen
  Bool -> MaybeT IO ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (RawOpenCC -> IntPtr
forall a. Ptr a -> IntPtr
ptrToIntPtr RawOpenCC
raw IntPtr -> IntPtr -> Bool
forall a. Eq a => a -> a -> Bool
/= IntPtr -> IntPtr
forall a. Bits a => a -> a
complement IntPtr
0)
  IO OpenCC -> MaybeT IO OpenCC
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO OpenCC -> MaybeT IO OpenCC) -> IO OpenCC -> MaybeT IO OpenCC
forall a b. (a -> b) -> a -> b
$ FinalizerPtr () -> RawOpenCC -> IO OpenCC
forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FinalizerPtr ()
_openccClosePtr RawOpenCC
raw

-- |Use an OpenCC handle to do the conversion. The result is a UTF-8
-- encoded text.
convertIO :: OpenCC -> T.Text -> IO T.Text
convertIO :: OpenCC -> Text -> IO Text
convertIO OpenCC
handle Text
str = OpenCC -> (RawOpenCC -> IO Text) -> IO Text
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr OpenCC
handle ((RawOpenCC -> IO Text) -> IO Text)
-> (RawOpenCC -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \RawOpenCC
ptr -> ByteString -> (CStringLen -> IO Text) -> IO Text
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen (Text -> ByteString
encodeUtf8 Text
str) ((CStringLen -> IO Text) -> IO Text)
-> (CStringLen -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \(CString
cstr,Int
len) ->
  RawOpenCC -> CString -> CSize -> IO CString
_openccConvertUtf8 RawOpenCC
ptr CString
cstr (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) IO CString -> (CString -> IO Text) -> IO Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= CString -> IO Text
_wrapText

-- |Return the last error message. This function is NOT thread-safe.
lastError :: IO T.Text
lastError :: IO Text
lastError = do
  CString
err <- IO CString
_openccError
  ForeignPtr CChar
ptr <- CString -> IO (ForeignPtr CChar)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ CString
err
  CSize
len <- CString -> IO CSize
c_strlen CString
err
  Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> IO Text) -> Text -> IO Text
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> ByteString
fromForeignPtr (ForeignPtr CChar -> ForeignPtr Word8
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr CChar
ptr) Int
0 (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
len)

-- |Do a one-shot conversion. Note that this might affect the outcome
-- of 'lastError', and thus unsafe (despite the pureness suggested by
-- the signature).
--
-- > convert1 defaultSimpToTrad "头发发财"
convert1 :: String -> T.Text -> Maybe T.Text
convert1 :: String -> Text -> Maybe Text
convert1 String
cfg Text
str = (IO (Maybe Text) -> Maybe Text
forall a. IO a -> a
unsafePerformIO (IO (Maybe Text) -> Maybe Text)
-> (MaybeT IO Text -> IO (Maybe Text))
-> MaybeT IO Text
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MaybeT IO Text -> IO (Maybe Text)
forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT) (MaybeT IO Text -> Maybe Text) -> MaybeT IO Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ do
  OpenCC
handle <- String -> MaybeT IO OpenCC
open String
cfg
  IO Text -> MaybeT IO Text
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Text -> MaybeT IO Text) -> IO Text -> MaybeT IO Text
forall a b. (a -> b) -> a -> b
$ OpenCC -> Text -> IO Text
convertIO OpenCC
handle Text
str

-- |The OpenCC environment. In this environment, any conversion
-- happens within a single 'OpenCC' instance created by 'withOpenCC'.
newtype OpenCCM a = OpenCCM (ReaderT OpenCC IO a)
  deriving ((forall a b. (a -> b) -> OpenCCM a -> OpenCCM b)
-> (forall a b. a -> OpenCCM b -> OpenCCM a) -> Functor OpenCCM
forall a b. a -> OpenCCM b -> OpenCCM a
forall a b. (a -> b) -> OpenCCM a -> OpenCCM b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> OpenCCM b -> OpenCCM a
$c<$ :: forall a b. a -> OpenCCM b -> OpenCCM a
fmap :: forall a b. (a -> b) -> OpenCCM a -> OpenCCM b
$cfmap :: forall a b. (a -> b) -> OpenCCM a -> OpenCCM b
Functor, Functor OpenCCM
Functor OpenCCM
-> (forall a. a -> OpenCCM a)
-> (forall a b. OpenCCM (a -> b) -> OpenCCM a -> OpenCCM b)
-> (forall a b c.
    (a -> b -> c) -> OpenCCM a -> OpenCCM b -> OpenCCM c)
-> (forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b)
-> (forall a b. OpenCCM a -> OpenCCM b -> OpenCCM a)
-> Applicative OpenCCM
forall a. a -> OpenCCM a
forall a b. OpenCCM a -> OpenCCM b -> OpenCCM a
forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
forall a b. OpenCCM (a -> b) -> OpenCCM a -> OpenCCM b
forall a b c. (a -> b -> c) -> OpenCCM a -> OpenCCM b -> OpenCCM c
forall (f :: * -> *).
Functor f
-> (forall a. a -> f a)
-> (forall a b. f (a -> b) -> f a -> f b)
-> (forall a b c. (a -> b -> c) -> f a -> f b -> f c)
-> (forall a b. f a -> f b -> f b)
-> (forall a b. f a -> f b -> f a)
-> Applicative f
<* :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM a
$c<* :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM a
*> :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
$c*> :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
liftA2 :: forall a b c. (a -> b -> c) -> OpenCCM a -> OpenCCM b -> OpenCCM c
$cliftA2 :: forall a b c. (a -> b -> c) -> OpenCCM a -> OpenCCM b -> OpenCCM c
<*> :: forall a b. OpenCCM (a -> b) -> OpenCCM a -> OpenCCM b
$c<*> :: forall a b. OpenCCM (a -> b) -> OpenCCM a -> OpenCCM b
pure :: forall a. a -> OpenCCM a
$cpure :: forall a. a -> OpenCCM a
Applicative, Applicative OpenCCM
Applicative OpenCCM
-> (forall a b. OpenCCM a -> (a -> OpenCCM b) -> OpenCCM b)
-> (forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b)
-> (forall a. a -> OpenCCM a)
-> Monad OpenCCM
forall a. a -> OpenCCM a
forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
forall a b. OpenCCM a -> (a -> OpenCCM b) -> OpenCCM b
forall (m :: * -> *).
Applicative m
-> (forall a b. m a -> (a -> m b) -> m b)
-> (forall a b. m a -> m b -> m b)
-> (forall a. a -> m a)
-> Monad m
return :: forall a. a -> OpenCCM a
$creturn :: forall a. a -> OpenCCM a
>> :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
$c>> :: forall a b. OpenCCM a -> OpenCCM b -> OpenCCM b
>>= :: forall a b. OpenCCM a -> (a -> OpenCCM b) -> OpenCCM b
$c>>= :: forall a b. OpenCCM a -> (a -> OpenCCM b) -> OpenCCM b
Monad, Monad OpenCCM
Monad OpenCCM -> (forall a. IO a -> OpenCCM a) -> MonadIO OpenCCM
forall a. IO a -> OpenCCM a
forall (m :: * -> *).
Monad m -> (forall a. IO a -> m a) -> MonadIO m
liftIO :: forall a. IO a -> OpenCCM a
$cliftIO :: forall a. IO a -> OpenCCM a
MonadIO)

-- |Convert a string in the current environment.
convert :: T.Text -> OpenCCM T.Text
convert :: Text -> OpenCCM Text
convert Text
str = ReaderT OpenCC IO Text -> OpenCCM Text
forall a. ReaderT OpenCC IO a -> OpenCCM a
OpenCCM (ReaderT OpenCC IO Text -> OpenCCM Text)
-> ReaderT OpenCC IO Text -> OpenCCM Text
forall a b. (a -> b) -> a -> b
$ do
  OpenCC
handle <- ReaderT OpenCC IO OpenCC
forall r (m :: * -> *). MonadReader r m => m r
ask
  IO Text -> ReaderT OpenCC IO Text
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Text -> ReaderT OpenCC IO Text)
-> IO Text -> ReaderT OpenCC IO Text
forall a b. (a -> b) -> a -> b
$ OpenCC -> Text -> IO Text
convertIO OpenCC
handle Text
str

-- |Open an OpenCC environment (which is 'MonadIO'), where 'convert'
-- is available.
withOpenCC :: String -> OpenCCM a -> MaybeT IO a
withOpenCC :: forall a. String -> OpenCCM a -> MaybeT IO a
withOpenCC String
cfg (OpenCCM ReaderT OpenCC IO a
inner) = String -> MaybeT IO OpenCC
open String
cfg MaybeT IO OpenCC -> (OpenCC -> MaybeT IO a) -> MaybeT IO a
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= IO a -> MaybeT IO a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO a -> MaybeT IO a) -> (OpenCC -> IO a) -> OpenCC -> MaybeT IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ReaderT OpenCC IO a -> OpenCC -> IO a
forall r (m :: * -> *) a. ReaderT r m a -> r -> m a
runReaderT ReaderT OpenCC IO a
inner

-- |Same as 'withOpenCC' but the result is not 'IO'. This is unsafe,
-- and the same safety conditions as 'convert1' apply here.
unsafeWithOpenCC :: String -> OpenCCM a -> Maybe a
unsafeWithOpenCC :: forall a. String -> OpenCCM a -> Maybe a
unsafeWithOpenCC String
cfg = IO (Maybe a) -> Maybe a
forall a. IO a -> a
unsafePerformIO (IO (Maybe a) -> Maybe a)
-> (OpenCCM a -> IO (Maybe a)) -> OpenCCM a -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MaybeT IO a -> IO (Maybe a)
forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT (MaybeT IO a -> IO (Maybe a))
-> (OpenCCM a -> MaybeT IO a) -> OpenCCM a -> IO (Maybe a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> OpenCCM a -> MaybeT IO a
forall a. String -> OpenCCM a -> MaybeT IO a
withOpenCC String
cfg

-- |Wrap a 'CString' in 'BS.ByteString' with no specified finalizer.
_wrapBS :: FunPtr (CString -> IO ()) -> CString -> IO BS.ByteString
_wrapBS :: FunPtr (CString -> IO ()) -> CString -> IO ByteString
_wrapBS FunPtr (CString -> IO ())
finalizer CString
cstr = do
  CSize
len <- CString -> IO CSize
c_strlen CString
cstr
  ForeignPtr CChar
ptr <- FunPtr (CString -> IO ()) -> CString -> IO (ForeignPtr CChar)
forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FunPtr (CString -> IO ())
finalizer CString
cstr
  ByteString -> IO ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> ByteString
fromForeignPtr (ForeignPtr CChar -> ForeignPtr Word8
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr CChar
ptr) Int
0 (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
len)

-- |Wrap a 'CString' in 'BS.ByteString' with
-- '_openccConvertUtf8FreePtr' being the finalizer. Useful for
-- wrapping strings from OpenCC.
_wrapBS' :: CString -> IO BS.ByteString
_wrapBS' :: CString -> IO ByteString
_wrapBS' = FunPtr (CString -> IO ()) -> CString -> IO ByteString
_wrapBS FunPtr (CString -> IO ())
_openccConvertUtf8FreePtr

-- |Decode the UTF-8 bytestrings into a 'Text'.
--
-- OpenCC always returns valid UTF-8 strings if your input is
-- well-formed.
_wrapText :: CString -> IO T.Text
_wrapText :: CString -> IO Text
_wrapText CString
cstr = CString -> IO ByteString
_wrapBS' CString
cstr IO ByteString -> (ByteString -> IO Text) -> IO Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \ByteString
bs -> Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Text
decodeUtf8 ByteString
bs)