{-# LANGUAGE ForeignFunctionInterface, CPP #-}
module Codec.Text.Detect (detectEncodingName, detectEncoding) where
import Control.Exception
import qualified Data.ByteString.Internal as SI
import qualified Data.ByteString.Lazy as L
import Data.Traversable (traverse)
import Control.Applicative
import Foreign.C.Types
import Foreign.C.String
import Foreign.Ptr
import Foreign.ForeignPtr
import System.IO
import System.IO.Unsafe
#if !MIN_VERSION_base(4,3,0)
mask :: ((IO a -> IO a) -> IO b) -> IO b
mask io = blocked >>= \b -> if b then io id else block $ io unblock
#endif
type Csd_t = Ptr ()
foreign import ccall unsafe "csd_open" c_csd_open :: IO Csd_t
foreign import ccall unsafe "csd_consider" c_csd_consider :: Csd_t -> CString -> CInt -> IO CInt
foreign import ccall unsafe "csd_close" c_csd_close :: Csd_t -> IO CString
{-# NOINLINE detectEncodingName #-}
detectEncodingName :: L.ByteString -> Maybe String
detectEncodingName :: ByteString -> Maybe String
detectEncodingName ByteString
b = IO (Maybe String) -> Maybe String
forall a. IO a -> a
unsafePerformIO (IO (Maybe String) -> Maybe String)
-> IO (Maybe String) -> Maybe String
forall a b. (a -> b) -> a -> b
$ do
((forall a. IO a -> IO a) -> IO (Maybe String))
-> IO (Maybe String)
forall b. ((forall a. IO a -> IO a) -> IO b) -> IO b
mask (((forall a. IO a -> IO a) -> IO (Maybe String))
-> IO (Maybe String))
-> ((forall a. IO a -> IO a) -> IO (Maybe String))
-> IO (Maybe String)
forall a b. (a -> b) -> a -> b
$ \forall a. IO a -> IO a
restore -> do
Csd_t
csd <- IO Csd_t
c_csd_open
IO () -> IO ()
forall a. IO a -> IO a
restore ((\ByteString -> IO () -> IO ()
f -> (ByteString -> IO () -> IO ()) -> IO () -> [ByteString] -> IO ()
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ByteString -> IO () -> IO ()
f (() -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) (ByteString -> [ByteString]
L.toChunks ByteString
b)) ((ByteString -> IO () -> IO ()) -> IO ())
-> (ByteString -> IO () -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ByteString
chunk IO ()
feed_more -> do
let (ForeignPtr Word8
fptr, Int
ptr_offset, Int
chunk_length) = ByteString -> (ForeignPtr Word8, Int, Int)
SI.toForeignPtr ByteString
chunk
CInt
res <- ForeignPtr Word8 -> (Ptr Word8 -> IO CInt) -> IO CInt
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fptr ((Ptr Word8 -> IO CInt) -> IO CInt)
-> (Ptr Word8 -> IO CInt) -> IO CInt
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> Csd_t -> CString -> CInt -> IO CInt
c_csd_consider Csd_t
csd (Ptr Word8
ptr Ptr Word8 -> Int -> CString
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
ptr_offset) (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chunk_length)
case CInt
res CInt -> CInt -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` CInt
0 of
Ordering
LT -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Ordering
EQ -> IO ()
feed_more
Ordering
GT -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
) IO () -> IO CString -> IO ()
forall a b. IO a -> IO b -> IO a
`onException` Csd_t -> IO CString
c_csd_close Csd_t
csd
CString
c_encoding_ptr <- Csd_t -> IO CString
c_csd_close Csd_t
csd
if CString
c_encoding_ptr CString -> CString -> Bool
forall a. Eq a => a -> a -> Bool
== CString
forall a. Ptr a
nullPtr
then Maybe String -> IO (Maybe String)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing
else String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String)
-> (String -> String) -> String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
normalise (String -> Maybe String) -> IO String -> IO (Maybe String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CString -> IO String
peekCString CString
c_encoding_ptr
where
normalise :: String -> String
normalise String
"GB18030" = String
"gb18030"
normalise String
x = String
x
detectEncoding :: L.ByteString -> IO (Maybe TextEncoding)
detectEncoding :: ByteString -> IO (Maybe TextEncoding)
detectEncoding = (String -> IO TextEncoding)
-> Maybe String -> IO (Maybe TextEncoding)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Maybe a -> f (Maybe b)
traverse String -> IO TextEncoding
mkTextEncoding (Maybe String -> IO (Maybe TextEncoding))
-> (ByteString -> Maybe String)
-> ByteString
-> IO (Maybe TextEncoding)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
detectEncodingName