{-# LINE 1 "Data/Text/ICU/CharsetDetection.hsc" #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Data.Text.ICU.CharsetDetection
(
setText
, detect
, mkCharsetDetector
, withCharsetDetector
, wrapUCharsetMatch
, CharsetMatch
, CharsetDetector
, getConfidence
, getName
, getLanguage
) where
import Foreign.Ptr (Ptr)
import Foreign.C.String (CString)
import Foreign.C.Types (CChar)
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import qualified Data.Text.Encoding as TE
import Data.Text (Text)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
import Data.Text.ICU.CharsetDetection.Internal (UCharsetMatch, UCharsetDetector,
CharsetDetector, CharsetMatch,
mkCharsetDetector,
withCharsetDetector,
withCharsetMatch,
wrapUCharsetMatch)
setText :: ByteString -> CharsetDetector -> IO ()
setText bs ucsd = withCharsetDetector ucsd go
where
go u = if BS.length bs < 512
then BS.useAsCStringLen bs (\(text,size) -> handleError $ ucsdet_setText u text size)
else BS.useAsCStringLen (BS.take 512 bs) (\(text,size) -> handleError $ ucsdet_setText u text size)
detect :: ByteString -> IO CharsetMatch
detect bs = do
ucsd <- mkCharsetDetector
setText bs ucsd
wrapUCharsetMatch ucsd $ withCharsetDetector ucsd (handleError . ucsdet_detect)
getConfidence :: CharsetMatch -> IO Int
getConfidence ucm = withCharsetMatch ucm $ handleError . ucsdet_getConfidence
getName :: CharsetMatch -> IO Text
getName ucsm = do
bs <- withCharsetMatch ucsm (handleError . ucsdet_getName) >>= BS.packCString
return $ TE.decodeUtf8 bs
getLanguage :: CharsetMatch -> IO Text
getLanguage ucsm = do
bs <- withCharsetMatch ucsm (handleError . ucsdet_getLanguage) >>= BS.packCString
return $ TE.decodeUtf8 bs
foreign import ccall unsafe "hs_text_icu.h __hs_ucsdet_setText" ucsdet_setText
:: Ptr UCharsetDetector -> Ptr CChar -> Int -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ucsdet_detect" ucsdet_detect
:: Ptr UCharsetDetector -> Ptr UErrorCode -> IO (Ptr UCharsetMatch)
foreign import ccall unsafe "hs_text_icu.h __hs_ucsdet_getConfidence" ucsdet_getConfidence
:: Ptr UCharsetMatch -> Ptr UErrorCode -> IO Int
foreign import ccall unsafe "hs_text_icu.h __hs_ucsdet_getName" ucsdet_getName
:: Ptr UCharsetMatch -> Ptr UErrorCode -> IO CString
foreign import ccall unsafe "hs_text_icu.h __hs_ucsdet_getLanguage" ucsdet_getLanguage
:: Ptr UCharsetMatch -> Ptr UErrorCode -> IO CString