{-# LINE 1 "Data/Text/ICU/Break.hsc" #-}
{-# LANGUAGE BangPatterns, ForeignFunctionInterface, RecordWildCards #-}
module Data.Text.ICU.Break
(
BreakIterator
, Line(..)
, Data.Text.ICU.Break.Word(..)
, breakCharacter
, breakLine
, breakSentence
, breakWord
, clone
, setText
, current
, first
, last
, next
, previous
, preceding
, following
, isBoundary
, getStatus
, getStatuses
, available
) where
import Control.DeepSeq (NFData(..))
import Control.Monad (forM)
import Data.IORef (newIORef, writeIORef)
import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.Foreign (I16, useAsPtr)
import Data.Text.ICU.Break.Types (BreakIterator(..), UBreakIterator)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
import Data.Text.ICU.Internal (LocaleName(..), UBool, UChar, asBool, withLocaleName)
import Foreign.C.String (CString, peekCString)
import Foreign.C.Types (CInt(..))
import Foreign.ForeignPtr (newForeignPtr, withForeignPtr)
import Foreign.Marshal.Array (allocaArray, peekArray)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (FunPtr, Ptr, nullPtr)
import Prelude hiding (last)
import System.IO.Unsafe (unsafePerformIO)
data Line = Soft
| Hard
deriving (Eq, Show, Enum)
instance NFData Line where
rnf !_ = ()
data Word = Uncategorized
| Number
| Letter
| Kana
| Ideograph
deriving (Eq, Show, Enum)
instance NFData Data.Text.ICU.Break.Word where
rnf !_ = ()
breakCharacter :: LocaleName -> Text -> IO (BreakIterator ())
breakCharacter = open (0) (const ())
{-# LINE 111 "Data/Text/ICU/Break.hsc" #-}
breakLine :: LocaleName -> Text -> IO (BreakIterator Line)
breakLine = open (2) asLine
{-# LINE 119 "Data/Text/ICU/Break.hsc" #-}
where
asLine i
| i < (100) = Soft
{-# LINE 122 "Data/Text/ICU/Break.hsc" #-}
| i < (200) = Hard
{-# LINE 123 "Data/Text/ICU/Break.hsc" #-}
| otherwise = error $ "unknown line break status " ++ show i
breakSentence :: LocaleName -> Text -> IO (BreakIterator ())
breakSentence = open (3) (const ())
{-# LINE 132 "Data/Text/ICU/Break.hsc" #-}
breakWord :: LocaleName -> Text -> IO (BreakIterator Data.Text.ICU.Break.Word)
breakWord = open (1) asWord
{-# LINE 143 "Data/Text/ICU/Break.hsc" #-}
where
asWord i
| i < (100) = Uncategorized
{-# LINE 146 "Data/Text/ICU/Break.hsc" #-}
| i < (200) = Number
{-# LINE 147 "Data/Text/ICU/Break.hsc" #-}
| i < (300) = Letter
{-# LINE 148 "Data/Text/ICU/Break.hsc" #-}
| i < (400) = Kana
{-# LINE 149 "Data/Text/ICU/Break.hsc" #-}
| i < (500) = Ideograph
{-# LINE 150 "Data/Text/ICU/Break.hsc" #-}
| otherwise = error $ "unknown word break status " ++ show i
open :: UBreakIteratorType -> (Int32 -> a) -> LocaleName -> Text
-> IO (BreakIterator a)
open brk f loc t = withLocaleName loc $ \locale ->
useAsPtr t $ \ptr len -> do
bi <- handleError $ ubrk_open brk locale ptr (fromIntegral len)
r <- newIORef t
BR r f `fmap` newForeignPtr ubrk_close bi
setText :: BreakIterator a -> Text -> IO ()
setText BR{..} t =
useAsPtr t $ \ptr len -> do
withForeignPtr brIter $ \p -> handleError $
ubrk_setText p ptr (fromIntegral len)
writeIORef brText t
clone :: BreakIterator a -> IO (BreakIterator a)
clone BR{..} = do
bi <- withForeignPtr brIter $ \p ->
with 1 $ handleError . ubrk_safeClone p nullPtr
BR brText brStatus `fmap` newForeignPtr ubrk_close bi
asIndex :: (Ptr UBreakIterator -> IO Int32) -> BreakIterator a -> IO (Maybe I16)
asIndex act BR{..} = do
i <- withForeignPtr brIter act
return $! if i == (-1)
{-# LINE 182 "Data/Text/ICU/Break.hsc" #-}
then Nothing
else Just $! fromIntegral i
first :: BreakIterator a -> IO I16
first BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_first
last :: BreakIterator a -> IO I16
last BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_last
next :: BreakIterator a -> IO (Maybe I16)
next = asIndex ubrk_next
previous :: BreakIterator a -> IO (Maybe I16)
previous = asIndex ubrk_previous
preceding :: BreakIterator a -> Int -> IO (Maybe I16)
preceding bi i = asIndex (flip ubrk_preceding (fromIntegral i)) bi
following :: BreakIterator a -> Int -> IO (Maybe I16)
following bi i = asIndex (flip ubrk_following (fromIntegral i)) bi
current :: BreakIterator a -> IO (Maybe I16)
current = asIndex ubrk_current
getStatus :: BreakIterator a -> IO a
getStatus BR{..} = brStatus `fmap` withForeignPtr brIter ubrk_getRuleStatus
getStatuses :: BreakIterator a -> IO [a]
getStatuses BR{..} =
withForeignPtr brIter $ \brk -> do
n <- handleError $ ubrk_getRuleStatusVec brk nullPtr 0
allocaArray (fromIntegral n) $ \ptr -> do
_ <- handleError $ ubrk_getRuleStatusVec brk ptr n
map brStatus `fmap` peekArray (fromIntegral n) ptr
isBoundary :: BreakIterator a -> Int -> IO Bool
isBoundary BR{..} i = asBool `fmap` withForeignPtr brIter (flip ubrk_isBoundary (fromIntegral i))
available :: [LocaleName]
available = unsafePerformIO $ do
n <- ubrk_countAvailable
forM [0..n-1] $ \i -> ubrk_getAvailable i >>= fmap Locale . peekCString
{-# NOINLINE available #-}
type UBreakIteratorType = CInt
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_open" ubrk_open
:: UBreakIteratorType -> CString -> Ptr UChar -> Int32 -> Ptr UErrorCode
-> IO (Ptr UBreakIterator)
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_setText" ubrk_setText
:: Ptr UBreakIterator -> Ptr UChar -> Int32 -> Ptr UErrorCode
-> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_safeClone" ubrk_safeClone
:: Ptr UBreakIterator -> Ptr a -> Ptr Int32 -> Ptr UErrorCode
-> IO (Ptr UBreakIterator)
foreign import ccall unsafe "hs_text_icu.h &__hs_ubrk_close" ubrk_close
:: FunPtr (Ptr UBreakIterator -> IO ())
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_current" ubrk_current
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_first" ubrk_first
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_last" ubrk_last
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_next" ubrk_next
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_previous" ubrk_previous
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_preceding" ubrk_preceding
:: Ptr UBreakIterator -> Int32 -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_following" ubrk_following
:: Ptr UBreakIterator -> Int32 -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatus" ubrk_getRuleStatus
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatusVec" ubrk_getRuleStatusVec
:: Ptr UBreakIterator -> Ptr Int32 -> Int32 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_isBoundary" ubrk_isBoundary
:: Ptr UBreakIterator -> Int32 -> IO UBool
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_countAvailable" ubrk_countAvailable
:: IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getAvailable" ubrk_getAvailable
:: Int32 -> IO CString