{-# LINE 1 "Data/Text/ICU/Break.hsc" #-}
{-# LANGUAGE BangPatterns, ForeignFunctionInterface, RecordWildCards #-}
module Data.Text.ICU.Break
(
BreakIterator
, Line(..)
, Data.Text.ICU.Break.Word(..)
, breakCharacter
, breakLine
, breakSentence
, breakWord
, clone
, setText
, current
, first
, last
, next
, previous
, preceding
, following
, isBoundary
, getStatus
, getStatuses
, available
) where
import Control.DeepSeq (NFData(..))
import Control.Monad (forM)
import Data.IORef (newIORef, writeIORef)
import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.ICU.Break.Types (BreakIterator(..), UBreakIterator)
import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
import Data.Text.ICU.Internal (LocaleName(..), UBool, UChar, asBool, withLocaleName, TextI, UText, asUTextPtr, withUTextPtr, newICUPtr)
import Foreign.C.String (CString, peekCString)
import Foreign.C.Types (CInt(..))
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Array (allocaArray, peekArray)
import Foreign.Marshal.Utils (with)
import Foreign.Ptr (FunPtr, Ptr, nullPtr)
import Prelude hiding (last)
import System.IO.Unsafe (unsafePerformIO)
data Line = Soft
| Hard
deriving (Eq, Show, Enum)
instance NFData Line where
rnf !_ = ()
data Word = Uncategorized
| Number
| Letter
| Kana
| Ideograph
deriving (Eq, Show, Enum)
instance NFData Data.Text.ICU.Break.Word where
rnf !_ = ()
breakCharacter :: LocaleName -> Text -> IO (BreakIterator ())
breakCharacter = open (0) (const ())
{-# LINE 110 "Data/Text/ICU/Break.hsc" #-}
breakLine :: LocaleName -> Text -> IO (BreakIterator Line)
breakLine = open (2) asLine
{-# LINE 118 "Data/Text/ICU/Break.hsc" #-}
where
asLine i
| i < (100) = Soft
{-# LINE 121 "Data/Text/ICU/Break.hsc" #-}
| i < (200) = Hard
{-# LINE 122 "Data/Text/ICU/Break.hsc" #-}
| otherwise = error $ "unknown line break status " ++ show i
breakSentence :: LocaleName -> Text -> IO (BreakIterator ())
breakSentence = open (3) (const ())
{-# LINE 131 "Data/Text/ICU/Break.hsc" #-}
breakWord :: LocaleName -> Text -> IO (BreakIterator Data.Text.ICU.Break.Word)
breakWord = open (1) asWord
{-# LINE 142 "Data/Text/ICU/Break.hsc" #-}
where
asWord i
| i < (100) = Uncategorized
{-# LINE 145 "Data/Text/ICU/Break.hsc" #-}
| i < (200) = Number
{-# LINE 146 "Data/Text/ICU/Break.hsc" #-}
| i < (300) = Letter
{-# LINE 147 "Data/Text/ICU/Break.hsc" #-}
| i < (400) = Kana
{-# LINE 148 "Data/Text/ICU/Break.hsc" #-}
| i < (500) = Ideograph
{-# LINE 149 "Data/Text/ICU/Break.hsc" #-}
| otherwise = error $ "unknown word break status " ++ show i
open :: UBreakIteratorType -> (Int32 -> a) -> LocaleName -> Text
-> IO (BreakIterator a)
open brk f loc t = withLocaleName loc $ \locale -> do
r <- newIORef undefined
b <- newICUPtr (BR r f) ubrk_close $
handleError $ ubrk_open brk locale nullPtr 0
setText b t
return b
setText :: BreakIterator a -> Text -> IO ()
setText BR{..} t = do
fp <- asUTextPtr t
withUTextPtr fp $ \ ptr -> do
withForeignPtr brIter $ \p -> handleError $ ubrk_setUText p ptr
writeIORef brText fp
clone :: BreakIterator a -> IO (BreakIterator a)
clone BR{..} = newICUPtr (BR brText brStatus) ubrk_close $
withForeignPtr brIter $ \p ->
with 1 $ handleError . ubrk_safeClone p nullPtr
asIndex :: (Ptr UBreakIterator -> IO Int32) -> BreakIterator a -> IO (Maybe TextI)
asIndex act BR{..} = do
i <- withForeignPtr brIter act
return $! if i == (-1)
{-# LINE 181 "Data/Text/ICU/Break.hsc" #-}
then Nothing
else Just $! fromIntegral i
first :: BreakIterator a -> IO TextI
first BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_first
last :: BreakIterator a -> IO TextI
last BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_last
next :: BreakIterator a -> IO (Maybe TextI)
next = asIndex ubrk_next
previous :: BreakIterator a -> IO (Maybe TextI)
previous = asIndex ubrk_previous
preceding :: BreakIterator a -> Int -> IO (Maybe TextI)
preceding bi i = asIndex (flip ubrk_preceding (fromIntegral i)) bi
following :: BreakIterator a -> Int -> IO (Maybe TextI)
following bi i = asIndex (flip ubrk_following (fromIntegral i)) bi
current :: BreakIterator a -> IO (Maybe TextI)
current = asIndex ubrk_current
getStatus :: BreakIterator a -> IO a
getStatus BR{..} = brStatus `fmap` withForeignPtr brIter ubrk_getRuleStatus
getStatuses :: BreakIterator a -> IO [a]
getStatuses BR{..} =
withForeignPtr brIter $ \brk -> do
n <- handleError $ ubrk_getRuleStatusVec brk nullPtr 0
allocaArray (fromIntegral n) $ \ptr -> do
_ <- handleError $ ubrk_getRuleStatusVec brk ptr n
map brStatus `fmap` peekArray (fromIntegral n) ptr
isBoundary :: BreakIterator a -> Int -> IO Bool
isBoundary BR{..} i = asBool `fmap` withForeignPtr brIter (flip ubrk_isBoundary (fromIntegral i))
available :: [LocaleName]
available = unsafePerformIO $ do
n <- ubrk_countAvailable
forM [0..n-1] $ \i -> ubrk_getAvailable i >>= fmap Locale . peekCString
{-# NOINLINE available #-}
type UBreakIteratorType = CInt
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_open" ubrk_open
:: UBreakIteratorType -> CString -> Ptr UChar -> Int32 -> Ptr UErrorCode
-> IO (Ptr UBreakIterator)
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_setUText" ubrk_setUText
:: Ptr UBreakIterator -> Ptr UText -> Ptr UErrorCode
-> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_safeClone" ubrk_safeClone
:: Ptr UBreakIterator -> Ptr a -> Ptr Int32 -> Ptr UErrorCode
-> IO (Ptr UBreakIterator)
foreign import ccall unsafe "hs_text_icu.h &__hs_ubrk_close" ubrk_close
:: FunPtr (Ptr UBreakIterator -> IO ())
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_current" ubrk_current
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_first" ubrk_first
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_last" ubrk_last
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_next" ubrk_next
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_previous" ubrk_previous
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_preceding" ubrk_preceding
:: Ptr UBreakIterator -> Int32 -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_following" ubrk_following
:: Ptr UBreakIterator -> Int32 -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatus" ubrk_getRuleStatus
:: Ptr UBreakIterator -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getRuleStatusVec" ubrk_getRuleStatusVec
:: Ptr UBreakIterator -> Ptr Int32 -> Int32 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_isBoundary" ubrk_isBoundary
:: Ptr UBreakIterator -> Int32 -> IO UBool
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_countAvailable" ubrk_countAvailable
:: IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_getAvailable" ubrk_getAvailable
:: Int32 -> IO CString