{-# LANGUAGE ForeignFunctionInterface #-}
-- |
-- Module      : Data.Text.ICU.Text
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Functions for manipulating Unicode text, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Text
    (
    -- * Case conversion
    -- $case
      toCaseFold
    , toLower
    , toUpper
    ) where

import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.Foreign (fromPtr, useAsPtr)
import Data.Text.ICU.Error.Internal (UErrorCode, handleOverflowError)
import Data.Text.ICU.Internal (LocaleName, UChar, withLocaleName)
import Data.Word (Word32)
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr, castPtr)
import System.IO.Unsafe (unsafePerformIO)

-- $case
--
-- In some languages, case conversion is a locale- and
-- context-dependent operation. The case conversion functions in this
-- module are locale and context sensitive.

-- | Case-fold the characters in a string.
--
-- Case folding is locale independent and not context sensitive, but
-- there is an option for treating the letter I specially for Turkic
-- languages.  The result may be longer or shorter than the original.
toCaseFold :: Bool -- ^ Whether to include or exclude mappings for
                   -- dotted and dotless I and i that are marked with
                   -- 'I' in @CaseFolding.txt@.
           -> Text -> Text
toCaseFold :: Bool -> Text -> Text
toCaseFold Bool
excludeI Text
s = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((Ptr Word16 -> I16 -> IO Text) -> IO Text)
-> (Ptr Word16 -> I16 -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  Text -> (Ptr Word16 -> I16 -> IO Text) -> IO Text
forall a. Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr Text
s ((Ptr Word16 -> I16 -> IO Text) -> Text)
-> (Ptr Word16 -> I16 -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
sptr I16
slen -> do
    let opts :: Word32
opts = Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> (Bool -> Int) -> Bool -> Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Bool -> Word32) -> Bool -> Word32
forall a b. (a -> b) -> a -> b
$ Bool
excludeI
    Int
-> (Ptr Word16 -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr Word16 -> Int -> IO Text)
-> IO Text
forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (I16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
        (\Ptr Word16
dptr Int32
dlen -> Ptr Word16
-> Int32
-> Ptr Word16
-> Int32
-> Word32
-> Ptr UErrorCode
-> IO Int32
u_strFoldCase Ptr Word16
dptr Int32
dlen Ptr Word16
sptr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) Word32
opts)
        (\Ptr Word16
dptr Int
dlen -> Ptr Word16 -> I16 -> IO Text
fromPtr (Ptr Word16 -> Ptr Word16
forall a b. Ptr a -> Ptr b
castPtr Ptr Word16
dptr) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

type CaseMapper = Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CString
                -> Ptr UErrorCode -> IO Int32

caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
mapFn LocaleName
loc Text
s = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((CString -> IO Text) -> IO Text)
-> (CString -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  LocaleName -> (CString -> IO Text) -> IO Text
forall a. LocaleName -> (CString -> IO a) -> IO a
withLocaleName LocaleName
loc ((CString -> IO Text) -> Text) -> (CString -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \CString
locale ->
    Text -> (Ptr Word16 -> I16 -> IO Text) -> IO Text
forall a. Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr Text
s ((Ptr Word16 -> I16 -> IO Text) -> IO Text)
-> (Ptr Word16 -> I16 -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
sptr I16
slen ->
      Int
-> (Ptr Word16 -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr Word16 -> Int -> IO Text)
-> IO Text
forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (I16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
      (\Ptr Word16
dptr Int32
dlen -> CaseMapper
mapFn Ptr Word16
dptr Int32
dlen Ptr Word16
sptr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) CString
locale)
      (\Ptr Word16
dptr Int
dlen -> Ptr Word16 -> I16 -> IO Text
fromPtr (Ptr Word16 -> Ptr Word16
forall a b. Ptr a -> Ptr b
castPtr Ptr Word16
dptr) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

-- | Lowercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toLower :: LocaleName -> Text -> Text
toLower :: LocaleName -> Text -> Text
toLower = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToLower

-- | Uppercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toUpper :: LocaleName -> Text -> Text
toUpper :: LocaleName -> Text -> Text
toUpper = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToUpper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strFoldCase" u_strFoldCase
    :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Word32 -> Ptr UErrorCode
    -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToLower" u_strToLower
    :: CaseMapper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToUpper" u_strToUpper
    :: CaseMapper