{-# LANGUAGE ForeignFunctionInterface #-}
-- |
-- Module      : Data.Text.ICU.Text
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Functions for manipulating Unicode text, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Text
    (
    -- * Case conversion
    -- $case
      toCaseFold
    , toLower
    , toUpper
    ) where

import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.ICU.Error.Internal (UErrorCode, handleOverflowError)
import Data.Text.ICU.Internal (LocaleName, UChar, withLocaleName, useAsUCharPtr, fromUCharPtr)
import Data.Word (Word32)
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr)
import System.IO.Unsafe (unsafePerformIO)

-- $case
--
-- In some languages, case conversion is a locale- and
-- context-dependent operation. The case conversion functions in this
-- module are locale and context sensitive.

-- | Case-fold the characters in a string.
--
-- Case folding is locale independent and not context sensitive, but
-- there is an option for treating the letter I specially for Turkic
-- languages.  The result may be longer or shorter than the original.
toCaseFold :: Bool -- ^ Whether to include or exclude mappings for
                   -- dotted and dotless I and i that are marked with
                   -- 'I' in @CaseFolding.txt@.
           -> Text -> Text
toCaseFold :: Bool -> Text -> Text
toCaseFold Bool
excludeI Text
s = forall a. IO a -> a
unsafePerformIO forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  forall a. Text -> (Ptr UChar -> I16 -> IO a) -> IO a
useAsUCharPtr Text
s forall a b. (a -> b) -> a -> b
$ \Ptr UChar
sptr I16
slen -> do
    let opts :: Word32
opts = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ Bool
excludeI
    forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
        (\Ptr UChar
dptr Int32
dlen -> Ptr UChar
-> Int32
-> Ptr UChar
-> Int32
-> Word32
-> Ptr UErrorCode
-> IO Int32
u_strFoldCase Ptr UChar
dptr Int32
dlen Ptr UChar
sptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) Word32
opts)
        (\Ptr UChar
dptr Int
dlen -> Ptr UChar -> I16 -> IO Text
fromUCharPtr Ptr UChar
dptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

type CaseMapper = Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CString
                -> Ptr UErrorCode -> IO Int32

caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
mapFn LocaleName
loc Text
s = forall a. IO a -> a
unsafePerformIO forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  forall a. LocaleName -> (CString -> IO a) -> IO a
withLocaleName LocaleName
loc forall a b. (a -> b) -> a -> b
$ \CString
locale ->
    forall a. Text -> (Ptr UChar -> I16 -> IO a) -> IO a
useAsUCharPtr Text
s forall a b. (a -> b) -> a -> b
$ \Ptr UChar
sptr I16
slen ->
      forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
      (\Ptr UChar
dptr Int32
dlen -> CaseMapper
mapFn Ptr UChar
dptr Int32
dlen Ptr UChar
sptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) CString
locale)
      (\Ptr UChar
dptr Int
dlen -> Ptr UChar -> I16 -> IO Text
fromUCharPtr Ptr UChar
dptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

-- | Lowercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toLower :: LocaleName -> Text -> Text
toLower :: LocaleName -> Text -> Text
toLower = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToLower

-- | Uppercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toUpper :: LocaleName -> Text -> Text
toUpper :: LocaleName -> Text -> Text
toUpper = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToUpper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strFoldCase" u_strFoldCase
    :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Word32 -> Ptr UErrorCode
    -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToLower" u_strToLower
    :: CaseMapper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToUpper" u_strToUpper
    :: CaseMapper