--  GIMP Toolkit (GTK) UTF aware string marshalling
--
--  Author : Axel Simon
--
--  Created: 22 June 2001
--
--  Copyright (c) 1999..2002 Axel Simon
--
--  This library is free software; you can redistribute it and/or
--  modify it under the terms of the GNU Lesser General Public
--  License as published by the Free Software Foundation; either
--  version 2.1 of the License, or (at your option) any later version.
--
--  This library is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
--  Lesser General Public License for more details.
--
-- |
-- Maintainer  : gtk2hs-users@lists.sourceforge.net
-- Stability   : provisional
-- Portability : portable (depends on GHC)
--
-- This module adds CString-like functions that handle UTF8 strings.
--

module System.Glib.UTFString (
  withUTFString,
  withUTFStringLen,
  newUTFString,
  newUTFStringLen,
  peekUTFString,
  peekUTFStringLen,
  maybePeekUTFString,
  readUTFString,
  readCString,
  withUTFStrings,
  withUTFStringArray,
  withUTFStringArray0,
  peekUTFStringArray,
  peekUTFStringArray0,
  readUTFStringArray0,
  UTFCorrection,
  genUTFOfs,
  ofsToUTF,
  ofsFromUTF
  ) where

import Control.Monad	(liftM)
import Data.Char (ord, chr)
import Data.Maybe (maybe)

import System.Glib.FFI

-- | Like 'withCString' but using the UTF-8 encoding.
--
withUTFString :: String -> (CString -> IO a) -> IO a
withUTFString hsStr = withCAString (toUTF hsStr)

-- | Like 'withCStringLen' but using the UTF-8 encoding.
--
withUTFStringLen :: String -> (CStringLen -> IO a) -> IO a
withUTFStringLen hsStr = withCAStringLen (toUTF hsStr)

-- | Like 'newCString' but using the UTF-8 encoding.
--
newUTFString :: String -> IO CString
newUTFString = newCAString . toUTF

-- | Like  Define newUTFStringLen to emit UTF-8.
--
newUTFStringLen :: String -> IO CStringLen
newUTFStringLen = newCAStringLen . toUTF

-- | Like 'peekCString' but using the UTF-8 encoding.
--
peekUTFString :: CString -> IO String
peekUTFString strPtr = liftM fromUTF $ peekCAString strPtr

-- | Like 'maybePeek' 'peekCString' but using the UTF-8 encoding to retrieve
-- UTF-8 from a 'CString' which may be the 'nullPtr'.
--
maybePeekUTFString :: CString -> IO (Maybe String)
maybePeekUTFString strPtr = liftM (maybe Nothing (Just . fromUTF)) $ maybePeek peekCAString strPtr

-- | Like 'peekCStringLen' but using the UTF-8 encoding.
--
peekUTFStringLen :: CStringLen -> IO String
peekUTFStringLen strPtr = liftM fromUTF $ peekCAStringLen strPtr

-- | Like like 'peekUTFString' but then frees the string using g_free
--
readUTFString :: CString -> IO String
readUTFString strPtr = do
  str <- peekUTFString strPtr
  g_free strPtr
  return str

-- | Like 'peekCString' but then frees the string using @g_free@.
--
readCString :: CString -> IO String
readCString strPtr = do
  str <- peekCAString strPtr
  g_free strPtr
  return str

foreign import ccall unsafe "g_free"
  g_free :: Ptr a -> IO ()

-- | Temporarily allocate a list of UTF-8 'CString's.
--
withUTFStrings :: [String] -> ([CString] -> IO a) -> IO a
withUTFStrings hsStrs = withUTFStrings' hsStrs []
  where withUTFStrings' :: [String] -> [CString] -> ([CString] -> IO a) -> IO a
        withUTFStrings' []     cs body = body (reverse cs)
        withUTFStrings' (s:ss) cs body = withUTFString s $ \c ->
                                         withUTFStrings' ss (c:cs) body

-- | Temporarily allocate an array of UTF-8 encoded 'CString's.
--
withUTFStringArray :: [String] -> (Ptr CString -> IO a) -> IO a
withUTFStringArray hsStr body =
  withUTFStrings hsStr $ \cStrs -> do
  withArray cStrs body

-- | Temporarily allocate a null-terminated array of UTF-8 encoded 'CString's.
--
withUTFStringArray0 :: [String] -> (Ptr CString -> IO a) -> IO a
withUTFStringArray0 hsStr body =
  withUTFStrings hsStr $ \cStrs -> do
  withArray0 nullPtr cStrs body

-- | Convert an array (of the given length) of UTF-8 encoded 'CString's to a
--   list of Haskell 'String's.
--
peekUTFStringArray :: Int -> Ptr CString -> IO [String]
peekUTFStringArray len cStrArr = do
  cStrs <- peekArray len cStrArr
  mapM peekUTFString cStrs

-- | Convert a null-terminated array of UTF-8 encoded 'CString's to a list of
--   Haskell 'String's.
--
peekUTFStringArray0 :: Ptr CString -> IO [String]
peekUTFStringArray0 cStrArr = do
  cStrs <- peekArray0 nullPtr cStrArr
  mapM peekUTFString cStrs

-- | Like 'peekUTFStringArray0' but then free the string array including all
-- strings.
--
-- To be used when functions indicate that their return value should be freed
-- with @g_strfreev@.
--
readUTFStringArray0 :: Ptr CString -> IO [String]
readUTFStringArray0 cStrArr | cStrArr == nullPtr = return []
                            | otherwise = do
  cStrs <- peekArray0 nullPtr cStrArr
  strings <- mapM peekUTFString cStrs
  g_strfreev cStrArr
  return strings

foreign import ccall unsafe "g_strfreev"
  g_strfreev :: Ptr a -> IO ()

-- | Encode a Haskell Unicode String as UTF-8
--
-- You should think of this as it it had type @String -> [Word8]@
--
toUTF :: String -> String
toUTF [] = []
toUTF (x:xs) | ord x<=0x007F = x:toUTF xs
	     | ord x<=0x07FF = chr (0xC0 .|. ((ord x `shift` (-6)) .&. 0x1F)):
			       chr (0x80 .|. (ord x .&. 0x3F)):
			       toUTF xs
	     | otherwise     = chr (0xE0 .|. ((ord x `shift` (-12)) .&. 0x0F)):
			       chr (0x80 .|. ((ord x `shift` (-6)) .&. 0x3F)):
			       chr (0x80 .|. (ord x .&. 0x3F)):
			       toUTF xs

-- | Decode a UTF-8 string into a Haskell Unicode String.
--
-- You should think of this as it it had type @[Word8] -> String@
--
fromUTF :: String -> String
fromUTF [] = []
fromUTF (all@(x:xs)) | ord x<=0x7F = x:fromUTF xs
		     | ord x<=0xBF = err
		     | ord x<=0xDF = twoBytes all
		     | ord x<=0xEF = threeBytes all
		     | otherwise   = err
  where
    twoBytes (x1:x2:xs) = chr (((ord x1 .&. 0x1F) `shift` 6) .|.
			       (ord x2 .&. 0x3F)):fromUTF xs
    twoBytes _ = error "fromUTF: illegal two byte sequence"

    threeBytes (x1:x2:x3:xs) = chr (((ord x1 .&. 0x0F) `shift` 12) .|.
				    ((ord x2 .&. 0x3F) `shift` 6) .|.
				    (ord x3 .&. 0x3F)):fromUTF xs
    threeBytes _ = error "fromUTF: illegal three byte sequence" 
    
    err = error "fromUTF: illegal UTF-8 character"

-- | Offset correction for String to UTF8 mapping.
--
newtype UTFCorrection = UTFCorrection [Int] deriving Show

-- | Create a list of offset corrections.
--
genUTFOfs :: String -> UTFCorrection
genUTFOfs str = UTFCorrection (gUO 0 str)
  where
  gUO n [] = []
  gUO n (x:xs) | ord x<=0x007F = gUO (n+1) xs
	       | ord x<=0x07FF = n:gUO (n+1) xs
	       | otherwise     = n:n:gUO (n+1) xs

ofsToUTF :: Int -> UTFCorrection -> Int
ofsToUTF n (UTFCorrection oc) = oTU oc
  where
  oTU [] = n
  oTU (x:xs) | n<=x = n
	     | otherwise = 1+oTU xs

ofsFromUTF :: Int -> UTFCorrection -> Int
ofsFromUTF n (UTFCorrection oc) = oFU n oc
  where
  oFU n [] = n
  oFU n (x:xs) | n<=x = n
	       | otherwise = oFU (n-1) xs