module CPython.Types.Unicode
(
Unicode
, Encoding
, ErrorHandling (..)
, unicodeType
, toUnicode
, fromUnicode
, length
, fromEncodedObject
, fromObject
, encode
, decode
, append
, split
, splitLines
, translate
, join
, MatchDirection (..)
, tailMatch
, FindDirection (..)
, find
, count
, replace
, format
, contains
) where
import Prelude hiding (length)
import Control.Exception (ErrorCall (..), throwIO)
import qualified Data.Text as T
import Data.Char (chr, ord)
import CPython.Internal
import CPython.Types.Bytes (Bytes)
newtype Unicode = Unicode (ForeignPtr Unicode)
instance Object Unicode where
toObject (Unicode x) = SomeObject x
fromForeignPtr = Unicode
instance Concrete Unicode where
concreteType _ = unicodeType
type Encoding = T.Text
data ErrorHandling
= Strict
| Replace
| Ignore
deriving (Show, Eq)
withErrors :: ErrorHandling -> (CString -> IO a) -> IO a
withErrors errors = withCString $ case errors of
Strict -> "strict"
Replace -> "replace"
Ignore -> "ignore"
unicodeType :: Type
unicodeType =
unsafePerformIO $
let {res = unicodeType'_} in
peekStaticObject res >>= \res' ->
return (res')
toUnicode :: T.Text -> IO Unicode
toUnicode str = withBuffer toPython >>= stealObject where
toPython ptr len = let
len' = fromIntegral len
ptr' = castPtr ptr
in hscpython_PyUnicode_FromUnicode ptr' len'
ords = map (fromIntegral . ord) (T.unpack str) :: [CUInt]
withBuffer = withArrayLen ords . flip
fromUnicode :: Unicode -> IO T.Text
fromUnicode obj = withObject obj $ \ptr -> do
buffer <- hscpython_PyUnicode_AsUnicode ptr
size <- hscpython_PyUnicode_GetSize ptr
raw <- peekArray (fromIntegral size) buffer
return . T.pack $ map (chr . fromIntegral) raw
length :: Unicode -> IO (Integer)
length a1 =
withObject a1 $ \a1' ->
length'_ a1' >>= \res ->
checkIntReturn res >>= \res' ->
return (res')
fromEncodedObject :: Object obj => obj -> Encoding -> ErrorHandling -> IO (Unicode)
fromEncodedObject a1 a2 a3 =
withObject a1 $ \a1' ->
withText a2 $ \a2' ->
withErrors a3 $ \a3' ->
fromEncodedObject'_ a1' a2' a3' >>= \res ->
stealObject res >>= \res' ->
return (res')
fromObject :: Object obj => obj -> IO Unicode
fromObject obj = fromEncodedObject obj (T.pack "utf-8") Strict
encode :: Unicode -> Encoding -> ErrorHandling -> IO (Bytes)
encode a1 a2 a3 =
withObject a1 $ \a1' ->
withText a2 $ \a2' ->
withErrors a3 $ \a3' ->
encode'_ a1' a2' a3' >>= \res ->
stealObject res >>= \res' ->
return (res')
decode :: Bytes -> Encoding -> ErrorHandling -> IO Unicode
decode bytes enc errors =
withObject bytes $ \bytesPtr ->
withText enc $ \encPtr ->
withErrors errors $ \errorsPtr ->
alloca $ \bufferPtr ->
alloca $ \lenPtr -> do
pyBytesAsStringAndSize bytesPtr bufferPtr lenPtr
>>= checkStatusCode
buffer <- peek bufferPtr
len <- peek lenPtr
hscpython_PyUnicode_Decode buffer len encPtr errorsPtr
>>= stealObject
append :: Unicode -> Unicode -> IO (Unicode)
append a1 a2 =
withObject a1 $ \a1' ->
withObject a2 $ \a2' ->
append'_ a1' a2' >>= \res ->
stealObject res >>= \res' ->
return (res')
split
:: Unicode
-> Maybe Unicode
-> Maybe Integer
-> IO List
split s sep maxsplit =
withObject s $ \sPtr ->
maybeWith withObject sep $ \sepPtr ->
let max' = maybe ( 1) fromInteger maxsplit in
hscpython_PyUnicode_Split sPtr sepPtr max'
>>= stealObject
splitLines :: Unicode -> Bool -> IO (List)
splitLines a1 a2 =
withObject a1 $ \a1' ->
let {a2' = fromBool a2} in
splitLines'_ a1' a2' >>= \res ->
stealObject res >>= \res' ->
return (res')
translate :: Object table => Unicode -> table -> ErrorHandling -> IO (Unicode)
translate a1 a2 a3 =
withObject a1 $ \a1' ->
withObject a2 $ \a2' ->
withErrors a3 $ \a3' ->
translate'_ a1' a2' a3' >>= \res ->
stealObject res >>= \res' ->
return (res')
join :: Sequence seq => Unicode -> seq -> IO (Unicode)
join a1 a2 =
withObject a1 $ \a1' ->
withObject a2 $ \a2' ->
join'_ a1' a2' >>= \res ->
stealObject res >>= \res' ->
return (res')
data MatchDirection = Prefix | Suffix
deriving (Show, Eq)
tailMatch
:: Unicode
-> Unicode
-> Integer
-> Integer
-> MatchDirection
-> IO Bool
tailMatch str substr start end dir =
withObject str $ \strPtr ->
withObject substr $ \substrPtr ->
let start' = fromInteger start in
let end' = fromInteger end in
let dir' = case dir of
Prefix -> 1
Suffix -> 1 in
hscpython_PyUnicode_Tailmatch strPtr substrPtr start' end' dir'
>>= checkBoolReturn
data FindDirection = Forwards | Backwards
deriving (Show, Eq)
find
:: Unicode
-> Unicode
-> Integer
-> Integer
-> FindDirection
-> IO (Maybe Integer)
find str substr start end dir =
withObject str $ \strPtr ->
withObject substr $ \substrPtr -> do
let start' = fromInteger start
let end' = fromInteger end
let dir' = case dir of
Forwards -> 1
Backwards -> 1
cRes <- hscpython_PyUnicode_Find strPtr substrPtr start' end' dir'
exceptionIf $ cRes == 2
case cRes of
1 -> return Nothing
x | x >= 0 -> return . Just . toInteger $ x
x -> throwIO . ErrorCall $ "Invalid return code: " ++ show x
count
:: Unicode
-> Unicode
-> Integer
-> Integer
-> IO Integer
count str substr start end =
withObject str $ \str' ->
withObject substr $ \substr' ->
let start' = fromInteger start in
let end' = fromInteger end in
hscpython_PyUnicode_Count str' substr' start' end'
>>= checkIntReturn
replace
:: Unicode
-> Unicode
-> Unicode
-> Maybe Integer
-> IO Unicode
replace str substr replstr maxcount =
withObject str $ \strPtr ->
withObject substr $ \substrPtr ->
withObject replstr $ \replstrPtr ->
let maxcount' = case maxcount of
Nothing -> 1
Just x -> fromInteger x in
hscpython_PyUnicode_Replace strPtr substrPtr replstrPtr maxcount'
>>= stealObject
format :: Unicode -> Tuple -> IO (Unicode)
format a1 a2 =
withObject a1 $ \a1' ->
withObject a2 $ \a2' ->
format'_ a1' a2' >>= \res ->
stealObject res >>= \res' ->
return (res')
contains :: Object element => Unicode -> element -> IO (Bool)
contains a1 a2 =
withObject a1 $ \a1' ->
withObject a2 $ \a2' ->
contains'_ a1' a2' >>= \res ->
checkBoolReturn res >>= \res' ->
return (res')
foreign import ccall unsafe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Type"
unicodeType'_ :: (Ptr ())
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_FromUnicode"
hscpython_PyUnicode_FromUnicode :: ((Ptr CLong) -> (CInt -> (IO (Ptr ()))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_AsUnicode"
hscpython_PyUnicode_AsUnicode :: ((Ptr ()) -> (IO (Ptr CLong)))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_GetSize"
hscpython_PyUnicode_GetSize :: ((Ptr ()) -> (IO CInt))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_GetSize"
length'_ :: ((Ptr ()) -> (IO CInt))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_FromEncodedObject"
fromEncodedObject'_ :: ((Ptr ()) -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ())))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_AsEncodedString"
encode'_ :: ((Ptr ()) -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ())))))
foreign import ccall safe "CPython/Types/Unicode.chs.h PyBytes_AsStringAndSize"
pyBytesAsStringAndSize :: ((Ptr ()) -> ((Ptr (Ptr CChar)) -> ((Ptr CInt) -> (IO CInt))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Decode"
hscpython_PyUnicode_Decode :: ((Ptr CChar) -> (CInt -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ()))))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Concat"
append'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Split"
hscpython_PyUnicode_Split :: ((Ptr ()) -> ((Ptr ()) -> (CInt -> (IO (Ptr ())))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Splitlines"
splitLines'_ :: ((Ptr ()) -> (CInt -> (IO (Ptr ()))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Translate"
translate'_ :: ((Ptr ()) -> ((Ptr ()) -> ((Ptr CChar) -> (IO (Ptr ())))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Join"
join'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Tailmatch"
hscpython_PyUnicode_Tailmatch :: ((Ptr ()) -> ((Ptr ()) -> (CInt -> (CInt -> (CInt -> (IO CInt))))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Find"
hscpython_PyUnicode_Find :: ((Ptr ()) -> ((Ptr ()) -> (CInt -> (CInt -> (CInt -> (IO CInt))))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Count"
hscpython_PyUnicode_Count :: ((Ptr ()) -> ((Ptr ()) -> (CInt -> (CInt -> (IO CInt)))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Replace"
hscpython_PyUnicode_Replace :: ((Ptr ()) -> ((Ptr ()) -> ((Ptr ()) -> (CInt -> (IO (Ptr ()))))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Format"
format'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))
foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Contains"
contains'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO CInt)))