module Data.Text.Short
(
ShortText
, Data.Text.Short.null
, Data.Text.Short.length
, Data.Text.Short.isAscii
, Data.Text.Short.fromString
, toString
, fromText
, toText
, fromShortByteString
, toShortByteString
, fromByteString
, toByteString
, toBuilder
) where
import Control.DeepSeq (NFData)
import qualified Data.ByteString as BS
import Data.ByteString.Short (ShortByteString)
import qualified Data.ByteString.Short as BSS
import qualified Data.ByteString.Short.Internal as BSSI
import Data.Char
import Data.Hashable (Hashable)
import Data.Semigroup
import qualified Data.String as S
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Foreign.C
import GHC.Exts (ByteArray#)
import qualified GHC.Foreign as GHC
import GHC.IO.Encoding
import System.IO.Unsafe
import Data.Binary
import qualified Data.ByteString.Builder as BB
newtype ShortText = ShortText ShortByteString
deriving (Eq,Ord,Monoid,Semigroup,Hashable,NFData)
instance Show ShortText where
showsPrec p (ShortText b) = showsPrec p (decodeStringShort' utf8 b)
show (ShortText b) = show (decodeStringShort' utf8 b)
instance Read ShortText where
readsPrec p = map (\(x,s) -> (ShortText $ encodeStringShort utf8 x,s)) . readsPrec p
instance S.IsString ShortText where
fromString = fromString
#if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
put = put . toShortByteString
get = do
sbs <- get
case fromShortByteString sbs of
Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
Just st -> return st
#else
instance Binary ShortText where
put = put . toByteString
get = do
bs <- get
case fromByteString bs of
Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
Just st -> return st
#endif
null :: ShortText -> Bool
null = BSS.null . toShortByteString
length :: ShortText -> Int
length st = fromIntegral $ unsafePerformIO (c_text_short_length (toByteArray# st) (toCSize st))
foreign import ccall unsafe "hs_text_short_length" c_text_short_length :: ByteArray# -> CSize -> IO CSize
isAscii :: ShortText -> Bool
isAscii st = (== sz) $ unsafePerformIO (c_text_short_is_ascii (toByteArray# st) sz)
where
sz = toCSize st
foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: ByteArray# -> CSize -> IO CSize
toCSize :: ShortText -> CSize
toCSize = fromIntegral . BSS.length . toShortByteString
toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ba#)) = ba#
toShortByteString :: ShortText -> ShortByteString
toShortByteString (ShortText b) = b
toByteString :: ShortText -> BS.ByteString
toByteString = BSS.fromShort . toShortByteString
toBuilder :: ShortText -> BB.Builder
toBuilder = BB.shortByteString . toShortByteString
toString :: ShortText -> String
toString = decodeStringShort' utf8 . toShortByteString
toText :: ShortText -> T.Text
toText = T.decodeUtf8 . toByteString
fromString :: String -> ShortText
fromString = ShortText . encodeStringShort utf8 . map r
where
r c | 0xd800 <= x && x < 0xe000 = '\xFFFD'
| otherwise = c
where
x = ord c
fromText :: T.Text -> ShortText
fromText = fromByteString' . T.encodeUtf8
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString sbs
| isValidUtf8 st = Just st
| otherwise = Nothing
where
st = ShortText sbs
fromByteString :: BS.ByteString -> Maybe ShortText
fromByteString = fromShortByteString . BSS.toShort
fromByteString' :: BS.ByteString -> ShortText
fromByteString' = ShortText . BSS.toShort
encodeString :: TextEncoding -> String -> BS.ByteString
encodeString te str = unsafePerformIO $ GHC.withCStringLen te str BS.packCStringLen
decodeString' :: TextEncoding -> BS.ByteString -> String
decodeString' te bs = unsafePerformIO $ BS.useAsCStringLen bs (GHC.peekCStringLen te)
decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' te = decodeString' te . BSS.fromShort
encodeStringShort :: TextEncoding -> String -> BSS.ShortByteString
encodeStringShort te = BSS.toShort . encodeString te
isValidUtf8 :: ShortText -> Bool
isValidUtf8 st = (==0) $ unsafePerformIO (c_text_short_is_valid_utf8 (toByteArray# st) (toCSize st))
foreign import ccall unsafe "hs_text_short_is_valid_utf8" c_text_short_is_valid_utf8 :: ByteArray# -> CSize -> IO CInt