{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE TypeApplications #-}

{- | This module treats 'Bytes' data as holding ASCII text. Providing bytes
outside the ASCII range (@U+0000@ -- @U+007F@) may cause a failure or
unspecified results, but such bytes will never be inspected.

For functions that can operate on ASCII-compatible encodings, see
'Data.Bytes.Text.AsciiExt'.
-}
module Data.Bytes.Text.Ascii
  ( fromString
  , decodeDecWord
  , equalsCStringCaseInsensitive
  , toShortText
  , toShortTextU
#if MIN_VERSION_text(2,0,0)
  , toText
#endif
  ) where

import Data.Bits ((.&.))
import Data.ByteString.Short.Internal (ShortByteString (SBS))
import Data.Bytes.Text.Latin1 (decodeDecWord)
import Data.Bytes.Types (Bytes (Bytes))
import Data.Char (ord)
import Data.Primitive (ByteArray)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Word (Word8)
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr, castPtr, plusPtr)

import qualified Data.Bytes.Pure as Bytes
import qualified Data.Primitive as PM
import qualified Data.Primitive.Ptr as PM
import qualified Data.Text.Array as A
import qualified Data.Text.Internal as I
import qualified Data.Text.Short.Unsafe as TS
import qualified GHC.Exts as Exts

{- | Convert a 'String' consisting of only characters in the ASCII block
to a byte sequence. Any character with a codepoint above @U+007F@ is
replaced by @U+0000@.
-}
fromString :: String -> Bytes
fromString :: String -> Bytes
fromString =
  ByteArray -> Bytes
Bytes.fromByteArray
    (ByteArray -> Bytes) -> (String -> ByteArray) -> String -> Bytes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteArray
[Item ByteArray] -> ByteArray
forall l. IsList l => [Item l] -> l
Exts.fromList
    ([Word8] -> ByteArray)
-> (String -> [Word8]) -> String -> ByteArray
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Word8) -> String -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> let i :: Int
i = Char -> Int
ord Char
c in if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
128 then forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 Int
i else Word8
0)

-- TODO presumably also fromText and fromShortText

toShortText :: Bytes -> Maybe ShortText
{-# INLINE toShortText #-}
toShortText :: Bytes -> Maybe ShortText
toShortText !Bytes
b = case (Word8 -> Bool -> Bool) -> Bool -> Bytes -> Bool
forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True Bytes
b of
  Bool
True -> ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (Bytes -> ShortByteString
Bytes.toShortByteString Bytes
b))
  Bool
False -> Maybe ShortText
forall a. Maybe a
Nothing

toShortTextU :: ByteArray -> Maybe ShortText
{-# INLINE toShortTextU #-}
toShortTextU :: ByteArray -> Maybe ShortText
toShortTextU !ByteArray
b = case (Word8 -> Bool -> Bool) -> Bool -> Bytes -> Bool
forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True (ByteArray -> Bytes
Bytes.fromByteArray ByteArray
b) of
  Bool
True -> ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (case ByteArray
b of PM.ByteArray ByteArray#
x -> ByteArray# -> ShortByteString
SBS ByteArray#
x))
  Bool
False -> Maybe ShortText
forall a. Maybe a
Nothing

#if MIN_VERSION_text(2,0,0)
-- | Interpret byte sequence as ASCII codepoints.
-- Only available when building with @text-2.0@ and newer.
-- Returns 'Nothing' if any of the bytes are outside of the
-- range @0x00-0x7F@
toText :: Bytes -> Maybe Text
{-# inline toText #-}
toText :: Bytes -> Maybe Text
toText !b :: Bytes
b@(Bytes (PM.ByteArray ByteArray#
arr) Int
off Int
len) = case (Word8 -> Bool -> Bool) -> Bool -> Bytes -> Bool
forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True Bytes
b of
  Bool
True -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Array -> Int -> Int -> Text
I.Text (ByteArray# -> Array
A.ByteArray ByteArray#
arr) Int
off Int
len)
  Bool
False -> Maybe Text
forall a. Maybe a
Nothing
#endif

{- | Is the byte sequence equal to the @NUL@-terminated C String?
The C string must be a constant.
-}
equalsCStringCaseInsensitive :: CString -> Bytes -> Bool
{-# INLINE equalsCStringCaseInsensitive #-}
equalsCStringCaseInsensitive :: CString -> Bytes -> Bool
equalsCStringCaseInsensitive !CString
ptr0 (Bytes ByteArray
arr Int
off0 Int
len0) = Ptr Word8 -> Int -> Int -> Bool
forall {t}. (Eq t, Num t) => Ptr Word8 -> Int -> t -> Bool
go (CString -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr CString
ptr0 :: Ptr Word8) Int
off0 Int
len0
 where
  go :: Ptr Word8 -> Int -> t -> Bool
go !Ptr Word8
ptr !Int
off !t
len = case t
len of
    t
0 -> Ptr Word8 -> Int -> Word8
forall a. Prim a => Ptr a -> Int -> a
PM.indexOffPtr Ptr Word8
ptr Int
0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== (Word8
0 :: Word8)
    t
_ -> case Ptr Word8 -> Int -> Word8
forall a. Prim a => Ptr a -> Int -> a
PM.indexOffPtr Ptr Word8
ptr Int
0 of
      Word8
0 -> Bool
False
      Word8
c ->
        (Word8
c Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b1101_1111) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== (ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray ByteArray
arr Int
off Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0b1101_1111)
          Bool -> Bool -> Bool
&& Ptr Word8 -> Int -> t -> Bool
go (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
1) (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (t
len t -> t -> t
forall a. Num a => a -> a -> a
- t
1)