{-# LANGUAGE MagicHash #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE UnliftedFFITypes #-}
#if defined(PURE_HASKELL)
{-# LANGUAGE BangPatterns #-}
#endif

{-# OPTIONS_HADDOCK not-home #-}

-- | Implements 'isAscii', using efficient C routines by default.
--
-- Similarly implements asciiPrefixLength, used internally in Data.Text.Encoding.
module Data.Text.Internal.IsAscii where

#if defined(PURE_HASKELL)
import Prelude hiding (all)
import qualified Data.Char as Char
import qualified Data.ByteString as BS
import Data.Text.Unsafe (iter, Iter(..))
#else
import Data.Text.Internal.ByteStringCompat (withBS)
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr)
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Data.Word (Word8)
import Foreign.C.Types
import Foreign.Ptr (Ptr, plusPtr)
import GHC.Base (ByteArray#)
import Prelude (Bool(..), Int, (==), ($), IO, (<$>))
import qualified Data.Text.Array as A
#endif
import Data.ByteString (ByteString)
import Data.Text.Internal (Text(..))
import qualified Prelude as P

-- | \O(n)\ Test whether 'Text' contains only ASCII code-points (i.e. only
--   U+0000 through U+007F).
--
-- This is a more efficient version of @'all' 'Data.Char.isAscii'@.
--
-- >>> isAscii ""
-- True
--
-- >>> isAscii "abc\NUL"
-- True
--
-- >>> isAscii "abcd€"
-- False
--
-- prop> isAscii t == all (< '\x80') t
--
-- @since 2.0.2
isAscii :: Text -> Bool
#if defined(PURE_HASKELL)
isAscii = all Char.isAscii

-- | (Re)implemented to avoid circular dependency on Data.Text.
all :: (Char -> Bool) -> Text -> Bool
all p t@(Text _ _ len) = go 0
  where
    go i | i >= len = True
         | otherwise =
             let !(Iter c j) = iter t i
             in p c && go (i+j)
#else
cSizeToInt :: CSize -> Int
cSizeToInt :: CSize -> Int
cSizeToInt = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
P.fromIntegral
{-# INLINE cSizeToInt #-}

intToCSize :: Int -> CSize
intToCSize :: Int -> CSize
intToCSize = Int -> CSize
forall a b. (Integral a, Num b) => a -> b
P.fromIntegral

isAscii :: Text -> Bool
isAscii (Text (A.ByteArray ByteArray#
arr) Int
off Int
len) =
    CSize -> Int
cSizeToInt (ByteArray# -> CSize -> CSize -> CSize
c_is_ascii_offset ByteArray#
arr (Int -> CSize
intToCSize Int
off) (Int -> CSize
intToCSize Int
len)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len
#endif
{-# INLINE isAscii #-}

-- | Length of the longest ASCII prefix.
asciiPrefixLength :: ByteString -> Int
#if defined(PURE_HASKELL)
asciiPrefixLength = BS.length P.. BS.takeWhile (P.< 0x80)
#else
asciiPrefixLength :: ByteString -> Int
asciiPrefixLength ByteString
bs = IO Int -> Int
forall a. IO a -> a
unsafeDupablePerformIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> (ForeignPtr Word8 -> Int -> IO Int) -> IO Int
forall r. ByteString -> (ForeignPtr Word8 -> Int -> r) -> r
withBS ByteString
bs ((ForeignPtr Word8 -> Int -> IO Int) -> IO Int)
-> (ForeignPtr Word8 -> Int -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \ ForeignPtr Word8
fp Int
len ->
  ForeignPtr Word8 -> (Ptr Word8 -> IO Int) -> IO Int
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
fp ((Ptr Word8 -> IO Int) -> IO Int)
-> (Ptr Word8 -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
src -> do
    CSize -> Int
forall a b. (Integral a, Num b) => a -> b
P.fromIntegral (CSize -> Int) -> IO CSize -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> Ptr Word8 -> IO CSize
c_is_ascii Ptr Word8
src (Ptr Word8
src Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len)
#endif
{-# INLINE asciiPrefixLength #-}

#if !defined(PURE_HASKELL)
foreign import ccall unsafe "_hs_text_is_ascii_offset" c_is_ascii_offset
    :: ByteArray# -> CSize -> CSize -> CSize

foreign import ccall unsafe "_hs_text_is_ascii" c_is_ascii
    :: Ptr Word8 -> Ptr Word8 -> IO CSize
#endif