module FlatParse.Common.Assorted
  (
  -- * Compatibility
    shortInteger

  -- * 'Char' predicates
  , isDigit, isLatinLetter, isGreekLetter

  -- * Other
  , packBytes, splitBytes

  -- * UTF-8 conversions
  , charToBytes, strToBytes
  , strToUtf8, utf8ToStr

  -- * Shortcuts
  , derefChar8#

  -- * Boxed integer coercions
  -- $boxed-integer-coercion
  , word16ToInt16
  , word32ToInt32
  , word64ToInt64

  -- * Helpers
  , withPosInt#, withIntUnwrap#

  -- * Bit manipulation
  , zbytel, zbytel'intermediate, zbytel'toIdx
  , zbyter, zbyter'intermediate, zbyter'toIdx
  ) where

import Data.Bits
import Data.Char ( ord )
import Data.Foldable (foldl')
import GHC.Exts

import qualified Data.ByteString as B

import Data.Word
import Data.Int

#if MIN_VERSION_base(4,15,0)
import GHC.Num.Integer (Integer(..))
#else
import GHC.Integer.GMP.Internals (Integer(..))
#endif

import qualified Data.ByteString.UTF8 as UTF8


-- Compatibility
--------------------------------------------------------------------------------

shortInteger :: Int# -> Integer
#if MIN_VERSION_base(4,15,0)
shortInteger :: Int# -> Integer
shortInteger = Int# -> Integer
IS
#else
shortInteger = S#
#endif
{-# inline shortInteger #-}


-- Char predicates
--------------------------------------------------------------------------------

-- | @isDigit c = \'0\' <= c && c <= \'9\'@
isDigit :: Char -> Bool
isDigit :: Char -> Bool
isDigit Char
c = Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'
{-# inline isDigit #-}

-- | @isLatinLetter c = (\'A\' <= c && c <= \'Z\') || (\'a\' <= c && c <= \'z\')@
isLatinLetter :: Char -> Bool
isLatinLetter :: Char -> Bool
isLatinLetter Char
c = (Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z') Bool -> Bool -> Bool
|| (Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
{-# inline isLatinLetter #-}

-- | @isGreekLetter c = (\'Α\' <= c && c <= \'Ω\') || (\'α\' <= c && c <= \'ω\')@
isGreekLetter :: Char -> Bool
isGreekLetter :: Char -> Bool
isGreekLetter Char
c = (Char
'Α' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Ω') Bool -> Bool -> Bool
|| (Char
'α' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'ω')
{-# inline isGreekLetter #-}

-- UTF conversions
--------------------------------------------------------------------------------

packBytes :: [Word] -> Word
packBytes :: [Word] -> Word
packBytes = (Word, Int) -> Word
forall a b. (a, b) -> a
fst ((Word, Int) -> Word) -> ([Word] -> (Word, Int)) -> [Word] -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Int) -> Word -> (Word, Int))
-> (Word, Int) -> [Word] -> (Word, Int)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Word, Int) -> Word -> (Word, Int)
forall {a} {a}.
(Bits a, Integral a, Num a) =>
(a, Int) -> a -> (a, Int)
go (Word
0, Int
0) where
  go :: (a, Int) -> a -> (a, Int)
go (a
acc, Int
shift) a
w | Int
shift Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
64 = [Char] -> (a, Int)
forall a. HasCallStack => [Char] -> a
error [Char]
"packBytes: too many bytes"
  go (a
acc, Int
shift) a
w = (a -> Int -> a
forall a. Bits a => a -> Int -> a
unsafeShiftL (a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w) Int
shift a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
acc, Int
shiftInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
8)

-- TODO chunks into 8-bytes for 64-bit performance
splitBytes :: [Word] -> ([Word], [Word])
splitBytes :: [Word] -> ([Word], [Word])
splitBytes [Word]
ws = case Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
quotRem ([Word] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
ws) Int
8 of
  (Int
0, Int
_) -> ([Word]
ws, [])
  (Int
_, Int
r) -> ([Word]
as, [Word] -> [Word]
chunk8s [Word]
bs) where
              ([Word]
as, [Word]
bs) = Int -> [Word] -> ([Word], [Word])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
r [Word]
ws
              chunk8s :: [Word] -> [Word]
chunk8s [] = []
              chunk8s [Word]
ws = let ([Word]
as, [Word]
bs) = Int -> [Word] -> ([Word], [Word])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
8 [Word]
ws in
                           [Word] -> Word
packBytes [Word]
as Word -> [Word] -> [Word]
forall a. a -> [a] -> [a]
: [Word] -> [Word]
chunk8s [Word]
bs

-- | Shortcut for 'indexCharOffAddr# addr# 0#'.
derefChar8# :: Addr# -> Char#
derefChar8# :: Addr# -> Char#
derefChar8# Addr#
addr# = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr# Int#
0#
{-# inline derefChar8# #-}

--------------------------------------------------------------------------------

{- $boxed-integer-coercion

These functions should be no-ops. They correspond to the similarly-named GHC 9.4
primops which work on unboxed integers.
-}

-- | Coerce a 'Word16' to 'Int16'.
word16ToInt16 :: Word16 -> Int16
word16ToInt16 :: Word16 -> Int16
word16ToInt16 = Word16 -> Int16
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# inline word16ToInt16 #-}

-- | Coerce a 'Word32' to 'Int32'.
word32ToInt32 :: Word32 -> Int32
word32ToInt32 :: Word32 -> Int32
word32ToInt32 = Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# inline word32ToInt32 #-}

-- | Coerce a 'Word64' to 'Int64'.
word64ToInt64 :: Word64 -> Int64
word64ToInt64 :: Word64 -> Int64
word64ToInt64 = Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# inline word64ToInt64 #-}

--------------------------------------------------------------------------------

-- | Assert for the given 'Int#' that @n >= 0@.
--
-- Throws a runtime error if given a negative integer.
withPosInt# :: Int# -> r -> r
withPosInt# :: forall r. Int# -> r -> r
withPosInt# Int#
n# r
r = case Int#
n# Int# -> Int# -> Int#
>=# Int#
0# of
  Int#
1# -> r
r
  Int#
_  -> [Char] -> r
forall a. HasCallStack => [Char] -> a
error [Char]
"FlatParse.Basic.Base.withPosInt#: negative integer"
{-# inline withPosInt# #-}

-- | Unwrap the 'Int#' from an 'Int' and apply it to the given function.
withIntUnwrap# :: (Int# -> r) -> Int -> r
withIntUnwrap# :: forall r. (Int# -> r) -> Int -> r
withIntUnwrap# Int# -> r
f (I# Int#
i#) = Int# -> r
f Int#
i#
{-# inline withIntUnwrap# #-}

--------------------------------------------------------------------------------

charToBytes :: Char -> [Word]
charToBytes :: Char -> [Word]
charToBytes Char
c'
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7f     = [Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7ff    = [Word
0xc0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff   = [Word
0xe0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
x, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10ffff = [Word
0xf0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
w, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
x, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
y, Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
z]
    | Bool
otherwise = [Char] -> [Word]
forall a. HasCallStack => [Char] -> a
error [Char]
"Not a valid Unicode code point"
  where
    c :: Int
c = Char -> Int
ord Char
c'
    z :: Word
z = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
c                 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    y :: Word
y = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
6  Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    x :: Word
x = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
12 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    w :: Word
w = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftR Int
c Int
18 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x7)

strToBytes :: String -> [Word]
strToBytes :: [Char] -> [Word]
strToBytes = (Char -> [Word]) -> [Char] -> [Word]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Word]
charToBytes
{-# inline strToBytes #-}

-- | Convert an UTF8-encoded `String` to a `B.ByteString`.
strToUtf8 :: String -> B.ByteString
strToUtf8 :: [Char] -> ByteString
strToUtf8 = [Char] -> ByteString
UTF8.fromString
{-# inline strToUtf8 #-}

-- | Convert a `B.ByteString` to an UTF8-encoded `String`.
utf8ToStr :: B.ByteString -> String
utf8ToStr :: ByteString -> [Char]
utf8ToStr = ByteString -> [Char]
UTF8.toString
{-# inline utf8ToStr #-}

--------------------------------------------------------------------------------

-- | Index of leftmost null byte, or (number of bytes in type) if not present.
--
-- Adapted from Hacker's Delight 6-1. Useful in big-endian environments.
zbytel :: (FiniteBits a, Num a) => a -> Int
zbytel :: forall a. (FiniteBits a, Num a) => a -> Int
zbytel = a -> Int
forall a. (FiniteBits a, Num a) => a -> Int
zbytel'toIdx (a -> Int) -> (a -> a) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> a
forall a. (FiniteBits a, Num a) => a -> a
zbytel'intermediate
{-# inline zbytel #-}

-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbytel'intermediate :: (FiniteBits a, Num a) => a -> a
zbytel'intermediate :: forall a. (FiniteBits a, Num a) => a -> a
zbytel'intermediate a
a =
    let a' :: a
a' = (a
a a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
mask) a -> a -> a
forall a. Num a => a -> a -> a
+ a
mask
    in  a -> a
forall a. Bits a => a -> a
complement (a
a' a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
a a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
mask)
  where
    mask :: a
mask = a
0x7F7F7F7F7F7F7F7F
{-# inline zbytel'intermediate #-}

-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbytel'toIdx :: (FiniteBits a, Num a) => a -> Int
zbytel'toIdx :: forall a. (FiniteBits a, Num a) => a -> Int
zbytel'toIdx a
a = a -> Int
forall b. FiniteBits b => b -> Int
countLeadingZeros a
a Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
3
{-# inline zbytel'toIdx #-}

-- | Index of rightmost null byte, or (number of bytes in type) if not present
--
-- Adapted from Hacker's Delight 6-1. Useful in little-endian environments.
zbyter :: (FiniteBits a, Num a) => a -> Int
zbyter :: forall a. (FiniteBits a, Num a) => a -> Int
zbyter = a -> Int
forall a. (FiniteBits a, Num a) => a -> Int
zbyter'toIdx (a -> Int) -> (a -> a) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> a
forall a. (FiniteBits a, Num a) => a -> a
zbyter'intermediate
{-# inline zbyter #-}

-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbyter'intermediate :: (FiniteBits a, Num a) => a -> a
zbyter'intermediate :: forall a. (FiniteBits a, Num a) => a -> a
zbyter'intermediate a
a = (a
a a -> a -> a
forall a. Num a => a -> a -> a
- a
0x0101010101010101) a -> a -> a
forall a. Bits a => a -> a -> a
.&. (a -> a
forall a. Bits a => a -> a
complement a
a) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x8080808080808080
{-# inline zbyter'intermediate #-}

-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbyter'toIdx :: (FiniteBits a, Num a) => a -> Int
zbyter'toIdx :: forall a. (FiniteBits a, Num a) => a -> Int
zbyter'toIdx a
a = a -> Int
forall b. FiniteBits b => b -> Int
countTrailingZeros a
a Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
3
{-# inline zbyter'toIdx #-}