module FlatParse.Common.Assorted
  (
  -- * Compatibility
    shortInteger

  -- * 'Char' predicates
  , isDigit, isLatinLetter, isGreekLetter

  -- * Other
  , packBytes, splitBytes

  -- * UTF-8 conversions
  , charToBytes, strToBytes
  , strToUtf8, utf8ToStr

  -- * Shortcuts
  , derefChar8#

  -- * Boxed integer coercions
  -- $boxed-integer-coercion
  , word16ToInt16
  , word32ToInt32
  , word64ToInt64

  -- * Helpers
  , withPosInt#, withIntUnwrap#

  -- * Bit manipulation
  , zbytel, zbytel'intermediate, zbytel'toIdx
  , zbyter, zbyter'intermediate, zbyter'toIdx
  ) where

import Data.Bits
import Data.Char ( ord )
import Data.Foldable (foldl')
import GHC.Exts

import qualified Data.ByteString as B

import Data.Word
import Data.Int

#if MIN_VERSION_base(4,15,0)
import GHC.Num.Integer (Integer(..))
#else
import GHC.Integer.GMP.Internals (Integer(..))
#endif

import qualified Data.ByteString.UTF8 as UTF8


-- Compatibility
--------------------------------------------------------------------------------

shortInteger :: Int# -> Integer
#if MIN_VERSION_base(4,15,0)
shortInteger = IS
#else
shortInteger = S#
#endif
{-# inline shortInteger #-}


-- Char predicates
--------------------------------------------------------------------------------

-- | @isDigit c = \'0\' <= c && c <= \'9\'@
isDigit :: Char -> Bool
isDigit c = '0' <= c && c <= '9'
{-# inline isDigit #-}

-- | @isLatinLetter c = (\'A\' <= c && c <= \'Z\') || (\'a\' <= c && c <= \'z\')@
isLatinLetter :: Char -> Bool
isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
{-# inline isLatinLetter #-}

-- | @isGreekLetter c = (\'Α\' <= c && c <= \'Ω\') || (\'α\' <= c && c <= \'ω\')@
isGreekLetter :: Char -> Bool
isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
{-# inline isGreekLetter #-}

-- UTF conversions
--------------------------------------------------------------------------------

packBytes :: [Word] -> Word
packBytes = fst . foldl' go (0, 0) where
  go (acc, shift) w | shift == 64 = error "packBytes: too many bytes"
  go (acc, shift) w = (unsafeShiftL (fromIntegral w) shift .|. acc, shift+8)

-- TODO chunks into 8-bytes for 64-bit performance
splitBytes :: [Word] -> ([Word], [Word])
splitBytes ws = case quotRem (length ws) 8 of
  (0, _) -> (ws, [])
  (_, r) -> (as, chunk8s bs) where
              (as, bs) = splitAt r ws
              chunk8s [] = []
              chunk8s ws = let (as, bs) = splitAt 8 ws in
                           packBytes as : chunk8s bs

-- | Shortcut for 'indexCharOffAddr# addr# 0#'.
derefChar8# :: Addr# -> Char#
derefChar8# addr# = indexCharOffAddr# addr# 0#
{-# inline derefChar8# #-}

--------------------------------------------------------------------------------

{- $boxed-integer-coercion

These functions should be no-ops. They correspond to the similarly-named GHC 9.4
primops which work on unboxed integers.
-}

-- | Coerce a 'Word16' to 'Int16'.
word16ToInt16 :: Word16 -> Int16
word16ToInt16 = fromIntegral
{-# inline word16ToInt16 #-}

-- | Coerce a 'Word32' to 'Int32'.
word32ToInt32 :: Word32 -> Int32
word32ToInt32 = fromIntegral
{-# inline word32ToInt32 #-}

-- | Coerce a 'Word64' to 'Int64'.
word64ToInt64 :: Word64 -> Int64
word64ToInt64 = fromIntegral
{-# inline word64ToInt64 #-}

--------------------------------------------------------------------------------

-- | Assert for the given 'Int#' that @n >= 0@.
--
-- Throws a runtime error if given a negative integer.
withPosInt# :: Int# -> r -> r
withPosInt# n# r = case n# >=# 0# of
  1# -> r
  _  -> error "FlatParse.Basic.Base.withPosInt#: negative integer"
{-# inline withPosInt# #-}

-- | Unwrap the 'Int#' from an 'Int' and apply it to the given function.
withIntUnwrap# :: (Int# -> r) -> Int -> r
withIntUnwrap# f (I# i#) = f i#
{-# inline withIntUnwrap# #-}

--------------------------------------------------------------------------------

charToBytes :: Char -> [Word]
charToBytes c'
    | c <= 0x7f     = [fromIntegral c]
    | c <= 0x7ff    = [0xc0 .|. y, 0x80 .|. z]
    | c <= 0xffff   = [0xe0 .|. x, 0x80 .|. y, 0x80 .|. z]
    | c <= 0x10ffff = [0xf0 .|. w, 0x80 .|. x, 0x80 .|. y, 0x80 .|. z]
    | otherwise = error "Not a valid Unicode code point"
  where
    c = ord c'
    z = fromIntegral (c                 .&. 0x3f)
    y = fromIntegral (unsafeShiftR c 6  .&. 0x3f)
    x = fromIntegral (unsafeShiftR c 12 .&. 0x3f)
    w = fromIntegral (unsafeShiftR c 18 .&. 0x7)

strToBytes :: String -> [Word]
strToBytes = concatMap charToBytes
{-# inline strToBytes #-}

-- | Convert an UTF8-encoded `String` to a `B.ByteString`.
strToUtf8 :: String -> B.ByteString
strToUtf8 = UTF8.fromString
{-# inline strToUtf8 #-}

-- | Convert a `B.ByteString` to an UTF8-encoded `String`.
utf8ToStr :: B.ByteString -> String
utf8ToStr = UTF8.toString
{-# inline utf8ToStr #-}

--------------------------------------------------------------------------------

-- | Index of leftmost null byte, or (number of bytes in type) if not present.
--
-- Adapted from Hacker's Delight 6-1. Useful in big-endian environments.
zbytel :: (FiniteBits a, Num a) => a -> Int
zbytel = zbytel'toIdx . zbytel'intermediate
{-# inline zbytel #-}

-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbytel'intermediate :: (FiniteBits a, Num a) => a -> a
zbytel'intermediate a =
    let a' = (a .&. mask) + mask
    in  complement (a' .|. a .|. mask)
  where
    mask = 0x7F7F7F7F7F7F7F7F
{-# inline zbytel'intermediate #-}

-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbytel'toIdx :: (FiniteBits a, Num a) => a -> Int
zbytel'toIdx a = countLeadingZeros a `unsafeShiftR` 3
{-# inline zbytel'toIdx #-}

-- | Index of rightmost null byte, or (number of bytes in type) if not present
--
-- Adapted from Hacker's Delight 6-1. Useful in little-endian environments.
zbyter :: (FiniteBits a, Num a) => a -> Int
zbyter = zbyter'toIdx . zbyter'intermediate
{-# inline zbyter #-}

-- | bit mangling, returns 0 for inputs without a null byte
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbyter'intermediate :: (FiniteBits a, Num a) => a -> a
zbyter'intermediate a = (a - 0x0101010101010101) .&. (complement a) .&. 0x8080808080808080
{-# inline zbyter'intermediate #-}

-- | bit mangling, turns intermediate value into an index
--
-- Separating allows us to skip some index calculation if there was no null byte.
zbyter'toIdx :: (FiniteBits a, Num a) => a -> Int
zbyter'toIdx a = countTrailingZeros a `unsafeShiftR` 3
{-# inline zbyter'toIdx #-}