{-
(c) The University of Glasgow 2006
(c) The University of Glasgow, 1997-2006


Buffers for scanning string input stored in external arrays.
-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}

{-# OPTIONS_GHC -O2 #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected

module GHC.Data.StringBuffer
       (
        StringBuffer(..),
        -- non-abstract for vs\/HaskellService

         -- * Creation\/destruction
        hGetStringBuffer,
        hGetStringBufferBlock,
        hPutStringBuffer,
        appendStringBuffers,
        stringToStringBuffer,
        stringBufferFromByteString,

        -- * Inspection
        nextChar,
        currentChar,
        prevChar,
        atEnd,
        fingerprintStringBuffer,

        -- * Moving and comparison
        stepOn,
        offsetBytes,
        byteDiff,
        atLine,

        -- * Conversion
        lexemeToString,
        lexemeToFastString,
        decodePrevNChars,

         -- * Parsing integers
        parseUnsignedInteger,

        -- * Checking for bi-directional format characters
        containsBidirectionalFormatChar,
        bidirectionalFormatChars
        ) where

import GHC.Prelude

import GHC.Data.FastString
import GHC.Utils.Encoding
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
import GHC.Utils.Exception      ( bracket_ )
import GHC.Fingerprint

import Data.Maybe
import System.IO
import System.IO.Unsafe         ( unsafePerformIO )
import GHC.IO.Encoding.UTF8     ( mkUTF8 )
import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )

import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString as BS
import Data.ByteString ( ByteString )

import GHC.Exts

import Foreign
#if MIN_VERSION_base(4,15,0)
import GHC.ForeignPtr (unsafeWithForeignPtr)
#else
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr = withForeignPtr
#endif

-- -----------------------------------------------------------------------------
-- The StringBuffer type

-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
-- The bytes are intended to be *immutable*.  There are pure
-- operations to read the contents of a StringBuffer.
--
-- A StringBuffer may have a finalizer, depending on how it was
-- obtained.
--
data StringBuffer
 = StringBuffer {
     StringBuffer -> ForeignPtr Word8
buf :: {-# UNPACK #-} !(ForeignPtr Word8),
     StringBuffer -> Int
len :: {-# UNPACK #-} !Int,        -- length
     StringBuffer -> Int
cur :: {-# UNPACK #-} !Int         -- current pos
  }
  -- The buffer is assumed to be UTF-8 encoded, and furthermore
  -- we add three @\'\\0\'@ bytes to the end as sentinels so that the
  -- decoder doesn't have to check for overflow at every single byte
  -- of a multibyte sequence.

instance Show StringBuffer where
        showsPrec :: Int -> StringBuffer -> ShowS
showsPrec Int
_ StringBuffer
s = String -> ShowS
showString String
"<stringbuffer("
                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows (StringBuffer -> Int
len StringBuffer
s) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
"," forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> ShowS
shows (StringBuffer -> Int
cur StringBuffer
s)
                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
")>"

-- -----------------------------------------------------------------------------
-- Creation / Destruction

-- | Read a file into a 'StringBuffer'.  The resulting buffer is automatically
-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer :: String -> IO StringBuffer
hGetStringBuffer String
fname = do
   Handle
h <- String -> IOMode -> IO Handle
openBinaryFile String
fname IOMode
ReadMode
   Integer
size_i <- Handle -> IO Integer
hFileSize Handle
h
   Integer
offset_i <- Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
h Integer
size_i Integer
0  -- offset is 0 initially
   let size :: Int
size = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Integer
size_i forall a. Num a => a -> a -> a
- Integer
offset_i
   ForeignPtr Word8
buf <- forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeforall a. Num a => a -> a -> a
+Int
3)
   forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
     Int
r <- if Int
size forall a. Eq a => a -> a -> Bool
== Int
0 then forall (m :: * -> *) a. Monad m => a -> m a
return Int
0 else forall a. Handle -> Ptr a -> Int -> IO Int
hGetBuf Handle
h Ptr Word8
ptr Int
size
     Handle -> IO ()
hClose Handle
h
     if (Int
r forall a. Eq a => a -> a -> Bool
/= Int
size)
        then forall a. IOError -> IO a
ioError (String -> IOError
userError String
"short read of file")
        else ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size

hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock Handle
handle Int
wanted
    = do Integer
size_i <- Handle -> IO Integer
hFileSize Handle
handle
         Integer
offset_i <- Handle -> IO Integer
hTell Handle
handle forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
handle Integer
size_i
         let size :: Int
size = forall a. Ord a => a -> a -> a
min Int
wanted (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Integer
size_iforall a. Num a => a -> a -> a
-Integer
offset_i)
         ForeignPtr Word8
buf <- forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeforall a. Num a => a -> a -> a
+Int
3)
         forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
             do Int
r <- if Int
size forall a. Eq a => a -> a -> Bool
== Int
0 then forall (m :: * -> *) a. Monad m => a -> m a
return Int
0 else forall a. Handle -> Ptr a -> Int -> IO Int
hGetBuf Handle
handle Ptr Word8
ptr Int
size
                if Int
r forall a. Eq a => a -> a -> Bool
/= Int
size
                   then forall a. IOError -> IO a
ioError (String -> IOError
userError forall a b. (a -> b) -> a -> b
$ String
"short read of file: "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show(Int
r,Int
size,Integer
size_i,Handle
handle))
                   else ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size

hPutStringBuffer :: Handle -> StringBuffer -> IO ()
hPutStringBuffer :: Handle -> StringBuffer -> IO ()
hPutStringBuffer Handle
hdl (StringBuffer ForeignPtr Word8
buf Int
len Int
cur)
    = forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (forall a b. ForeignPtr a -> Int -> ForeignPtr b
plusForeignPtr ForeignPtr Word8
buf Int
cur) forall a b. (a -> b) -> a -> b
$ \Ptr Any
ptr ->
          forall a. Handle -> Ptr a -> Int -> IO ()
hPutBuf Handle
hdl Ptr Any
ptr Int
len

-- | Skip the byte-order mark if there is one (see #1744 and #6016),
-- and return the new position of the handle in bytes.
--
-- This is better than treating #FEFF as whitespace,
-- because that would mess up layout.  We don't have a concept
-- of zero-width whitespace in Haskell: all whitespace codepoints
-- have a width of one column.
skipBOM :: Handle -> Integer -> Integer -> IO Integer
skipBOM :: Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
h Integer
size Integer
offset =
  -- Only skip BOM at the beginning of a file.
  if Integer
size forall a. Ord a => a -> a -> Bool
> Integer
0 Bool -> Bool -> Bool
&& Integer
offset forall a. Eq a => a -> a -> Bool
== Integer
0
    then do
      -- Validate assumption that handle is in binary mode.
      forall (m :: * -> *). (HasCallStack, Monad m) => m Bool -> m ()
assertM (Handle -> IO (Maybe TextEncoding)
hGetEncoding Handle
h forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Maybe a -> Bool
isNothing)
      -- Temporarily select utf8 encoding with error ignoring,
      -- to make `hLookAhead` and `hGetChar` return full Unicode characters.
      forall a b c. IO a -> IO b -> IO c -> IO c
bracket_ (Handle -> TextEncoding -> IO ()
hSetEncoding Handle
h TextEncoding
safeEncoding) (Handle -> Bool -> IO ()
hSetBinaryMode Handle
h Bool
True) forall a b. (a -> b) -> a -> b
$ do
        Char
c <- Handle -> IO Char
hLookAhead Handle
h
        if Char
c forall a. Eq a => a -> a -> Bool
== Char
'\xfeff'
          then Handle -> IO Char
hGetChar Handle
h forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> IO Integer
hTell Handle
h
          else forall (m :: * -> *) a. Monad m => a -> m a
return Integer
offset
    else forall (m :: * -> *) a. Monad m => a -> m a
return Integer
offset
  where
    safeEncoding :: TextEncoding
safeEncoding = CodingFailureMode -> TextEncoding
mkUTF8 CodingFailureMode
IgnoreCodingFailure

newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size = do
  forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
size :: Ptr Word8) [Word8
0,Word8
0,Word8
0]
  -- sentinels for UTF-8 decoding
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
size Int
0

appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers StringBuffer
sb1 StringBuffer
sb2
    = do ForeignPtr Word8
newBuf <- forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeforall a. Num a => a -> a -> a
+Int
3)
         forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
newBuf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
          forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (StringBuffer -> ForeignPtr Word8
buf StringBuffer
sb1) forall a b. (a -> b) -> a -> b
$ \Ptr Word8
sb1Ptr ->
           forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (StringBuffer -> ForeignPtr Word8
buf StringBuffer
sb2) forall a b. (a -> b) -> a -> b
$ \Ptr Word8
sb2Ptr ->
             do forall a. Storable a => Ptr a -> Ptr a -> Int -> IO ()
copyArray Ptr Word8
ptr (Ptr Word8
sb1Ptr forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` StringBuffer -> Int
cur StringBuffer
sb1) Int
sb1_len
                forall a. Storable a => Ptr a -> Ptr a -> Int -> IO ()
copyArray (Ptr Word8
ptr forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` Int
sb1_len) (Ptr Word8
sb2Ptr forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` StringBuffer -> Int
cur StringBuffer
sb2) Int
sb2_len
                forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` Int
size) [Word8
0,Word8
0,Word8
0]
                forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
newBuf Int
size Int
0)
    where sb1_len :: Int
sb1_len = StringBuffer -> Int
calcLen StringBuffer
sb1
          sb2_len :: Int
sb2_len = StringBuffer -> Int
calcLen StringBuffer
sb2
          calcLen :: StringBuffer -> Int
calcLen StringBuffer
sb = StringBuffer -> Int
len StringBuffer
sb forall a. Num a => a -> a -> a
- StringBuffer -> Int
cur StringBuffer
sb
          size :: Int
size =  Int
sb1_len forall a. Num a => a -> a -> a
+ Int
sb2_len

-- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer
-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer String
str =
 forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
  let size :: Int
size = String -> Int
utf8EncodedLength String
str
  ForeignPtr Word8
buf <- forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeforall a. Num a => a -> a -> a
+Int
3)
  forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
    Ptr Word8 -> String -> IO ()
utf8EncodePtr Ptr Word8
ptr String
str
    forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
size :: Ptr Word8) [Word8
0,Word8
0,Word8
0]
    -- sentinels for UTF-8 decoding
  forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
size Int
0)

-- | Convert a UTF-8 encoded 'ByteString' into a 'StringBuffer. This really
-- relies on the internals of both 'ByteString' and 'StringBuffer'.
--
-- /O(n)/ (but optimized into a @memcpy@ by @bytestring@ under the hood)
stringBufferFromByteString :: ByteString -> StringBuffer
stringBufferFromByteString :: ByteString -> StringBuffer
stringBufferFromByteString ByteString
bs =
  let BS.PS ForeignPtr Word8
fp Int
off Int
len = ByteString -> ByteString -> ByteString
BS.append ByteString
bs ([Word8] -> ByteString
BS.pack [Word8
0,Word8
0,Word8
0])
  in StringBuffer { buf :: ForeignPtr Word8
buf = ForeignPtr Word8
fp, len :: Int
len = Int
len forall a. Num a => a -> a -> a
- Int
3, cur :: Int
cur = Int
off }

-- -----------------------------------------------------------------------------
-- Grab a character

-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well
-- the remaining portion (analogous to 'Data.List.uncons').  __Warning:__ The
-- behavior is undefined if the 'StringBuffer' is empty.  The result shares
-- the same buffer as the original.  Similar to 'utf8DecodeChar', if the
-- character cannot be decoded as UTF-8, @\'\\0\'@ is returned.
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar :: StringBuffer -> (Char, StringBuffer)
nextChar (StringBuffer ForeignPtr Word8
buf Int
len (I# Int#
cur#)) =
  -- Getting our fingers dirty a little here, but this is performance-critical
  forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$
    forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \(Ptr Addr#
a#) ->
        case Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# (Addr#
a# Addr# -> Int# -> Addr#
`plusAddr#` Int#
cur#) Int#
0# of
          (# Char#
c#, Int#
nBytes# #) ->
             let cur' :: Int
cur' = Int# -> Int
I# (Int#
cur# Int# -> Int# -> Int#
+# Int#
nBytes#) in
             forall (m :: * -> *) a. Monad m => a -> m a
return (Char# -> Char
C# Char#
c#, ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
len Int
cur')


bidirectionalFormatChars :: [(Char,String)]
bidirectionalFormatChars :: [(Char, String)]
bidirectionalFormatChars =
  [ (Char
'\x202a' , String
"U+202A LEFT-TO-RIGHT EMBEDDING (LRE)")
  , (Char
'\x202b' , String
"U+202B RIGHT-TO-LEFT EMBEDDING (RLE)")
  , (Char
'\x202c' , String
"U+202C POP DIRECTIONAL FORMATTING (PDF)")
  , (Char
'\x202d' , String
"U+202D LEFT-TO-RIGHT OVERRIDE (LRO)")
  , (Char
'\x202e' , String
"U+202E RIGHT-TO-LEFT OVERRIDE (RLO)")
  , (Char
'\x2066' , String
"U+2066 LEFT-TO-RIGHT ISOLATE (LRI)")
  , (Char
'\x2067' , String
"U+2067 RIGHT-TO-LEFT ISOLATE (RLI)")
  , (Char
'\x2068' , String
"U+2068 FIRST STRONG ISOLATE (FSI)")
  , (Char
'\x2069' , String
"U+2069 POP DIRECTIONAL ISOLATE (PDI)")
  ]

{-| Returns true if the buffer contains Unicode bi-directional formatting
characters.

https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types

Bidirectional format characters are one of
'\x202a' : "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)"
'\x202b' : "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)"
'\x202c' : "U+202C POP DIRECTIONAL FORMATTING (PDF)"
'\x202d' : "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)"
'\x202e' : "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)"
'\x2066' : "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)"
'\x2067' : "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)"
'\x2068' : "U+2068 FIRST STRONG ISOLATE (FSI)"
'\x2069' : "U+2069 POP DIRECTIONAL ISOLATE (PDI)"

This list is encoded in 'bidirectionalFormatChars'

-}
{-# INLINE containsBidirectionalFormatChar #-}
containsBidirectionalFormatChar :: StringBuffer -> Bool
containsBidirectionalFormatChar :: StringBuffer -> Bool
containsBidirectionalFormatChar (StringBuffer ForeignPtr Word8
buf (I# Int#
len#) (I# Int#
cur#))
  = forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$ forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \(Ptr Addr#
a#) -> do
  let go :: Int# -> Bool
      go :: Int# -> Bool
go Int#
i | Int# -> Bool
isTrue# (Int#
i Int# -> Int# -> Int#
>=# Int#
len#) = Bool
False
           | Bool
otherwise = case Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# Addr#
a# Int#
i of
                (# Char#
'\x202a'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202b'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202c'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202d'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202e'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2066'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2067'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2068'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2069'#  , Int#
_ #) -> Bool
True
                (# Char#
_, Int#
bytes #) -> Int# -> Bool
go (Int#
i Int# -> Int# -> Int#
+# Int#
bytes)
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$! Int# -> Bool
go Int#
cur#

-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
-- to 'Data.List.head').  __Warning:__ The behavior is undefined if the
-- 'StringBuffer' is empty.  Similar to 'utf8DecodeChar', if the character
-- cannot be decoded as UTF-8, @\'\\0\'@ is returned.
currentChar :: StringBuffer -> Char
currentChar :: StringBuffer -> Char
currentChar = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. StringBuffer -> (Char, StringBuffer)
nextChar

prevChar :: StringBuffer -> Char -> Char
prevChar :: StringBuffer -> Char -> Char
prevChar (StringBuffer ForeignPtr Word8
_   Int
_   Int
0)   Char
deflt = Char
deflt
prevChar (StringBuffer ForeignPtr Word8
buf Int
_   Int
cur) Char
_     =
  forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$
    forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> do
      Ptr Word8
p' <- Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar (Ptr Word8
p forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur)
      forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (a, b) -> a
fst (Ptr Word8 -> (Char, Int)
utf8DecodeCharPtr Ptr Word8
p'))

-- -----------------------------------------------------------------------------
-- Moving

-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous
-- to 'Data.List.tail').  __Warning:__ The behavior is undefined if the
-- 'StringBuffer' is empty.  The result shares the same buffer as the
-- original.
stepOn :: StringBuffer -> StringBuffer
stepOn :: StringBuffer -> StringBuffer
stepOn StringBuffer
s = forall a b. (a, b) -> b
snd (StringBuffer -> (Char, StringBuffer)
nextChar StringBuffer
s)

-- | Return a 'StringBuffer' with the first @n@ bytes removed.  __Warning:__
-- If there aren't enough characters, the returned 'StringBuffer' will be
-- invalid and any use of it may lead to undefined behavior.  The result
-- shares the same buffer as the original.
offsetBytes :: Int                      -- ^ @n@, the number of bytes
            -> StringBuffer
            -> StringBuffer
offsetBytes :: Int -> StringBuffer -> StringBuffer
offsetBytes Int
i StringBuffer
s = StringBuffer
s { cur :: Int
cur = StringBuffer -> Int
cur StringBuffer
s forall a. Num a => a -> a -> a
+ Int
i }

-- | Compute the difference in offset between two 'StringBuffer's that share
-- the same buffer.  __Warning:__ The behavior is undefined if the
-- 'StringBuffer's use separate buffers.
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff StringBuffer
s1 StringBuffer
s2 = StringBuffer -> Int
cur StringBuffer
s2 forall a. Num a => a -> a -> a
- StringBuffer -> Int
cur StringBuffer
s1

-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
atEnd :: StringBuffer -> Bool
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer ForeignPtr Word8
_ Int
l Int
c) = Int
l forall a. Eq a => a -> a -> Bool
== Int
c

-- | Computes a hash of the contents of a 'StringBuffer'.
fingerprintStringBuffer :: StringBuffer -> Fingerprint
fingerprintStringBuffer :: StringBuffer -> Fingerprint
fingerprintStringBuffer (StringBuffer ForeignPtr Word8
buf Int
len Int
cur) =
  forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$
    forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
      Ptr Word8 -> Int -> IO Fingerprint
fingerprintData (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur) Int
len

-- | Computes a 'StringBuffer' which points to the first character of the
-- wanted line. Lines begin at 1.
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine Int
line sb :: StringBuffer
sb@(StringBuffer ForeignPtr Word8
buf Int
len Int
_) =
  forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$
    forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> do
      Ptr Word8
p' <- Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine Int
line Int
len Ptr Word8
p
      if Ptr Word8
p' forall a. Eq a => a -> a -> Bool
== forall a. Ptr a
nullPtr
        then forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
        else
          let
            delta :: Int
delta = Ptr Word8
p' forall a b. Ptr a -> Ptr b -> Int
`minusPtr` Ptr Word8
p
          in forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just (StringBuffer
sb { cur :: Int
cur = Int
delta
                               , len :: Int
len = Int
len forall a. Num a => a -> a -> a
- Int
delta
                               })

skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !Int
line !Int
len !Ptr Word8
op0 = Int -> Ptr Word8 -> IO (Ptr Word8)
go Int
1 Ptr Word8
op0
  where
    !opend :: Ptr Word8
opend = Ptr Word8
op0 forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len

    go :: Int -> Ptr Word8 -> IO (Ptr Word8)
go !Int
i_line !Ptr Word8
op
      | Ptr Word8
op forall a. Ord a => a -> a -> Bool
>= Ptr Word8
opend    = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Ptr a
nullPtr
      | Int
i_line forall a. Eq a => a -> a -> Bool
== Int
line = forall (f :: * -> *) a. Applicative f => a -> f a
pure Ptr Word8
op
      | Bool
otherwise      = do
          Word8
w <- forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
op :: IO Word8
          case Word8
w of
            Word8
10 -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line forall a. Num a => a -> a -> a
+ Int
1) (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)
            Word8
13 -> do
              -- this is safe because a 'StringBuffer' is
              -- guaranteed to have 3 bytes sentinel values.
              Word8
w' <- forall a. Storable a => Ptr a -> IO a
peek (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1) :: IO Word8
              case Word8
w' of
                Word8
10 -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line forall a. Num a => a -> a -> a
+ Int
1) (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
2)
                Word8
_  -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line forall a. Num a => a -> a -> a
+ Int
1) (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)
            Word8
_  -> Int -> Ptr Word8 -> IO (Ptr Word8)
go Int
i_line (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)

-- -----------------------------------------------------------------------------
-- Conversion

-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'.
-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8,
-- they will be replaced with @\'\\0\'@.
lexemeToString :: StringBuffer
               -> Int                   -- ^ @n@, the number of bytes
               -> String
lexemeToString :: StringBuffer -> Int -> String
lexemeToString StringBuffer
_ Int
0 = String
""
lexemeToString (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
bytes =
  ForeignPtr Word8 -> Int -> Int -> String
utf8DecodeForeignPtr ForeignPtr Word8
buf Int
cur Int
bytes

lexemeToFastString :: StringBuffer
                   -> Int               -- ^ @n@, the number of bytes
                   -> FastString
lexemeToFastString :: StringBuffer -> Int -> FastString
lexemeToFastString StringBuffer
_ Int
0 = FastString
nilFS
lexemeToFastString (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
len =
   forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$
     forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
       forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Ptr Word8 -> Int -> FastString
mkFastStringBytes (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur) Int
len

-- | Return the previous @n@ characters (or fewer if we are less than @n@
-- characters into the buffer.
decodePrevNChars :: Int -> StringBuffer -> String
decodePrevNChars :: Int -> StringBuffer -> String
decodePrevNChars Int
n (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) =
    forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$ forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p0 ->
      Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
p0 Int
n String
"" (Ptr Word8
p0 forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
cur forall a. Num a => a -> a -> a
- Int
1))
  where
    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
buf0 Int
n String
acc Ptr Word8
p | Int
n forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Ptr Word8
buf0 forall a. Ord a => a -> a -> Bool
>= Ptr Word8
p = forall (m :: * -> *) a. Monad m => a -> m a
return String
acc
    go Ptr Word8
buf0 Int
n String
acc Ptr Word8
p = do
        Ptr Word8
p' <- Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar Ptr Word8
p
        let (Char
c,Int
_) = Ptr Word8 -> (Char, Int)
utf8DecodeCharPtr Ptr Word8
p'
        Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
buf0 (Int
n forall a. Num a => a -> a -> a
- Int
1) (Char
cforall a. a -> [a] -> [a]
:String
acc) Ptr Word8
p'

-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char -> Int) -> Integer
parseUnsignedInteger (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
len Integer
radix Char -> Int
char_to_int
  = forall a. IO a -> a
inlinePerformIO forall a b. (a -> b) -> a -> b
$ forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
buf forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! let
    go :: Int -> Integer -> Integer
go Int
i Integer
x | Int
i forall a. Eq a => a -> a -> Bool
== Int
len  = Integer
x
           | Bool
otherwise = case forall a b. (a, b) -> a
fst (Ptr Word8 -> (Char, Int)
utf8DecodeCharPtr (Ptr Word8
ptr forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
cur forall a. Num a => a -> a -> a
+ Int
i))) of
               Char
'_'  -> Int -> Integer -> Integer
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) Integer
x    -- skip "_" (#14473)
               Char
char -> Int -> Integer -> Integer
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) (Integer
x forall a. Num a => a -> a -> a
* Integer
radix forall a. Num a => a -> a -> a
+ forall a. Integral a => a -> Integer
toInteger (Char -> Int
char_to_int Char
char))
  in Int -> Integer -> Integer
go Int
0 Integer
0