{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
{-# OPTIONS_GHC -O2 #-}
module StringBuffer
(
StringBuffer(..),
hGetStringBuffer,
hGetStringBufferBlock,
hPutStringBuffer,
appendStringBuffers,
stringToStringBuffer,
nextChar,
currentChar,
prevChar,
atEnd,
stepOn,
offsetBytes,
byteDiff,
atLine,
lexemeToString,
lexemeToFastString,
decodePrevNChars,
parseUnsignedInteger,
) where
#include "HsVersions.h"
import GhcPrelude
import Encoding
import FastString
import FastFunctions
import PlainPanic
import Util
import Data.Maybe
import Control.Exception
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) )
import GHC.Exts
import Foreign
data StringBuffer
= StringBuffer {
buf :: {-# UNPACK #-} !(ForeignPtr Word8),
len :: {-# UNPACK #-} !Int,
cur :: {-# UNPACK #-} !Int
}
instance Show StringBuffer where
showsPrec _ s = showString "<stringbuffer("
. shows (len s) . showString "," . shows (cur s)
. showString ")>"
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
h <- openBinaryFile fname ReadMode
size_i <- hFileSize h
offset_i <- skipBOM h size_i 0
let size = fromIntegral $ size_i - offset_i
buf <- mallocForeignPtrArray (size+3)
withForeignPtr buf $ \ptr -> do
r <- if size == 0 then return 0 else hGetBuf h ptr size
hClose h
if (r /= size)
then ioError (userError "short read of file")
else newUTF8StringBuffer buf ptr size
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock handle wanted
= do size_i <- hFileSize handle
offset_i <- hTell handle >>= skipBOM handle size_i
let size = min wanted (fromIntegral $ size_i-offset_i)
buf <- mallocForeignPtrArray (size+3)
withForeignPtr buf $ \ptr ->
do r <- if size == 0 then return 0 else hGetBuf handle ptr size
if r /= size
then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
else newUTF8StringBuffer buf ptr size
hPutStringBuffer :: Handle -> StringBuffer -> IO ()
hPutStringBuffer hdl (StringBuffer buf len cur)
= do withForeignPtr (plusForeignPtr buf cur) $ \ptr ->
hPutBuf hdl ptr len
skipBOM :: Handle -> Integer -> Integer -> IO Integer
skipBOM h size offset =
if size > 0 && offset == 0
then do
ASSERTM( hGetEncoding h >>= return . isNothing )
bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do
c <- hLookAhead h
if c == '\xfeff'
then hGetChar h >> hTell h
else return offset
else return offset
where
safeEncoding = mkUTF8 IgnoreCodingFailure
newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer buf ptr size = do
pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
return $ StringBuffer buf size 0
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers sb1 sb2
= do newBuf <- mallocForeignPtrArray (size+3)
withForeignPtr newBuf $ \ptr ->
withForeignPtr (buf sb1) $ \sb1Ptr ->
withForeignPtr (buf sb2) $ \sb2Ptr ->
do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
pokeArray (ptr `advancePtr` size) [0,0,0]
return (StringBuffer newBuf size 0)
where sb1_len = calcLen sb1
sb2_len = calcLen sb2
calcLen sb = len sb - cur sb
size = sb1_len + sb2_len
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer str =
unsafePerformIO $ do
let size = utf8EncodedLength str
buf <- mallocForeignPtrArray (size+3)
withForeignPtr buf $ \ptr -> do
utf8EncodeString ptr str
pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
return (StringBuffer buf size 0)
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar (StringBuffer buf len (I# cur#)) =
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
case utf8DecodeChar# (a# `plusAddr#` cur#) of
(# c#, nBytes# #) ->
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')
currentChar :: StringBuffer -> Char
currentChar = fst . nextChar
prevChar :: StringBuffer -> Char -> Char
prevChar (StringBuffer _ _ 0) deflt = deflt
prevChar (StringBuffer buf _ cur) _ =
inlinePerformIO $ do
withForeignPtr buf $ \p -> do
p' <- utf8PrevChar (p `plusPtr` cur)
return (fst (utf8DecodeChar p'))
stepOn :: StringBuffer -> StringBuffer
stepOn s = snd (nextChar s)
offsetBytes :: Int
-> StringBuffer
-> StringBuffer
offsetBytes i s = s { cur = cur s + i }
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff s1 s2 = cur s2 - cur s1
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer _ l c) = l == c
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine line sb@(StringBuffer buf len _) =
inlinePerformIO $
withForeignPtr buf $ \p -> do
p' <- skipToLine line len p
if p' == nullPtr
then return Nothing
else
let
delta = p' `minusPtr` p
in return $ Just (sb { cur = delta
, len = len - delta
})
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !line !len !op0 = go 1 op0
where
!opend = op0 `plusPtr` len
go !i_line !op
| op >= opend = pure nullPtr
| i_line == line = pure op
| otherwise = do
w <- peek op :: IO Word8
case w of
10 -> go (i_line + 1) (plusPtr op 1)
13 -> do
w' <- peek (plusPtr op 1) :: IO Word8
case w' of
10 -> go (i_line + 1) (plusPtr op 2)
_ -> go (i_line + 1) (plusPtr op 1)
_ -> go i_line (plusPtr op 1)
lexemeToString :: StringBuffer
-> Int
-> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
utf8DecodeStringLazy buf cur bytes
lexemeToFastString :: StringBuffer
-> Int
-> FastString
lexemeToFastString _ 0 = nilFS
lexemeToFastString (StringBuffer buf _ cur) len =
inlinePerformIO $
withForeignPtr buf $ \ptr ->
return $! mkFastStringBytes (ptr `plusPtr` cur) len
decodePrevNChars :: Int -> StringBuffer -> String
decodePrevNChars n (StringBuffer buf _ cur) =
inlinePerformIO $ withForeignPtr buf $ \p0 ->
go p0 n "" (p0 `plusPtr` (cur - 1))
where
go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go buf0 n acc p | n == 0 || buf0 >= p = return acc
go buf0 n acc p = do
p' <- utf8PrevChar p
let (c,_) = utf8DecodeChar p'
go buf0 (n - 1) (c:acc) p'
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
= inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
go i x | i == len = x
| otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
'_' -> go (i + 1) x
char -> go (i + 1) (x * radix + toInteger (char_to_int char))
in go 0 0