{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Data.JsonStream.CLexer (
tokenParser
, unescapeText
) where
#if !MIN_VERSION_bytestring(0,10,6)
import Control.Applicative ((<$>))
#endif
import Control.Monad (when)
import qualified Data.Aeson as AE
import qualified Data.ByteString as BSW
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Unsafe (unsafeUseAsCString)
import Data.Scientific (Scientific, scientific)
import Data.Text.Encoding (decodeUtf8')
import Data.Text.Internal.Unsafe (inlinePerformIO)
import Foreign
import Foreign.C.Types
import System.IO.Unsafe (unsafeDupablePerformIO)
import Data.JsonStream.CLexType
import Data.JsonStream.TokenParser (Element (..), TokenResult (..))
import Data.JsonStream.Unescape
numberDigitLimit :: Int
numberDigitLimit = 200000
newtype ResultPtr = ResultPtr { unresPtr :: ForeignPtr () }
data Header = Header {
hdrCurrentState :: !CInt
, hdrStateData :: !CInt
, hdrStateSata2 :: !CInt
, hdrPosition :: !CInt
, hdrLength :: !CInt
, hdrResultNum :: !CInt
, hdrResultLimit :: !CInt
} deriving (Show)
defHeader :: Header
defHeader = Header 0 0 0 0 0 0 0
instance Storable Header where
sizeOf _ = 7 * sizeOf (undefined :: CInt)
alignment _ = sizeOf (undefined :: CInt)
peek ptr = do
state <- peekByteOff ptr 0
sdata1 <- peekByteOff ptr (sizeOf state)
sdata2 <- peekByteOff ptr (2 * sizeOf state)
position <- peekByteOff ptr (3 * sizeOf state)
slength <- peekByteOff ptr (4 * sizeOf state)
sresultnum <- peekByteOff ptr (5 * sizeOf state)
sresultlimit <- peekByteOff ptr (6 * sizeOf state)
return $ Header state sdata1 sdata2 position slength sresultnum sresultlimit
poke ptr (Header {..}) = do
pokeByteOff ptr 0 hdrCurrentState
pokeByteOff ptr (1 * sizeOf hdrCurrentState) hdrStateData
pokeByteOff ptr (2 * sizeOf hdrCurrentState) hdrStateSata2
pokeByteOff ptr (3 * sizeOf hdrCurrentState) hdrPosition
pokeByteOff ptr (4 * sizeOf hdrCurrentState) hdrLength
pokeByteOff ptr (5 * sizeOf hdrCurrentState) hdrResultNum
pokeByteOff ptr (6 * sizeOf hdrCurrentState) hdrResultLimit
resultRecSize :: Int
resultRecSize = 4 * sizeOf (undefined :: CInt) + sizeOf (undefined :: CLong)
peekResultField :: Int -> Int -> ResultPtr -> Int
peekResultField n fieldno fptr = inlinePerformIO $
withForeignPtr (unresPtr fptr) $ \ptr ->
fromIntegral <$> (peekByteOff ptr (resultRecSize * n + fieldno * isize) :: IO CInt)
where
isize = sizeOf (undefined :: CInt)
peekResultAddData :: Int -> ResultPtr -> CLong
peekResultAddData n fptr = inlinePerformIO $
withForeignPtr (unresPtr fptr) $ \ptr ->
fromIntegral <$> (peekByteOff ptr (resultRecSize * n + 4 * isize) :: IO CLong)
where
isize = sizeOf (undefined :: CInt)
peekResultType :: Int -> ResultPtr -> LexResultType
peekResultType n fptr = inlinePerformIO $
withForeignPtr (unresPtr fptr) $ \ptr ->
LexResultType <$> peekByteOff ptr (resultRecSize * n)
foreign import ccall unsafe "lex_json" lexJson :: Ptr CChar -> Ptr Header -> Ptr () -> IO CInt
callLex :: BS.ByteString -> Header -> (CInt, Header, Int, ResultPtr)
callLex bs hdr = unsafeDupablePerformIO $
alloca $ \hdrptr -> do
poke hdrptr (hdr{hdrResultNum=0, hdrLength=fromIntegral $ BS.length bs})
bsptr <- unsafeUseAsCString bs return
resptr <- mallocForeignPtrBytes (fromIntegral (hdrResultLimit hdr) * resultRecSize)
res <- withForeignPtr resptr $ \resptr' ->
lexJson bsptr hdrptr resptr'
hdrres <- peek hdrptr
let !rescount = fromIntegral (hdrResultNum hdrres)
return (res, hdrres, rescount, ResultPtr resptr)
{-# INLINE substr #-}
substr :: Int -> Int -> BS.ByteString -> BS.ByteString
substr start len = BS.take len . BS.drop start
data TempData = TempData {
tmpBuffer :: BS.ByteString
, tmpHeader :: Header
, tmpError :: Bool
, tmpNumbers :: [BS.ByteString]
}
parseNumber :: BS.ByteString -> Maybe Scientific
parseNumber tnumber = do
let
(csign, r1) = parseSign tnumber :: (Int, BS.ByteString)
((num, numdigits), r2) = parseDecimal r1 :: ((Integer, Int), BS.ByteString)
((frac, frdigits), r3) = parseFract r2 :: ((Integer, Int), BS.ByteString)
(texp, rest) = parseE r3
when (numdigits == 0 || not (BS.null rest)) Nothing
let dpart = fromIntegral csign * (num * (10 ^ frdigits) + fromIntegral frac) :: Integer
e = texp - frdigits
return $ scientific dpart e
where
parseFract txt
| BS.null txt = ((0, 0), txt)
| BS.head txt == '.' = parseDecimal (BS.tail txt)
| otherwise = ((0,0), txt)
parseE txt
| BS.null txt = (0, txt)
| firstc == 'e' || firstc == 'E' =
let (sign, rest) = parseSign (BS.tail txt)
((dnum, _), trest) = parseDecimal rest :: ((Int, Int), BS.ByteString)
in (dnum * sign, trest)
| otherwise = (0, txt)
where
firstc = BS.head txt
parseSign txt
| BS.null txt = (1, txt)
| BS.head txt == '+' = (1, BS.tail txt)
| BS.head txt == '-' = (-1, BS.tail txt)
| otherwise = (1, txt)
parseDecimal txt
| BS.null txt = ((0, 0), txt)
| otherwise = parseNum txt (0,0)
parseNum txt (!start, !digits)
| BS.null txt = ((start, digits), txt)
| dchr >= 48 && dchr <= 57 = parseNum (BS.tail txt) (start * 10 + fromIntegral (dchr - 48), digits + 1)
| otherwise = ((start, digits), txt)
where
dchr = BSW.head txt
parseResults :: TempData -> (CInt, Header, Int, ResultPtr) -> TokenResult
parseResults (TempData {tmpNumbers=tmpNumbers, tmpBuffer=bs}) (err, hdr, rescount, resptr) = parse 0
where
newtemp = TempData bs hdr (err /= 0)
parse n
| n >= rescount = getNextResult (newtemp tmpNumbers)
| otherwise =
let resType = peekResultType n resptr
resStartPos = peekResultField n 1 resptr
resLength = peekResultField n 2 resptr
resAddData = peekResultAddData n resptr
next = parse (n + 1)
context = BS.drop (resStartPos + resLength) bs
textSection = substr resStartPos resLength bs
in case () of
_| resType == resNumberPartial ->
if | resAddData == 0 -> getNextResult (newtemp [textSection])
| sum (map BS.length tmpNumbers) > numberDigitLimit -> TokFailed
| otherwise -> getNextResult (newtemp (textSection:tmpNumbers))
| resType == resTrue -> PartialResult (JValue (AE.Bool True)) next
| resType == resFalse -> PartialResult (JValue (AE.Bool False)) next
| resType == resNull -> PartialResult (JValue AE.Null) next
| resType == resOpenBrace -> PartialResult ObjectBegin next
| resType == resOpenBracket -> PartialResult ArrayBegin next
| resType == resCloseBrace -> PartialResult (ObjectEnd context) next
| resType == resCloseBracket -> PartialResult (ArrayEnd context) next
| resType == resNumberSmall ->
if | resLength == 0 -> PartialResult (JInteger resAddData) next
| otherwise -> PartialResult
(JValue (AE.Number $ scientific (fromIntegral resAddData) ((-1) * resLength)))
next
| resType == resNumber ->
if | resAddData == 0 ->
case parseNumber textSection of
Just num -> PartialResult (JValue (AE.Number num)) next
Nothing -> TokFailed
| otherwise ->
case parseNumber (BS.concat $ reverse (textSection:tmpNumbers)) of
Just num -> PartialResult (JValue (AE.Number num)) next
Nothing -> TokFailed
| resType == resString ->
if | resAddData == -1 ->
case decodeUtf8' textSection of
Right ctext -> PartialResult (JValue (AE.String ctext)) next
Left _ -> TokFailed
| resAddData == 0 ->
case unescapeText textSection of
Right ctext -> PartialResult (JValue (AE.String ctext)) next
_ -> TokFailed
| otherwise -> PartialResult (StringContent textSection)
(PartialResult StringEnd next)
| resType == resStringPartial ->
PartialResult (StringContent textSection) next
| otherwise -> error "Unsupported"
estResultLimit :: BS.ByteString -> CInt
estResultLimit dta = fromIntegral $ 20 + BS.length dta `quot` 5
getNextResult :: TempData -> TokenResult
getNextResult tmp@(TempData {..})
| tmpError = TokFailed
| hdrPosition tmpHeader < hdrLength tmpHeader = parseResults tmp (callLex tmpBuffer tmpHeader)
| otherwise = TokMoreData newdata
where
newdata dta = parseResults newtmp (callLex dta newhdr{hdrResultLimit=estResultLimit dta})
where
newtmp = tmp{tmpBuffer=dta}
newhdr = tmpHeader{hdrPosition=0, hdrLength=fromIntegral $ BS.length dta}
tokenParser :: BS.ByteString -> TokenResult
tokenParser dta = getNextResult (TempData dta newhdr False [])
where
newhdr = defHeader{hdrLength=fromIntegral (BS.length dta), hdrResultLimit=estResultLimit dta}