{-# OPTIONS_GHC -funbox-strict-fields #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module InternalParserJSON( JsonToken(..) ,Separator(..) ,ConType(..) ,parseTokenPrimValue ,DelimSort(..) ) where import Data.ByteString.Builder (Builder, byteString, toLazyByteString, charUtf8, word8) import Control.Applicative (empty,(<|>)) import Data.Attoparsec.ByteString.Char8 (char,Parser,string,peekChar',double) import Data.Bits ((.|.), shiftL) import Data.ByteString (ByteString) import Data.Char (chr, isDigit) import Data.Text (Text) import Data.Text.Encoding (decodeUtf8') import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.Zepto as Z import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Unsafe as B -- these imports are only needed pre GHC 7.10 / pre #if !MIN_VERSION_base(4,8,0) import Data.Monoid import Control.Applicative #endif ---- import TokenJSON #define BACKSLASH 92 #define CLOSE_CURLY 125 #define CLOSE_SQUARE 93 #define COMMA 44 #define DOUBLE_QUOTE 34 #define OPEN_CURLY 123 #define OPEN_SQUARE 91 #define C_0 48 #define C_9 57 #define C_A 65 #define C_F 70 #define C_a 97 #define C_f 102 #define C_n 110 #define C_t 116 -- | 'parseTokenValue' will parse any "token" value, namely true,false,String,Number, or Null parseTokenPrimValue :: Parser JsonToken parseTokenPrimValue = do nextChar <- peekChar' case nextChar of '[' -> ( string "[]" >> return EmptyArray ) <|> (do _<- char '[' ; return $! OpenDelim Array) ']' -> do _<- char ']'; return $! CloseDelim Array '{' -> (do _<- string "{}" ; return EmptyObject) <|> (do _ <-char '{' ; return $! OpenDelim Object) '}' -> do _<- char '}' ; return $! CloseDelim Object ':' -> do _<- char ':' ; return $! TokenSeparator Colon ',' -> do _ <- char ',' ; return $! TokenSeparator Comma '"' -> fmap (TokenText $!) jstring -- Do we really need $! here? 't' -> do _ <- string "true" ; return $! TokenBool True 'f' -> do _ <- string "false" ; return $! TokenBool False 'n' -> do _<- string "null" ; return TokenNull c | c == '+' || c == '-' || isDigit c -> fmap TokenNumber double --- REPLACE DOUBLE WITH SCIENTIFIC Parse (and eager decimal for 0.0 corner case) _ -> empty -- | Parse a top-level JSON value. This must be either an object or -- an array, per RFC 4627. -- -- The conversion of a parsed value to a Haskell value is deferred -- until the Haskell value is needed. This may improve performance if -- only a subset of the results of conversions are needed, but at a -- cost in thunk allocation. -- | Parse a quoted JSON string. jstring :: Parser Text jstring = A.word8 DOUBLE_QUOTE *> jstring_ -- | Parse a string without a leading quote. jstring_ :: Parser Text jstring_ = {-# SCC "jstring_" #-} do s <- A.scan False $ \s c -> if s then Just False else if c == DOUBLE_QUOTE then Nothing else Just (c == BACKSLASH) _ <- A.word8 DOUBLE_QUOTE s1 <- if BACKSLASH `B.elem` s then case Z.parse unescape s of Right r -> return r Left err -> fail err else return s case decodeUtf8' s1 of Right r -> return r Left err -> fail $ show err {-# INLINE jstring_ #-} unescape :: Z.Parser ByteString unescape = toByteString <$> go mempty where go acc = do h <- Z.takeWhile (/=BACKSLASH) let rest = do start <- Z.take 2 let !slash = B.unsafeHead start !t = B.unsafeIndex start 1 escape = case B.findIndex (==t) "\"\\/ntbrfu" of Just i -> i _ -> 255 if slash /= BACKSLASH || escape == 255 then fail "invalid JSON escape sequence" else do let cont m = go (acc `mappend` byteString h `mappend` m) {-# INLINE cont #-} if t /= 117 -- 'u' then cont (word8 (B.unsafeIndex mapping escape)) else do a <- hexQuad if a < 0xd800 || a > 0xdfff then cont (charUtf8 (chr a)) else do b <- Z.string "\\u" *> hexQuad if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff then let !c = ((a - 0xd800) `shiftL` 10) + (b - 0xdc00) + 0x10000 in cont (charUtf8 (chr c)) else fail "invalid UTF-16 surrogates" done <- Z.atEnd if done then return (acc `mappend` byteString h) else rest mapping = "\"\\/\n\t\b\r\f" hexQuad :: Z.Parser Int hexQuad = do s <- Z.take 4 let hex n | w >= C_0 && w <= C_9 = w - C_0 | w >= C_a && w <= C_f = w - 87 | w >= C_A && w <= C_F = w - 55 | otherwise = 255 where w = fromIntegral $ B.unsafeIndex s n a = hex 0; b = hex 1; c = hex 2; d = hex 3 if (a .|. b .|. c .|. d) /= 255 then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12) else fail "invalid hex escape" toByteString :: Builder -> ByteString toByteString = L.toStrict . toLazyByteString {-# INLINE toByteString #-}