module Data.JsonStream.Parser (
Parser
, ParseOutput(..)
, runParser
, runParser'
, parseByteString
, parseLazyByteString
, value
, string
, bytestring
, safeString
, number
, integer
, real
, bool
, jNull
, (.:)
, (.:?)
, (.!=)
, (.!)
, objectWithKey
, objectItems
, objectValues
, arrayOf
, arrayWithIndexOf
, indexedArrayOf
, nullable
, defaultValue
, filterI
, takeI
, toList
) where
import Control.Applicative
import qualified Data.Aeson as AE
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import qualified Data.HashMap.Strict as HMap
import Data.Maybe (fromMaybe)
import Data.Scientific (Scientific, isInteger,
toBoundedInteger, toRealFloat)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Encoding (decodeUtf8')
import qualified Data.Vector as Vec
import Data.JsonStream.TokenParser
objectKeyStringLimit :: Int
objectKeyStringLimit = 65536
data ParseResult v = MoreData (Parser v, BS.ByteString -> TokenResult)
| Failed String
| Done (Maybe Element) TokenResult
| Yield v (ParseResult v)
instance Functor ParseResult where
fmap f (MoreData (np, ntok)) = MoreData (fmap f np, ntok)
fmap _ (Failed err) = Failed err
fmap _ (Done el tok) = Done el tok
fmap f (Yield v np) = Yield (f v) (fmap f np)
newtype Parser a = Parser {
callParse :: TokenResult -> ParseResult a
}
instance Functor Parser where
fmap f (Parser p) = Parser $ \d -> fmap f (p d)
instance Applicative Parser where
pure x = Parser $ \tok -> process (callParse ignoreVal tok)
where
process (Failed err) = Failed err
process (Done Nothing tok) = Yield x (Done Nothing tok)
process (Done (Just el) tok) = Done (Just el) tok
process (MoreData (np, ntok)) = MoreData (Parser (process . callParse np), ntok)
process _ = Failed "Internal error in pure, ignoreVal doesn't yield"
(<*>) m1 m2 = Parser $ \tok -> process ([], []) (callParse m1 tok) (callParse m2 tok)
where
process ([], _) (Done el ntok) _ = Done el ntok
process (lst1, lst2) (Yield v np1) p2 = process (v:lst1, lst2) np1 p2
process (lst1, lst2) p1 (Yield v np2) = process (lst1, v:lst2) p1 np2
process (lst1, lst2) (Done el ntok) (Done _ _) =
yieldResults [ mx my | mx <- lst1, my <- lst2 ] (Done el ntok)
process lsts (MoreData (np1, ntok1)) (MoreData (np2, _)) =
MoreData (Parser (\tok -> process lsts (callParse np1 tok) (callParse np2 tok)), ntok1)
process _ (Failed err) _ = Failed err
process _ _ (Failed err) = Failed err
process _ _ _ = Failed "Unexpected error in parallel processing <*>."
yieldResults values end = foldr Yield end values
instance Alternative Parser where
empty = ignoreVal
(<|>) m1 m2 = Parser $ \tok -> process (callParse m1 tok) (callParse m2 tok)
where
process (Yield v np1) p2 = Yield v (process np1 p2)
process p1 (Yield v np2) = Yield v (process p1 np2)
process (Done el ntok) (Done _ _) = Done el ntok
process (MoreData (np1, ntok)) (MoreData (np2, _)) =
MoreData (Parser $ \tok -> process (callParse np1 tok) (callParse np2 tok), ntok)
process (Failed err) _ = Failed err
process _ (Failed err) = Failed err
process _ _ = error "Unexpected error in parallel processing <|>"
array' :: (Int -> Parser a) -> Parser a
array' valparse = Parser $ \tp ->
case tp of
(PartialResult ArrayBegin ntp _) -> arrcontent 0 (callParse (valparse 0) ntp)
(PartialResult el ntp _)
| el == ArrayEnd || el == ObjectEnd -> Done (Just el) ntp
| otherwise -> callParse ignoreVal tp
(TokMoreData ntok _) -> MoreData (array' valparse, ntok)
(TokFailed _) -> Failed "Array - token failed"
where
arrcontent i (Done Nothing ntp) = arrcontent (i+1) (callParse (valparse (i + 1)) ntp)
arrcontent i (MoreData (Parser np, ntp)) = MoreData (Parser (arrcontent i . np), ntp)
arrcontent i (Yield v np) = Yield v (arrcontent i np)
arrcontent _ (Failed err) = Failed err
arrcontent _ (Done (Just ArrayEnd) ntp) = Done Nothing ntp
arrcontent _ (Done (Just el) _) = Failed ("Array - UnexpectedEnd: " ++ show el)
arrayOf :: Parser a -> Parser a
arrayOf valparse = array' (const valparse)
arrayWithIndexOf :: Int -> Parser a -> Parser a
arrayWithIndexOf idx valparse = array' itemFn
where
itemFn aidx
| aidx == idx = valparse
| otherwise = ignoreVal
indexedArrayOf :: Parser a -> Parser (Int, a)
indexedArrayOf valparse = array' (\(!key) -> (key,) <$> valparse)
object' :: Bool -> (T.Text -> Parser a) -> Parser a
object' once valparse = Parser $ moreData object''
where
object'' tok el ntok =
case el of
ObjectBegin -> objcontent False (moreData keyValue ntok)
ArrayEnd -> Done (Just el) ntok
ObjectEnd -> Done (Just el) ntok
_ -> callParse ignoreVal tok
objcontent yielded (Done Nothing ntp)
| once && yielded = callParse (ignoreVal' 1) ntp
| otherwise = objcontent yielded (moreData keyValue ntp)
objcontent yielded (MoreData (Parser np, ntok)) = MoreData (Parser (objcontent yielded. np), ntok)
objcontent _ (Yield v np) = Yield v (objcontent True np)
objcontent _ (Failed err) = Failed err
objcontent _ (Done (Just ObjectEnd) ntp) = Done Nothing ntp
objcontent _ (Done (Just el) _) = Failed ("Object - UnexpectedEnd: " ++ show el)
keyValue _ el ntok =
case el of
JValue (AE.String key) -> callParse (valparse key) ntok
StringBegin str -> moreData (getLongKey [str] (BS.length str)) ntok
_| el == ArrayEnd || el == ObjectEnd -> Done (Just el) ntok
| otherwise -> Failed ("Object - unexpected token: " ++ show el)
getLongKey acc len _ el ntok =
case el of
StringEnd
| Right key <- decodeUtf8' (BL.fromChunks $ reverse acc) ->
callParse (valparse $ T.concat $ TL.toChunks key) ntok
| otherwise -> Failed "Error decoding UTF8"
StringContent str
| len > objectKeyStringLimit -> callParse (ignoreStrRestThen ignoreVal) ntok
| otherwise -> moreData (getLongKey (str:acc) (len + BS.length str)) ntok
_ -> Failed "Object longstr - unexpected token."
moreData :: (TokenResult -> Element -> TokenResult -> ParseResult v) -> TokenResult -> ParseResult v
moreData parser tok =
case tok of
PartialResult el ntok _ -> parser tok el ntok
TokMoreData ntok _ -> MoreData (Parser (moreData parser), ntok)
TokFailed _ -> Failed "Object longstr - unexpected token."
objectItems :: Parser a -> Parser (T.Text, a)
objectItems valparse = object' False $ \(!key) -> (key,) <$> valparse
objectValues :: Parser a -> Parser a
objectValues valparse = object' False (const valparse)
objectWithKey :: T.Text -> Parser a -> Parser a
objectWithKey name valparse = object' True itemFn
where
itemFn key
| key == name = valparse
| otherwise = ignoreVal
aeValue :: Parser AE.Value
aeValue = Parser $ moreData value'
where
value' tok el ntok =
case el of
JValue val -> Yield val (Done Nothing ntok)
StringBegin _ -> callParse (AE.String <$> longString Nothing) tok
ArrayBegin -> AE.Array . Vec.fromList <$> callParse (toList (arrayOf aeValue)) tok
ObjectBegin -> AE.Object . HMap.fromList <$> callParse (toList (objectItems aeValue)) tok
ArrayEnd -> Done (Just el) ntok
ObjectEnd -> Done (Just el) ntok
_ -> Failed ("aeValue - unexpected token: " ++ show el)
jvalue :: (AE.Value -> Maybe a) -> Parser a
jvalue convert = Parser (moreData value')
where
value' tok el ntok =
case el of
JValue val
| Just convValue <- convert val -> Yield convValue (Done Nothing ntok)
| otherwise -> Done Nothing ntok
ArrayEnd -> Done (Just el) ntok
ObjectEnd -> Done (Just el) ntok
_ -> callParse ignoreVal tok
longString :: Maybe Int -> Parser T.Text
longString mbounds = Parser $ moreData (handle [] 0)
where
handle acc len tok el ntok =
case el of
JValue (AE.String str) -> Yield str (Done Nothing ntok)
StringBegin str -> moreData (handle [str] (BS.length str)) ntok
StringContent str
| (Just bounds) <- mbounds, len > bounds
-> callParse (ignoreVal' 1) ntok
| otherwise -> moreData (handle (str:acc) (len + BS.length str)) ntok
StringEnd
| Right val <- decodeUtf8' (BL.fromChunks $ reverse acc)
-> Yield (T.concat $ TL.toChunks val) (Done Nothing ntok)
| otherwise -> Failed "Error decoding UTF8"
_ -> callParse ignoreVal tok
bytestring :: Parser BL.ByteString
bytestring = Parser $ moreData (handle [])
where
handle acc tok el ntok =
case el of
JValue (AE.String str) -> Yield (BL.fromChunks [encodeUtf8 str]) (Done Nothing ntok)
StringBegin str -> moreData (handle [str]) ntok
StringContent str -> moreData (handle (str:acc)) ntok
StringEnd -> Yield (BL.fromChunks $ reverse acc) (Done Nothing ntok)
_ -> callParse ignoreVal tok
string :: Parser T.Text
string = longString Nothing
safeString :: Int -> Parser T.Text
safeString limit = longString (Just limit)
number :: Parser Scientific
number = jvalue cvt
where
cvt (AE.Number num) = Just num
cvt _ = Nothing
integer :: (Integral i, Bounded i) => Parser i
integer = jvalue cvt
where
cvt (AE.Number num)
| isInteger num = toBoundedInteger num
cvt _ = Nothing
real :: RealFloat a => Parser a
real = jvalue cvt
where
cvt (AE.Number num) = Just $ toRealFloat num
cvt _ = Nothing
bool :: Parser Bool
bool = jvalue cvt
where
cvt (AE.Bool b) = Just b
cvt _ = Nothing
jNull :: Parser ()
jNull = jvalue cvt
where
cvt (AE.Null) = Just ()
cvt _ = Nothing
nullable :: Parser a -> Parser (Maybe a)
nullable valparse = Parser (moreData value')
where
value' _ (JValue AE.Null) ntok = Yield Nothing (Done Nothing ntok)
value' tok _ _ = callParse (Just <$> valparse) tok
value :: AE.FromJSON a => Parser a
value = Parser $ \ntok -> loop (callParse aeValue ntok)
where
loop (Done el ntp) = Done el ntp
loop (Failed err) = Failed err
loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok)
loop (Yield v np) =
case AE.fromJSON v of
AE.Error _ -> loop np
AE.Success res -> Yield res (loop np)
takeI :: Int -> Parser a -> Parser a
takeI num valparse = Parser $ \tok -> loop num (callParse valparse tok)
where
loop _ (Done el ntp) = Done el ntp
loop _ (Failed err) = Failed err
loop n (MoreData (Parser np, ntok)) = MoreData (Parser (loop n . np), ntok)
loop 0 (Yield _ np) = loop 0 np
loop n (Yield v np) = Yield v (loop (n1) np)
ignoreStrRestThen :: Parser a -> Parser a
ignoreStrRestThen next = Parser $ moreData handle
where
handle _ el ntok =
case el of
StringContent _ -> moreData handle ntok
StringEnd -> callParse next ntok
_ -> Failed "Unexpected result in ignoreStrRestPlusOne"
ignoreVal :: Parser a
ignoreVal = ignoreVal' 0
ignoreVal' :: Int -> Parser a
ignoreVal' stval = Parser $ moreData (handleTok stval)
where
handleTok :: Int -> TokenResult -> Element -> TokenResult -> ParseResult a
handleTok 0 _ (JValue _) ntok = Done Nothing ntok
handleTok 0 _ elm ntok
| elm == ArrayEnd || elm == ObjectEnd = Done (Just elm) ntok
handleTok 1 _ elm ntok
| elm == ArrayEnd || elm == ObjectEnd || elm == StringEnd = Done Nothing ntok
handleTok level _ el ntok =
case el of
JValue _ -> moreData (handleTok level) ntok
StringBegin _ -> moreData (handleTok (level + 1)) ntok
StringEnd -> moreData (handleTok (level 1)) ntok
StringContent _ -> moreData (handleTok level) ntok
_| el == ArrayBegin || el == ObjectBegin -> moreData (handleTok (level + 1)) ntok
| el == ArrayEnd || el == ObjectEnd -> moreData (handleTok (level 1)) ntok
| otherwise -> Failed "UnexpectedEnd "
toList :: Parser a -> Parser [a]
toList f = Parser $ \ntok -> loop [] (callParse f ntok)
where
loop acc (Done el ntp) = Yield (reverse acc) (Done el ntp)
loop acc (MoreData (Parser np, ntok)) = MoreData (Parser (loop acc . np), ntok)
loop acc (Yield v np) = loop (v:acc) np
loop _ (Failed err) = Failed err
filterI :: (a -> Bool) -> Parser a -> Parser a
filterI cond valparse = Parser $ \ntok -> loop (callParse valparse ntok)
where
loop (Done el ntp) = Done el ntp
loop (Failed err) = Failed err
loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok)
loop (Yield v np)
| cond v = Yield v (loop np)
| otherwise = loop np
defaultValue :: a -> Parser a -> Parser a
defaultValue defvalue valparse = Parser $ \ntok -> loop False (callParse valparse ntok)
where
loop False (Done Nothing ntp) = Yield defvalue (Done Nothing ntp)
loop _ (Done el ntp) = Done el ntp
loop _ (Failed err) = Failed err
loop found (MoreData (Parser np, ntok)) = MoreData (Parser (loop found . np), ntok)
loop _ (Yield v np) = Yield v (loop True np)
(.:) :: T.Text -> Parser a -> Parser a
(.:) = objectWithKey
infixr 7 .:
(.:?) :: T.Text -> Parser a -> Parser (Maybe a)
key .:? val = defaultValue Nothing (key .: nullable val)
infixr 7 .:?
(.!=) :: Parser (Maybe a) -> a -> Parser a
nullval .!= defval = fromMaybe defval <$> nullval
infixl 6 .!=
(.!) :: Int -> Parser a -> Parser a
(.!) = arrayWithIndexOf
infixr 7 .!
data ParseOutput a = ParseYield a (ParseOutput a)
| ParseNeedData (BS.ByteString -> ParseOutput a)
| ParseFailed String
| ParseDone BS.ByteString
runParser' :: Parser a -> BS.ByteString -> ParseOutput a
runParser' parser startdata = parse $ callParse parser (tokenParser startdata)
where
parse (MoreData (np, ntok)) = ParseNeedData (parse . callParse np .ntok)
parse (Failed err) = ParseFailed err
parse (Yield v np) = ParseYield v (parse np)
parse (Done Nothing (PartialResult _ _ rest)) = ParseDone rest
parse (Done Nothing (TokFailed rest)) = ParseDone rest
parse (Done Nothing (TokMoreData _ rest)) = ParseDone rest
parse (Done (Just el) _) = ParseFailed $ "UnexpectedEnd item: " ++ show el
runParser :: Parser a -> ParseOutput a
runParser parser = runParser' parser BS.empty
parseByteString :: Parser a -> BS.ByteString -> [a]
parseByteString parser startdata = loop (runParser' parser startdata)
where
loop (ParseNeedData _) = error "Not enough data."
loop (ParseDone _) = []
loop (ParseFailed err) = error err
loop (ParseYield v np) = v : loop np
parseLazyByteString :: Parser a -> BL.ByteString -> [a]
parseLazyByteString parser input = loop chunks (runParser parser)
where
chunks = BL.toChunks input
loop [] (ParseNeedData _) = error "Not enough data."
loop (dta:rest) (ParseNeedData np) = loop rest (np dta)
loop _ (ParseDone _) = []
loop _ (ParseFailed err) = error err
loop rest (ParseYield v np) = v : loop rest np