{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE MultiWayIf        #-}
{-# LANGUAGE OverloadedStrings #-}

module Data.JsonStream.TokenParser (
    Element(..)
  , TokenResult(..)
  , tokenParser
) where

import           Control.Applicative
import           Control.Monad         (replicateM, when, (>=>))
import qualified Data.Aeson            as AE
import qualified Data.ByteString       as BSW
import qualified Data.ByteString.Char8 as BS
import           Data.Char             (isDigit, isDigit, isLower, isSpace)
import           Data.Scientific       (scientific)
import qualified Data.Text             as T
import           Data.Text.Encoding    (decodeUtf8', encodeUtf8)

data Element = ArrayBegin | ArrayEnd | ObjectBegin | ObjectEnd
               | StringBegin BS.ByteString | StringContent BS.ByteString | StringEnd
               | JValue AE.Value
               deriving (Show, Eq)

-- Internal Interface for parsing monad
data TokenResult' a =  TokMoreData' (BS.ByteString -> TokenParser a) BS.ByteString
                 | PartialResult' Element (TokenParser a) BS.ByteString
                 -- ^ found element, continuation, actual parsing view - so that we can report the unparsed
                 -- data when the parsing finishes.
                 | TokFailed' BS.ByteString
                 | Intermediate' a


-- | Public interface for parsing JSON tokens.
data TokenResult =  TokMoreData (BS.ByteString -> TokenResult) BS.ByteString
                  | PartialResult Element (TokenResult) BS.ByteString
                  -- ^ found element, continuation, actual parsing view - so that we can report the unparsed
                  -- data when the parsing finishes.
                  | TokFailed BS.ByteString

-- For debugging purposes
instance Show TokenResult where
  show (TokMoreData _ ctx) = "(TokMoreData' + " ++ show ctx ++ ")"
  show (TokFailed _) = "TokFailed'"
  show (PartialResult el _ rest) = "(PartialResult' " ++ show el ++ " " ++ show rest ++ ")"

data State = State {
    stData    :: BS.ByteString
  , stContext :: BS.ByteString
}

newtype TokenParser a = TokenParser {
    runTokParser :: State -> (TokenResult' a, State)
}

instance Monad TokenParser where
  return x = TokenParser $ \s -> (Intermediate' x, s)
  {-# INLINE return #-}
  m >>= mpost = TokenParser $ \s ->
                let (res, newstate) = runTokParser m s
                in case res of
                    TokMoreData' cont context -> (TokMoreData' (cont >=> mpost) context, newstate)
                    PartialResult' el tokp context -> (PartialResult' el (tokp >>= mpost) context, newstate)
                    TokFailed' context -> (TokFailed' context, newstate)
                    Intermediate' result -> runTokParser (mpost result) newstate
  {-# INLINE (>>=) #-}

instance Functor TokenResult' where
  fmap f (TokMoreData' newp ctx) = TokMoreData' (fmap f . newp) ctx
  fmap f (PartialResult' el tok ctx) = PartialResult' el (fmap f tok) ctx
  fmap _ (TokFailed' ctx) = TokFailed' ctx
  fmap f (Intermediate' a) = Intermediate' (f a)

instance Applicative TokenParser where
  pure = return
  f <*> param = do
    mf <- f
    mparam <- param
    return (mf mparam)

instance Functor TokenParser where
  fmap f tokp = TokenParser $ \s ->
              let (res, newstate) = runTokParser tokp s
              in (fmap f res, newstate)

failTok :: TokenParser a
failTok = TokenParser $ \s -> (TokFailed' (stContext s), s)

{-# INLINE isBreakChar #-}
isBreakChar :: Char -> Bool
isBreakChar c = isSpace c || (c == '{') || (c == '[') || (c == '}') || (c == ']') || (c == ',')

{-# INLINE peekChar #-}
peekChar :: TokenParser Char
peekChar = TokenParser handle
  where
    -- handle :: State -> (TokenResult' a, State)
    handle st@(State dta context)
      | BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append context newdta)))
                                     context
                        , st)
      | otherwise   = (Intermediate' (BS.head dta), st)

{-# INLINE pickChar #-}
pickChar :: TokenParser Char
pickChar = TokenParser handle
  where
    handle st@(State dta context)
      | BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append context newdta)))
                                     context
                        , st)
      | otherwise   = (Intermediate' (BS.head dta), State (BS.tail dta) context)

{-# INLINE yield #-}
yield :: Element -> TokenParser ()
yield el = TokenParser $ \state@(State dta ctx) -> (PartialResult' el (contparse dta) ctx, state)
  where
    -- Use data as new context
    contparse dta = TokenParser $ const (Intermediate' (), State dta dta )

-- | Return SOME input satisfying predicate or none, if the next element does not satisfy
-- Return tuple (str satisfying predicate, true_if_next_char_does_not_satisfy)
{-# INLINE getWhile' #-}
getWhile' :: (Char -> Bool) -> TokenParser (BS.ByteString, Bool)
getWhile' predicate = do
  char <- peekChar
  if predicate char then getBuf
                    else return ("", True)
  where
    getBuf = TokenParser $ \(State dta ctx) ->
        let (st,rest) = BS.span predicate dta
        in (Intermediate' (st, not (BS.null rest)), State rest ctx)

-- | Read ALL input satisfying predicate
{-# INLINE getWhile #-}
getWhile :: (Char -> Bool) -> TokenParser BS.ByteString
getWhile predicate = do
  (dta, complete) <- getWhile' predicate
  if complete
    then return dta
    else loop [dta]
  where
    loop acc = do
      (dta, complete) <- getWhile' predicate
      if complete
        then return $! BS.concat $ reverse (dta:acc)
        else loop (dta:acc)

-- | Parse unquoted identifier - true/false/null
parseIdent :: TokenParser ()
parseIdent = do
    ident <- getWhile isLower
    nextchar <- peekChar
    if | isBreakChar nextchar -> toTemp ident -- We found a barrier -> parse
       | otherwise -> failTok
  where
    toTemp "true" = yield $ JValue $ AE.Bool True
    toTemp "false" = yield $ JValue $ AE.Bool False
    toTemp "null" = yield $ JValue AE.Null
    toTemp _ = failTok

parseUnicode :: TokenParser Char
parseUnicode = do
    lst <- replicateM 4 pickChar
    return $! toEnum $ foldl1 (\a b -> 16 * a + b) $ map hexCharToInt lst
  where
    hexCharToInt :: Char -> Int
    hexCharToInt c
      | c >= 'A' && c <= 'F' = 10 + (fromEnum c - fromEnum 'A')
      | c >= 'a' && c <= 'f' = 10 + (fromEnum c - fromEnum 'a')
      | isDigit c = fromEnum c - fromEnum '0'
      | otherwise = error "Incorrect hex input, internal error."

-- | Parse string, when finished check if we are object in dict (followed by :) or just a string
parseString :: TokenParser ()
parseString = do
    -- leading '"' removed upstream
    (firstpart, _) <- getWhile' (\c -> c /= '"' && c /= '\\' )
    chr <- peekChar
    if chr == '"'
      then pickChar >> handleDecode firstpart
      else do
        yield $ StringBegin firstpart
        handleString
  where
    handleDecode str = case decodeUtf8' str of
          Left _ -> failTok
          Right val -> yield $ JValue $ AE.String val
    handleString = do
      chr <- peekChar
      case chr of
        '"' -> do
            _ <- pickChar
            yield StringEnd
        '\\' -> do
            _ <- pickChar
            specchr <- pickChar
            nchr <- parseSpecChar specchr
            yield $ StringContent $ encodeUtf8 (T.singleton nchr)
            handleString
        _ -> do
          (dstr, _) <- getWhile' (\c -> c /= '"' && c /= '\\' )
          yield $ StringContent dstr
          handleString

    parseSpecChar '"' = return '"'
    parseSpecChar '\\' = return '\\'
    parseSpecChar '/' = return '/'
    parseSpecChar 'b' = return '\b'
    parseSpecChar 'f' = return '\f'
    parseSpecChar 'n' = return '\n'
    parseSpecChar 'r' = return '\r'
    parseSpecChar 't' = return '\t'
    parseSpecChar 'u' = parseUnicode
    parseSpecChar c = return c

parseNumber :: TokenParser ()
parseNumber = do
    tnumber <- getWhile (\c -> isDigit c || c == '.' || c == '+' || c == '-' || c == 'e' || c == 'E')
    let
      (csign, r1) = parseSign tnumber :: (Int, BS.ByteString)
      ((num, numdigits), r2) = parseDecimal r1 :: ((Integer, Int), BS.ByteString)
      ((frac, frdigits), r3) = parseFract r2 :: ((Int, Int), BS.ByteString)
      (texp, rest) = parseE r3

    when (numdigits == 0 || not (BS.null rest)) failTok

    let dpart = fromIntegral csign * (num * (10 ^ frdigits) + fromIntegral frac) :: Integer
        e = texp - frdigits
    yield $ JValue $ AE.Number $ scientific dpart e
  where
    parseFract txt
      | BS.null txt = ((0, 0), txt)
      | BS.head txt == '.' = parseDecimal (BS.tail txt)
      | otherwise = ((0,0), txt)

    parseE txt
      | BS.null txt = (0, txt)
      | firstc == 'e' || firstc == 'E' =
              let (sign, rest) = parseSign (BS.tail txt)
                  ((dnum, _), trest) = parseDecimal rest :: ((Int, Int), BS.ByteString)
              in (dnum * sign, trest)
      | otherwise = (0, txt)
      where
        firstc = BS.head txt

    parseSign txt
      | BS.null txt = (1, txt)
      | BS.head txt == '+' = (1, BS.tail txt)
      | BS.head txt == '-' = (-1, BS.tail txt)
      | otherwise = (1, txt)

    parseDecimal txt
      | BS.null txt = ((0, 0), txt)
      | otherwise = parseNum txt (0,0)

    -- parseNum :: BS.ByteString -> (Integer, Int) -> ((Integer, Int), BS.ByteString)
    parseNum txt (!start, !digits)
      | BS.null txt = ((start, digits), txt)
      | dchr >= 48 && dchr <= 57 = parseNum (BS.tail txt) (start * 10 + fromIntegral (dchr - 48), digits + 1)
      | otherwise = ((start, digits), txt)
      where
        dchr = BSW.head txt

{-# INLINE peekCharInMain #-}
-- Specialized version of peek char for main function so that we get faster performance
peekCharInMain :: TokenParser Char
peekCharInMain = TokenParser handle
  where
    handle st@(State dta ctx)
      | BS.null dta = (TokMoreData' (\newdta -> TokenParser $ \_ -> handle (State newdta (BS.append ctx newdta)))
                                     ctx
                        , st)
      | chr == '[' = (PartialResult' ArrayBegin contparse ctx, st)
      | chr == ']' = (PartialResult' ArrayEnd contparse ctx, st)
      | chr == '{' = (PartialResult' ObjectBegin contparse ctx, st)
      | chr == '}' = (PartialResult' ObjectEnd contparse ctx, st)
      | isBlankChar chr = handle (State (BS.dropWhile isBlankChar dta) ctx)
      | chr == '"' = runTokParser (parseString >> peekCharInMain) (State rest ctx)
      | otherwise   = (Intermediate' (BS.head dta), st)
      where
        chr = BS.head dta
        rest = BS.tail dta
        -- Use data as new context
        contparse = TokenParser $ const $ handle (State rest rest)
        isBlankChar c = c == ',' || c == ':' || isSpace c

{-# INLINE mainParser #-}
mainParser :: TokenParser ()
mainParser = do
  chr <- peekCharInMain
  case chr of
    't' -> parseIdent
    'f' -> parseIdent
    'n' -> parseIdent
    '-' -> parseNumber
    _| isDigit chr -> parseNumber
     | otherwise -> failTok

-- | Incremental lexer
tokenParser :: BS.ByteString -> TokenResult
tokenParser dta = handle $ runTokParser mainParser (State dta dta)
  where
    handle (TokMoreData' ntp ctx, st) = TokMoreData (\ndta -> handle $ runTokParser (ntp ndta) st) ctx
    handle (PartialResult' el ntp ctx, st) = PartialResult el (handle $ runTokParser ntp st) ctx
    handle (TokFailed' ctx, _) = TokFailed ctx
    handle (Intermediate' _, st) = handle $ runTokParser mainParser st