{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
-- |
-- Conversions from Bencoded @ByteString@s to Haskell values.
--
module Data.Bencode.Decode
  (
    -- * Quick start
    -- $quick

    -- * Parser
    Parser
  , decode
  , decodeMaybe

    -- * String parsers
  , string
  , stringEq
  , text
  , textEq

    -- * Integer parsers
  , integer
  , int
  , intEq
  , int64
  , int32
  , int16
  , int8
  , word
  , word64
  , word32
  , word16
  , word8

    -- * List parsers
  , list
  , index
  , elem
  , list'
  , Elems

    -- * Dictionary parsers
  , dict
  , field
  , field'
  , dict'
  , Fields

    -- * Miscellaneous
  , value
  , fail
  , mapMaybe
  , mapOrFail

    -- * Recipes #recipes#
    -- $recipes
  ) where

import Prelude hiding (elem, fail)
import Control.Applicative
import Control.Monad hiding (fail)
import Control.Monad.ST
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader
import Control.Monad.Trans.State.Strict
import Data.Int
import Data.Word
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Foldable as F
import qualified Data.IntSet as IS
import qualified Data.Map as M
import qualified Data.Primitive.Array as A
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified GHC.Exts as X

import Data.Bencode.Type (Value(..))
import qualified Data.Bencode.Util as Util
import qualified Data.Bencode.AST as AST

newtype ParseResult a = ParseResult { unParseResult :: Either String a }
  deriving (Functor, Applicative, Monad)

failResult :: String -> ParseResult a
failResult = ParseResult . Left
{-# INLINE failResult #-}

instance Alternative ParseResult where
  empty = failResult "Alternative.empty"
  l <|> r = ParseResult $ unParseResult l <> unParseResult r
  -- Discards left error, not ideal

instance MonadPlus ParseResult
-- Does not satisfy MonadPlus laws because of the failure String
-- But required for Alternative (StateT _ ParseResult)

-- | A parser from a Bencode value to a Haskell value.
newtype Parser a = Parser { runParser_ :: ReaderT AST.Value ParseResult a }
  deriving (Functor, Applicative, Alternative, Monad)

runParser :: Parser a -> AST.Value -> ParseResult a
runParser = runReaderT . runParser_
{-# INLINE runParser #-}

liftP :: ParseResult a -> Parser a
liftP = Parser . lift
{-# INLINE liftP #-}

failParser :: String -> Parser a
failParser = liftP . failResult
{-# INLINE failParser #-}

-- | Decode a value from the given @ByteString@. If decoding fails, returns
-- @Left@ with a failure message. The message is a short human-readable error
-- description and should not be relied on programmatically.
decode :: Parser a -> B.ByteString -> Either String a
decode p s = AST.parseOnly s >>= unParseResult . runParser p

-- | Decode a value from the given @ByteString@. If decoding fails, returns
-- @Nothing@.
decodeMaybe :: Parser a -> B.ByteString -> Maybe a
decodeMaybe p = either (const Nothing) Just . decode p

errTypeMismatch :: String -> AST.Value -> ParseResult a
errTypeMismatch a b = failResult $ "TypeMismatch " ++ a ++ " " ++ b'
  where
    b' = case b of
      AST.String _  -> "String"
      AST.Integer _ -> "Integer"
      AST.List _    -> "List"
      AST.Dict _    -> "Dict"

-- Parsers below are all marked INLINE because they match on the AST
-- constructor and return Eithers. When inlined, GHC is able to optimize the
-- nested case matches using "case merging" and get rid of the intemeditate
-- Eithers using "case-of-case".

stringDirect :: Parser B.ByteString
stringDirect = Parser $ ReaderT $ \v -> case v of
  AST.String s -> pure s
  _            -> errTypeMismatch "String" v
{-# INLINE stringDirect #-}

integerDirect :: Parser B.ByteString
integerDirect = Parser $ ReaderT $ \v -> case v of
  AST.Integer s -> pure s
  _             -> errTypeMismatch "Integer" v
{-# INLINE integerDirect #-}

listDirect :: Parser (A.Array AST.Value)
listDirect = Parser $ ReaderT $ \v -> case v of
  AST.List a -> pure a
  _          -> errTypeMismatch "List" v
{-# INLINE listDirect #-}

dictDirect :: Parser (A.Array AST.KeyValue)
dictDirect = Parser $ ReaderT $ \v -> case v of
  AST.Dict a -> pure a
  _          -> errTypeMismatch "Dict" v
{-# INLINE dictDirect #-}

-- | Decode a Bencode string as a ByteString. Fails on a non-string.
string :: Parser B.ByteString
string = stringDirect
{-# INLINE string #-}

-- | Decode a Bencode integer as an Integer. Fails on a non-integer.
integer :: Parser Integer
integer = toI <$!> integerDirect
  where
    -- BC.readInteger will be making redundant digit checks since we already
    -- know it to be a valid integer.
    -- But it has an efficient divide-and-conquer algorithm compared to the
    -- simple but O(n^2) foldl' (\acc x -> acc * 10 + x) 0.
    -- We can reimplement the algorithm without the redundant checks if we
    -- really want.
    toI s = case BC.readInteger s of
      Nothing    -> error "Data.Bencode.Decode.integer: should not happen"
      Just (i,_) -> i
{-# INLINE integer #-}

-- | Decode a Bencode list with the given parser for elements. Fails on a
-- non-list or if any element in the list fails to parse.
list :: Parser a -> Parser (V.Vector a)
list p = listDirect >>= liftP . traverseAToV (runParser p)
{-# INLINE list #-}

traverseAToV :: (a -> ParseResult b) -> A.Array a -> ParseResult (V.Vector b)
traverseAToV f a = runST $ do
  let n = A.sizeofArray a
  v <- VM.new n
  let loop i | i == n = pure Nothing
      loop i = case f (A.indexArray a i) of
        ParseResult (Left e)  -> pure (Just e)
        ParseResult (Right x) -> VM.write v i x *> loop (i+1)
  res <- loop 0
  maybe (pure <$> V.unsafeFreeze v) (pure . failResult) res
{-# INLINABLE traverseAToV #-}

-- | Decode a Bencode dict with the given parser for values. Fails on a
-- non-dict or if any value in the dict fails to parse.
dict :: Parser a -> Parser (M.Map B.ByteString a)
dict p =
  dictDirect >>= liftP . fmap M.fromDistinctAscList . traverse f .  F.toList
  where
    f (AST.KeyValue k v) = (,) k <$> runParser p v
{-# INLINE dict #-}

-- | Succeeds only on a Bencode string that equals the given string.
stringEq :: B.ByteString -> Parser ()
stringEq s = stringDirect >>= \s' ->
  if s == s'
  then pure ()
  else failParser $ "StringNotEq " ++ show s ++ " " ++ show s'
{-# INLINE stringEq #-}

-- | Decode a bencode string as UTF-8 text. Fails on a non-string or if the
-- string is not valid UTF-8.
text :: Parser T.Text
text =
  stringDirect >>=
  either (const (failParser "UTF8DecodeFailure")) pure . T.decodeUtf8'
{-# INLINE text #-}

-- | Succeeds only on a Bencode string that equals the given text.
textEq :: T.Text -> Parser ()
textEq t = text >>= \t' ->
  if t == t'
  then pure ()
  else failParser $ "TextNotEq " ++ show t ++ " " ++ show t'
{-# INLINE textEq #-}

-- | Decode a Bencode integer as an @Int@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int@.
int :: Parser Int
int = integerDirect >>= maybe (failParser "IntOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-', s') -> Util.readKnownNaturalAsInt True s'
      _              -> Util.readKnownNaturalAsInt False s
{-# INLINE int #-}

-- | Succeeds only on a Bencode integer that equals the given value.
intEq :: Int -> Parser ()
intEq i = int >>= \i' ->
  if i == i'
  then pure ()
  else failParser $ "IntNotEq " ++ show i ++ " " ++ show i'
{-# INLINE intEq #-}

-- | Decode a Bencode integer as a @Word@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word@.
word :: Parser Word
word = integerDirect >>= maybe (failParser "WordOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-',_) -> Nothing
      _            -> Util.readKnownNaturalAsWord s
{-# INLINE word #-}

-- | Decode a @Value@. Always succeeds for valid Bencode.
value :: Parser Value
value = String  <$> string
    <|> Integer <$> integer
    <|> List    <$> list value
    <|> Dict    <$> dict value

-- | Always fails with the given message.
fail :: String -> Parser a
fail = failParser . ("Fail: " ++ )
{-# INLINE fail #-}

-- | Decode a value with the given parser for the given key. Fails on a
-- non-dict, if the key is absent, or if the value parser fails.
--
-- If keys should not be left over in the dict, use 'field'' and 'dict''
-- instead.
--
-- ==== __Examples__
-- @
-- data File = File { name :: Text, size :: Int }
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File \<$> D.field "name" D.'text'
--        \<*> D.field "size" D.'int'
-- @
field :: B.ByteString -> Parser a -> Parser a
field k p = do
  a <- dictDirect
  case binarySearch k a of
    (# _ |            #) -> failParser $ "KeyNotFound " ++ show k
    (#   | (# _, x #) #) -> liftP $ runParser p x
{-# INLINE field #-}

-- | Decode a value with the given parser for the given key. Convert to a
-- @Parser@ with 'dict''.
--
-- @since 0.1.1.0
field' :: B.ByteString -> Parser a -> Fields a
field' k p = Fields $ ReaderT $ \a -> case binarySearch k a of
  (# _ |             #) -> lift . failResult $ "KeyNotFound " ++ show k
  (#   | (# i#, v #) #) -> lift (runParser p v) <* modify' (IS.insert (X.I# i#))
{-# INLINE field' #-}

-- | Create a @Parser@ from a 'Fields'. Fails on a non-dict, if a key is
-- absent, or if any value fails to parse. Also fails if there are leftover
-- unparsed keys in the dict.
--
-- If leftover keys should be ignored, use 'field' instead.
--
-- ==== __Examples__
-- @
-- data File = File { name :: Text, size :: Int }
--
-- fileParser :: D.'Parser' File
-- fileParser = D.dict' $
--   File \<$> D.'field'' "name" D.'text'
--        \<*> D.'field'' "size" D.'int'
-- @
--
-- @since 0.1.1.0
dict' :: Fields a -> Parser a
dict' fs = do
  a <- dictDirect
  liftP $ do
    (v, is) <- runStateT (runReaderT (runFields fs) a) IS.empty
    if IS.size is == A.sizeofArray a
    then pure v
    else let i = head $ filter (`IS.notMember` is) [0..]
             AST.KeyValue k _ = A.indexArray a i
         in failResult $ "UnrecognizedKey " ++ show k
{-# INLINE dict' #-}

-- | Key-value parsers. See 'dict'' and 'field''.
newtype Fields a = Fields
  { runFields ::
      ReaderT (A.Array AST.KeyValue) (StateT IS.IntSet ParseResult) a
  } deriving (Functor, Applicative, Alternative, Monad)
-- We could use WriterT (CPS) but StateT is a teeny bit more efficient because
-- we can do IS.insert x instead of IS.union (IS.singleton x).

-- | Decode a list element with the given parser at the given (0-based) index.
-- Fails on a non-list, if the index is out of bounds, or if the element parser
-- fails.
--
-- Also see 'elem' and 'list''.
--
-- ==== __Examples__
-- @
-- data File = File { name :: Text, size :: Int }
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File \<$> D.index 0 D.'text'
--        \<*> D.index 1 D.'int'
-- @
--
-- @since 0.1.1.0
index :: Int -> Parser a -> Parser a
index i _ | i < 0 = failParser "IndexOutOfBounds"
index i p = do
  a <- listDirect
  if i < A.sizeofArray a
  then liftP $ runParser p (A.indexArray a i)
  else failParser "IndexOutOfBounds"
{-# INLINE index #-}

-- | Decode the next list element with the given parser. Convert to a @Parser@
-- with 'list''.
--
-- @since 0.1.1.0
elem :: Parser a -> Elems a
elem p = Elems $ ReaderT $ \a -> do
  i <- get
  if i < A.sizeofArray a
  then lift (runParser p (A.indexArray a i)) <* (put $! i+1)
  else lift $ failResult "ListElemsExhausted"
{-# INLINE elem #-}

-- | Create a @Parser@ from an @Elems@. Fails on a non-list, if the number of
-- elements does not match the @Elems@ exactly, or if any element parser fails.
--
-- ==== __Examples__
-- @
-- data File = File { name :: Text, size :: Int }
--
-- fileParser :: D.'Parser' File
-- fileParser = D.list' $
--   File \<$> D.'elem' D.'text'
--        \<*> D.'elem' D.'int'
-- @
--
-- @since 0.1.1.0
list' :: Elems a -> Parser a
list' es = do
  a <- listDirect
  liftP $ do
    (x, i) <- runStateT (runReaderT (runElems es) a) 0
    if i == A.sizeofArray a
    then pure x
    else failResult $ "ListElemsLeft"
{-# INLINE list' #-}

-- | List elements parser. See 'elem' and 'list''.
newtype Elems a = Elems
  { runElems :: ReaderT (A.Array AST.Value) (StateT Int ParseResult) a
  } deriving (Functor, Applicative, Alternative, Monad)

-- | Decode a Bencode integer as an @Int64@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int64@.
--
-- @since 0.1.1.0
int64 :: Parser Int64
int64 = integerDirect >>= maybe (failParser "IntOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-', s') -> Util.readKnownNaturalAsInt64 True s'
      _              -> Util.readKnownNaturalAsInt64 False s
{-# INLINE int64 #-}

-- | Decode a Bencode integer as an @Int32@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int32@.
--
-- @since 0.1.1.0
int32 :: Parser Int32
int32 = intL32
{-# INLINE int32 #-}

-- | Decode a Bencode integer as an @Int16@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int16@.
--
-- @since 0.1.1.0
int16 :: Parser Int16
int16 = intL32
{-# INLINE int16 #-}

-- | Decode a Bencode integer as an @Int8@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int8@.
--
-- @since 0.1.1.0
int8 :: Parser Int8
int8 = intL32
{-# INLINE int8 #-}

-- Parse an Int(<=32) via Int.
intL32 :: forall a. (Bounded a, Integral a) => Parser a
intL32 = int >>= \i ->
  if fromIntegral (minBound :: a) <= i && i <= fromIntegral (maxBound :: a)
  then pure $! fromIntegral i
  else failParser "IntOutOfBounds"
{-# INLINE intL32 #-}

-- | Decode a Bencode integer as a @Word64@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word64@.
--
-- @since 0.1.1.0
word64 :: Parser Word64
word64 = integerDirect >>= maybe (failParser "WordOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-', _) -> Nothing
      _             -> Util.readKnownNaturalAsWord64 s
{-# INLINE word64 #-}

-- | Decode a Bencode integer as a @Word32@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word32@.
--
-- @since 0.1.1.0
word32 :: Parser Word32
word32 = wordL32
{-# INLINE word32 #-}

-- | Decode a Bencode integer as a @Word16@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word16@.
--
-- @since 0.1.1.0
word16 :: Parser Word16
word16 = wordL32
{-# INLINE word16 #-}

-- | Decode a Bencode integer as a @Word8@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word8@.
--
-- @since 0.1.1.0
word8 :: Parser Word8
word8 = wordL32
{-# INLINE word8 #-}

-- Parse a Word(<=32) via Word.
wordL32 :: forall a. (Bounded a, Integral a) => Parser a
wordL32 = word >>= \i ->
  if fromIntegral (minBound :: a) <= i && i <= fromIntegral (maxBound :: a)
  then pure $! fromIntegral i
  else failParser "WordOutOfBounds"
{-# INLINE wordL32 #-}

-- | Run the function on the parsed value, fail with 'empty' if the result is
-- @Nothing@.
--
-- @since 0.1.1.0
mapMaybe :: (a -> Maybe b) -> Parser a -> Parser b
mapMaybe f p = p >>= maybe empty pure . f
{-# INLINE mapMaybe #-}

-- | Run the function on the parsed value, fail with 'fail' if the result is a
-- @Left@.
--
-- @since 0.1.1.0
mapOrFail :: (a -> Either String b) -> Parser a -> Parser b
mapOrFail f p = p >>= either fail pure . f
{-# INLINE mapOrFail #-}

-- | Binary search. The array must be sorted by key.
binarySearch
  :: B.ByteString
  -> A.Array AST.KeyValue
  -> (# (# #) | (# X.Int#,  AST.Value #) #)
binarySearch k a = go 0 (A.sizeofArray a)
  where
    go l r | l == r = (# (# #) | #)
    go l r = case compare k k' of
      LT -> go l m
      EQ -> (# | (# case m of X.I# m# -> m#, v #) #)
      GT -> go (m+1) r
      where
        -- Overflow, careful!
        m = fromIntegral ((fromIntegral (l+r) :: Word) `div` 2) :: Int
        AST.KeyValue k' v = A.indexArray a m
{-# INLINABLE binarySearch #-}
-- binarySearch returns an unboxed type, which serves as an equivalent of
-- Maybe (Int, Value). This is to avoid allocating the (,) and the I#. This
-- won't be necessary if GHC gets
-- https://gitlab.haskell.org/ghc/ghc/-/issues/14259.
-- A (not-so-good) alternative is to inline binarySearch.

------------------------------
-- Documentation
------------------------------

-- $quick
-- Decoding is done using parsers. This module defines parsers that can be
-- composed to build parsers for arbitrary types.
--
-- @
-- data File = File
--   { hash :: ByteString
--   , size :: Integer
--   , tags :: Vector Text
--   } deriving Show
-- @
--
-- Assuming a @File@ is encoded as a Bencode dictionary with the field names as
-- keys and appropriate value types, a parser for @File@ can be defined as
--
-- @
-- {-# LANGUAGE OverloadedStrings #-}
-- import qualified Data.Bencode.Decode as D
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File \<\$> D.'field' "hash" D.'string'
--        \<*> D.'field' "size" D.'integer'
--        \<*> D.'field' "tags" (D.'list' D.'text')
-- @
--
-- The parser can then be run on a @ByteString@ with 'decode'.
--
-- >>> D.decode fileParser "d4:hash4:xxxx4:sizei1024e4:tagsl4:work6:backupee"
-- Right (File {hash = "xxxx", size = 1024, tags = ["work","backup"]})
--
-- Of course, invalid Bencode or Bencode that does not satisfy the @File@ parser
-- will fail to decode.
--
-- >>> D.decode fileParser "d4:hash4:xxxx4:tagsl4:work6:backupee"
-- Left "KeyNotFound \"size\""
--
-- For more examples, see the [Recipes](#g:recipes) section at the end of this
-- page.


-- $recipes
-- Recipes for some common and uncommon usages.
--
-- The following preface is assumed.
--
-- @
-- {-# LANGUAGE OverloadedStrings #-}
-- import Data.ByteString (ByteString)
-- import Data.Text (Text)
-- import qualified Data.Bencode.Decode as D
-- @
--
-- === Decode an optional field
--
-- @
-- import Control.Applicative ('optional')
--
-- data File = File { name :: Text, size :: Maybe Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File
--     \<$> D.'field' "name" D.'text'
--     \<*> optional (D.'field' "size" D.'int')
-- @
--
-- >>> D.decode fileParser "d4:name9:hello.txt4:sizei16ee"
-- Right (File {name = "hello.txt", size = Just 16})
-- >>> D.decode fileParser "d4:name9:hello.txte"
-- Right (File {name = "hello.txt", size = Nothing})
--
-- === Decode an enum
--
-- @
-- import Control.Applicative ('(<|>)')
--
-- data Color = Red | Green | Blue deriving Show
--
-- colorParser :: D.'Parser' Color
-- colorParser =
--       Red   \<$ D.'stringEq' "red"
--   \<|> Green \<$ D.'stringEq' "green"
--   \<|> Blue  \<$ D.'stringEq' "blue"
--   \<|> D.'fail' "unknown color"
-- @
--
-- >>> D.decode colorParser "5:green"
-- Right Green
-- >>> D.decode colorParser "5:black"
-- Left "Fail: unknown color"
--
-- === Decode a dict, failing on leftover keys
--
-- @
-- data File = File { name :: Text, size :: Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser = D.'dict'' $
--   File
--     \<$> D.'field'' "name" D.'text'
--     \<*> D.'field'' "size" D.'int'
-- @
--
-- >>> D.decode fileParser "d4:name9:hello.txt4:sizei32ee"
-- Right (File {name = "hello.txt", size = 32})
-- >>> D.decode fileParser "d6:hiddeni1e4:name9:hello.txt4:sizei32ee"
-- Left "UnrecognizedKey \"hidden\""
--
-- === Decode differently based on dict contents
--
-- @
-- data Response = Response
--   { id_    :: Int
--   , result :: Either Text ByteString
--   } deriving Show
--
-- responseParser :: D.'Parser' Response
-- responseParser = do
--   id_ <- D.'field' "id" D.'int'
--   status <- D.'field' "status" D.'string'
--   case status of
--     "failure" -> do
--       reason <- D.'field' "reason" D.'text'
--       pure $ Response id_ (Left reason)
--     "success" -> do
--       data_ <- D.'field' "data" D.'string'
--       pure $ Response id_ (Right data_)
--     _ -> D.'fail' "unknown status"
-- @
--
-- >>> D.decode responseParser "d2:idi42e6:reason12:unauthorized6:status7:failuree"
-- Right (Response {id_ = 42, result = Left "unauthorized"})
-- >>> D.decode responseParser "d4:data4:00002:idi42e6:status7:successe"
-- Right (Response {id_ = 42, result = Right "0000"})
--
-- === Decode nested dicts
--
-- @
-- data File = File { name :: Text, size :: Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File
--     \<$> D.'field' "name" D.'text'
--     \<*> D.'field' "metadata" (D.'field' "info" (D.'field' "size" D.'int'))
-- @
--
-- >>> D.decode fileParser "d8:metadatad4:infod4:sizei32eee4:name9:hello.txte"
-- Right (File {name = "hello.txt", size = 32})
--
-- === Decode a heterogeneous list
--
-- @
-- data File = File { name :: Text, size :: Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser = D.'list'' $
--   File
--     \<$> D.'elem' D.'text'
--     \<*> D.'elem' D.'int'
-- @
--
-- >>> D.decode fileParser "l9:hello.txti32ee"
-- Right (File {name = "hello.txt", size = 32})
--