{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- |
-- Conversions from Bencoded @ByteString@s to Haskell values.
--
-- == Introduction
--
-- Decoding is done using parsers. There are parsers for the four Bencode types:
--
-- * 'string' decodes Bencode strings as 'B.ByteString's
-- * 'integer' decodes Bencode integers as 'Prelude.Integer's
-- * 'list' decodes Bencode lists as 'V.Vector's
-- * 'dict' decodes Bencode dictionaries as 'M.Map's with 'B.ByteString' keys.
--
-- These can be used to build more complex parsers for arbitrary types.
--
-- @
-- data File = File
--   { hash :: ByteString
--   , size :: Integer
--   , tags :: Vector Text
--   } deriving Show
-- @
--
-- Assuming a @File@ is encoded as a Bencode dictionary with the field names as
-- keys and appropriate value types, a parser for @File@ can be defined as
--
-- @
-- {-# LANGUAGE OverloadedStrings #-}
-- import qualified Data.Bencode.Decode as D
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File \<\$> D.'field' "hash" D.'string'
--        \<*> D.'field' "size" D.'integer'
--        \<*> D.'field' "tags" (D.'list' D.'text')
-- @
--
-- The parser can then be run on a @ByteString@ with 'decode'.
--
-- >>> D.decode fileParser "d4:hash4:xxxx4:sizei1024e4:tagsl4:work6:backupee"
-- Right (File {hash = "xxxx", size = 1024, tags = ["work","backup"]})
--
-- Of course, invalid Bencode or Bencode that does not satisfy our @File@ parser
-- will fail to decode.
--
-- >>> D.decode fileParser "d4:hash4:xxxx4:tagsl4:work6:backupee"
-- Left "KeyNotFound \"size\""
--
-- For more examples, see the \"Recipes\" section at the end of this page.
--
module Data.Bencode.Decode
  ( -- * Parser
    Parser
  , decode
  , decodeMaybe

    -- * Primary parsers
  , string
  , integer
  , list
  , dict

    -- * More parsers
  , stringEq
  , text
  , textEq
  , int
  , intEq
  , word
  , field
  , value
  , fail

    -- * Recipes
    --
    -- $recipes
  ) where

import Prelude hiding (fail)
import Control.Applicative
import Control.Monad hiding (fail)
import Control.Monad.ST
import Control.Monad.Trans.Reader
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Primitive.Array as A
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM

import Data.Bencode.Type (Value(..))
import Data.Bencode.Util (readKnownNaturalAsInt, readKnownNaturalAsWord)
import qualified Data.Bencode.AST as AST

newtype ParseResult a = ParseResult { unParseResult :: Either String a }
  deriving (Functor, Applicative, Monad)

failResult :: String -> ParseResult a
failResult = ParseResult . Left
{-# INLINE failResult #-}

instance Alternative ParseResult where
  empty = failResult "Alternative.empty"
  l <|> r = ParseResult $ unParseResult l <> unParseResult r
  -- Discards left error, not ideal

-- | A parser from a Bencode value to a Haskell value.
newtype Parser a = Parser { runParser :: AST.Value -> ParseResult a }
  deriving (Functor, Applicative, Alternative, Monad)
    via ReaderT AST.Value ParseResult

lift :: ParseResult a -> Parser a
lift = Parser . const
{-# INLINE lift #-}

failParser :: String -> Parser a
failParser = lift . failResult
{-# INLINE failParser #-}

-- | Decode a value from the given @ByteString@. If decoding fails, returns
-- @Left@ with a failure message.
decode :: Parser a -> B.ByteString -> Either String a
decode p s = AST.parseOnly s >>= unParseResult . runParser p

-- | Decode a value from the given @ByteString@. If decoding fails, returns
-- @Nothing@.
decodeMaybe :: Parser a -> B.ByteString -> Maybe a
decodeMaybe p = either (const Nothing) Just . decode p

errTypeMismatch :: String -> AST.Value -> ParseResult a
errTypeMismatch a b = failResult $ "TypeMismatch " ++ a ++ " " ++ b'
  where
    b' = case b of
      AST.String _  -> "String"
      AST.Integer _ -> "Integer"
      AST.List _    -> "List"
      AST.Dict _    -> "Dict"

-- Parsers below are all marked INLINE because they match on the AST
-- constructor and return Eithers. When inlined, GHC is able to optimize the
-- nested case matches using "case merging" and get rid of the intemeditate
-- Eithers using "case-of-case".

stringDirect :: Parser B.ByteString
stringDirect = Parser $ \v -> case v of
  AST.String s -> pure s
  _ -> errTypeMismatch "String" v
{-# INLINE stringDirect #-}

integerDirect :: Parser B.ByteString
integerDirect = Parser $ \v -> case v of
  AST.Integer s -> pure s
  _ -> errTypeMismatch "Integer" v
{-# INLINE integerDirect #-}

listDirect :: Parser (A.Array AST.Value)
listDirect = Parser $ \v -> case v of
  AST.List a -> pure a
  _ -> errTypeMismatch "List" v
{-# INLINE listDirect #-}

dictDirect :: Parser (A.Array AST.KeyValue)
dictDirect = Parser $ \v -> case v of
  AST.Dict a -> pure a
  _ -> errTypeMismatch "Dict" v
{-# INLINE dictDirect #-}

-- | Decode a Bencode string as a ByteString. Fails on a non-string.
string :: Parser B.ByteString
string = stringDirect
{-# INLINE string #-}

-- | Decode a Bencode integer as an Integer. Fails on a non-integer.
integer :: Parser Integer
integer = toI <$!> integerDirect
  where
    -- BC.readInteger will be making redundant digit checks since we already
    -- know it to be a valid integer.
    -- But it has an efficient divide-and-conquer algorithm compared to the
    -- simple but O(n^2) foldl' (\acc x -> acc * 10 + x) 0.
    -- We can reimplement the algorithm without the redundant checks if we
    -- really want.
    toI s = case BC.readInteger s of
      Nothing    -> error "Data.Bencode.Decode.integer: should not happen"
      Just (i,_) -> i
{-# INLINE integer #-}

-- | Decode a Bencode list with the given parser for elements. Fails on a
-- non-list or if any element in the list fails to parse.
list :: Parser a -> Parser (V.Vector a)
list p = listDirect >>= lift . traverseAToV (runParser p)
{-# INLINE list #-}

traverseAToV :: (a -> ParseResult b) -> A.Array a -> ParseResult (V.Vector b)
traverseAToV f a = runST $ do
  let n = A.sizeofArray a
  v <- VM.new n
  let loop i | i == n = pure Nothing
      loop i = case f (A.indexArray a i) of
        ParseResult (Left e)  -> pure (Just e)
        ParseResult (Right x) -> VM.write v i x *> loop (i+1)
  res <- loop 0
  maybe (pure <$> V.unsafeFreeze v) (pure . failResult) res
{-# INLINABLE traverseAToV #-}

-- | Decode a Bencode dict with the given parser for values. Fails on a
-- non-dict or if any value in the dict fails to parse.
dict :: Parser a -> Parser (M.Map B.ByteString a)
dict p =
  dictDirect >>= lift . fmap M.fromDistinctAscList . traverse f .  F.toList
  where
    f (AST.KeyValue k v) = (,) k <$> runParser p v
{-# INLINE dict #-}

-- | Succeeds only on a Bencode string that equals the given string.
stringEq :: B.ByteString -> Parser ()
stringEq s = stringDirect >>= \s' ->
  if s == s'
  then pure ()
  else failParser $ "StringNotEq " ++ show s ++ " " ++ show s'
{-# INLINE stringEq #-}

-- | Decode a bencode string as UTF-8 text. Fails on a non-string or if the
-- string is not valid UTF-8.
text :: Parser T.Text
text =
  stringDirect >>=
  either (const (failParser "UTF8DecodeFailure")) pure . T.decodeUtf8'
{-# INLINE text #-}

-- | Succeeds only on a Bencode string that equals the given text.
textEq :: T.Text -> Parser ()
textEq t = text >>= \t' ->
  if t == t'
  then pure ()
  else failParser $ "TextNotEq " ++ show t ++ " " ++ show t'
{-# INLINE textEq #-}

-- | Decode a Bencode integer as an @Int@. Fails on a non-integer or if the
-- integer is out of bounds for an @Int@.
int :: Parser Int
int = integerDirect >>= maybe (failParser "IntOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-', s') -> readKnownNaturalAsInt True s'
      _              -> readKnownNaturalAsInt False s
{-# INLINE int #-}

-- | Succeeds only on a Bencode integer that equals the given value.
intEq :: Int -> Parser ()
intEq i = int >>= \i' ->
  if i == i'
  then pure ()
  else failParser $ "IntNotEq " ++ show i ++ " " ++ show i'
{-# INLINE intEq #-}

-- | Decode a Bencode integer as a @Word@. Fails on a non-integer or if the
-- integer is out of bounds for a @Word@.
word :: Parser Word
word = integerDirect >>= maybe (failParser "WordOutOfBounds") pure . go
  where
    go s = case BC.uncons s of
      Just ('-',_) -> Nothing
      _            -> readKnownNaturalAsWord s
{-# INLINE word #-}

-- | Decode a @Value@. Always succeeds for valid Bencode.
value :: Parser Value
value = String  <$> string
    <|> Integer <$> integer
    <|> List    <$> list value
    <|> Dict    <$> dict value

-- | Always fails with the given message.
fail :: String -> Parser a
fail = failParser . ("Fail: " ++ )
{-# INLINE fail #-}

-- | Decode a value with the given parser for the given key. Fails on a
-- non-dict, if the key is absent, or if the value parser fails.
field :: B.ByteString -> Parser a -> Parser a
field k p =
  dictDirect >>=
  maybe (failParser $ "KeyNotFound " ++ show k) pure . binarySearch k >>=
  lift . runParser p
{-# INLINE field #-}

----------
-- Utils

-- | Binary search. The array must be sorted by key.
binarySearch :: B.ByteString -> A.Array AST.KeyValue -> Maybe AST.Value
binarySearch k a = go 0 (A.sizeofArray a)
  where
    go l r | l == r = Nothing
    go l r = case compare k k' of
      LT -> go l m
      EQ -> Just v
      GT -> go (m+1) r
      where
        -- Overflow, careful!
        m = fromIntegral ((fromIntegral (l+r) :: Word) `div` 2) :: Int
        AST.KeyValue k' v = A.indexArray a m
{-# INLINABLE binarySearch #-}

-- $recipes
-- Recipes for some common and uncommon usages.
--
-- The following preface is assumed.
--
-- @
-- {-# LANGUAGE OverloadedStrings #-}
-- import Data.ByteString (ByteString)
-- import Data.Text (Text)
-- import qualified Data.Bencode.Decode as D
-- @
--
-- === Decode an optional field
--
-- @
-- import Control.Applicative ('optional')
--
-- data File = File { name :: Text, size :: Maybe Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File
--     \<$> D.'field' "name" D.'text'
--     \<*> optional (D.'field' "size" D.'int')
-- @
--
-- >>> D.decode fileParser "d4:name9:hello.txt4:sizei16ee"
-- Right (File {name = "hello.txt", size = Just 16})
-- >>> D.decode fileParser "d4:name9:hello.txte"
-- Right (File {name = "hello.txt", size = Nothing})
--
-- === Decode an enum
--
-- @
-- import Control.Applicative ('(<|>)')
--
-- data Color = Red | Green | Blue deriving Show
--
-- colorParser :: D.'Parser' Color
-- colorParser =
--       Red   \<$ D.'stringEq' "red"
--   \<|> Green \<$ D.'stringEq' "green"
--   \<|> Blue  \<$ D.'stringEq' "blue"
--   \<|> D.'fail' "unknown color"
-- @
--
-- >>> D.decode colorParser "5:green"
-- Right Green
-- >>> D.decode colorParser "5:black"
-- Left "Fail: unknown color"
--
-- === Decode differently based on dict contents
--
-- @
-- import Control.Applicative ('(<|>)')
--
-- data Response = Response
--   { id_    :: Int
--   , result :: Either Text ByteString
--   } deriving Show
--
-- responseParser :: D.'Parser' Response
-- responseParser = do
--   id_ <- D.'field' "id" D.'int'
--   success <- D.'field' "status" $
--         False \<$ D.'stringEq' "failure"
--     \<|> True  \<$ D.'stringEq' "success"
--     \<|> D.'fail' "unknown status"
--   Response id_
--     \<$> if success
--         then Right \<$> D.'field' "data" D.'string'
--         else Left  \<$> D.'field' "reason" D.'text'
-- @
--
-- >>> D.decode responseParser "d2:idi42e6:reason12:unauthorized6:status7:failuree"
-- Right (Response {id_ = 42, result = Left "unauthorized"})
-- >>> D.decode responseParser "d4:data4:00002:idi42e6:status7:successe"
-- Right (Response {id_ = 42, result = Right "0000"})
--
-- === Decode nested dicts
--
-- @
-- data File = File { name :: Text, size :: Int } deriving Show
--
-- fileParser :: D.'Parser' File
-- fileParser =
--   File
--     \<$> D.'field' "name" D.'text'
--     \<*> D.'field' "metadata" (D.'field' "info" (D.'field' "size" D.'int'))
-- @
--
-- >>> D.decode fileParser "d8:metadatad4:infod4:sizei32eee4:name9:hello.txte"
-- Right (File {name = "hello.txt", size = 32})
--