{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TupleSections #-}

-- |
-- Module: Data.Medea
-- Description: A JSON schema language validator.
-- Copyright: (C) Juspay Technologies Pvt Ltd, 2020
-- License: MIT
-- Maintainer: koz.ross@retro-freedom.nz
-- Stability: Experimental
-- Portability: GHC only
--
-- This module contains the reference Haskell implementation of a Medea
-- validator, providing both schema graph file loading and validation, with some
-- convenience functions.
--
-- A minimal example of use follows. This example first attempts to load a Medea
-- schema graph file from @\/path\/to\/schema.medea@, and, if successful, attempts
-- to validate the JSON file at @\/path\/to\/my.json@ against the schemata so
-- loaded.
--
-- > import Data.Medea (loadSchemaFromFile, validateFromFile)
-- >
-- > main :: IO ()
-- > main = do
-- >   -- try to load the schema graph file
-- >   loaded <- loadSchemaFromFile "/path/to/schema.medea"
-- >   case loaded of
-- >      Left err -> print err -- or some other handling
-- >      Right scm -> do
-- >        -- try to validate
-- >        validated <- validateFromFile scm "/path/to/my.json"
-- >        case validated of
-- >          Left err -> print err -- or some other handling
-- >          Right validJson -> print validJson -- or some other useful thing
--
-- For more details about how to create Medea schema graph files, see
-- @TUTORIAL.md@ and @SPEC.md@.
module Data.Medea
  ( -- * Schema loading
    Schema,
    LoaderError (..),
    ParseError (..),
    buildSchema,
    loadSchemaFromFile,
    loadSchemaFromHandle,

    -- * Schema validation
    JSONType (..),
    SchemaInformation (..),
    ValidationError (..),
    ValidatedJSON,
    toValue,
    validAgainst,
    validate,
    validateFromFile,
    validateFromHandle,
  )
where

import Control.Applicative (Alternative (..))
import Control.Comonad.Cofree (Cofree (..))
import Control.DeepSeq (NFData (..))
import Control.Monad (unless)
import Control.Monad.Except (MonadError (..))
import Control.Monad.IO.Class (MonadIO (..))
import Control.Monad.RWS.Strict (RWST (..), evalRWST)
import Control.Monad.Reader (MonadReader, asks)
import Control.Monad.State.Strict (MonadState (..), gets)
import Data.Aeson (Array, Object, Value (..), decodeStrict)
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import Data.Can (Can (..))
import Data.Coerce (coerce)
import Data.Data (Data)
import Data.Foldable (asum, traverse_)
import Data.Functor (($>))
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Hashable (Hashable (..))
import qualified Data.Map.Strict as M
import Data.Medea.Analysis (ArrayType (..), CompiledSchema (..), TypeNode (..), arrayBounds)
import Data.Medea.JSONType (JSONType (..), typeOf)
import Data.Medea.Loader
  ( LoaderError (..),
    buildSchema,
    loadSchemaFromFile,
    loadSchemaFromHandle,
  )
import Data.Medea.Parser.Primitive (Identifier (..), ReservedIdentifier (..), identFromReserved)
import Data.Medea.Parser.Types (ParseError (..))
import Data.Medea.Schema (Schema (..))
import Data.Medea.ValidJSON (ValidJSONF (..))
import qualified Data.Set as S
import Data.Set.NonEmpty
  ( NESet,
    dropWhileAntitone,
    findMin,
    member,
    singleton,
  )
import Data.Text (Text)
import qualified Data.Vector as V
import GHC.Generics (Generic)
import System.IO (Handle)

-- | An annotation, describing which schema a given chunk of JSON was deemed to
-- be valid against.
data SchemaInformation
  = -- | No requirements were placed on this chunk.
    AnySchema
  | -- | Validated as JSON @null@.
    NullSchema
  | -- | Validated as JSON boolean.
    BooleanSchema
  | -- | Validated as JSON number.
    NumberSchema
  | -- | Validated as JSON string.
    StringSchema
  | -- | Validated as JSON array.
    ArraySchema
  | -- | Validated as JSON object.
    ObjectSchema
  | -- | Validated against the start schema.
    StartSchema
  | -- | Validated against the schema with the given name.
    UserDefined {-# UNPACK #-} !Text
  deriving stock (Eq, Data, Show, Generic)
  deriving anyclass (Hashable, NFData)

-- | JSON, annotated with what schemata it was deemed valid against.
newtype ValidatedJSON = ValidatedJSON (Cofree ValidJSONF SchemaInformation)
  deriving stock (Data)
  deriving newtype (Eq, Show)

-- Can't coerce-erase the constructor fmap, sigh
instance NFData ValidatedJSON where
  {-# INLINE rnf #-}
  rnf (ValidatedJSON (x :< f)) =
    rnf x `seq` (rnf . fmap ValidatedJSON $ f)

-- Nor here
instance Hashable ValidatedJSON where
  {-# INLINE hashWithSalt #-}
  hashWithSalt salt (ValidatedJSON (x :< f)) =
    salt `hashWithSalt` x `hashWithSalt` fmap ValidatedJSON f

-- | Convert to an Aeson 'Value', throwing away all schema information.
toValue :: ValidatedJSON -> Value
toValue (ValidatedJSON (_ :< f)) = case f of
  AnythingF v -> v
  NullF -> Null
  BooleanF b -> Bool b
  NumberF n -> Number n
  StringF s -> String s
  ArrayF v -> Array . fmap (toValue . coerce) $ v
  ObjectF hm -> Object . fmap (toValue . coerce) $ hm

-- | What schema did this validate against?
validAgainst :: ValidatedJSON -> SchemaInformation
validAgainst (ValidatedJSON (label :< _)) = label -- TODO: This is a bit useless right now.

-- | All possible validation errors.
data ValidationError
  = EmptyError
  | -- | We could not parse JSON out of what we were provided.
    NotJSON
  | -- | We got a type different to what we expected.
    WrongType
      !Value
      -- ^ The chunk of JSON.
      !JSONType
      -- ^ What we expected the type to be.
  | -- | We expected one of several possibilities, but got something that fits
    -- none.
    NotOneOfOptions !Value
  | -- | We found a JSON object with a property that wasn't specified in its
    -- schema, and additional properties are forbidden.
    AdditionalPropFoundButBanned
      {-# UNPACK #-} !Text
      -- ^ The property in question.
      {-# UNPACK #-} !Text
      -- ^ The name of the specifying schema.
  | -- | We found a JSON object which is missing a property its schema requires.
    RequiredPropertyIsMissing
      {-# UNPACK #-} !Text
      -- ^ The property in question.
      {-# UNPACK #-} !Text
      -- ^ The name of the specifying schema.
  | -- | We found a JSON array which falls outside of the minimum or maximum
    -- length constraints its corresponding schema demands.
    OutOfBoundsArrayLength
      {-# UNPACK #-} !Text
      -- ^ The name of the specifying schema.
      !Value
      -- ^ The JSON chunk corresponding to the invalid array.
  | -- | This is a bug - please report it to us!
    ImplementationError
      {-# UNPACK #-} !Text -- some descriptive text
  deriving stock (Eq, Show, Generic)
  deriving anyclass (Hashable)

instance Semigroup ValidationError where
  EmptyError <> x = x
  x <> _ = x

instance Monoid ValidationError where
  mempty = EmptyError

-- | Attempt to construct validated JSON from a strict bytestring.
-- This will attempt to decode using Aeson before validating.
--
-- If this fails, it will return the first failure condition; that is, the one
-- caused by the first node in a depth-first, right-to-left, document-order
-- traversal of the input JSON.
validate :: Schema -> ByteString -> Either ValidationError ValidatedJSON
validate scm bs = case decodeStrict bs of
  Nothing -> throwError NotJSON
  Just v -> ValidatedJSON <$> go v
  where
    go v =
      fmap fst . evalRWST (runValidationM . checkTypes $ v) scm $ (initialSet, Nothing)
    initialSet = singleton . CustomNode . identFromReserved $ RStart

-- | Helper for construction of validated JSON from a JSON file.
-- This will attempt to decode using Aeson before validating. This will return
-- errors on failure in the same way as 'validate' does.
--
-- This will clean up any file handle(s) if any exceptions are thrown.
validateFromFile ::
  (MonadIO m) =>
  Schema ->
  FilePath ->
  m (Either ValidationError ValidatedJSON)
validateFromFile scm = fmap (validate scm) . liftIO . BS.readFile

-- | Helper for construction of validated JSON from a 'Handle'. This will
-- attempt to decode using Aeson before validating. This will return errors on
-- failure in the same way as 'validate' does.
--
-- This will close the 'Handle' upon finding EOF, or if an exception is thrown.
validateFromHandle ::
  (MonadIO m) =>
  Schema ->
  Handle ->
  m (Either ValidationError ValidatedJSON)
validateFromHandle scm = fmap (validate scm) . liftIO . BS.hGetContents

-- Helpers

newtype ValidationM a = ValidationM
  { runValidationM ::
      RWST
        Schema
        ()
        (NESet TypeNode, Maybe Identifier)
        (Either ValidationError)
        a
  }
  deriving newtype
    ( Functor,
      Applicative,
      Monad,
      MonadReader Schema,
      MonadState (NESet TypeNode, Maybe Identifier),
      MonadError ValidationError
    )

instance Alternative ValidationM where
  empty = ValidationM . RWST $ \_ _ -> Left EmptyError
  ValidationM comp1 <|> ValidationM comp2 = ValidationM . RWST $ go
    where
      go r s = case runRWST comp1 r s of
        Left err -> case runRWST comp2 r s of
          Left _ -> Left err
          Right res -> Right res
        Right res -> Right res

failWith :: ValidationError -> ValidationM a
failWith err = ValidationM . RWST $ \_ _ -> Left err

-- We have 3 different cases:
-- 1. If we are checking against AnyNode, we ALWAYS succeed.
-- 2. If we are checking against PrimitiveNode, we can match with EXACTLY ONE
--    kind of PrimitiveNode.
-- 3. If we are checking against CustomNode, we can match against ANY CustomNode.
--    Thus, we must try all of them.
checkTypes :: Value -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkTypes v = checkAny v <|> checkPrim v <|> checkCustoms v

-- checkAny throws EmptyError if AnyNode is not found. This lets checkTypes
-- use the error thrown by checkPrim/checkCustoms if checkAny fails.
checkAny :: Value -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkAny v = do
  minNode <- gets (findMin . fst)
  case minNode of
    AnyNode -> pure (AnySchema :< AnythingF v)
    _ -> failWith EmptyError

-- checkPrim searches the NESet for the PrimitiveNode corresponding to the Value, otherwise throws an error.
checkPrim :: Value -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkPrim v = do
  (nodes, par) <- get
  unless (member (PrimitiveNode . typeOf $ v) nodes) (failWith . NotOneOfOptions $ v)
  case v of
    Null -> pure (NullSchema :< NullF)
    Bool b -> pure (BooleanSchema :< BooleanF b)
    Number n -> pure (NumberSchema :< NumberF n)
    String s -> case par of
      -- if we are checking a dependent string, we match against the supplied
      -- values
      Nothing -> pure (StringSchema :< StringF s)
      Just parIdent -> do
        scm <- lookupSchema parIdent
        let validVals = stringVals scm
        if
            | V.length validVals == 0 -> pure (StringSchema :< StringF s)
            | s `V.elem` validVals -> pure (StringSchema :< StringF s)
            | otherwise -> failWith . NotOneOfOptions $ v
    Array arr -> case par of
      Nothing -> put (anySet, Nothing) >> (ArraySchema :<) . ArrayF <$> traverse checkTypes arr
      Just parIdent -> checkArray arr parIdent
    Object obj -> case par of
      -- Fast path (no object spec)
      Nothing ->
        put (anySet, Nothing) >> (ObjectSchema :<) . ObjectF <$> traverse checkTypes obj
      Just parIdent -> checkObject obj parIdent

-- check if the array satisfies the corresponding specification.
checkArray :: Array -> Identifier -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkArray arr parIdent = do
  scm <- lookupSchema parIdent
  let arrLen = fromIntegral . V.length $ arr
  maybe (failWith outOfBounds) pure $ case arrayBounds scm of
    Non -> Just () -- no bounds, so any array will do
    One lo -> unless (arrLen >= lo) Nothing
    Eno hi -> unless (arrLen <= hi) Nothing
    Two lo hi -> unless (arrLen >= lo && arrLen <= hi) Nothing
  let valsAndTypes = pairValsWithTypes . arrayTypes $ scm
  checkedArray <- traverse go valsAndTypes
  pure (ArraySchema :< ArrayF checkedArray)
  where
    outOfBounds = OutOfBoundsArrayLength (textify parIdent) . Array $ arr
    pairValsWithTypes = \case
      Nothing -> (,AnyNode) <$> arr
      Just (ListType node) -> (,node) <$> arr
      Just (TupleType nodes) -> V.zip arr nodes
    go (val, typeNode) = do
      put (singleton typeNode, Nothing)
      checkTypes val

-- check if object properties satisfy the corresponding specification.
checkObject :: Object -> Identifier -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkObject obj parIdent = do
  valsAndTypes <- pairPropertySchemaAndVal obj parIdent
  checkedObj <- traverse go valsAndTypes
  pure (ObjectSchema :< ObjectF checkedObj)
  where
    go (val, typeNode) = do
      put (singleton typeNode, Nothing)
      checkTypes val

pairPropertySchemaAndVal ::
  HashMap Text Value -> Identifier -> ValidationM (HashMap Text (Value, TypeNode))
pairPropertySchemaAndVal obj parIdent = do
  scm <- lookupSchema parIdent
  mappedObj <- traverse (pairProperty scm) . HM.mapWithKey (,) $ obj
  traverse_ isMatched . HM.mapWithKey (,) . props $ scm
  pure mappedObj
  where
    -- maps each property value with the schema it should validate against
    pairProperty scm (propName, v) = case HM.lookup propName . props $ scm of
      Just (typeNode, _) -> pure (v, typeNode)
      Nothing ->
        if additionalProps scm
          then pure (v, additionalPropSchema scm)
          else failWith . AdditionalPropFoundButBanned (textify parIdent) $ propName
    -- throws an error if a non-optional property was not found in the object
    isMatched (propName, (_, optional)) = case HM.lookup propName obj of
      Nothing ->
        unless optional . failWith . RequiredPropertyIsMissing (textify parIdent) $ propName
      Just _ -> pure ()

-- checkCustoms removes all non custom nodes from the typeNode set and
-- checks the Value against each until one succeeds.
checkCustoms :: Value -> ValidationM (Cofree ValidJSONF SchemaInformation)
checkCustoms v = do
  -- Here we drop all non-custom nodes
  customNodes <- gets (dropWhileAntitone (not . isCustom) . fst)
  asum . fmap checkCustom . S.toList $ customNodes
  where
    checkCustom = \case
      CustomNode ident -> do
        neighbourhood <- typesAs <$> lookupSchema ident
        put (neighbourhood, Just ident)
        ($> (UserDefined . textify $ ident)) <$> checkTypes v
      -- TODO: Implement a safer filter to avoid having this.
      _ -> failWith . ImplementationError $ "Unreachable code: these nodes must be custom."

lookupSchema ::
  (MonadReader Schema m, MonadError ValidationError m) => Identifier -> m CompiledSchema
lookupSchema ident = do
  x <- asks $ M.lookup ident . compiledSchemata
  case x of
    Just scm -> pure scm
    Nothing -> throwError . ImplementationError $ "Unreachable state: We should be able to find this schema"

anySet :: NESet TypeNode
anySet = singleton AnyNode

textify :: Identifier -> Text
textify (Identifier t) = t

isCustom :: TypeNode -> Bool
isCustom (CustomNode _) = True
isCustom _ = False