{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TupleSections #-}

-- |
-- Module: Data.Medea
-- Description: A JSON schema language validator.
-- Copyright: (C) Juspay Technologies Pvt Ltd, 2020
-- License: MIT
-- Maintainer: koz.ross@retro-freedom.nz
-- Stability: Experimental
-- Portability: GHC only
--
-- This module contains the reference Haskell implementation of a Medea
-- validator, providing both schema graph file loading and validation, with some
-- convenience functions.
--
-- A minimal example of use follows. This example first attempts to load a Medea
-- schema graph file from @\/path\/to\/schema.medea@, and, if successful, attempts
-- to validate the JSON file at @\/path\/to\/my.json@ against the schemata so
-- loaded.
--
-- > import Data.Medea (loadSchemaFromFile, validateFromFile)
-- > import Control.Monad.Except (runExceptT)
-- >
-- > main :: IO ()
-- > main = do
-- >   -- try to load the schema graph file
-- >   loaded <- runExceptT . loadSchemaFromFile $ "/path/to/schema.medea"
-- >   case loaded of
-- >      Left err -> print err -- or some other handling
-- >      Right scm -> do
-- >        -- try to validate
-- >        validated <- runExceptT . validateFromFile scm $ "/path/to/my.json"
-- >        case validated of
-- >          Left err -> print err -- or some other handling
-- >          Right validJson -> print validJson -- or some other useful thing
--
-- For more details about how to create Medea schema graph files, see
-- @TUTORIAL.md@ and @SPEC.md@.
module Data.Medea
  ( -- * Schema loading
    Schema,
    LoaderError (..),
    ParseError (..),
    buildSchema,
    loadSchemaFromFile,
    loadSchemaFromHandle,

    -- * Schema validation
    JSONType (..),
    SchemaInformation (..),
    ValidationError (..),
    ValidatedJSON,
    toValue,
    validAgainst,
    validate,
    validateFromFile,
    validateFromHandle,
  )
where

import Control.Applicative ((<|>), Alternative)
import Control.Comonad.Cofree (Cofree (..))
import Control.DeepSeq (NFData (..))
import Control.Monad (MonadPlus, unless, when)
import Control.Monad.Except (MonadError (..))
import Control.Monad.IO.Class (MonadIO (..))
import Control.Monad.Reader (MonadReader, asks, runReaderT)
import Control.Monad.State.Strict (MonadState (..), evalStateT, gets)
import Data.Aeson (Array, Object, Value (..), decode)
import qualified Data.ByteString.Lazy as BS
import Data.ByteString.Lazy (ByteString)
import Data.Coerce (coerce)
import Data.Data (Data)
import Data.Foldable (asum, traverse_)
import Data.Functor (($>))
import qualified Data.HashMap.Strict as HM
import Data.Hashable (Hashable (..))
import qualified Data.Map.Strict as M
import Data.Maybe (isNothing)
import Data.Medea.Analysis (ArrayType (..), CompiledSchema (..), TypeNode (..))
import Data.Medea.JSONType (JSONType (..), typeOf)
import Data.Medea.Loader
  ( LoaderError (..),
    buildSchema,
    loadSchemaFromFile,
    loadSchemaFromHandle,
  )
import Data.Medea.Parser.Primitive (Identifier (..), ReservedIdentifier (..), identFromReserved)
import Data.Medea.Parser.Types (ParseError(..))
import Data.Medea.Schema (Schema (..))
import Data.Medea.ValidJSON (ValidJSONF (..))
import qualified Data.Set as S
import Data.Set.NonEmpty
  ( NESet,
    dropWhileAntitone,
    findMin,
    member,
    singleton,
  )
import Data.Text (Text)
import qualified Data.Vector as V
import GHC.Generics (Generic)
import System.IO (Handle, hSetBinaryMode)

-- | An annotation, describing which schema a given chunk of JSON was deemed to
-- be valid against.
data SchemaInformation
  = -- | No requirements were placed on this chunk.
    AnySchema
  | -- | Validated as JSON @null@.
    NullSchema
  | -- | Validated as JSON boolean.
    BooleanSchema
  | -- | Validated as JSON number.
    NumberSchema
  | -- | Validated as JSON string.
    StringSchema
  | -- | Validated as JSON array.
    ArraySchema
  | -- | Validated as JSON object.
    ObjectSchema
  | -- | Validated against the start schema.
    StartSchema
  | -- | Validated against the schema with the given name.
    UserDefined {-# UNPACK #-} !Text
  deriving stock (Eq, Data, Show, Generic)
  deriving anyclass (Hashable, NFData)

-- | JSON, annotated with what schemata it was deemed valid against.
newtype ValidatedJSON = ValidatedJSON (Cofree ValidJSONF SchemaInformation)
  deriving stock (Data)
  deriving newtype (Eq, Show)

-- Can't coerce-erase the constructor fmap, sigh
instance NFData ValidatedJSON where
  {-# INLINE rnf #-}
  rnf (ValidatedJSON (x :< f)) =
    rnf x `seq` (rnf . fmap ValidatedJSON $ f)

-- Nor here
instance Hashable ValidatedJSON where
  {-# INLINE hashWithSalt #-}
  hashWithSalt salt (ValidatedJSON (x :< f)) =
    salt `hashWithSalt` x `hashWithSalt` fmap ValidatedJSON f

-- | Convert to an Aeson 'Value', throwing away all schema information.
toValue :: ValidatedJSON -> Value
toValue (ValidatedJSON (_ :< f)) = case f of
  AnythingF v -> v
  NullF -> Null
  BooleanF b -> Bool b
  NumberF n -> Number n
  StringF s -> String s
  ArrayF v -> Array . fmap (toValue . coerce) $ v
  ObjectF hm -> Object . fmap (toValue . coerce) $ hm

-- | What schema did this validate against?
validAgainst :: ValidatedJSON -> SchemaInformation
validAgainst (ValidatedJSON (label :< _)) = label -- TODO: This is a bit useless right now.

-- | All possible validation errors.
data ValidationError
  = EmptyError
  | -- | We could not parse JSON out of what we were provided.
    NotJSON
  | -- | We got a type different to what we expected.
    WrongType
      !Value -- ^ The chunk of JSON. 
      !JSONType -- ^ What we expected the type to be.
  | -- | We expected one of several possibilities, but got something that fits
    -- none.
    NotOneOfOptions !Value
  | -- | We found a JSON object with a property that wasn't specified in its
    -- schema, and additional properties are forbidden.
    AdditionalPropFoundButBanned
      {-# UNPACK #-} !Text -- ^ The property in question. 
      {-# UNPACK #-} !Text -- ^ The name of the specifying schema.
  | -- | We found a JSON object which is missing a property its schema requires.
    RequiredPropertyIsMissing
      {-# UNPACK #-} !Text -- ^ The property in question.
      {-# UNPACK #-} !Text -- ^ The name of the specifying schema.
  | -- | We found a JSON array which falls outside of the minimum or maximum 
    -- length constraints its corresponding schema demands.
    OutOfBoundsArrayLength
      {-# UNPACK #-} !Text -- ^ The name of the specifying schema. 
      !Value -- ^ The JSON chunk corresponding to the invalid array.
  | -- | This is a bug - please report it to us!
    ImplementationError
      {-# UNPACK #-} !Text -- some descriptive text
  deriving stock (Eq, Show, Generic)
  deriving anyclass (Hashable)

instance Semigroup ValidationError where
  EmptyError <> x = x
  x <> _ = x

instance Monoid ValidationError where
  mempty = EmptyError

-- | Attempt to construct validated JSON from a bytestring.
-- This will attempt to decode using Aeson before validating.
validate ::
  (MonadPlus m, MonadError ValidationError m) =>
  Schema ->
  ByteString ->
  m ValidatedJSON
validate scm bs = case decode bs of
  Nothing -> throwError NotJSON
  Just v -> ValidatedJSON <$> go v
  where
    go v = runReaderT (evalStateT (checkTypes v) (initialSet, Nothing)) scm
    initialSet = singleton . CustomNode . identFromReserved $ RStart

-- | Helper for construction of validated JSON from a JSON file.
-- This will attempt to decode using Aeson before validating.
validateFromFile ::
  (MonadPlus m, MonadError ValidationError m, MonadIO m) =>
  Schema ->
  FilePath ->
  m ValidatedJSON
validateFromFile scm fp = do
  bs <- liftIO (BS.readFile fp)
  validate scm bs

-- | Helper for construction of validated JSON from a 'Handle'.
-- This will set the argument 'Handle' to binary mode, as this function won't
-- work otherwise. This function will close the 'Handle' once it finds EOF.
-- This will attempt to decode using Aeson before validating.
validateFromHandle ::
  (MonadPlus m, MonadError ValidationError m, MonadIO m) =>
  Schema ->
  Handle ->
  m ValidatedJSON
validateFromHandle scm h = do
  liftIO (hSetBinaryMode h True)
  bs <- liftIO (BS.hGetContents h)
  validate scm bs

-- Helpers

-- We have 3 different cases:
-- 1. If we are checking against AnyNode, we ALWAYS succeed.
-- 2. If we are checking against PrimitiveNode, we can match with EXACTLY ONE
--    kind of PrimitiveNode.
-- 3. If we are checking against CustomNode, we can match against ANY CustomNode.
--    Thus, we must try all of them.
checkTypes ::
  (Alternative m, MonadReader Schema m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Value ->
  m (Cofree ValidJSONF SchemaInformation)
checkTypes v = checkAny v <|> checkPrim v <|> checkCustoms v

-- checkAny throws EmptyError if AnyNode is not found. This lets checkTypes
-- use the error thrown by checkPrim/checkCustoms if checkAny fails.
checkAny ::
  (Alternative m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Value ->
  m (Cofree ValidJSONF SchemaInformation)
checkAny v = do
  minNode <- gets $ findMin . fst -- AnyNode is the smallest possible TypeNode.
  case minNode of
    AnyNode -> pure $ AnySchema :< AnythingF v
    _ -> throwError EmptyError

-- checkPrim searches the NESet for the PrimitiveNode corresponding to the Value, otherwise throws an error.
checkPrim ::
  (Alternative m, MonadReader Schema m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Value ->
  m (Cofree ValidJSONF SchemaInformation)
checkPrim v = do
  (nodes, par) <- gets id
  unless (member (PrimitiveNode . typeOf $ v) nodes) $ throwError . NotOneOfOptions $ v
  case v of
    Null -> pure $ NullSchema :< NullF
    Bool b -> pure $ BooleanSchema :< BooleanF b
    Number n -> pure $ NumberSchema :< NumberF n
    String s -> case par of
      -- if we are checking against a dependant string, we match against the supplied values
      Nothing -> pure $ StringSchema :< StringF s
      Just parIdent -> do
        scm <- lookupSchema parIdent
        let validVals = stringVals scm
        if s `V.elem` validVals || null validVals
          then pure $ StringSchema :< StringF s
          else throwError $ NotOneOfOptions v
    Array arr -> case par of
      Nothing -> put (anySet, Nothing) >> (ArraySchema :<) . ArrayF <$> traverse checkTypes arr
      Just parIdent -> checkArray arr parIdent
    Object obj -> case par of
      -- Fast Path (no object spec)
      Nothing -> put (anySet, Nothing) >> (ObjectSchema :<) . ObjectF <$> traverse checkTypes obj
      Just parIdent -> checkObject obj parIdent

-- check if the array length is within the specification range.
checkArray ::
  (Alternative m, MonadReader Schema m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Array ->
  Identifier ->
  m (Cofree ValidJSONF SchemaInformation)
checkArray arr parIdent = do
  scm <- lookupSchema parIdent
  let arrLen = fromIntegral $ V.length arr
  when
    ( maybe False (arrLen <) (minArrayLen scm)
        || maybe False (arrLen >) (maxArrayLen scm)
    )
    $ throwError . OutOfBoundsArrayLength (textify parIdent) . Array
    $ arr
  let valsAndTypes = pairValsWithTypes $ arrayTypes scm
  checkedArray <- traverse (\(val, typeNode) -> put (singleton typeNode, Nothing) >> checkTypes val) valsAndTypes
  pure $ ArraySchema :< ArrayF checkedArray
  where
    pairValsWithTypes Nothing = fmap (,AnyNode) arr
    pairValsWithTypes (Just (ListType node)) = fmap (,node) arr
    pairValsWithTypes (Just (TupleType nodes)) = V.zip arr nodes

-- check if object properties satisfy the corresponding specification.
checkObject ::
  (Alternative m, MonadReader Schema m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Object ->
  Identifier ->
  m (Cofree ValidJSONF SchemaInformation)
checkObject obj parIdent = do
  valsAndTypes <- pairPropertySchemaAndVal obj parIdent
  checkedObj <- traverse (\(val, typeNode) -> put (singleton typeNode, Nothing) >> checkTypes val) valsAndTypes
  pure $ ObjectSchema :< ObjectF checkedObj

pairPropertySchemaAndVal ::
  (Alternative m, MonadReader Schema m, MonadError ValidationError m) =>
  HM.HashMap Text Value ->
  Identifier ->
  m (HM.HashMap Text (Value, TypeNode))
pairPropertySchemaAndVal obj parIdent = do
  scm <- lookupSchema parIdent
  mappedObj <- traverse (pairProperty scm) $ HM.mapWithKey (,) obj
  traverse_ isMatched . HM.mapWithKey (,) $ props scm
  pure mappedObj
  where
    -- maps each property-value with the schema(typeNode) it should validate against
    pairProperty scm (propName, v) = case HM.lookup propName $ props scm of
      Just (typeNode, _) -> pure (v, typeNode)
      Nothing
        | additionalProps scm -> pure (v, additionalPropSchema scm)
        | otherwise -> throwError . AdditionalPropFoundButBanned (textify parIdent) $ propName
    -- throws ann error if a non-optional property was not found in the object
    isMatched (propName, (_, optional)) =
      when (isNothing (HM.lookup propName obj) && not optional)
        $ throwError . RequiredPropertyIsMissing (textify parIdent)
        $ propName

-- checkCustoms removes all non custom nodes from the typeNode set and
-- checks the Value against each until one succeeds.
checkCustoms ::
  (Alternative m, MonadReader Schema m, MonadState (NESet TypeNode, Maybe Identifier) m, MonadError ValidationError m) =>
  Value ->
  m (Cofree ValidJSONF SchemaInformation)
checkCustoms v = do
  -- Here we drop all non custom nodes.
  customNodes <- gets $ dropWhileAntitone (not . isCustom) . fst
  asum . fmap checkCustom . S.toList $ customNodes
  where
    -- Check value against successfors of a custom node.
    checkCustom (CustomNode ident) = do
      neighbourhood <- typesAs <$> lookupSchema ident
      put (neighbourhood, Just ident)
      ($> (UserDefined . textify $ ident)) <$> checkTypes v
    checkCustom _ = throwError $ ImplementationError "Unreachable code: All these nodes MUST be custom."

lookupSchema ::
  (MonadReader Schema m, MonadError ValidationError m) => Identifier -> m CompiledSchema
lookupSchema ident = do
  x <- asks $ M.lookup ident . compiledSchemata
  case x of
    Just scm -> pure scm
    Nothing -> throwError . ImplementationError $ "Unreachable state: We should be able to find this schema"

anySet :: NESet TypeNode
anySet = singleton AnyNode

textify :: Identifier -> Text
textify (Identifier t) = t

isCustom :: TypeNode -> Bool
isCustom (CustomNode _) = True
isCustom _ = False