{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}

-- Derived from AWS service descriptions, licensed under Apache 2.0.

-- |
-- Module      : Amazonka.SageMaker.Types.DataProcessing
-- Copyright   : (c) 2013-2023 Brendan Hay
-- License     : Mozilla Public License, v. 2.0.
-- Maintainer  : Brendan Hay
-- Stability   : auto-generated
-- Portability : non-portable (GHC extensions)
module Amazonka.SageMaker.Types.DataProcessing where

import qualified Amazonka.Core as Core
import qualified Amazonka.Core.Lens.Internal as Lens
import qualified Amazonka.Data as Data
import qualified Amazonka.Prelude as Prelude
import Amazonka.SageMaker.Types.JoinSource

-- | The data structure used to specify the data to be used for inference in
-- a batch transform job and to associate the data that is relevant to the
-- prediction results in the output. The input filter provided allows you
-- to exclude input data that is not needed for inference in a batch
-- transform job. The output filter provided allows you to include input
-- data relevant to interpreting the predictions in the output from the
-- job. For more information, see
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html Associate Prediction Results with their Corresponding Input Records>.
--
-- /See:/ 'newDataProcessing' smart constructor.
data DataProcessing = DataProcessing'
  { -- | A
    -- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
    -- expression used to select a portion of the input data to pass to the
    -- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an
    -- ID column, from the input. If you want SageMaker to pass the entire
    -- input dataset to the algorithm, accept the default value @$@.
    --
    -- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@
    DataProcessing -> Maybe Text
inputFilter :: Prelude.Maybe Prelude.Text,
    -- | Specifies the source of the data to join with the transformed data. The
    -- valid values are @None@ and @Input@. The default value is @None@, which
    -- specifies not to join the input with the transformed data. If you want
    -- the batch transform job to join the original input data with the
    -- transformed data, set @JoinSource@ to @Input@. You can specify
    -- @OutputFilter@ as an additional filter to select a portion of the joined
    -- dataset and store it in the output file.
    --
    -- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the
    -- transformed data to the input JSON object in an attribute called
    -- @SageMakerOutput@. The joined result for JSON must be a key-value pair
    -- object. If the input is not a key-value pair object, SageMaker creates a
    -- new JSON file. In the new JSON file, and the input data is stored under
    -- the @SageMakerInput@ key and the results are stored in
    -- @SageMakerOutput@.
    --
    -- For CSV data, SageMaker takes each row as a JSON array and joins the
    -- transformed data with the input by appending each transformed row to the
    -- end of the input. The joined data has the original input data followed
    -- by the transformed data and the output is a CSV file.
    --
    -- For information on how joining in applied, see
    -- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#batch-transform-data-processing-workflow Workflow for Associating Inferences with Input Records>.
    DataProcessing -> Maybe JoinSource
joinSource :: Prelude.Maybe JoinSource,
    -- | A
    -- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
    -- expression used to select a portion of the joined dataset to save in the
    -- output file for a batch transform job. If you want SageMaker to store
    -- the entire input dataset in the output file, leave the default value,
    -- @$@. If you specify indexes that aren\'t within the dimension size of
    -- the joined dataset, you get an error.
    --
    -- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@
    DataProcessing -> Maybe Text
outputFilter :: Prelude.Maybe Prelude.Text
  }
  deriving (DataProcessing -> DataProcessing -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DataProcessing -> DataProcessing -> Bool
$c/= :: DataProcessing -> DataProcessing -> Bool
== :: DataProcessing -> DataProcessing -> Bool
$c== :: DataProcessing -> DataProcessing -> Bool
Prelude.Eq, ReadPrec [DataProcessing]
ReadPrec DataProcessing
Int -> ReadS DataProcessing
ReadS [DataProcessing]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DataProcessing]
$creadListPrec :: ReadPrec [DataProcessing]
readPrec :: ReadPrec DataProcessing
$creadPrec :: ReadPrec DataProcessing
readList :: ReadS [DataProcessing]
$creadList :: ReadS [DataProcessing]
readsPrec :: Int -> ReadS DataProcessing
$creadsPrec :: Int -> ReadS DataProcessing
Prelude.Read, Int -> DataProcessing -> ShowS
[DataProcessing] -> ShowS
DataProcessing -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DataProcessing] -> ShowS
$cshowList :: [DataProcessing] -> ShowS
show :: DataProcessing -> String
$cshow :: DataProcessing -> String
showsPrec :: Int -> DataProcessing -> ShowS
$cshowsPrec :: Int -> DataProcessing -> ShowS
Prelude.Show, forall x. Rep DataProcessing x -> DataProcessing
forall x. DataProcessing -> Rep DataProcessing x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DataProcessing x -> DataProcessing
$cfrom :: forall x. DataProcessing -> Rep DataProcessing x
Prelude.Generic)

-- |
-- Create a value of 'DataProcessing' with all optional fields omitted.
--
-- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields.
--
-- The following record fields are available, with the corresponding lenses provided
-- for backwards compatibility:
--
-- 'inputFilter', 'dataProcessing_inputFilter' - A
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
-- expression used to select a portion of the input data to pass to the
-- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an
-- ID column, from the input. If you want SageMaker to pass the entire
-- input dataset to the algorithm, accept the default value @$@.
--
-- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@
--
-- 'joinSource', 'dataProcessing_joinSource' - Specifies the source of the data to join with the transformed data. The
-- valid values are @None@ and @Input@. The default value is @None@, which
-- specifies not to join the input with the transformed data. If you want
-- the batch transform job to join the original input data with the
-- transformed data, set @JoinSource@ to @Input@. You can specify
-- @OutputFilter@ as an additional filter to select a portion of the joined
-- dataset and store it in the output file.
--
-- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the
-- transformed data to the input JSON object in an attribute called
-- @SageMakerOutput@. The joined result for JSON must be a key-value pair
-- object. If the input is not a key-value pair object, SageMaker creates a
-- new JSON file. In the new JSON file, and the input data is stored under
-- the @SageMakerInput@ key and the results are stored in
-- @SageMakerOutput@.
--
-- For CSV data, SageMaker takes each row as a JSON array and joins the
-- transformed data with the input by appending each transformed row to the
-- end of the input. The joined data has the original input data followed
-- by the transformed data and the output is a CSV file.
--
-- For information on how joining in applied, see
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#batch-transform-data-processing-workflow Workflow for Associating Inferences with Input Records>.
--
-- 'outputFilter', 'dataProcessing_outputFilter' - A
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
-- expression used to select a portion of the joined dataset to save in the
-- output file for a batch transform job. If you want SageMaker to store
-- the entire input dataset in the output file, leave the default value,
-- @$@. If you specify indexes that aren\'t within the dimension size of
-- the joined dataset, you get an error.
--
-- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@
newDataProcessing ::
  DataProcessing
newDataProcessing :: DataProcessing
newDataProcessing =
  DataProcessing'
    { $sel:inputFilter:DataProcessing' :: Maybe Text
inputFilter = forall a. Maybe a
Prelude.Nothing,
      $sel:joinSource:DataProcessing' :: Maybe JoinSource
joinSource = forall a. Maybe a
Prelude.Nothing,
      $sel:outputFilter:DataProcessing' :: Maybe Text
outputFilter = forall a. Maybe a
Prelude.Nothing
    }

-- | A
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
-- expression used to select a portion of the input data to pass to the
-- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an
-- ID column, from the input. If you want SageMaker to pass the entire
-- input dataset to the algorithm, accept the default value @$@.
--
-- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@
dataProcessing_inputFilter :: Lens.Lens' DataProcessing (Prelude.Maybe Prelude.Text)
dataProcessing_inputFilter :: Lens' DataProcessing (Maybe Text)
dataProcessing_inputFilter = forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\DataProcessing' {Maybe Text
inputFilter :: Maybe Text
$sel:inputFilter:DataProcessing' :: DataProcessing -> Maybe Text
inputFilter} -> Maybe Text
inputFilter) (\s :: DataProcessing
s@DataProcessing' {} Maybe Text
a -> DataProcessing
s {$sel:inputFilter:DataProcessing' :: Maybe Text
inputFilter = Maybe Text
a} :: DataProcessing)

-- | Specifies the source of the data to join with the transformed data. The
-- valid values are @None@ and @Input@. The default value is @None@, which
-- specifies not to join the input with the transformed data. If you want
-- the batch transform job to join the original input data with the
-- transformed data, set @JoinSource@ to @Input@. You can specify
-- @OutputFilter@ as an additional filter to select a portion of the joined
-- dataset and store it in the output file.
--
-- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the
-- transformed data to the input JSON object in an attribute called
-- @SageMakerOutput@. The joined result for JSON must be a key-value pair
-- object. If the input is not a key-value pair object, SageMaker creates a
-- new JSON file. In the new JSON file, and the input data is stored under
-- the @SageMakerInput@ key and the results are stored in
-- @SageMakerOutput@.
--
-- For CSV data, SageMaker takes each row as a JSON array and joins the
-- transformed data with the input by appending each transformed row to the
-- end of the input. The joined data has the original input data followed
-- by the transformed data and the output is a CSV file.
--
-- For information on how joining in applied, see
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#batch-transform-data-processing-workflow Workflow for Associating Inferences with Input Records>.
dataProcessing_joinSource :: Lens.Lens' DataProcessing (Prelude.Maybe JoinSource)
dataProcessing_joinSource :: Lens' DataProcessing (Maybe JoinSource)
dataProcessing_joinSource = forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\DataProcessing' {Maybe JoinSource
joinSource :: Maybe JoinSource
$sel:joinSource:DataProcessing' :: DataProcessing -> Maybe JoinSource
joinSource} -> Maybe JoinSource
joinSource) (\s :: DataProcessing
s@DataProcessing' {} Maybe JoinSource
a -> DataProcessing
s {$sel:joinSource:DataProcessing' :: Maybe JoinSource
joinSource = Maybe JoinSource
a} :: DataProcessing)

-- | A
-- <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#data-processing-operators JSONPath>
-- expression used to select a portion of the joined dataset to save in the
-- output file for a batch transform job. If you want SageMaker to store
-- the entire input dataset in the output file, leave the default value,
-- @$@. If you specify indexes that aren\'t within the dimension size of
-- the joined dataset, you get an error.
--
-- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@
dataProcessing_outputFilter :: Lens.Lens' DataProcessing (Prelude.Maybe Prelude.Text)
dataProcessing_outputFilter :: Lens' DataProcessing (Maybe Text)
dataProcessing_outputFilter = forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\DataProcessing' {Maybe Text
outputFilter :: Maybe Text
$sel:outputFilter:DataProcessing' :: DataProcessing -> Maybe Text
outputFilter} -> Maybe Text
outputFilter) (\s :: DataProcessing
s@DataProcessing' {} Maybe Text
a -> DataProcessing
s {$sel:outputFilter:DataProcessing' :: Maybe Text
outputFilter = Maybe Text
a} :: DataProcessing)

instance Data.FromJSON DataProcessing where
  parseJSON :: Value -> Parser DataProcessing
parseJSON =
    forall a. String -> (Object -> Parser a) -> Value -> Parser a
Data.withObject
      String
"DataProcessing"
      ( \Object
x ->
          Maybe Text -> Maybe JoinSource -> Maybe Text -> DataProcessing
DataProcessing'
            forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> (Object
x forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
Data..:? Key
"InputFilter")
            forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
Prelude.<*> (Object
x forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
Data..:? Key
"JoinSource")
            forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
Prelude.<*> (Object
x forall a. FromJSON a => Object -> Key -> Parser (Maybe a)
Data..:? Key
"OutputFilter")
      )

instance Prelude.Hashable DataProcessing where
  hashWithSalt :: Int -> DataProcessing -> Int
hashWithSalt Int
_salt DataProcessing' {Maybe Text
Maybe JoinSource
outputFilter :: Maybe Text
joinSource :: Maybe JoinSource
inputFilter :: Maybe Text
$sel:outputFilter:DataProcessing' :: DataProcessing -> Maybe Text
$sel:joinSource:DataProcessing' :: DataProcessing -> Maybe JoinSource
$sel:inputFilter:DataProcessing' :: DataProcessing -> Maybe Text
..} =
    Int
_salt
      forall a. Hashable a => Int -> a -> Int
`Prelude.hashWithSalt` Maybe Text
inputFilter
      forall a. Hashable a => Int -> a -> Int
`Prelude.hashWithSalt` Maybe JoinSource
joinSource
      forall a. Hashable a => Int -> a -> Int
`Prelude.hashWithSalt` Maybe Text
outputFilter

instance Prelude.NFData DataProcessing where
  rnf :: DataProcessing -> ()
rnf DataProcessing' {Maybe Text
Maybe JoinSource
outputFilter :: Maybe Text
joinSource :: Maybe JoinSource
inputFilter :: Maybe Text
$sel:outputFilter:DataProcessing' :: DataProcessing -> Maybe Text
$sel:joinSource:DataProcessing' :: DataProcessing -> Maybe JoinSource
$sel:inputFilter:DataProcessing' :: DataProcessing -> Maybe Text
..} =
    forall a. NFData a => a -> ()
Prelude.rnf Maybe Text
inputFilter
      seq :: forall a b. a -> b -> b
`Prelude.seq` forall a. NFData a => a -> ()
Prelude.rnf Maybe JoinSource
joinSource
      seq :: forall a b. a -> b -> b
`Prelude.seq` forall a. NFData a => a -> ()
Prelude.rnf Maybe Text
outputFilter

instance Data.ToJSON DataProcessing where
  toJSON :: DataProcessing -> Value
toJSON DataProcessing' {Maybe Text
Maybe JoinSource
outputFilter :: Maybe Text
joinSource :: Maybe JoinSource
inputFilter :: Maybe Text
$sel:outputFilter:DataProcessing' :: DataProcessing -> Maybe Text
$sel:joinSource:DataProcessing' :: DataProcessing -> Maybe JoinSource
$sel:inputFilter:DataProcessing' :: DataProcessing -> Maybe Text
..} =
    [Pair] -> Value
Data.object
      ( forall a. [Maybe a] -> [a]
Prelude.catMaybes
          [ (Key
"InputFilter" forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
Data..=) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
inputFilter,
            (Key
"JoinSource" forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
Data..=) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe JoinSource
joinSource,
            (Key
"OutputFilter" forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
Data..=) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
outputFilter
          ]
      )