{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-} -- Derived from AWS service descriptions, licensed under Apache 2.0. -- | -- Module : Amazonka.SageMaker.Types.DataProcessing -- Copyright : (c) 2013-2023 Brendan Hay -- License : Mozilla Public License, v. 2.0. -- Maintainer : Brendan Hay -- Stability : auto-generated -- Portability : non-portable (GHC extensions) module Amazonka.SageMaker.Types.DataProcessing where import qualified Amazonka.Core as Core import qualified Amazonka.Core.Lens.Internal as Lens import qualified Amazonka.Data as Data import qualified Amazonka.Prelude as Prelude import Amazonka.SageMaker.Types.JoinSource -- | The data structure used to specify the data to be used for inference in -- a batch transform job and to associate the data that is relevant to the -- prediction results in the output. The input filter provided allows you -- to exclude input data that is not needed for inference in a batch -- transform job. The output filter provided allows you to include input -- data relevant to interpreting the predictions in the output from the -- job. For more information, see -- . -- -- /See:/ 'newDataProcessing' smart constructor. data DataProcessing = DataProcessing' { -- | A -- -- expression used to select a portion of the input data to pass to the -- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an -- ID column, from the input. If you want SageMaker to pass the entire -- input dataset to the algorithm, accept the default value @$@. -- -- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@ inputFilter :: Prelude.Maybe Prelude.Text, -- | Specifies the source of the data to join with the transformed data. The -- valid values are @None@ and @Input@. The default value is @None@, which -- specifies not to join the input with the transformed data. If you want -- the batch transform job to join the original input data with the -- transformed data, set @JoinSource@ to @Input@. You can specify -- @OutputFilter@ as an additional filter to select a portion of the joined -- dataset and store it in the output file. -- -- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the -- transformed data to the input JSON object in an attribute called -- @SageMakerOutput@. The joined result for JSON must be a key-value pair -- object. If the input is not a key-value pair object, SageMaker creates a -- new JSON file. In the new JSON file, and the input data is stored under -- the @SageMakerInput@ key and the results are stored in -- @SageMakerOutput@. -- -- For CSV data, SageMaker takes each row as a JSON array and joins the -- transformed data with the input by appending each transformed row to the -- end of the input. The joined data has the original input data followed -- by the transformed data and the output is a CSV file. -- -- For information on how joining in applied, see -- . joinSource :: Prelude.Maybe JoinSource, -- | A -- -- expression used to select a portion of the joined dataset to save in the -- output file for a batch transform job. If you want SageMaker to store -- the entire input dataset in the output file, leave the default value, -- @$@. If you specify indexes that aren\'t within the dimension size of -- the joined dataset, you get an error. -- -- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@ outputFilter :: Prelude.Maybe Prelude.Text } deriving (Prelude.Eq, Prelude.Read, Prelude.Show, Prelude.Generic) -- | -- Create a value of 'DataProcessing' with all optional fields omitted. -- -- Use or to modify other optional fields. -- -- The following record fields are available, with the corresponding lenses provided -- for backwards compatibility: -- -- 'inputFilter', 'dataProcessing_inputFilter' - A -- -- expression used to select a portion of the input data to pass to the -- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an -- ID column, from the input. If you want SageMaker to pass the entire -- input dataset to the algorithm, accept the default value @$@. -- -- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@ -- -- 'joinSource', 'dataProcessing_joinSource' - Specifies the source of the data to join with the transformed data. The -- valid values are @None@ and @Input@. The default value is @None@, which -- specifies not to join the input with the transformed data. If you want -- the batch transform job to join the original input data with the -- transformed data, set @JoinSource@ to @Input@. You can specify -- @OutputFilter@ as an additional filter to select a portion of the joined -- dataset and store it in the output file. -- -- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the -- transformed data to the input JSON object in an attribute called -- @SageMakerOutput@. The joined result for JSON must be a key-value pair -- object. If the input is not a key-value pair object, SageMaker creates a -- new JSON file. In the new JSON file, and the input data is stored under -- the @SageMakerInput@ key and the results are stored in -- @SageMakerOutput@. -- -- For CSV data, SageMaker takes each row as a JSON array and joins the -- transformed data with the input by appending each transformed row to the -- end of the input. The joined data has the original input data followed -- by the transformed data and the output is a CSV file. -- -- For information on how joining in applied, see -- . -- -- 'outputFilter', 'dataProcessing_outputFilter' - A -- -- expression used to select a portion of the joined dataset to save in the -- output file for a batch transform job. If you want SageMaker to store -- the entire input dataset in the output file, leave the default value, -- @$@. If you specify indexes that aren\'t within the dimension size of -- the joined dataset, you get an error. -- -- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@ newDataProcessing :: DataProcessing newDataProcessing = DataProcessing' { inputFilter = Prelude.Nothing, joinSource = Prelude.Nothing, outputFilter = Prelude.Nothing } -- | A -- -- expression used to select a portion of the input data to pass to the -- algorithm. Use the @InputFilter@ parameter to exclude fields, such as an -- ID column, from the input. If you want SageMaker to pass the entire -- input dataset to the algorithm, accept the default value @$@. -- -- Examples: @\"$\"@, @\"$[1:]\"@, @\"$.features\"@ dataProcessing_inputFilter :: Lens.Lens' DataProcessing (Prelude.Maybe Prelude.Text) dataProcessing_inputFilter = Lens.lens (\DataProcessing' {inputFilter} -> inputFilter) (\s@DataProcessing' {} a -> s {inputFilter = a} :: DataProcessing) -- | Specifies the source of the data to join with the transformed data. The -- valid values are @None@ and @Input@. The default value is @None@, which -- specifies not to join the input with the transformed data. If you want -- the batch transform job to join the original input data with the -- transformed data, set @JoinSource@ to @Input@. You can specify -- @OutputFilter@ as an additional filter to select a portion of the joined -- dataset and store it in the output file. -- -- For JSON or JSONLines objects, such as a JSON array, SageMaker adds the -- transformed data to the input JSON object in an attribute called -- @SageMakerOutput@. The joined result for JSON must be a key-value pair -- object. If the input is not a key-value pair object, SageMaker creates a -- new JSON file. In the new JSON file, and the input data is stored under -- the @SageMakerInput@ key and the results are stored in -- @SageMakerOutput@. -- -- For CSV data, SageMaker takes each row as a JSON array and joins the -- transformed data with the input by appending each transformed row to the -- end of the input. The joined data has the original input data followed -- by the transformed data and the output is a CSV file. -- -- For information on how joining in applied, see -- . dataProcessing_joinSource :: Lens.Lens' DataProcessing (Prelude.Maybe JoinSource) dataProcessing_joinSource = Lens.lens (\DataProcessing' {joinSource} -> joinSource) (\s@DataProcessing' {} a -> s {joinSource = a} :: DataProcessing) -- | A -- -- expression used to select a portion of the joined dataset to save in the -- output file for a batch transform job. If you want SageMaker to store -- the entire input dataset in the output file, leave the default value, -- @$@. If you specify indexes that aren\'t within the dimension size of -- the joined dataset, you get an error. -- -- Examples: @\"$\"@, @\"$[0,5:]\"@, @\"$[\'id\',\'SageMakerOutput\']\"@ dataProcessing_outputFilter :: Lens.Lens' DataProcessing (Prelude.Maybe Prelude.Text) dataProcessing_outputFilter = Lens.lens (\DataProcessing' {outputFilter} -> outputFilter) (\s@DataProcessing' {} a -> s {outputFilter = a} :: DataProcessing) instance Data.FromJSON DataProcessing where parseJSON = Data.withObject "DataProcessing" ( \x -> DataProcessing' Prelude.<$> (x Data..:? "InputFilter") Prelude.<*> (x Data..:? "JoinSource") Prelude.<*> (x Data..:? "OutputFilter") ) instance Prelude.Hashable DataProcessing where hashWithSalt _salt DataProcessing' {..} = _salt `Prelude.hashWithSalt` inputFilter `Prelude.hashWithSalt` joinSource `Prelude.hashWithSalt` outputFilter instance Prelude.NFData DataProcessing where rnf DataProcessing' {..} = Prelude.rnf inputFilter `Prelude.seq` Prelude.rnf joinSource `Prelude.seq` Prelude.rnf outputFilter instance Data.ToJSON DataProcessing where toJSON DataProcessing' {..} = Data.object ( Prelude.catMaybes [ ("InputFilter" Data..=) Prelude.<$> inputFilter, ("JoinSource" Data..=) Prelude.<$> joinSource, ("OutputFilter" Data..=) Prelude.<$> outputFilter ] )