{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-} -- Derived from AWS service descriptions, licensed under Apache 2.0. -- | -- Module : Amazonka.Comprehend.Types.EntityRecognizerInputDataConfig -- Copyright : (c) 2013-2023 Brendan Hay -- License : Mozilla Public License, v. 2.0. -- Maintainer : Brendan Hay -- Stability : auto-generated -- Portability : non-portable (GHC extensions) module Amazonka.Comprehend.Types.EntityRecognizerInputDataConfig where import Amazonka.Comprehend.Types.AugmentedManifestsListItem import Amazonka.Comprehend.Types.EntityRecognizerAnnotations import Amazonka.Comprehend.Types.EntityRecognizerDataFormat import Amazonka.Comprehend.Types.EntityRecognizerDocuments import Amazonka.Comprehend.Types.EntityRecognizerEntityList import Amazonka.Comprehend.Types.EntityTypesListItem import qualified Amazonka.Core as Core import qualified Amazonka.Core.Lens.Internal as Lens import qualified Amazonka.Data as Data import qualified Amazonka.Prelude as Prelude -- | Specifies the format and location of the input data. -- -- /See:/ 'newEntityRecognizerInputDataConfig' smart constructor. data EntityRecognizerInputDataConfig = EntityRecognizerInputDataConfig' { -- | The S3 location of the CSV file that annotates your training documents. annotations :: Prelude.Maybe EntityRecognizerAnnotations, -- | A list of augmented manifest files that provide training data for your -- custom model. An augmented manifest file is a labeled dataset that is -- produced by Amazon SageMaker Ground Truth. -- -- This parameter is required if you set @DataFormat@ to -- @AUGMENTED_MANIFEST@. augmentedManifests :: Prelude.Maybe [AugmentedManifestsListItem], -- | The format of your training data: -- -- - @COMPREHEND_CSV@: A CSV file that supplements your training -- documents. The CSV file contains information about the custom -- entities that your trained model will detect. The required format of -- the file depends on whether you are providing annotations or an -- entity list. -- -- If you use this value, you must provide your CSV file by using -- either the @Annotations@ or @EntityList@ parameters. You must -- provide your training documents by using the @Documents@ parameter. -- -- - @AUGMENTED_MANIFEST@: A labeled dataset that is produced by Amazon -- SageMaker Ground Truth. This file is in JSON lines format. Each line -- is a complete JSON object that contains a training document and its -- labels. Each label annotates a named entity in the training -- document. -- -- If you use this value, you must provide the @AugmentedManifests@ -- parameter in your request. -- -- If you don\'t specify a value, Amazon Comprehend uses @COMPREHEND_CSV@ -- as the default. dataFormat :: Prelude.Maybe EntityRecognizerDataFormat, -- | The S3 location of the folder that contains the training documents for -- your custom entity recognizer. -- -- This parameter is required if you set @DataFormat@ to @COMPREHEND_CSV@. documents :: Prelude.Maybe EntityRecognizerDocuments, -- | The S3 location of the CSV file that has the entity list for your custom -- entity recognizer. entityList :: Prelude.Maybe EntityRecognizerEntityList, -- | The entity types in the labeled training data that Amazon Comprehend -- uses to train the custom entity recognizer. Any entity types that you -- don\'t specify are ignored. -- -- A maximum of 25 entity types can be used at one time to train an entity -- recognizer. Entity types must not contain the following invalid -- characters: \\n (line break), \\\\n (escaped line break), \\r (carriage -- return), \\\\r (escaped carriage return), \\t (tab), \\\\t (escaped -- tab), space, and , (comma). entityTypes :: [EntityTypesListItem] } deriving (Prelude.Eq, Prelude.Read, Prelude.Show, Prelude.Generic) -- | -- Create a value of 'EntityRecognizerInputDataConfig' with all optional fields omitted. -- -- Use or to modify other optional fields. -- -- The following record fields are available, with the corresponding lenses provided -- for backwards compatibility: -- -- 'annotations', 'entityRecognizerInputDataConfig_annotations' - The S3 location of the CSV file that annotates your training documents. -- -- 'augmentedManifests', 'entityRecognizerInputDataConfig_augmentedManifests' - A list of augmented manifest files that provide training data for your -- custom model. An augmented manifest file is a labeled dataset that is -- produced by Amazon SageMaker Ground Truth. -- -- This parameter is required if you set @DataFormat@ to -- @AUGMENTED_MANIFEST@. -- -- 'dataFormat', 'entityRecognizerInputDataConfig_dataFormat' - The format of your training data: -- -- - @COMPREHEND_CSV@: A CSV file that supplements your training -- documents. The CSV file contains information about the custom -- entities that your trained model will detect. The required format of -- the file depends on whether you are providing annotations or an -- entity list. -- -- If you use this value, you must provide your CSV file by using -- either the @Annotations@ or @EntityList@ parameters. You must -- provide your training documents by using the @Documents@ parameter. -- -- - @AUGMENTED_MANIFEST@: A labeled dataset that is produced by Amazon -- SageMaker Ground Truth. This file is in JSON lines format. Each line -- is a complete JSON object that contains a training document and its -- labels. Each label annotates a named entity in the training -- document. -- -- If you use this value, you must provide the @AugmentedManifests@ -- parameter in your request. -- -- If you don\'t specify a value, Amazon Comprehend uses @COMPREHEND_CSV@ -- as the default. -- -- 'documents', 'entityRecognizerInputDataConfig_documents' - The S3 location of the folder that contains the training documents for -- your custom entity recognizer. -- -- This parameter is required if you set @DataFormat@ to @COMPREHEND_CSV@. -- -- 'entityList', 'entityRecognizerInputDataConfig_entityList' - The S3 location of the CSV file that has the entity list for your custom -- entity recognizer. -- -- 'entityTypes', 'entityRecognizerInputDataConfig_entityTypes' - The entity types in the labeled training data that Amazon Comprehend -- uses to train the custom entity recognizer. Any entity types that you -- don\'t specify are ignored. -- -- A maximum of 25 entity types can be used at one time to train an entity -- recognizer. Entity types must not contain the following invalid -- characters: \\n (line break), \\\\n (escaped line break), \\r (carriage -- return), \\\\r (escaped carriage return), \\t (tab), \\\\t (escaped -- tab), space, and , (comma). newEntityRecognizerInputDataConfig :: EntityRecognizerInputDataConfig newEntityRecognizerInputDataConfig = EntityRecognizerInputDataConfig' { annotations = Prelude.Nothing, augmentedManifests = Prelude.Nothing, dataFormat = Prelude.Nothing, documents = Prelude.Nothing, entityList = Prelude.Nothing, entityTypes = Prelude.mempty } -- | The S3 location of the CSV file that annotates your training documents. entityRecognizerInputDataConfig_annotations :: Lens.Lens' EntityRecognizerInputDataConfig (Prelude.Maybe EntityRecognizerAnnotations) entityRecognizerInputDataConfig_annotations = Lens.lens (\EntityRecognizerInputDataConfig' {annotations} -> annotations) (\s@EntityRecognizerInputDataConfig' {} a -> s {annotations = a} :: EntityRecognizerInputDataConfig) -- | A list of augmented manifest files that provide training data for your -- custom model. An augmented manifest file is a labeled dataset that is -- produced by Amazon SageMaker Ground Truth. -- -- This parameter is required if you set @DataFormat@ to -- @AUGMENTED_MANIFEST@. entityRecognizerInputDataConfig_augmentedManifests :: Lens.Lens' EntityRecognizerInputDataConfig (Prelude.Maybe [AugmentedManifestsListItem]) entityRecognizerInputDataConfig_augmentedManifests = Lens.lens (\EntityRecognizerInputDataConfig' {augmentedManifests} -> augmentedManifests) (\s@EntityRecognizerInputDataConfig' {} a -> s {augmentedManifests = a} :: EntityRecognizerInputDataConfig) Prelude.. Lens.mapping Lens.coerced -- | The format of your training data: -- -- - @COMPREHEND_CSV@: A CSV file that supplements your training -- documents. The CSV file contains information about the custom -- entities that your trained model will detect. The required format of -- the file depends on whether you are providing annotations or an -- entity list. -- -- If you use this value, you must provide your CSV file by using -- either the @Annotations@ or @EntityList@ parameters. You must -- provide your training documents by using the @Documents@ parameter. -- -- - @AUGMENTED_MANIFEST@: A labeled dataset that is produced by Amazon -- SageMaker Ground Truth. This file is in JSON lines format. Each line -- is a complete JSON object that contains a training document and its -- labels. Each label annotates a named entity in the training -- document. -- -- If you use this value, you must provide the @AugmentedManifests@ -- parameter in your request. -- -- If you don\'t specify a value, Amazon Comprehend uses @COMPREHEND_CSV@ -- as the default. entityRecognizerInputDataConfig_dataFormat :: Lens.Lens' EntityRecognizerInputDataConfig (Prelude.Maybe EntityRecognizerDataFormat) entityRecognizerInputDataConfig_dataFormat = Lens.lens (\EntityRecognizerInputDataConfig' {dataFormat} -> dataFormat) (\s@EntityRecognizerInputDataConfig' {} a -> s {dataFormat = a} :: EntityRecognizerInputDataConfig) -- | The S3 location of the folder that contains the training documents for -- your custom entity recognizer. -- -- This parameter is required if you set @DataFormat@ to @COMPREHEND_CSV@. entityRecognizerInputDataConfig_documents :: Lens.Lens' EntityRecognizerInputDataConfig (Prelude.Maybe EntityRecognizerDocuments) entityRecognizerInputDataConfig_documents = Lens.lens (\EntityRecognizerInputDataConfig' {documents} -> documents) (\s@EntityRecognizerInputDataConfig' {} a -> s {documents = a} :: EntityRecognizerInputDataConfig) -- | The S3 location of the CSV file that has the entity list for your custom -- entity recognizer. entityRecognizerInputDataConfig_entityList :: Lens.Lens' EntityRecognizerInputDataConfig (Prelude.Maybe EntityRecognizerEntityList) entityRecognizerInputDataConfig_entityList = Lens.lens (\EntityRecognizerInputDataConfig' {entityList} -> entityList) (\s@EntityRecognizerInputDataConfig' {} a -> s {entityList = a} :: EntityRecognizerInputDataConfig) -- | The entity types in the labeled training data that Amazon Comprehend -- uses to train the custom entity recognizer. Any entity types that you -- don\'t specify are ignored. -- -- A maximum of 25 entity types can be used at one time to train an entity -- recognizer. Entity types must not contain the following invalid -- characters: \\n (line break), \\\\n (escaped line break), \\r (carriage -- return), \\\\r (escaped carriage return), \\t (tab), \\\\t (escaped -- tab), space, and , (comma). entityRecognizerInputDataConfig_entityTypes :: Lens.Lens' EntityRecognizerInputDataConfig [EntityTypesListItem] entityRecognizerInputDataConfig_entityTypes = Lens.lens (\EntityRecognizerInputDataConfig' {entityTypes} -> entityTypes) (\s@EntityRecognizerInputDataConfig' {} a -> s {entityTypes = a} :: EntityRecognizerInputDataConfig) Prelude.. Lens.coerced instance Data.FromJSON EntityRecognizerInputDataConfig where parseJSON = Data.withObject "EntityRecognizerInputDataConfig" ( \x -> EntityRecognizerInputDataConfig' Prelude.<$> (x Data..:? "Annotations") Prelude.<*> ( x Data..:? "AugmentedManifests" Data..!= Prelude.mempty ) Prelude.<*> (x Data..:? "DataFormat") Prelude.<*> (x Data..:? "Documents") Prelude.<*> (x Data..:? "EntityList") Prelude.<*> (x Data..:? "EntityTypes" Data..!= Prelude.mempty) ) instance Prelude.Hashable EntityRecognizerInputDataConfig where hashWithSalt _salt EntityRecognizerInputDataConfig' {..} = _salt `Prelude.hashWithSalt` annotations `Prelude.hashWithSalt` augmentedManifests `Prelude.hashWithSalt` dataFormat `Prelude.hashWithSalt` documents `Prelude.hashWithSalt` entityList `Prelude.hashWithSalt` entityTypes instance Prelude.NFData EntityRecognizerInputDataConfig where rnf EntityRecognizerInputDataConfig' {..} = Prelude.rnf annotations `Prelude.seq` Prelude.rnf augmentedManifests `Prelude.seq` Prelude.rnf dataFormat `Prelude.seq` Prelude.rnf documents `Prelude.seq` Prelude.rnf entityList `Prelude.seq` Prelude.rnf entityTypes instance Data.ToJSON EntityRecognizerInputDataConfig where toJSON EntityRecognizerInputDataConfig' {..} = Data.object ( Prelude.catMaybes [ ("Annotations" Data..=) Prelude.<$> annotations, ("AugmentedManifests" Data..=) Prelude.<$> augmentedManifests, ("DataFormat" Data..=) Prelude.<$> dataFormat, ("Documents" Data..=) Prelude.<$> documents, ("EntityList" Data..=) Prelude.<$> entityList, Prelude.Just ("EntityTypes" Data..= entityTypes) ] )