{-# LANGUAGE DataKinds           #-}
{-# LANGUAGE DeriveGeneric       #-}
{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE OverloadedLabels    #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications    #-}

-- The same example than example1 in 'porcupine-core', but with s3 access
-- enabled by 'runPipelineTask'. Don't forget to map locations to s3 urls in the
-- 'porcupine.yaml' generated by calling 'exampleS3 write-config-template', or
-- else it will act exactly like example1.
--
-- Don't forget to enable OverloadedLabels and import Data.Locations.Accessors.AWS

import           Data.Aeson
import           Data.DocRecord
import qualified Data.HashMap.Strict          as HM
import qualified Data.Text                    as T
import           GHC.Generics
import           Porcupine

import           Data.Locations.Accessors.AWS


data User = User { userName    :: T.Text
                 , userSurname :: T.Text
                 , userAge     :: Int }
  deriving (Generic)
instance FromJSON User

newtype Analysis = Analysis { numLetters :: HM.HashMap Char Int }
  deriving (Generic)
instance ToJSON Analysis

-- | How to load users
userFile :: DataSource User
userFile = dataSource ["Inputs", "User"]
                      (somePureDeserial JSONSerial)

-- | How to write analysis
analysisFile :: DataSink Analysis
analysisFile = dataSink ["Outputs", "Analysis"]
                        (somePureSerial JSONSerial)

-- | The simple computation we want to perform
computeAnalysis :: User -> Analysis
computeAnalysis (User name surname _) = Analysis $
  HM.fromListWith (+) $ [(c,1) | c <- T.unpack name]
                     ++ [(c,1) | c <- T.unpack surname]

-- | The task combining the three previous operations.
--
-- This task may look very opaque from the outside, having no parameters and no
-- return value. But we will be able to reuse it over different users without
-- having to change it at all.
analyseOneUser :: (LogThrow m) => PTask m () ()
analyseOneUser =
  loadData userFile >>> arr computeAnalysis >>> writeData analysisFile

mainTask :: (LogThrow m) => PTask m () ()
mainTask =
  -- First we get the ids of the users that we want to analyse. We need only one
  -- field that will contain a range of values, see IndexRange. By default, this
  -- range contains just one value, zero.
  getOption ["Settings"] (docField @"users" (oneIndex (0::Int)) "The user ids to load")
  -- We turn the range we read into a full lazy list:
  >>> arr enumTRIndices
  -- Then we just map over these ids and call analyseOneUser each time:
  >>> parMapTask_ "userId" analyseOneUser

main :: IO ()
main = runPipelineTask (FullConfig "exampleS3" "porcupine.yaml" "porcupine-core/examples/data" ())
                       (  #aws <-- useAWS Discover
                            -- We just add #aws on top of the
                            -- baseContexts. Credentials will be discovered.
                       :& baseContexts "")
                       mainTask ()