{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Spark.Core.Internal.ColumnStructures where

import Control.Arrow ((&&&))
import Data.Function(on)
import Data.Vector(Vector)

import Spark.Core.Internal.DatasetStructures
import Spark.Core.Internal.DatasetFunctions()
import Spark.Core.Internal.RowStructures
import Spark.Core.Internal.TypesStructures
import Spark.Core.Internal.OpStructures
import Spark.Core.StructuresInternal
import Spark.Core.Try

{-| The data structure that implements the notion of data columns.

The type on this one may either be a Cell or a proper type.

A column of data from a dataset
The ref is a reference potentially to the originating
dataset, but it may be more general than that to perform
type-safe tricks.

Unlike Spark, columns are always attached to a reference dataset or dataframe.
One cannot materialize a column out of thin air. In order to broadcast a value
along a given column, the `broadcast` function is provided.

TODO: try something like this https://www.vidarholen.net/contents/junk/catbag.html
-}
data ColumnData ref a = ColumnData {
  _cOrigin :: !UntypedDataset,
  _cType :: !DataType,
  _cOp :: !GeneralizedColOp,
  -- The name in the dataset.
  -- If not set, it will be deduced from the operation.
  _cReferingPath :: !(Maybe FieldName)
}

{-| A generalization of the column operation.

This structure is useful to performn some extra operations not supported by
the Spark engine:
 - express joins with an observable
 - keep track of DAGs of column operations (not implemented yet)
-}
data GeneralizedColOp =
    GenColExtraction !FieldPath
  | GenColFunction !SqlFunctionName !(Vector GeneralizedColOp)
  | GenColLit !DataType !Cell
    -- This is the extra operation that needs to be flattened with a broadcast.
  | BroadcastColOp !UntypedLocalData
  | GenColStruct !(Vector GeneralizedTransField)
  deriving (Eq, Show)

data GeneralizedTransField = GeneralizedTransField {
  gtfName :: !FieldName,
  gtfValue :: !GeneralizedColOp
} deriving (Eq, Show)

{-| A column of data from a dataset or a dataframe.

This column is typed: the operations on this column will be
validdated by Haskell' type inferenc.
-}
type Column ref a = ColumnData ref a

{-| An untyped column of data from a dataset or a dataframe.

This column is untyped and may not be properly constructed. Any error
will be found during the analysis phase at runtime.
-}
type DynColumn = Try (ColumnData UnknownReference Cell)


-- | (dev)
-- The type of untyped column data.
type UntypedColumnData = ColumnData UnknownReference Cell

{-| (dev)
A column for which the type of the cells is unavailable (at the type level),
 but for which the origin is available at the type level.
-}
type GenericColumn ref = Column ref Cell

{-| A dummy data type that indicates the data referenc is missing.
-}
data UnknownReference

{-| A tag that carries the reference information of a column at a
type level. This is useful when creating column.

See ref and colRef.
-}
data ColumnReference a = ColumnReference

instance forall ref a. Eq (ColumnData ref a) where
  (==) = (==) `on` (_cOrigin &&& _cType &&& _cOp &&& _cReferingPath)