{-# LANGUAGE OverloadedStrings #-}

module Spark.Core.ColumnSpec where

import Test.Hspec

import Spark.Core.Column
import Spark.Core.Dataset
import Spark.Core.Functions
import Spark.Core.Types
import Spark.Core.ColumnFunctions
import Spark.Core.Internal.Utilities

data Z
data Y

myScaler :: Column ref Double -> Column ref Double
myScaler col =
  let cnt = asDouble (countCol col)
      m = sumCol col / cnt
      centered = col .- m
      stdDev = sumCol (centered * centered) / cnt
  in centered ./ stdDev


spec :: Spec
spec = do
  describe "ColumnSpec: ensure rules compile correctly" $ do
    let ds = dataset [(1,2)] :: Dataset (Int, Int)
    let c1 = ds // _1
    let c2 = ds // _2
    let c1' = untypedCol c1
    let c2' = untypedCol c2
    let i1 = 3 :: Int
    let o1 = constant 4 :: LocalData Int
    let o2 = 5 :: LocalData Int
    let o1' = asLocalObservable o1
    let o2' = asLocalObservable o2
    it "+ should not blow up" $ do
      let z1 = c1 + c2
      let z2 = c1' + c2'
      let z3 = c1 + 1
      let z4 = 1 + c1
      'a' `shouldBe` 'a'
    it ".+ should not blow up with colums" $ do
      let z1 = c1 .+ c2
      let z2 = c1' .+ c2'
      let z3 = c1 .+ c2'
      let z4 = c1' .+ c2
      let z5 = c1 .+ o1
      let z6 = c1 .+ o1'
      'a' `shouldBe` 'a'
    it "simple aggregations" $ do
      let c3 = c1 + (c2 .+ sumCol c2)
      let ds2 = pack1 c3
      nodeType ds2 `shouldBe` (buildType :: SQLType Int)
    it "mean" $ do
      let ds' = dataset [1, 2] :: Dataset Double
      let c = asCol ds'
      let cnt = asDouble (countCol c)
      let m = traceHint "m=" $ sumCol c / cnt
      let centered = c .- m
      let stdDev = sumCol (centered * centered) / cnt
      let scaled = traceHint "scaled=" $ centered ./ stdDev
      let ds2 = pack1 scaled
      nodeType ds2 `shouldBe` (buildType :: SQLType Double)