module Spark.Core.Internal.TypesFunctions(
isNullable,
iInnerStrictType,
columnType,
unsafeCastType,
intType,
arrayType,
compatibleTypes,
arrayType',
frameTypeFromCol,
colTypeFromFrame,
canNull,
structField,
structType,
structTypeFromFields,
structTypeTuple,
structTypeTuple',
tupleType,
structName,
iSingleField,
) where
import Control.Monad.Except
import qualified Data.List.NonEmpty as N
import Control.Arrow(second)
import Data.Function(on)
import Data.List(sort, nub, sortBy)
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import Data.Text(Text, intercalate)
import qualified Data.Vector as V
import Formatting
import Spark.Core.Internal.TypesStructures
import Spark.Core.StructuresInternal
import Spark.Core.Internal.RowGenericsFrom(FromSQL(..), TryS)
import Spark.Core.Internal.Utilities
import Spark.Core.Internal.TypesStructuresRepr(DataTypeRepr, DataTypeElementRepr)
import qualified Spark.Core.Internal.TypesStructuresRepr as DTR
import Spark.Core.Try
unsafeCastType :: SQLType a -> SQLType b
unsafeCastType (SQLType dt) = SQLType dt
columnType :: SQLType a -> DataType
columnType (SQLType dt) = dt
isNullable :: DataType -> Bool
isNullable (StrictType _) = False
isNullable (NullableType _) = True
frameTypeFromCol :: DataType -> StructType
frameTypeFromCol (StrictType (Struct struct)) = struct
frameTypeFromCol dt = _structFromUnfields [("value", dt)]
colTypeFromFrame :: StructType -> DataType
colTypeFromFrame st @ (StructType fs) = case V.toList fs of
[StructField {
structFieldName = fname,
structFieldType = (StrictType dt)}] | fname == "value" ->
StrictType dt
_ -> StrictType (Struct st)
compatibleTypes :: DataType -> DataType -> Bool
compatibleTypes (StrictType sdt) (StrictType sdt') = _compatibleTypesStrict sdt sdt'
compatibleTypes (NullableType sdt) (NullableType sdt') = _compatibleTypesStrict sdt sdt'
compatibleTypes _ _ = False
instance FromSQL DataType where
_cellToValue = _cellToValue >=> _sDataTypeFromRepr
_sDataTypeFromRepr :: DataTypeRepr -> TryS DataType
_sDataTypeFromRepr (DTR.DataTypeRepr l) = snd <$> _sToTreeRepr l
_sToTreeRepr :: [DataTypeElementRepr] -> TryS (Int, DataType)
_sToTreeRepr [] = throwError $ sformat "_sToTreeRepr: empty list"
_sToTreeRepr [dtr] | null (DTR.fieldPath dtr) =
_decodeLeaf dtr []
_sToTreeRepr l = do
let f dtr = case DTR.fieldPath dtr of
[] -> []
(h : t) -> [(h, dtr')] where dtr' = dtr { DTR.fieldPath = t }
let hDtrt = case filter (null . DTR.fieldPath) l of
[dtr] -> pure dtr
_ ->
throwError $ sformat ("_decodeList: invalid top with "%sh) l
let withHeads = concatMap f l
let g = myGroupBy withHeads
let groupst = M.toList g <&> \(h, l') ->
_sToTreeRepr l' <&> second (StructField (FieldName h))
groups <- sequence groupst
checkedGroups <- _packWithIndex groups
hDtr <- hDtrt
_decodeLeaf hDtr checkedGroups
_packWithIndex :: (Show t) => [(Int, t)] -> TryS [t]
_packWithIndex l = _check 0 $ sortBy (compare `on` fst) l
_check :: (Show t) => Int -> [(Int, t)] -> TryS [t]
_check _ [] = pure []
_check n ((n', x):t) =
if n == n'
then (x : ) <$> _check (n+1) t
else
throwError $ sformat ("_check: could not match arguments at index "%sh%" for argument "%sh) n ((n', x):t)
_decodeLeaf :: DataTypeElementRepr -> [StructField] -> TryS (Int, DataType)
_decodeLeaf dtr l = _decodeLeafStrict dtr l <&> \sdt ->
if DTR.isNullable dtr
then (DTR.fieldIndex dtr, NullableType sdt)
else (DTR.fieldIndex dtr, StrictType sdt)
_decodeLeafStrict :: DataTypeElementRepr -> [StructField] -> TryS StrictDataType
_decodeLeafStrict dtr [sf] | DTR.typeId dtr == 11 =
pure $ ArrayType (structFieldType sf)
_decodeLeafStrict dtr l | DTR.typeId dtr == 10 =
pure . Struct . StructType . V.fromList $ l
_decodeLeafStrict dtr [] = case DTR.typeId dtr of
0 -> pure IntType
1 -> pure StringType
2 -> pure BoolType
n -> throwError $ sformat ("_decodeLeafStrict: unknown type magic id "%sh) n
_decodeLeafStrict dtr l =
throwError $ sformat ("_decodeLeafStrict: cannot interpret dtr="%sh%" and fields="%sh) dtr l
_compatibleTypesStrict :: StrictDataType -> StrictDataType -> Bool
_compatibleTypesStrict IntType IntType = True
_compatibleTypesStrict DoubleType DoubleType = True
_compatibleTypesStrict StringType StringType = True
_compatibleTypesStrict (ArrayType et) (ArrayType et') = compatibleTypes et et'
_compatibleTypesStrict (Struct (StructType v)) (Struct (StructType v')) =
(length v == length v') &&
and (V.zipWith compatibleTypes (structFieldType <$> v) (structFieldType <$> v'))
_compatibleTypesStrict _ _ = False
tupleType :: SQLType a -> SQLType b -> SQLType (a, b)
tupleType (SQLType dt1) (SQLType dt2) =
SQLType $ structType [structField "_1" dt1, structField "_2" dt2]
intType :: DataType
intType = StrictType IntType
structField :: T.Text -> DataType -> StructField
structField txt = StructField (FieldName txt)
structType :: [StructField] -> DataType
structType = StrictType . Struct . StructType . V.fromList
arrayType' :: DataType -> DataType
arrayType' = StrictType . ArrayType
canNull :: DataType -> DataType
canNull = NullableType . iInnerStrictType
arrayType :: SQLType a -> SQLType [a]
arrayType (SQLType dt) = SQLType (arrayType' dt)
iInnerStrictType :: DataType -> StrictDataType
iInnerStrictType (StrictType st) = st
iInnerStrictType (NullableType st) = st
iSingleField :: DataType -> Maybe DataType
iSingleField (StrictType (Struct (StructType fields))) = case V.toList fields of
[StructField _ dt] -> Just dt
_ -> Nothing
iSingleField _ = Nothing
structName :: StructType -> Text
structName (StructType fields) =
"struct(" <> intercalate "," (unFieldName . structFieldName <$> V.toList fields) <> ")"
structTypeTuple :: N.NonEmpty DataType -> StructType
structTypeTuple dts =
let numFields = length dts
rawFieldNames = ("_" <> ) . show' <$> (1 N.:| [2..numFields])
fieldNames = N.toList $ unsafeFieldName <$> rawFieldNames
fieldTypes = N.toList dts
in forceRight $ structTypeFromFields (zip fieldNames fieldTypes)
structTypeTuple' :: N.NonEmpty DataType -> DataType
structTypeTuple' = StrictType . Struct . structTypeTuple
structTypeFromFields :: [(FieldName, DataType)] -> Try StructType
structTypeFromFields [] = tryError "You cannot build an empty structure"
structTypeFromFields ((hfn, hdt):t) =
let fs = (hfn, hdt) : t
ct = StructType $ uncurry StructField <$> V.fromList fs
names = fst <$> fs
numNames = length names
numDistincts = length . nub $ names
in if numNames == numDistincts
then return ct
else tryError $ sformat ("Duplicate field names when building the struct: "%sh) (sort names)
_structFromUnfields :: [(T.Text, DataType)] -> StructType
_structFromUnfields l = StructType . V.fromList $ x where
x = [StructField (FieldName name) dt | (name, dt) <- l]