{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
module Data.Avro.Schema
(
Schema, Type(..)
, Field(..), Order(..)
, TypeName(..)
, renderFullname
, parseFullname
, mkEnum, mkUnion
, validateSchema
, typeName
, buildTypeEnvironment
, extractBindings
, Result(..)
, badValue
, resultToEither
, matches
, parseBytes
, serializeBytes
, parseAvroJSON
, overlay
, subdefinition
, expandNamedTypes
) where
import Control.Applicative
import Control.DeepSeq (NFData)
import Control.Monad.Except
import qualified Control.Monad.Fail as MF
import Control.Monad.State.Strict
import Data.Aeson (FromJSON (..), ToJSON (..), object, (.!=), (.:), (.:!), (.:?), (.=))
import qualified Data.Aeson as A
import Data.Aeson.Types (Parser, typeMismatch)
import qualified Data.Avro.Types as Ty
import qualified Data.ByteString as B
import qualified Data.ByteString.Base16 as Base16
import qualified Data.Char as Char
import Data.Function (on)
import Data.Hashable
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import Data.Int
import qualified Data.IntMap as IM
import qualified Data.List as L
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe (catMaybes, fromMaybe, isJust)
import Data.Monoid (First (..))
import Data.Semigroup
import qualified Data.Set as S
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding as T
import qualified Data.Vector as V
import Prelude as P
import GHC.Generics (Generic)
type Schema = Type
data Type
=
Null
| Boolean
| Int | Long
| Float | Double
| Bytes | String
| Array { item :: Type }
| Map { values :: Type }
| NamedType TypeName
| Record { name :: TypeName
, aliases :: [TypeName]
, doc :: Maybe Text
, order :: Maybe Order
, fields :: [Field]
}
| Enum { name :: TypeName
, aliases :: [TypeName]
, doc :: Maybe Text
, symbols :: V.Vector Text
}
| Union { options :: V.Vector Type
}
| Fixed { name :: TypeName
, aliases :: [TypeName]
, size :: Int
}
deriving (Show, Generic, NFData)
instance Eq Type where
Null == Null = True
Boolean == Boolean = True
Int == Int = True
Long == Long = True
Float == Float = True
Double == Double = True
Bytes == Bytes = True
String == String = True
Array ty == Array ty2 = ty == ty2
Map ty == Map ty2 = ty == ty2
NamedType t == NamedType t2 = t == t2
Record name1 _ _ _ fs1 == Record name2 _ _ _ fs2 =
and [name1 == name2, fs1 == fs2]
Enum name1 _ _ s == Enum name2 _ _ s2 =
and [name1 == name2, s == s2]
Union a == Union b = a == b
Fixed name1 _ s == Fixed name2 _ s2 =
and [name1 == name2, s == s2]
_ == _ = False
mkEnum :: TypeName
-> [TypeName]
-> Maybe Text
-> [Text]
-> Type
mkEnum name aliases doc symbols = Enum name aliases doc (V.fromList symbols)
mkUnion :: NonEmpty Type -> Type
mkUnion = Union . V.fromList . NE.toList
data TypeName = TN { baseName :: T.Text
, namespace :: [T.Text]
}
deriving (Eq, Ord, Generic, NFData)
instance Show TypeName where
show = show . renderFullname
renderFullname :: TypeName -> T.Text
renderFullname TN { baseName, namespace } =
T.intercalate "." $ namespace ++ [baseName]
parseFullname :: T.Text -> TypeName
parseFullname (T.splitOn "." -> components) = TN { baseName, namespace }
where
baseName = last components
namespace = filter (/= "") (init components)
mkTypeName :: Maybe TypeName
-> Text
-> Maybe Text
-> TypeName
mkTypeName context name ns
| isFullName name = parseFullname name
| otherwise = case ns of
Just ns -> TN name $ filter (/= "") (T.splitOn "." ns)
Nothing -> TN name $ fromMaybe [] $ namespace <$> context
where isFullName = isJust . T.find (== '.')
instance IsString TypeName where
fromString = parseFullname . fromString
instance Hashable TypeName where
hashWithSalt s (renderFullname -> name) =
hashWithSalt (hashWithSalt s ("AvroTypeName" :: Text)) name
typeName :: Type -> Text
typeName bt =
case bt of
Null -> "null"
Boolean -> "boolean"
Int -> "int"
Long -> "long"
Float -> "float"
Double -> "double"
Bytes -> "bytes"
String -> "string"
Array _ -> "array"
Map _ -> "map"
NamedType name -> renderFullname name
Union ts -> typeName (V.head ts)
_ -> renderFullname $ name bt
data Field = Field { fldName :: Text
, fldAliases :: [Text]
, fldDoc :: Maybe Text
, fldOrder :: Maybe Order
, fldType :: Type
, fldDefault :: Maybe (Ty.Value Type)
}
deriving (Eq, Show, Generic, NFData)
data Order = Ascending | Descending | Ignore
deriving (Eq, Ord, Show, Generic, NFData)
instance FromJSON Type where
parseJSON = parseSchemaJSON Nothing
parseSchemaJSON :: Maybe TypeName
-> A.Value
-> Parser Schema
parseSchemaJSON context = \case
A.String s -> case s of
"null" -> return Null
"boolean" -> return Boolean
"int" -> return Int
"long" -> return Long
"float" -> return Float
"double" -> return Double
"bytes" -> return Bytes
"string" -> return String
somename -> return $ NamedType $ mkTypeName context somename Nothing
A.Array arr
| V.length arr > 0 ->
Union <$> V.mapM (parseSchemaJSON context) arr
| otherwise -> fail "Unions must have at least one type."
A.Object o -> do
logicalType :: Maybe Text <- o .:? "logicalType"
ty <- o .: "type"
case logicalType of
Just _ -> parseJSON (A.String ty)
Nothing -> case ty of
"map" -> Map <$> (parseSchemaJSON context =<< o .: "values")
"array" -> Array <$> (parseSchemaJSON context =<< o .: "items")
"record" -> do
name <- o .: "name"
namespace <- o .:? "namespace"
let typeName = mkTypeName context name namespace
mkAlias name = mkTypeName (Just typeName) name Nothing
aliases <- mkAliases typeName <$> (o .:? "aliases" .!= [])
doc <- o .:? "doc"
order <- o .:? "order" .!= Just Ascending
fields <- mapM (parseField typeName) =<< o .: "fields"
pure $ Record typeName aliases doc order fields
"enum" -> do
name <- o .: "name"
namespace <- o .:? "namespace"
let typeName = mkTypeName context name namespace
mkAlias name = mkTypeName (Just typeName) name Nothing
aliases <- mkAliases typeName <$> (o .:? "aliases" .!= [])
doc <- o .:? "doc"
symbols <- o .: "symbols"
pure $ mkEnum typeName aliases doc symbols
"fixed" -> do
name <- o .: "name"
namespace <- o .:? "namespace"
let typeName = mkTypeName context name namespace
mkAlias name = mkTypeName (Just typeName) name Nothing
aliases <- mkAliases typeName <$> (o .:? "aliases" .!= [])
size <- o .: "size"
pure $ Fixed typeName aliases size
"null" -> pure Null
"boolean" -> pure Boolean
"int" -> pure Int
"long" -> pure Long
"float" -> pure Float
"double" -> pure Double
"bytes" -> pure Bytes
"string" -> pure String
s -> fail $ "Unrecognized object type: " <> T.unpack s
invalid -> typeMismatch "Invalid JSON for Avro Schema" invalid
mkAliases :: TypeName
-> [Text]
-> [TypeName]
mkAliases context = map $ \ name ->
mkTypeName (Just context) name Nothing
parseField :: TypeName
-> A.Value
-> Parser Field
parseField record = \case
A.Object o -> do
name <- o .: "name"
doc <- o .:? "doc"
ty <- parseSchemaJSON (Just record) =<< o .: "type"
let err = error "Haskell Avro bindings does not support default for aliased or recursive types at this time."
defM <- o .:! "default"
def <- case parseFieldDefault err ty <$> defM of
Just (Success x) -> return (Just x)
Just (Error e) -> fail e
Nothing -> return Nothing
order <- o .:? ("order" :: Text) .!= Just Ascending
let mkAlias name = mkTypeName (Just record) name Nothing
aliases <- o .:? "aliases" .!= []
return $ Field name aliases doc order ty def
invalid -> typeMismatch "Field" invalid
instance ToJSON Type where
toJSON = schemaToJSON Nothing
schemaToJSON :: Maybe TypeName
-> Schema
-> A.Value
schemaToJSON context = \case
Null -> A.String "null"
Boolean -> A.String "boolean"
Int -> A.String "int"
Long -> A.String "long"
Float -> A.String "float"
Double -> A.String "double"
Bytes -> A.String "bytes"
String -> A.String "string"
Array tn ->
object [ "type" .= ("array" :: Text), "items" .= schemaToJSON context tn ]
Map tn ->
object [ "type" .= ("map" :: Text), "values" .= schemaToJSON context tn ]
NamedType name -> toJSON $ render context name
Record {..} ->
let opts = catMaybes
[ ("order" .=) <$> order
, ("doc" .=) <$> doc
]
in object $ opts ++
[ "type" .= ("record" :: Text)
, "name" .= render context name
, "aliases" .= (render (Just name) <$> aliases)
, "fields" .= (fieldToJSON name <$> fields)
]
Enum {..} ->
let opts = catMaybes [("doc" .=) <$> doc]
in object $ opts ++
[ "type" .= ("enum" :: Text)
, "name" .= render context name
, "aliases" .= (render (Just name) <$> aliases)
, "symbols" .= symbols
]
Union {..} -> toJSON $ schemaToJSON context <$> options
Fixed {..} ->
object [ "type" .= ("fixed" :: Text)
, "name" .= render context name
, "aliases" .= (render (Just name) <$> aliases)
, "size" .= size
]
where render context typeName
| Just ctx <- context
, namespace ctx == namespace typeName = baseName typeName
| otherwise = renderFullname typeName
fieldToJSON context Field {..} =
let opts = catMaybes
[ ("order" .=) <$> fldOrder
, ("doc" .=) <$> fldDoc
, ("default" .=) <$> fldDefault
]
in object $ opts ++
[ "name" .= fldName
, "type" .= schemaToJSON (Just context) fldType
, "aliases" .= fldAliases
]
instance ToJSON (Ty.Value Type) where
toJSON av =
case av of
Ty.Null -> A.Null
Ty.Boolean b -> A.Bool b
Ty.Int i -> A.Number (fromIntegral i)
Ty.Long i -> A.Number (fromIntegral i)
Ty.Float f -> A.Number (realToFrac f)
Ty.Double d -> A.Number (realToFrac d)
Ty.Bytes bs -> A.String (serializeBytes bs)
Ty.String t -> A.String t
Ty.Array vec -> A.Array (V.map toJSON vec)
Ty.Map mp -> A.Object (HashMap.map toJSON mp)
Ty.Record _ flds -> A.Object (HashMap.map toJSON flds)
Ty.Union _ _ Ty.Null -> A.Null
Ty.Union _ ty val -> object [ typeName ty .= val ]
Ty.Fixed _ bs -> A.String (serializeBytes bs)
Ty.Enum _ _ txt -> A.String txt
data Result a = Success a | Error String
deriving (Eq, Ord, Show)
badValue :: Show t => t -> String -> Result a
badValue v t = fail $ "Unexpected value for '" <> t <> "': " <> show v
resultToEither :: Result b -> Either String b
resultToEither r =
case r of
Success v -> Right v
Error err -> Left err
{-# INLINE resultToEither #-}
instance Monad Result where
return = pure
Success a >>= k = k a
Error e >>= _ = Error e
#if !MIN_VERSION_base(4,13,0)
fail = MF.fail
#endif
instance Functor Result where
fmap f (Success x) = Success (f x)
fmap _ (Error e) = Error e
instance MF.MonadFail Result where
fail = Error
instance MonadError String Result where
throwError = fail
catchError a@(Success _) _ = a
catchError (Error e) k = k e
instance Applicative Result where
pure = Success
(<*>) = ap
instance Alternative Result where
empty = mzero
(<|>) = mplus
instance MonadPlus Result where
mzero = fail "mzero"
mplus a@(Success _) _ = a
mplus _ b = b
instance Semigroup (Result a) where
(<>) = mplus
instance Monoid (Result a) where
mempty = fail "Empty Result"
mappend = (<>)
instance Foldable Result where
foldMap _ (Error _) = mempty
foldMap f (Success y) = f y
foldr _ z (Error _) = z
foldr f z (Success y) = f y z
instance Traversable Result where
traverse _ (Error err) = pure (Error err)
traverse f (Success v) = Success <$> f v
parseFieldDefault :: (TypeName -> Maybe Type)
-> Schema
-> A.Value
-> Result (Ty.Value Schema)
parseFieldDefault env schema value = parseAvroJSON defaultUnion env schema value
where defaultUnion (Union ts) val = Ty.Union ts (V.head ts) <$> parseFieldDefault env (V.head ts) val
defaultUnion _ _ = error "Impossible: not Union."
parseAvroJSON :: (Type -> A.Value -> Result (Ty.Value Type))
-> (TypeName -> Maybe Type)
-> Type
-> A.Value
-> Result (Ty.Value Type)
parseAvroJSON union env (NamedType name) av =
case env name of
Nothing -> fail $ "Could not resolve type name for " <> T.unpack (renderFullname name)
Just t -> parseAvroJSON union env t av
parseAvroJSON union _ u@Union{} av = union u av
parseAvroJSON union env ty av =
case av of
A.String s ->
case ty of
String -> return $ Ty.String s
Enum {..} ->
case s `V.elemIndex` symbols of
Just i -> pure $ Ty.Enum ty i s
Nothing -> fail $ "JSON string is not one of the expected symbols for enum '" <> show name <> "': " <> T.unpack s
Bytes -> Ty.Bytes <$> parseBytes s
Fixed {..} -> do
bytes <- parseBytes s
let len = B.length bytes
when (len /= size) $
fail $ "Fixed string wrong size. Expected " <> show size <> " but got " <> show len
return $ Ty.Fixed ty bytes
A.Bool b -> case ty of
Boolean -> return $ Ty.Boolean b
_ -> avroTypeMismatch ty "boolean"
A.Number i ->
case ty of
Int -> return $ Ty.Int (floor i)
Long -> return $ Ty.Long (floor i)
Float -> return $ Ty.Float (realToFrac i)
Double -> return $ Ty.Double (realToFrac i)
_ -> avroTypeMismatch ty "number"
A.Array vec ->
case ty of
Array t -> Ty.Array <$> V.mapM (parseAvroJSON union env t) vec
_ -> avroTypeMismatch ty "array"
A.Object obj ->
case ty of
Map mTy -> Ty.Map <$> mapM (parseAvroJSON union env mTy) obj
Record {..} ->
do let lkAndParse f =
case HashMap.lookup (fldName f) obj of
Nothing -> case fldDefault f of
Just v -> return v
Nothing -> fail $ "Decode failure: No record field '" <> T.unpack (fldName f) <> "' and no default in schema."
Just v -> parseAvroJSON union env (fldType f) v
Ty.Record ty . HashMap.fromList <$> mapM (\f -> (fldName f,) <$> lkAndParse f) fields
_ -> avroTypeMismatch ty "object"
A.Null -> case ty of
Null -> return Ty.Null
_ -> avroTypeMismatch ty "null"
parseBytes :: Text -> Result B.ByteString
parseBytes bytes = case T.find (not . inRange) bytes of
Just badChar -> fail $ "Invalid character in bytes or fixed string representation: " <> show badChar
Nothing -> return $ B.pack $ fromIntegral . Char.ord <$> T.unpack bytes
where inRange (Char.ord -> c) = c >= 0x00 && c <= 0xFF
serializeBytes :: B.ByteString -> Text
serializeBytes = T.pack . map (Char.chr . fromIntegral) . B.unpack
avroTypeMismatch :: Type -> Text -> Result a
avroTypeMismatch expected actual =
fail $ "Could not resolve type '" <> T.unpack actual <> "' with expected type: " <> show expected
instance ToJSON Order where
toJSON o =
case o of
Ascending -> A.String "ascending"
Descending -> A.String "descending"
Ignore -> A.String "ignore"
instance FromJSON Order where
parseJSON (A.String s) =
case s of
"ascending" -> return Ascending
"descending" -> return Descending
"ignore" -> return Ignore
_ -> fail $ "Unknown string for order: " <> T.unpack s
parseJSON j = typeMismatch "Order" j
validateSchema :: Schema -> Parser ()
validateSchema _sch = return ()
buildTypeEnvironment :: Applicative m
=> (TypeName -> m Type)
-> Schema
-> (TypeName -> m Type)
buildTypeEnvironment failure from =
\ forTy -> case HashMap.lookup forTy env of
Nothing -> failure forTy
Just res -> pure res
where
env = extractBindings from
matches :: Type -> Type -> Bool
matches n@NamedType{} t = typeName n == typeName t
matches t n@NamedType{} = typeName t == typeName n
matches (Array itemA) (Array itemB) = matches itemA itemB
matches a@Record{} b@Record{} =
and [ name a == name b
, length (fields a) == length (fields b)
, and $ zipWith fieldMatches (fields a) (fields b)
]
where fieldMatches = matches `on` fldType
matches a@Union{} b@Union{} = and $ V.zipWith matches (options a) (options b)
matches t1 t2 = t1 == t2
extractBindings :: Type -> HashMap.HashMap TypeName Type
extractBindings = \case
t@Record{..} ->
let withRecord = HashMap.fromList $ (name : aliases) `zip` repeat t
in HashMap.unions $ withRecord : (extractBindings . fldType <$> fields)
e@Enum{..} -> HashMap.fromList $ (name : aliases) `zip` repeat e
Union{..} -> HashMap.unions $ V.toList $ extractBindings <$> options
f@Fixed{..} -> HashMap.fromList $ (name : aliases) `zip` repeat f
Array{..} -> extractBindings item
Map{..} -> extractBindings values
_ -> HashMap.empty
expandNamedTypes :: Schema -> Schema
expandNamedTypes =
flip evalState HashMap.empty . go
where
expandField f@Field{fldType} = (\x -> f { fldType = x }) <$> go fldType
go = \case
t@(NamedType n) -> fromMaybe t <$> gets (HashMap.lookup n)
a@Array{item} -> (\x -> a { item = x }) <$> go item
m@Map{values} -> (\x -> m { values = x }) <$> go values
u@Union{options} -> Union <$> traverse go options
r@Record{name, fields} -> do
fields' <- traverse expandField fields
let r' = r { fields = fields' }
modify' (HashMap.insert name r')
pure r'
r@Enum{name} -> do
modify' (HashMap.insert name r)
pure r
other -> pure other
overlay :: Type -> Type -> Type
overlay input supplement = overlayType input
where
overlayField f@Field{..} = f { fldType = overlayType fldType }
overlayType a@Array{..} = a { item = overlayType item }
overlayType m@Map{..} = m { values = overlayType values }
overlayType r@Record{..} = r { fields = map overlayField fields }
overlayType u@Union{..} = Union (V.map overlayType options)
overlayType nt@(NamedType _) = rebind nt
overlayType other = other
rebind (NamedType tn) = HashMap.lookupDefault (NamedType tn) tn bindings
bindings = extractBindings supplement
subdefinition :: Type -> Text -> Maybe Type
subdefinition schema name = mkTypeName Nothing name Nothing `HashMap.lookup` extractBindings schema