{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -- | Avro supports a JSON representation of Avro objects alongside the -- Avro binary format. An Avro schema can be used to generate and -- validate JSON representations of Avro objects. -- -- The JSON format is the same format as used for default values in -- schemas except unions are encoded differently. Non-union values are -- encoded as follows: -- -- +--------------+----------+----------+ -- |Avro Type |JSON Type |Example | -- +==============+==========+==========+ -- |null |null |null | -- +--------------+----------+----------+ -- |boolean |boolean |true | -- +--------------+----------+----------+ -- |int, long |integer |1 | -- +--------------+----------+----------+ -- |float, double |number |1.1 | -- +--------------+----------+----------+ -- |bytes |string |"\u00FF" | -- +--------------+----------+----------+ -- |string |string |"foo" | -- +--------------+----------+----------+ -- |record |object |{"a":1} | -- +--------------+----------+----------+ -- |enum |string |"FOO" | -- +--------------+----------+----------+ -- |array |array |[1] | -- +--------------+----------+----------+ -- |map |object |{"a":1} | -- +--------------+----------+----------+ -- |fixed |string |"\u00FF" | -- +--------------+----------+----------+ -- -- (Table from the Avro 1.8.2 specification: -- <https://avro.apache.org/docs/1.8.2/spec.html#schema_record>) -- -- Bytes and fixed are encoded as JSON strings where each byte is -- translated into the corresponding Unicode codepoint between 0–255, -- which includes non-printable characters. Note that this encoding -- happens at the Unicode code-point level, meaning it is independent -- of text encoding. (JSON is, by definition, encoded in UTF8.) -- -- Unions are encoded as an object with a single field that specifies -- the "branch" of the union. If the branch is a primitive type like -- @"string"@, the name of the primitive type is used: -- -- @ -- { "string" : "foo" } -- @ -- -- For named types (record, enum and fixed), the name of the type is -- used: -- -- @ -- { "MyRecord" : { ... } } -- @ module Data.Avro.JSON where import Data.Semigroup ((<>)) import qualified Data.Aeson as Aeson import Data.ByteString.Lazy (ByteString) import Data.HashMap.Strict ((!)) import qualified Data.HashMap.Strict as HashMap import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NE import Data.Tagged import qualified Data.Text as Text import Data.Avro (FromAvro (..), Result (..), ToAvro (..)) import qualified Data.Avro as Avro import Data.Avro.Schema (Schema, parseAvroJSON) import qualified Data.Avro.Schema as Schema import qualified Data.Avro.Types as Avro import qualified Data.Vector as V decodeAvroJSON :: Schema -> Aeson.Value -> Result (Avro.Value Schema) decodeAvroJSON schema json = parseAvroJSON union env schema json where env = Schema.buildTypeEnvironment missing schema missing name = fail ("Type " <> show name <> " not in schema") union (Schema.Union schemas) Aeson.Null | Schema.Null `elem` schemas = pure $ Avro.Union schemas Schema.Null Avro.Null | otherwise = fail "Null not in union." union (Schema.Union schemas) (Aeson.Object obj) | null obj = fail "Invalid encoding of union: empty object ({})." | length obj > 1 = fail "Invalid encoding of union: object with too many fields." | otherwise = let canonicalize name | isBuiltIn name = name | otherwise = Schema.renderFullname $ Schema.parseFullname name branch = head $ HashMap.keys obj names = HashMap.fromList [(Schema.typeName t, t) | t <- V.toList schemas] in case HashMap.lookup (canonicalize branch) names of Just t -> do nested <- parseAvroJSON union env t (obj ! branch) return (Avro.Union schemas t nested) Nothing -> fail ("Type '" <> Text.unpack branch <> "' not in union: " <> show schemas) union Schema.Union{} _ = Avro.Error "Invalid JSON representation for union: has to be a JSON object with exactly one field." union _ _ = error "Impossible: function given non-union schema." isBuiltIn name = name `elem` [ "null", "boolean", "int", "long", "float" , "double", "bytes", "string", "array", "map" ] -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The -- schema is used to validate the JSON and will return an 'Error' if -- the JSON object is not encoded correctly or does not match the schema. fromJSON :: forall a. (FromAvro a) => Aeson.Value -> Result a fromJSON json = do value <- decodeAvroJSON schema json fromAvro value where schema = untag (Avro.schema :: Tagged a Schema) -- | Parse a 'ByteString' as JSON and convert it to a type with an -- Avro schema. Will return 'Error' if the input is not valid JSON or -- the JSON does not convert with the specified schema. parseJSON :: forall a. (FromAvro a) => ByteString -> Result a parseJSON input = case Aeson.eitherDecode input of Left msg -> Error msg Right value -> fromJSON value -- | Convert an object with an Avro schema to JSON using that schema. -- -- We always need the schema to /encode/ to JSON because representing -- unions requires using the names of named types. toJSON :: forall a. (ToAvro a) => a -> Aeson.Value toJSON = Aeson.toJSON . toAvro