{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Avro.Parser
(
parseProtocol,
readWithImports,
parseAliases,
parseAnnotation,
parseDecimal,
parseImport,
parseMethod,
parseNamespace,
parseOrder,
parseSchema,
)
where
import Control.Monad (filterM)
import Data.Avro
import Data.Avro.Schema
import Data.Either (partitionEithers)
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Vector as V
import Language.Avro.Types
import System.Directory (doesFileExist)
import System.FilePath
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
spaces :: MonadParsec Char T.Text m => m ()
spaces = L.space space1 (L.skipLineComment "//") (L.skipBlockComment "/*" "*/")
lexeme :: MonadParsec Char T.Text m => m a -> m a
lexeme = L.lexeme spaces
symbol :: MonadParsec Char T.Text m => T.Text -> m T.Text
symbol = L.symbol spaces
reserved :: MonadParsec Char T.Text m => T.Text -> m T.Text
reserved = lexeme . chunk
number :: (MonadParsec Char T.Text m, Integral a) => m a
number = L.signed spaces (lexeme L.decimal) <|> lexeme L.octal <|> lexeme L.hexadecimal
floating :: (MonadParsec Char T.Text m, RealFloat a) => m a
floating = L.signed spaces (lexeme L.float)
strlit :: MonadParsec Char T.Text m => m T.Text
strlit = T.pack <$> (char '"' >> manyTill L.charLiteral (char '"'))
braces :: MonadParsec Char T.Text m => m a -> m a
braces = between (symbol "{") (symbol "}")
brackets :: MonadParsec Char T.Text m => m a -> m a
brackets = between (symbol "[") (symbol "]")
parens :: MonadParsec Char T.Text m => m a -> m a
parens = between (symbol "(") (symbol ")")
diamonds :: MonadParsec Char T.Text m => m a -> m a
diamonds = between (symbol "<") (symbol ">")
backticks :: MonadParsec Char T.Text m => m T.Text
backticks = T.pack <$> (char '`' >> manyTill L.charLiteral (char '`'))
ident :: MonadParsec Char T.Text m => m T.Text
ident = T.pack <$> ((:) <$> letterChar <*> many (alphaNumChar <|> char '_' <|> char '-'))
identifier :: MonadParsec Char T.Text m => m T.Text
identifier = lexeme (ident <|> backticks)
toNamedType :: [T.Text] -> TypeName
toNamedType [] = error "named types cannot be empty"
toNamedType xs = TN {baseName, namespace}
where
baseName = last xs
namespace = filter (/= "") $ init xs
multiNamedTypes :: [T.Text] -> [TypeName]
multiNamedTypes = fmap $ toNamedType . T.splitOn "."
parseAnnotation :: MonadParsec Char T.Text m => m Annotation
parseAnnotation = Annotation <$ symbol "@" <*> identifier <*> parens strlit
parseNamespace :: MonadParsec Char T.Text m => m Namespace
parseNamespace = toNs <$ (symbol "@" *> reserved "namespace") <*> parens strlit
where
toNs :: T.Text -> Namespace
toNs = Namespace . T.splitOn "."
parseAliases :: MonadParsec Char T.Text m => m Aliases
parseAliases = multiNamedTypes <$> parseFieldAlias
parseImport :: MonadParsec Char T.Text m => m ImportType
parseImport =
reserved "import"
*> ( impHelper IdlImport "idl"
<|> impHelper ProtocolImport "protocol"
<|> impHelper SchemaImport "schema"
)
where
impHelper :: MonadParsec Char T.Text m => (T.Text -> a) -> T.Text -> m a
impHelper ct t = ct <$> (reserved t *> strlit <* symbol ";")
parseProtocol :: MonadParsec Char T.Text m => m Protocol
parseProtocol =
buildProtocol <$ spaces <*> optional parseNamespace <* reserved "protocol"
<*> identifier
<*> braces (many serviceThing)
where
buildProtocol :: Maybe Namespace -> T.Text -> [ProtocolThing] -> Protocol
buildProtocol ns name things =
Protocol
ns
name
(S.fromList [i | ProtocolThingImport i <- things])
(S.fromList [t | ProtocolThingType t <- things])
(S.fromList [m | ProtocolThingMethod m <- things])
data ProtocolThing
= ProtocolThingImport ImportType
| ProtocolThingType Schema
| ProtocolThingMethod Method
serviceThing :: MonadParsec Char T.Text m => m ProtocolThing
serviceThing =
try (ProtocolThingImport <$> parseImport)
<|> try (ProtocolThingMethod <$> parseMethod)
<|> ProtocolThingType <$> parseSchema <* optional (symbol ";")
parseVector :: MonadParsec Char T.Text m => m a -> m (V.Vector a)
parseVector t = V.fromList <$> braces (lexeme $ sepBy1 t $ symbol ",")
parseTypeName :: MonadParsec Char T.Text m => m TypeName
parseTypeName = toNamedType . pure <$> identifier
parseOrder :: MonadParsec Char T.Text m => m Order
parseOrder =
symbol "@" *> reserved "order"
*> parens
( Ascending <$ string "\"ascending\""
<|> Descending <$ string "\"descending\""
<|> Ignore <$ string "\"ignore\""
)
parseFieldAlias :: MonadParsec Char T.Text m => m [T.Text]
parseFieldAlias =
symbol "@" *> reserved "aliases"
*> parens (brackets $ lexeme $ sepBy1 strlit $ symbol ",")
parseField :: MonadParsec Char T.Text m => m Field
parseField =
(\o t a n -> Field n a Nothing o t Nothing)
<$> optional parseOrder
<*> parseSchema
<*> option [] parseFieldAlias
<*> identifier
<* (symbol ";" <|> symbol "=" <* manyTill anySingle (symbol ";"))
parseArgument :: MonadParsec Char T.Text m => m Argument
parseArgument = Argument <$> parseSchema <*> identifier
parseMethod :: MonadParsec Char T.Text m => m Method
parseMethod =
(\r n a t o -> Method n a r t o)
<$> parseSchema
<*> identifier
<*> parens (option [] (lexeme $ sepBy1 parseArgument $ symbol ","))
<*> option Null (reserved "throws" *> parseSchema)
<*> option False (True <$ reserved "oneway")
<* symbol ";"
parseDecimal :: MonadParsec Char T.Text m => m Decimal
parseDecimal = toDec <$ reserved "decimal" <*> parens (lexeme $ sepBy1 number $ symbol ",")
where
toDec :: [Int] -> Decimal
toDec [precision] = Decimal (fromIntegral precision) 0
toDec [precision, scale] = Decimal (fromIntegral precision) (fromIntegral scale)
toDec _ = error "decimal types can only be specified using two numbers!"
parseSchema :: MonadParsec Char T.Text m => m Schema
parseSchema =
Null <$ (reserved "null" <|> reserved "void")
<|> Boolean <$ reserved "boolean"
<|> Int' <$ reserved "int"
<|> Int (Just Date) <$ reserved "date"
<|> Int (Just TimeMillis) <$ reserved "time_ms"
<|> Long' <$ reserved "long"
<|> Long . Just . DecimalL <$> parseDecimal
<|> Long (Just TimestampMillis) <$ reserved "timestamp_ms"
<|> Float <$ reserved "float"
<|> Double <$ reserved "double"
<|> Bytes' <$ reserved "bytes"
<|> String (Just UUID) <$ reserved "uuid"
<|> String' <$ reserved "string"
<|> Array <$ reserved "array" <*> diamonds parseSchema
<|> Map <$ reserved "map" <*> diamonds parseSchema
<|> Union <$ reserved "union" <*> parseVector parseSchema
<|> try
( flip Fixed
<$> option [] parseAliases <* reserved "fixed"
<*> parseTypeName
<*> parens number
<*> pure Nothing
)
<|> try
( flip Enum
<$> option [] parseAliases <* reserved "enum"
<*> parseTypeName
<*> pure Nothing
<*> parseVector identifier
)
<|> try
( flip Record
<$> option [] parseAliases <* (reserved "record" <|> reserved "error")
<*> parseTypeName
<*> pure Nothing
<*> optional parseOrder
<*> option [] (braces . many $ parseField)
)
<|> NamedType . toNamedType <$> lexeme (sepBy1 identifier $ char '.')
parseFile :: Parsec e T.Text a -> String -> IO (Either (ParseErrorBundle T.Text e) a)
parseFile p file = runParser p file <$> T.readFile file
(>>>=) :: Applicative m => Either a b -> (b -> m (Either a c)) -> m (Either a c)
Left x >>>= _ = pure (Left x)
Right y >>>= f = f y
readWithImports ::
FilePath ->
FilePath ->
IO (Either (ParseErrorBundle T.Text Char) Protocol)
readWithImports baseDir initialFile = do
initial <- parseFile parseProtocol (baseDir </> initialFile)
case initial of
Left e -> pure $ Left e
Right p -> do
possibleImps <- traverse (oneOfTwo . T.unpack) [i | IdlImport i <- S.toList $ imports p]
(lefts, rights) <- partitionEithers <$> traverse (>>>= readWithImports baseDir) possibleImps
pure $ case lefts of
e : _ -> Left e
_ -> Right $ S.foldl' (<>) p (S.fromList rights)
where
oneOfTwo :: FilePath -> IO (Either (ParseErrorBundle T.Text Char) FilePath)
oneOfTwo p = do
let dir = takeDirectory initialFile
path1 = baseDir </> p
path2 = baseDir </> dir </> p
options <- (,) <$> doesFileExist path1 <*> doesFileExist path2
pure $ case options of
(True, False) -> Right p
(False, True) -> Right $ dir </> p
(False, False) -> runParser (fail $ "Import not found: " ++ p) initialFile ""
(True, True)
| normalise dir == "." -> Right p
| otherwise -> runParser (fail $ "Duplicate files found: " ++ p) initialFile ""