{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module FlatBuffers.Internal.Compiler.Parser where
import Control.Monad ( when )
import qualified Control.Monad.Combinators.NonEmpty as NE
import qualified Data.ByteString as BS
import Data.Coerce ( coerce )
import Data.Functor ( (<&>), void )
import Data.List.NonEmpty ( NonEmpty((:|)) )
import qualified Data.List.NonEmpty as NE
import qualified Data.Map.Strict as Map
import Data.Maybe ( catMaybes )
import Data.Scientific ( Scientific )
import Data.Text ( Text )
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Void ( Void )
import Data.Word ( Word8 )
import FlatBuffers.Internal.Compiler.SyntaxTree
import FlatBuffers.Internal.Constants ( fileIdentifierSize )
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
import Text.Read ( readMaybe )
type Parser = Parsec Void String
schema :: Parser Schema
schema = do
sc
includes <- catMaybes <$> many (Just <$> include <|> Nothing <$ nativeInclude)
decls <- many (decl <|> failOnInclude)
eof
pure $ Schema includes (catMaybes decls)
where
failOnInclude =
rword "include" *> fail "\"include\" statements must be at the beginning of the file."
<|> (rword "native_include" *> fail "\"native_include\" statements must be at the beginning of the file.")
decl :: Parser (Maybe Decl)
decl =
choice
[ Just . DeclN <$> namespaceDecl
, Just . DeclT <$> tableDecl
, Just . DeclS <$> structDecl
, Just . DeclE <$> enumDecl
, Just . DeclU <$> unionDecl
, Just . DeclR <$> rootDecl
, Just . DeclFI <$> fileIdentifierDecl
, Just . DeclA <$> attributeDecl
, Nothing <$ fileExtensionDecl
, Nothing <$ jsonObj
, Nothing <$ rpcDecl
]
sc :: Parser ()
sc = L.space space1 lineCmnt blockCmnt
where
lineCmnt = L.skipLineComment "//"
blockCmnt = L.skipBlockComment "/*" "*/"
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
symbol :: String -> Parser String
symbol = L.symbol sc
rword :: String -> Parser ()
rword w = (lexeme . try) (string w *> notFollowedBy (alphaNumChar <|> char '_'))
curly, square, parens :: Parser a -> Parser a
curly = between (symbol "{") (symbol "}")
square = between (symbol "[") (symbol "]")
parens = between (symbol "(") (symbol ")")
commaSep :: Parser a -> Parser [a]
commaSep p = sepBy p (symbol ",")
commaSep1 :: Parser a -> Parser (NonEmpty a)
commaSep1 p = NE.sepBy1 p (symbol ",")
semi, colon :: Parser ()
semi = void $ symbol ";"
colon = void $ symbol ":"
ident :: Parser Ident
ident = label "identifier" $ (lexeme . try) identifier
where
identifier = fmap (Ident . T.pack) $ (:) <$> letterChar <*> many (alphaNumChar <|> char '_')
typ :: Parser Type
typ =
TInt8 <$ (rword "int8" <|> rword "byte") <|>
TInt16 <$ (rword "int16" <|> rword "short") <|>
TInt32 <$ (rword "int32" <|> rword "int") <|>
TInt64 <$ (rword "int64" <|> rword "long") <|>
TWord8 <$ (rword "uint8" <|> rword "ubyte") <|>
TWord16 <$ (rword "uint16" <|> rword "ushort") <|>
TWord32 <$ (rword "uint32" <|> rword "uint") <|>
TWord64 <$ (rword "uint64" <|> rword "ulong") <|>
TFloat <$ (rword "float32" <|> rword "float") <|>
TDouble <$ (rword "float64" <|> rword "double") <|>
TBool <$ rword "bool" <|>
TString <$ rword "string" <|>
label "type identifier" (TRef <$> typeRef) <|>
label "vector type" vector
where
vector = TVector <$> between
(symbol "[" *> (notFollowedBy (symbol "[") <|> fail "nested vector types not supported" ))
(symbol "]")
typ
typeRef :: Parser TypeRef
typeRef = do
idents <- many (try (ident <* symbol "."))
i <- ident
pure $ TypeRef (Namespace (coerce idents)) i
tableField :: Parser TableField
tableField = do
i <- ident
colon
t <- typ
def <- optional (symbol "=" *> defaultVal)
md <- metadata
semi
pure $ TableField i t def md
structField :: Parser StructField
structField = do
i <- ident
colon
t <- typ
md <- metadata
semi
pure $ StructField i t md
tableDecl :: Parser TableDecl
tableDecl = do
rword "table"
i <- ident
md <- metadata
fs <- curly (many tableField)
pure $ TableDecl i md fs
structDecl :: Parser StructDecl
structDecl = do
rword "struct"
i <- ident
md <- metadata
fs <- curly (NE.some structField)
pure $ StructDecl i md fs
enumDecl :: Parser EnumDecl
enumDecl = do
rword "enum"
i <- ident
colon
t <- typ
md <- metadata
v <- curly (commaSep1 enumVal)
pure $ EnumDecl i t md v
enumVal :: Parser EnumVal
enumVal = EnumVal <$> ident <*> optional (symbol "=" *> intLiteral)
unionDecl :: Parser UnionDecl
unionDecl = do
rword "union"
i <- ident
md <- metadata
v <- curly (commaSep1 unionVal)
pure $ UnionDecl i md v
unionVal :: Parser UnionVal
unionVal = UnionVal <$> optional (try (ident <* colon)) <*> typeRef
namespaceDecl :: Parser NamespaceDecl
namespaceDecl =
NamespaceDecl . Namespace . coerce <$>
(rword "namespace" *> sepBy ident (symbol ".") <* semi)
stringLiteral :: Parser StringLiteral
stringLiteral =
label "string literal" $
fmap (StringLiteral . T.pack) . lexeme $
char '"' >> manyTill L.charLiteral (char '"')
intLiteral :: Parser IntLiteral
intLiteral =
label "integer literal" . lexeme $
L.signed sc L.decimal
attributeVal :: Parser AttributeVal
attributeVal =
choice
[ AttrI . unIntLiteral <$> intLiteral
, AttrS . unStringLiteral <$> stringLiteral
]
defaultVal :: Parser DefaultVal
defaultVal =
choice
[ DefaultBool True <$ rword "true"
, DefaultBool False <$ rword "false"
, DefaultNum <$> label "number literal" (lexeme (L.signed sc L.scientific))
, ident <&> \(Ident ref) -> DefaultRef (ref :| [])
, stringLiteral >>= \(StringLiteral str) ->
case T.strip str of
"true" -> pure $ DefaultBool True
"false" -> pure $ DefaultBool False
other ->
case readMaybe @Scientific (T.unpack other) of
Just n -> pure $ DefaultNum n
Nothing ->
case NE.nonEmpty (T.words str) of
Just refs -> pure $ DefaultRef refs
Nothing -> fail "Expected 'true', 'false', a number, or one or more identifiers"
]
metadata :: Parser Metadata
metadata =
label "metadata"
. fmap (Metadata . Map.fromList . maybe [] NE.toList)
. optional
. parens
. commaSep1 $
(,) <$> attributeName <*> optional (colon *> attributeVal)
include :: Parser Include
include = Include <$> (rword "include" *> stringLiteral <* semi)
nativeInclude :: Parser ()
nativeInclude = void (rword "native_include" >> stringLiteral >> semi)
rootDecl :: Parser RootDecl
rootDecl = RootDecl <$> (rword "root_type" *> typeRef <* semi)
fileExtensionDecl :: Parser ()
fileExtensionDecl = void (rword "file_extension" *> stringLiteral <* semi)
fileIdentifierDecl :: Parser FileIdentifierDecl
fileIdentifierDecl = do
rword "file_identifier"
fi <- coerce stringLiteral
let byteCount = BS.length (T.encodeUtf8 fi)
let codePointCount = T.length fi
when (byteCount /= fileIdentifierSize) $
if codePointCount == byteCount
then fail $ "file_identifier must be exactly " <> show (fileIdentifierSize @Word8) <> " characters"
else fail $ "file_identifier must be exactly " <> show (fileIdentifierSize @Word8) <> " UTF-8 code units"
semi
pure (FileIdentifierDecl fi)
attributeDecl :: Parser AttributeDecl
attributeDecl = AttributeDecl <$> (rword "attribute" *> attributeName <* semi)
attributeName :: Parser Text
attributeName = coerce stringLiteral <|> coerce ident
jsonObj :: Parser ()
jsonObj =
label "JSON object" (void jobject)
where
json = choice [void jstring, void jnumber, jbool, jnull, void jarray, void jobject]
jnull = rword "null"
jbool = rword "true" <|> rword "false"
jstring = stringLiteral
jnumber = lexeme $ L.signed sc L.scientific
jarray = square (commaSep json)
jobject = curly (commaSep keyValuePair)
keyValuePair = do
void stringLiteral <|> void ident
colon
json
rpcDecl :: Parser ()
rpcDecl = void $ rword "rpc_service" >> ident >> curly (NE.some rpcMethod)
rpcMethod :: Parser ()
rpcMethod = ident >> parens ident >> colon >> ident >> metadata >> void semi