{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE CPP #-}
module Data.TPTP.Parse.Combinators (
whitespace,
atom,
var,
distinctObject,
function,
predicate,
sort,
tff1Sort,
type_,
number,
term,
literal,
clause,
unsortedFirstOrder,
sortedFirstOrder,
monomorphicFirstOrder,
polymorphicFirstOrder,
unit,
tptp,
intro,
parent,
source,
info
) where
#if MIN_VERSION_base(4, 8, 0)
import Prelude hiding (pure, (<$>), (<*>), (*>), (<*))
#endif
import Control.Applicative (pure, (<*>), (*>), (<*), (<|>), optional)
import Data.Char (isAscii, isAsciiLower, isAsciiUpper, isDigit, isPrint)
import Data.Function (on)
import Data.Functor ((<$>), ($>))
import Data.List (sortBy, genericLength)
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NEL (fromList, toList)
import qualified Data.Scientific as Sci (base10Exponent, coefficient)
import Data.Text (Text)
import qualified Data.Text as Text (pack, unpack, cons)
import Data.Attoparsec.Text as Atto (
Parser,
(<?>), char, string, decimal, scientific, signed, isEndOfLine, endOfLine,
satisfy, option, eitherP, choice, manyTill, takeWhile, skipSpace, skipMany,
skipWhile, endOfInput, sepBy, sepBy1
)
import Data.TPTP hiding (name, clause)
import qualified Data.TPTP as TPTP (name)
comment :: Parser ()
comment = char '%' *> skipWhile (not . isEndOfLine)
*> (endOfLine <|> endOfInput)
<?> "comment"
blockComment :: Parser ()
blockComment = string "/*" *> bc <?> "block comment"
where
bc = skipWhile (/= '*') *> (string "*/" $> () <|> bc)
whitespace :: Parser ()
whitespace = skipSpace *> skipMany ((comment <|> blockComment) *> skipSpace)
<?> "whitespace"
lexeme :: Parser a -> Parser a
lexeme p = p <* whitespace
{-# INLINE lexeme #-}
integer :: Parser Integer
integer = lexeme decimal <?> "integer"
{-# INLINE integer #-}
token :: Text -> Parser Text
token t = lexeme (string t) <?> "token " ++ Text.unpack t
{-# INLINE token #-}
op :: Char -> Parser Char
op c = lexeme (char c) <?> "operator " ++ [c]
{-# INLINE op #-}
parens :: Parser a -> Parser a
parens p = op '(' *> p <* op ')' <?> "parens"
{-# INLINE parens #-}
optionalParens :: Parser a -> Parser a
optionalParens p = parens p <|> p
{-# INLINE optionalParens #-}
brackets :: Parser a -> Parser a
brackets p = op '[' *> p <* op ']' <?> "brackets"
{-# INLINE brackets #-}
bracketList :: Parser a -> Parser [a]
bracketList p = brackets (p `sepBy` op ',') <?> "bracket list"
{-# INLINE bracketList #-}
bracketList1 :: Parser a -> Parser (NonEmpty a)
bracketList1 p = NEL.fromList <$> brackets (p `sepBy1` op ',')
<?> "bracket list 1"
{-# INLINE bracketList1 #-}
application :: Parser f -> Parser a -> Parser (f, [a])
application f a = (,) <$> f <*> option [] (parens (a `sepBy1` op ','))
{-# INLINE application #-}
labeled :: Text -> Parser a -> Parser a
labeled l p = token l *> parens p
{-# INLINE labeled #-}
comma :: Parser a -> Parser a
comma p = op ',' *> p
{-# INLINE comma #-}
maybeP :: Parser a -> Parser (Maybe a)
maybeP = optional . comma
{-# INLINE maybeP #-}
enum :: (Named a, Enum a, Bounded a) => Parser a
enum = choice
$ fmap (\(n, c) -> token n $> c <?> "reserved " ++ Text.unpack n)
$ sortBy (flip compare `on` fst)
$ fmap (\c -> (TPTP.name c, c)) [minBound..]
isAlphaNumeric :: Char -> Bool
isAlphaNumeric c = isAsciiLower c || isAsciiUpper c || isDigit c || c == '_'
isAsciiPrint :: Char -> Bool
isAsciiPrint c = isAscii c && isPrint c
lowerWord, upperWord :: Parser Text
lowerWord = Text.cons <$> satisfy isAsciiLower <*> Atto.takeWhile isAlphaNumeric
upperWord = Text.cons <$> satisfy isAsciiUpper <*> Atto.takeWhile isAlphaNumeric
quoted :: Char -> Parser Text
quoted q = Text.pack <$> (char q *> manyTill escaped (char q))
<?> "quoted " ++ [q]
where
escaped = char '\\' *> (char q $> q <|> char '\\' $> '\\')
<|> satisfy isAsciiPrint
atom :: Parser Atom
atom = Atom <$> lexeme (quoted '\'' <|> lowerWord) <?> "atom"
{-# INLINE atom #-}
var :: Parser Var
var = Var <$> lexeme upperWord <?> "var"
{-# INLINE var #-}
distinctObject :: Parser DistinctObject
distinctObject = DistinctObject <$> lexeme (quoted '"') <?> "distinct object"
{-# INLINE distinctObject #-}
reserved :: (Named a, Enum a, Bounded a) => Parser (Reserved a)
reserved = extended <$> lexeme lowerWord <?> "reserved"
{-# INLINE reserved #-}
name :: (Named a, Enum a, Bounded a) => Parser (Name a)
name = Reserved <$> (char '$' *> reserved)
<|> Defined <$> atom
<?> "name"
function :: Parser (Name Function)
function = name <?> "function"
{-# INLINE function #-}
predicate :: Parser (Name Predicate)
predicate = name <?> "predicate"
{-# INLINE predicate #-}
sort :: Parser (Name Sort)
sort = name <?> "sort"
{-# INLINE sort #-}
tff1Sort :: Parser TFF1Sort
tff1Sort = SortVariable <$> var
<|> uncurry TFF1Sort <$> application sort tff1Sort
<?> "tff1 sort"
mapping :: Parser a -> Parser ([a], a)
mapping s = (,) <$> option [] (args <* op '>') <*> s
where
args = fmap (:[]) s <|> parens (s `sepBy1` op '*')
type_ :: Parser Type
type_ = uncurry . tff1Type
<$> (maybe [] NEL.toList <$> optional prefix) <*> matrix
<?> "type"
where
prefix = token "!>" *> bracketList1 sortVar <* op ':'
sortVar = var <* op ':' <* token "$tType"
matrix = optionalParens (mapping tff1Sort)
number :: Parser Number
number = RationalConstant <$> signed integer <* char '/' <*> integer
<|> real <$> lexeme scientific
<?> "number"
where
real n
| Sci.base10Exponent n == 0 = IntegerConstant (Sci.coefficient n)
| otherwise = RealConstant n
term :: Parser Term
term = parens term
<|> uncurry Function <$> application function term
<|> Variable <$> var
<|> Number <$> number
<|> DistinctTerm <$> distinctObject
<?> "term"
eq :: Parser Sign
eq = enum <?> "eq"
{-# INLINE eq #-}
literal :: Parser Literal
literal = parens literal
<|> Equality <$> term <*> eq <*> term
<|> uncurry Predicate <$> application predicate term
<?> "literal"
sign :: Parser Sign
sign = option Positive (op '~' $> Negative)
{-# INLINE sign #-}
signedLiteral :: Parser (Sign, Literal)
signedLiteral = (,) <$> sign <*> literal <?> "signed literal"
{-# INLINE signedLiteral #-}
clause :: Parser Clause
clause = parens clause
<|> Clause . NEL.fromList <$> signedLiteral `sepBy1` op '|'
<?> "clause"
quantifier :: Parser Quantifier
quantifier = enum <?> "quantifier"
{-# INLINE quantifier #-}
connective :: Parser Connective
connective = enum <?> "connective"
{-# INLINE connective #-}
firstOrder :: Parser s -> Parser (FirstOrder s)
firstOrder p = do
f <- unitary
option f (Connected f <$> connective <*> firstOrder p)
where
unitary = parens (firstOrder p)
<|> Atomic <$> literal
<|> Quantified <$> quantifier <*> vs <* op ':' <*> unitary
<|> Negated <$> (op '~' *> unitary)
<?> "unitary first order"
vs = bracketList1 $ (,) <$> var <*> p
unsortedFirstOrder :: Parser UnsortedFirstOrder
unsortedFirstOrder = firstOrder unsorted
where unsorted = pure (Unsorted ()) <?> "unsorted"
sorted :: Parser s -> Parser (Sorted s)
sorted s = Sorted <$> optional (op ':' *> s) <?> "sorted"
sortedFirstOrder :: Parser SortedFirstOrder
sortedFirstOrder = monomorphicFirstOrder
monomorphicFirstOrder :: Parser MonomorphicFirstOrder
monomorphicFirstOrder = firstOrder (sorted sort) <?> "tff0"
quantifiedSort :: Parser QuantifiedSort
quantifiedSort = token "$tType" $> QuantifiedSort ()
polymorphicFirstOrder :: Parser PolymorphicFirstOrder
polymorphicFirstOrder = firstOrder (sorted (eitherP quantifiedSort tff1Sort))
<?> "tff1"
formula :: Language -> Parser Formula
formula = \case
CNF_ -> CNF <$> clause <?> "cnf"
FOF_ -> FOF <$> unsortedFirstOrder <?> "fof"
TFF_ -> tff <$> polymorphicFirstOrder <?> "tff"
where
tff f = case monomorphizeFirstOrder f of
Just f' -> TFF0 f'
Nothing -> TFF1 f
role :: Parser (Reserved Role)
role = reserved <?> "role"
{-# INLINE role #-}
language :: Parser Language
language = enum <?> "language"
{-# INLINE language #-}
declaration :: Language -> Parser Declaration
declaration l = token "type" *> comma (optionalParens typeDeclaration)
<|> Formula <$> role <*> comma (formula l)
<?> "declaration"
typeDeclaration :: Parser Declaration
typeDeclaration = Sort <$> atom <* op ':' <*> arity
<|> Typing <$> atom <* op ':' <*> type_
<?> "type declaration"
where
arity = genericLength . fst <$> mapping (token "$tType")
unitName :: Parser (Either Atom Integer)
unitName = eitherP atom (signed integer) <?> "unit name"
{-# INLINE unitName #-}
unitNames :: Parser (NonEmpty UnitName)
unitNames = bracketList1 unitName <?> "unit names"
{-# INLINE unitNames #-}
include :: Parser Unit
include = labeled "include" (Include <$> atom <*> maybeP unitNames) <* op '.'
<?> "include"
annotatedUnit :: Parser Unit
annotatedUnit = do
l <- language
let n = unitName
let d = declaration l
let a = maybeP annotation
parens (Unit <$> n <*> comma d <*> a) <* op '.'
<?> "annotated unit"
unit :: Parser Unit
unit = include <|> annotatedUnit <?> "unit"
tptp :: Parser TPTP
tptp = TPTP <$> manyTill unit endOfInput <?> "derivation"
intro :: Parser Intro
intro = enum <?> "intro"
{-# INLINE intro #-}
info :: Parser Info
info = labeled "description" (Description <$> atom)
<|> labeled "iquote" (Iquote <$> atom)
<|> labeled "status" (Status <$> reserved)
<|> labeled "assumptions" (Assumptions <$> unitNames)
<|> labeled "refutation" (Refutation <$> atom)
<|> labeled "new_symbols" (NewSymbols <$> atom <*> comma symbols)
<|> labeled "bind" (Bind <$> var <*> comma expr)
<|> Expression <$> expr
<|> uncurry Application <$> application atom info
<|> InfoNumber <$> number
<|> Infos <$> infos
where
symbols = bracketList (eitherP var atom)
infos :: Parser [Info]
infos = bracketList info <?> "infos"
{-# INLINE infos #-}
expr :: Parser Expression
expr = char '$' *> (labeled "fot" (Term <$> term)
<|> Logical <$> (language >>= parens . formula))
<?> "expression"
parent :: Parser Parent
parent = Parent <$> source <*> option [] (op ':' *> infos) <?> "parent"
source :: Parser Source
source = token "unknown" $> UnknownSource
<|> labeled "file" (File <$> atom <*> maybeP unitName)
<|> labeled "theory" (Theory <$> atom <*> maybeP infos)
<|> labeled "creator" (Creator <$> atom <*> maybeP infos)
<|> labeled "introduced" (Introduced <$> reserved <*> maybeP infos)
<|> labeled "inference" (Inference <$> atom <*> comma infos
<*> comma (bracketList parent))
<|> UnitSource <$> unitName
<?> "source"
annotation :: Parser Annotation
annotation = (,) <$> source <*> maybeP infos <?> "annotation"