{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Tree.Parser.Penn.Megaparsec.Char (
pTree,
ParsableAsTerm(..),
PennTreeParserT,
PennTreeParser,
parse,
parseMaybe,
parseTest,
runParser,
runParser',
runParserT,
runParserT'
) where
import Data.Char as DCh
import Data.Tree
import Data.Void (Void)
import Data.Proxy (Proxy(..))
import Text.Megaparsec
import qualified Text.Megaparsec.Char as MC
import qualified Text.Megaparsec.Char.Lexer as Lex
import Control.Monad (forM_)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Identity (Identity)
import Data.Tree.Parser.Penn.Megaparsec.Internal
spaceConsumer :: (MonadParsec err str m, Token str ~ Char) => m ()
spaceConsumer
= Lex.space
MC.space1
empty
empty
lexer :: (MonadParsec err str m, Token str ~ Char)
=> m term
-> m term
lexer = Lex.lexeme spaceConsumer
pParens :: (
MonadParsec err str m,
Token str ~ Char,
Tokens str ~ str
) => m term
-> m term
pParens =
between
(lexer $ single '(')
(lexer $ single ')')
invokeLabelParserRaw :: (MonadParsec err str m, Token str ~ Char)
=> m (Tokens str)
invokeLabelParserRaw
= takeWhile1P
(Just "Literal String")
(\x -> x /= '(' && x /= ')' && not (DCh.isSpace x))
invokeLabelParser :: (
Ord err,
ParsableAsTerm str term,
Monad m,
Token str ~ Char,
Tokens str ~ str
) => ParsecT err str m term
-> State str err
-> ParsecT err str m term
invokeLabelParser labelParser substate = do
(_, res) <- lift $ runParserT' (labelParser <* eof) substate
case res of
Left b@(ParseErrorBundle errors _) -> do
forM_ errors registerParseError
return undefined
Right label -> return label
pTree ::
(
Ord err,
ParsableAsTerm str term,
Monad m,
Token str ~ Char,
Tokens str ~ str
) => ParsecT err str m (Tree term)
pTree
= pParens pTreeInside <|> pTerminalNode <?> "Parsed Tree"
where
pTreeInside :: forall str. forall err. forall m. forall term. (
Ord err,
ParsableAsTerm str term,
Monad m,
Token str ~ Char,
Tokens str ~ str
) => ParsecT err str m (Tree term)
pTreeInside = do
state <- getParserState
maybeLabelRaw <- lexer $ optional invokeLabelParserRaw
let substate = state {
stateInput
= case maybeLabelRaw of
Just labelRaw -> labelRaw
Nothing -> tokensToChunk pxy []
,
stateOffset = 0
}
labelParsed <- invokeLabelParser pNonTerm substate
children <- many $ lexer pTree
return $ Node labelParsed children
where
pxy :: Proxy str
pxy = Proxy
pTerminalNode :: (
Ord err,
ParsableAsTerm str term,
Monad m,
Token str ~ Char,
Tokens str ~ str
) => ParsecT err str m (Tree term)
pTerminalNode = do
state <- getParserState
labelRaw <- lexer $ invokeLabelParserRaw
labelParsed <- do
let substate = state {
stateInput = labelRaw,
stateOffset = 0
}
invokeLabelParser pTerm substate
return $ Node labelParsed []
type PennTreeParserT str m term = ParsecT Void str m (Tree term)
type PennTreeParser str term = PennTreeParserT str Identity term