{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeApplications #-} module Descript.Lex.Parse ( parse_ , parse ) where import Prelude hiding (lex) import Descript.Lex.Data import Descript.Misc import Text.Megaparsec hiding (parse) import Text.Megaparsec.Char import qualified Text.Megaparsec.Char.Lexer as L import Data.Ratio import Core.Data.List import Data.Text (Text) import qualified Data.Text as Text type Parser a = Parsec RangedError Text a type RParser a = Parser (a Range) parse_ :: ParseAction () (RangeStream Lex) parse_ file () = parse file parse :: SFile -> ParseResult (RangeStream Lex) parse = runFirstParser lexemes lexemes :: Parser (RangeStream Lex) lexemes = stripSeps <$> (white *> many lexeme) lexeme :: RParser Lex lexeme = LexPunc <$> punc <|> LexSymbol <$> symbol <|> LexPrim <$> prim "lexeme" punc :: RParser Punc punc = ranged puncFullRange <|> DeclModule <$> rword "module" <|> DeclImport <$> rword "import" "punctuation" puncFullRange :: Parser (an -> Punc an) puncFullRange = PhaseSep <$ genPuncSep "---" <|> Period <$ genPuncSuffix "." <|> Colon <$ genPuncInfix ":" <|> Question <$ genPuncSuffix "?" <|> Union <$ genPuncInfix "|" <|> PathFwd <$ genPuncInfix ">" <|> PathBwd <$ genPuncInfix "<" <|> ArrowEqFwd <$ genPuncInfix "=>" <|> OpenBracket <$ genPuncPrefix "[" <|> CloseBracket <$ genPuncSuffix "]" <|> OpenBrace <$ genPuncPrefix "{" <|> CloseBrace <$ genPuncSuffix "}" <|> Sep <$ sep symbol :: RParser Symbol symbol = genLexeme symbol' prim :: RParser Prim prim = genLexeme prim' symbol' :: Parser (an -> Symbol an) symbol' = Symbol <$@> symbolStr "symbol" prim' :: Parser (an -> Prim an) prim' = PrimNumber <$@> number <|> PrimText <$@> primText "primitive" symbolStr :: Parser String symbolStr = (:) <$> symStartChar <*> many symNextChar symStartChar :: Parser Char symStartChar = letterChar <|> char '_' <|> char '#' symNextChar :: Parser Char symNextChar = alphaNumChar <|> char '_' primText :: Parser Text primText = Text.pack <$> (char '"' *> manyTill L.charLiteral (char '"')) "string" unsignedNumber :: Parser Rational unsignedNumber = try (toRational @Double <$> L.float) <|> try (string "0x") *> (toRational @Integer <$> L.hexadecimal) <|> try (string "0o") *> (toRational @Integer <$> L.octal) <|> (%) <$> L.decimal <*> (char '/' *> L.decimal <|> pure 1) "unsigned number" number :: Parser Rational number = char '-' *> (negate <$> unsignedNumber) <|> unsignedNumber "number" sep :: Parser () sep = genPuncPrefix "," <|> genPuncPrefix "\n" "separator" rword :: Text -> Parser Range rword word = expRanged_ (Text.length word) $ genPuncPrefix (word `Text.snoc` ' ') genPuncInfix :: Text -> Parser () genPuncInfix lit = () <$ L.symbol white lit genPuncPrefix :: Text -> Parser () genPuncPrefix = genPuncInfix genPuncSuffix :: Text -> Parser () genPuncSuffix lit = () <$ try (white *> L.symbol whiteNoSep lit) genPuncSep :: Text -> Parser () genPuncSep lit = () <$ L.symbol whiteNoSep lit genLexeme :: Parser (Range -> a) -> Parser a genLexeme = L.lexeme whiteNoSep . ranged spaceNoSep :: Parser () spaceNoSep = () <$ oneOf [' ', '\t'] whiteNoSep :: Parser () whiteNoSep = L.space spaceNoSep lineComment blockComment white :: Parser () white = L.space space1 lineComment blockComment lineComment :: Parser () lineComment = L.skipLineComment "//" blockComment :: Parser () blockComment = L.skipBlockComment "/*" "*/" stripSeps :: RangeStream Lex -> RangeStream Lex stripSeps = strip $ (== LexPunc (Sep ())) . remAnns