{-|
This module contains lexer and error message primitives for a simple lambda calculus parser. It
demonstrates a simple but decently informative implementation of error message propagation.
-}

{-# language StrictData #-}

module FlatParse.Examples.BasicLambda.Lexer where

import FlatParse.Basic hiding (Parser, runParser, string, char, cut)

import qualified FlatParse.Basic as FP
import qualified Data.ByteString as B
import Language.Haskell.TH

import Data.String
import qualified Data.Set as S

import qualified Data.ByteString.UTF8 as UTF8

--------------------------------------------------------------------------------

-- | An expected item which is displayed in error messages.
data Expected
  = Msg String  -- ^ An error message.
  | Lit String  -- ^ A literal expected thing.
  deriving (Expected -> Expected -> Bool
(Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool) -> Eq Expected
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Expected -> Expected -> Bool
== :: Expected -> Expected -> Bool
$c/= :: Expected -> Expected -> Bool
/= :: Expected -> Expected -> Bool
Eq, Int -> Expected -> ShowS
[Expected] -> ShowS
Expected -> [Char]
(Int -> Expected -> ShowS)
-> (Expected -> [Char]) -> ([Expected] -> ShowS) -> Show Expected
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Expected -> ShowS
showsPrec :: Int -> Expected -> ShowS
$cshow :: Expected -> [Char]
show :: Expected -> [Char]
$cshowList :: [Expected] -> ShowS
showList :: [Expected] -> ShowS
Show, Eq Expected
Eq Expected =>
(Expected -> Expected -> Ordering)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Expected)
-> (Expected -> Expected -> Expected)
-> Ord Expected
Expected -> Expected -> Bool
Expected -> Expected -> Ordering
Expected -> Expected -> Expected
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Expected -> Expected -> Ordering
compare :: Expected -> Expected -> Ordering
$c< :: Expected -> Expected -> Bool
< :: Expected -> Expected -> Bool
$c<= :: Expected -> Expected -> Bool
<= :: Expected -> Expected -> Bool
$c> :: Expected -> Expected -> Bool
> :: Expected -> Expected -> Bool
$c>= :: Expected -> Expected -> Bool
>= :: Expected -> Expected -> Bool
$cmax :: Expected -> Expected -> Expected
max :: Expected -> Expected -> Expected
$cmin :: Expected -> Expected -> Expected
min :: Expected -> Expected -> Expected
Ord)

instance IsString Expected where fromString :: [Char] -> Expected
fromString = [Char] -> Expected
Lit

-- | A parsing error.
data Error
  = Precise Pos Expected     -- ^ A precisely known error, like leaving out "in" from "let".
  | Imprecise Pos [Expected] -- ^ An imprecise error, when we expect a number of different things,
                             --   but parse something else.
  deriving Int -> Error -> ShowS
[Error] -> ShowS
Error -> [Char]
(Int -> Error -> ShowS)
-> (Error -> [Char]) -> ([Error] -> ShowS) -> Show Error
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Error -> ShowS
showsPrec :: Int -> Error -> ShowS
$cshow :: Error -> [Char]
show :: Error -> [Char]
$cshowList :: [Error] -> ShowS
showList :: [Error] -> ShowS
Show

errorPos :: Error -> Pos
errorPos :: Error -> Pos
errorPos (Precise Pos
p Expected
_)   = Pos
p
errorPos (Imprecise Pos
p [Expected]
_) = Pos
p

-- | Merge two errors. Inner errors (which were thrown at points with more consumed inputs)
--   are preferred. If errors are thrown at identical input positions, we prefer precise errors
--   to imprecise ones.
--
--   The point of prioritizing inner and precise errors is to suppress the deluge of "expected"
--   items, and instead try to point to a concrete issue to fix.
merge :: Error -> Error -> Error
merge :: Error -> Error -> Error
merge Error
e Error
e' = case (Error -> Pos
errorPos Error
e, Error -> Pos
errorPos Error
e') of
  (Pos
p, Pos
p') | Pos
p Pos -> Pos -> Bool
forall a. Ord a => a -> a -> Bool
< Pos
p' -> Error
e'
  (Pos
p, Pos
p') | Pos
p Pos -> Pos -> Bool
forall a. Ord a => a -> a -> Bool
> Pos
p' -> Error
e
  (Pos
p, Pos
p')          -> case (Error
e, Error
e') of
    (Precise{}      , Error
_               ) -> Error
e
    (Error
_              , Precise{}       ) -> Error
e'
    (Imprecise Pos
_ [Expected]
es , Imprecise Pos
_ [Expected]
es' ) -> Pos -> [Expected] -> Error
Imprecise Pos
p ([Expected]
es [Expected] -> [Expected] -> [Expected]
forall a. [a] -> [a] -> [a]
++ [Expected]
es')
{-# noinline merge #-} -- merge is "cold" code, so we shouldn't inline it.

type Parser = FP.Parser Error

-- | Pretty print an error. The `B.ByteString` input is the source file. The offending line from the
--   source is displayed in the output.
prettyError :: B.ByteString -> Error -> String
prettyError :: ByteString -> Error -> [Char]
prettyError ByteString
b Error
e =

  let pos :: Pos
      pos :: Pos
pos      = case Error
e of Imprecise Pos
pos [Expected]
e -> Pos
pos
                           Precise Pos
pos Expected
e   -> Pos
pos
      ls :: [[Char]]
ls       = ByteString -> [[Char]]
FP.linesUtf8 ByteString
b
      (Int
l, Int
c)   = [(Int, Int)] -> (Int, Int)
forall a. HasCallStack => [a] -> a
head ([(Int, Int)] -> (Int, Int)) -> [(Int, Int)] -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ ByteString -> [Pos] -> [(Int, Int)]
FP.posLineCols ByteString
b [Pos
pos]
      line :: [Char]
line     = if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [[Char]] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [[Char]]
ls then [[Char]]
ls [[Char]] -> Int -> [Char]
forall a. HasCallStack => [a] -> Int -> a
!! Int
l else [Char]
""
      linum :: [Char]
linum    = Int -> [Char]
forall a. Show a => a -> [Char]
show Int
l
      lpad :: [Char]
lpad     = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char -> Char
forall a b. a -> b -> a
const Char
' ') [Char]
linum

      expected :: Expected -> [Char]
expected (Lit [Char]
s) = ShowS
forall a. Show a => a -> [Char]
show [Char]
s
      expected (Msg [Char]
s) = [Char]
s

      err :: Error -> [Char]
err (Precise Pos
_ Expected
e)    = Expected -> [Char]
expected Expected
e
      err (Imprecise Pos
_ [Expected]
es) = [Expected] -> [Char]
imprec ([Expected] -> [Char]) -> [Expected] -> [Char]
forall a b. (a -> b) -> a -> b
$ Set Expected -> [Expected]
forall a. Set a -> [a]
S.toList (Set Expected -> [Expected]) -> Set Expected -> [Expected]
forall a b. (a -> b) -> a -> b
$ [Expected] -> Set Expected
forall a. Ord a => [a] -> Set a
S.fromList [Expected]
es

      imprec :: [Expected] -> String
      imprec :: [Expected] -> [Char]
imprec []     = ShowS
forall a. HasCallStack => [Char] -> a
error [Char]
"impossible"
      imprec [Expected
e]    = Expected -> [Char]
expected Expected
e
      imprec (Expected
e:[Expected]
es) = Expected -> [Char]
expected Expected
e [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> [Char]
go [Expected]
es where
        go :: [Expected] -> [Char]
go []     = [Char]
""
        go [Expected
e]    = [Char]
" or " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> [Char]
expected Expected
e
        go (Expected
e:[Expected]
es) = [Char]
", " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> [Char]
expected Expected
e [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> [Char]
go [Expected]
es

  in Int -> [Char]
forall a. Show a => a -> [Char]
show Int
l [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
":" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
":\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
     [Char]
lpad   [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"|\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
     [Char]
linum  [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"| " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
line [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
     [Char]
lpad   [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"| " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> Char -> [Char]
forall a. Int -> a -> [a]
replicate Int
c Char
' ' [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"^\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
     [Char]
"parse error: expected " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
     Error -> [Char]
err Error
e

-- | Imprecise cut: we slap a list of items on inner errors.
cut :: Parser a -> [Expected] -> Parser a
cut :: forall a. Parser a -> [Expected] -> Parser a
cut Parser a
p [Expected]
es = do
  Pos
pos <- ParserT PureMode Error Pos
forall (st :: ZeroBitType) e. ParserT st e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall (st :: ZeroBitType) e a.
ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a
FP.cutting Parser a
p (Pos -> [Expected] -> Error
Imprecise Pos
pos [Expected]
es) Error -> Error -> Error
merge

-- | Precise cut: we propagate at most a single error.
cut' :: Parser a -> Expected -> Parser a
cut' :: forall a. Parser a -> Expected -> Parser a
cut' Parser a
p Expected
e = do
  Pos
pos <- ParserT PureMode Error Pos
forall (st :: ZeroBitType) e. ParserT st e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall (st :: ZeroBitType) e a.
ParserT st e a -> e -> (e -> e -> e) -> ParserT st e a
FP.cutting Parser a
p (Pos -> Expected -> Error
Precise Pos
pos Expected
e) Error -> Error -> Error
merge

runParser :: Parser a -> B.ByteString -> Result Error a
runParser :: forall a. Parser a -> ByteString -> Result Error a
runParser = Parser Error a -> ByteString -> Result Error a
forall e a. Parser e a -> ByteString -> Result e a
FP.runParser

-- | Run parser, print pretty error on failure.
testParser :: Show a => Parser a -> String -> IO ()
testParser :: forall a. Show a => Parser a -> [Char] -> IO ()
testParser Parser a
p [Char]
str = case [Char] -> ByteString
UTF8.fromString [Char]
str of
  ByteString
b -> case Parser a -> ByteString -> Result Error a
forall a. Parser a -> ByteString -> Result Error a
runParser Parser a
p ByteString
b of
    Err Error
e  -> [Char] -> IO ()
putStrLn ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Error -> [Char]
prettyError ByteString
b Error
e
    OK a
a ByteString
_ -> a -> IO ()
forall a. Show a => a -> IO ()
print a
a
    Result Error a
Fail   -> [Char] -> IO ()
putStrLn [Char]
"uncaught parse error"

-- | Parse a line comment.
lineComment :: Parser ()
lineComment :: Parser ()
lineComment =
  ParserT PureMode Error Word8
-> (Word8 -> Parser ()) -> Parser () -> Parser ()
forall (st :: ZeroBitType) e a r.
ParserT st e a
-> (a -> ParserT st e r) -> ParserT st e r -> ParserT st e r
withOption ParserT PureMode Error Word8
forall (st :: ZeroBitType) e. ParserT st e Word8
anyWord8
    (\case Word8
10 -> Parser ()
ws
           Word8
_  -> Parser ()
lineComment)
    (() -> Parser ()
forall a. a -> ParserT PureMode Error a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())

-- | Parse a potentially nested multiline comment.
multilineComment :: Parser ()
multilineComment :: Parser ()
multilineComment = Int -> Parser ()
forall {a}. (Eq a, Num a) => a -> Parser ()
go (Int
1 :: Int) where
  go :: a -> Parser ()
go a
0 = Parser ()
ws
  go a
n = $(switch [| case _ of
    "-}" -> go (n - 1)
    "{-" -> go (n + 1)
    _    -> branch anyWord8 (go n) (pure ()) |])

-- | Consume whitespace.
ws :: Parser ()
ws :: Parser ()
ws = $(switch [| case _ of
  " "  -> ws
  "\n" -> ws
  "\t" -> ws
  "\r" -> ws
  "--" -> lineComment
  "{-" -> multilineComment
  _    -> pure () |])

-- | Consume whitespace after running a parser.
token :: Parser a -> Parser a
token :: forall a. Parser a -> Parser a
token Parser a
p = Parser a
p Parser a -> Parser () -> Parser a
forall a b.
ParserT PureMode Error a
-> ParserT PureMode Error b -> ParserT PureMode Error a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
ws
{-# inline token #-}

-- | Read a starting character of an identifier.
identStartChar :: Parser Char
identStartChar :: Parser Char
identStartChar = (Char -> Bool) -> Parser Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter
{-# inline identStartChar #-}

-- | Read a non-starting character of an identifier.
identChar :: Parser Char
identChar :: Parser Char
identChar = (Char -> Bool) -> Parser Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii (\Char
c -> Char -> Bool
isLatinLetter Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c)
{-# inline identChar #-}

-- | Check whether a `Span` contains exactly a keyword. Does not change parsing state.
isKeyword :: Span -> Parser ()
isKeyword :: Span -> Parser ()
isKeyword Span
span = Span -> Parser () -> Parser ()
forall (st :: ZeroBitType) e a.
Span -> ParserT st e a -> ParserT st e a
inSpan Span
span do
  $(FP.switch [| case _ of
      "lam"   -> pure ()
      "let"   -> pure ()
      "in"    -> pure ()
      "if"    -> pure ()
      "then"  -> pure ()
      "else"  -> pure ()
      "true"  -> pure ()
      "false" -> pure ()  |])
  Parser ()
forall (st :: ZeroBitType) e. ParserT st e ()
eof

-- | Parse a non-keyword string.
symbol :: String -> Q Exp
symbol :: [Char] -> Q Exp
symbol [Char]
str = [| token $([Char] -> Q Exp
FP.string [Char]
str) |]

-- | Parser a non-keyword string, throw precise error on failure.
symbol' :: String -> Q Exp
symbol' :: [Char] -> Q Exp
symbol' [Char]
str = [| $([Char] -> Q Exp
symbol [Char]
str) `cut'` Lit str |]

-- | Parse a keyword string.
keyword :: String -> Q Exp
keyword :: [Char] -> Q Exp
keyword [Char]
str = [| token ($([Char] -> Q Exp
FP.string [Char]
str) `notFollowedBy` identChar) |]

-- | Parse a keyword string, throw precise error on failure.
keyword' :: String -> Q Exp
keyword' :: [Char] -> Q Exp
keyword' [Char]
str = [| $([Char] -> Q Exp
keyword [Char]
str) `cut'` Lit str |]