{-|
This module contains lexer and error message primitives for a simple lambda calculus parser. It
demonstrates a simple but decently informative implementation of error message propagation.
-}

module FlatParse.Examples.BasicLambda.Lexer where

import FlatParse.Basic hiding (Parser, runParser, string, char, cut, err)

import qualified FlatParse.Basic as FP
import qualified Data.ByteString as B
import Language.Haskell.TH

import qualified Data.Set as S

--------------------------------------------------------------------------------

-- | An expected item which is displayed in error messages.
data Expected
  = Lit String  -- ^ An expected literal string.
  | Msg String  -- ^ A description of what's expected.
  deriving (Expected -> Expected -> Bool
(Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool) -> Eq Expected
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Expected -> Expected -> Bool
$c/= :: Expected -> Expected -> Bool
== :: Expected -> Expected -> Bool
$c== :: Expected -> Expected -> Bool
Eq, Int -> Expected -> ShowS
[Expected] -> ShowS
Expected -> String
(Int -> Expected -> ShowS)
-> (Expected -> String) -> ([Expected] -> ShowS) -> Show Expected
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Expected] -> ShowS
$cshowList :: [Expected] -> ShowS
show :: Expected -> String
$cshow :: Expected -> String
showsPrec :: Int -> Expected -> ShowS
$cshowsPrec :: Int -> Expected -> ShowS
Show, Eq Expected
Eq Expected
-> (Expected -> Expected -> Ordering)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Bool)
-> (Expected -> Expected -> Expected)
-> (Expected -> Expected -> Expected)
-> Ord Expected
Expected -> Expected -> Bool
Expected -> Expected -> Ordering
Expected -> Expected -> Expected
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Expected -> Expected -> Expected
$cmin :: Expected -> Expected -> Expected
max :: Expected -> Expected -> Expected
$cmax :: Expected -> Expected -> Expected
>= :: Expected -> Expected -> Bool
$c>= :: Expected -> Expected -> Bool
> :: Expected -> Expected -> Bool
$c> :: Expected -> Expected -> Bool
<= :: Expected -> Expected -> Bool
$c<= :: Expected -> Expected -> Bool
< :: Expected -> Expected -> Bool
$c< :: Expected -> Expected -> Bool
compare :: Expected -> Expected -> Ordering
$ccompare :: Expected -> Expected -> Ordering
$cp1Ord :: Eq Expected
Ord)

-- | A parsing error, without source position.
data Error'
  = Precise Expected     -- ^ A precisely known error, like leaving out "in" from "let".
  | Imprecise [Expected] -- ^ An imprecise error, when we expect a number of different things,
                         --   but parse something else.
  deriving Int -> Error' -> ShowS
[Error'] -> ShowS
Error' -> String
(Int -> Error' -> ShowS)
-> (Error' -> String) -> ([Error'] -> ShowS) -> Show Error'
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Error'] -> ShowS
$cshowList :: [Error'] -> ShowS
show :: Error' -> String
$cshow :: Error' -> String
showsPrec :: Int -> Error' -> ShowS
$cshowsPrec :: Int -> Error' -> ShowS
Show

-- | A source-annotated error.
data Error = Error !Pos !Error'
  deriving Int -> Error -> ShowS
[Error] -> ShowS
Error -> String
(Int -> Error -> ShowS)
-> (Error -> String) -> ([Error] -> ShowS) -> Show Error
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Error] -> ShowS
$cshowList :: [Error] -> ShowS
show :: Error -> String
$cshow :: Error -> String
showsPrec :: Int -> Error -> ShowS
$cshowsPrec :: Int -> Error -> ShowS
Show

-- | Merge two errors. Imprecise errors are merged by appending lists of expected items.  If we have
--   a precise and an imprecise error, we throw away the imprecise one. If we have two precise
--   errors, we choose the left one, which is by convention the one throw by an inner parser.
--
--   The point of prioritizing inner and precise errors is to suppress the deluge of "expected"
--   items, and instead try to point to a concrete issue to fix.
merge :: Error -> Error -> Error
merge :: Error -> Error -> Error
merge err :: Error
err@(Error Pos
p Error'
e) err' :: Error
err'@(Error Pos
p' Error'
e') = case (Error'
e, Error'
e') of
  (Precise Expected
_, Error'
_)                -> Error
err   -- pick the inner concrete error
  (Error'
_, Precise Expected
_)                -> Error
err'  -- pick the outer concrete error
  (Imprecise [Expected]
ss, Imprecise [Expected]
ss') -> Pos -> Error' -> Error
Error Pos
p ([Expected] -> Error'
Imprecise ([Expected]
ss [Expected] -> [Expected] -> [Expected]
forall a. [a] -> [a] -> [a]
++ [Expected]
ss'))
   -- note: we never recover from errors, so all merged errors will in fact have exactly the same
   -- Pos. So we can simply throw away one of the two here.
{-# noinline merge #-} -- merge is "cold" code, so we shouldn't inline it.

type Parser = FP.Parser () Error

-- | Pretty print an error. The `B.ByteString` input is the source file. The offending line from the
--   source is displayed in the output.
prettyError :: B.ByteString -> Error -> String
prettyError :: ByteString -> Error -> String
prettyError ByteString
b (Error Pos
pos Error'
e) =

  let ls :: [String]
ls       = ByteString -> [String]
FP.lines ByteString
b
      [(Int
l, Int
c)] = ByteString -> [Pos] -> [(Int, Int)]
posLineCols ByteString
b [Pos
pos]
      line :: String
line     = if [String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
ls then String
"" else [String]
ls [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
l
      linum :: String
linum    = Int -> String
forall a. Show a => a -> String
show Int
l
      lpad :: String
lpad     = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char -> Char
forall a b. a -> b -> a
const Char
' ') String
linum

      expected :: Expected -> String
expected (Lit String
s) = ShowS
forall a. Show a => a -> String
show String
s
      expected (Msg String
s) = String
s

      err :: Error' -> String
err (Precise Expected
exp)    = Expected -> String
expected Expected
exp
      err (Imprecise [Expected]
exps) = [Expected] -> String
imprec ([Expected] -> String) -> [Expected] -> String
forall a b. (a -> b) -> a -> b
$ Set Expected -> [Expected]
forall a. Set a -> [a]
S.toList (Set Expected -> [Expected]) -> Set Expected -> [Expected]
forall a b. (a -> b) -> a -> b
$ [Expected] -> Set Expected
forall a. Ord a => [a] -> Set a
S.fromList [Expected]
exps

      imprec :: [Expected] -> String
      imprec :: [Expected] -> String
imprec []     = ShowS
forall a. HasCallStack => String -> a
error String
"impossible"
      imprec [Expected
s]    = Expected -> String
expected Expected
s
      imprec (Expected
s:[Expected]
ss) = Expected -> String
expected Expected
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> String
go [Expected]
ss where
        go :: [Expected] -> String
go []     = String
""
        go [Expected
s]    = String
" or " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> String
expected Expected
s
        go (Expected
s:[Expected]
ss) = String
", " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Expected -> String
expected Expected
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Expected] -> String
go [Expected]
ss

  in Int -> String
forall a. Show a => a -> String
show Int
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
":" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
c String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
":\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
lpad   String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"|\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
linum  String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"| " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
line String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
lpad   String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"| " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
c Char
' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"^\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++
     String
"parse error: expected " String -> ShowS
forall a. [a] -> [a] -> [a]
++
     Error' -> String
err Error'
e

-- | Imprecise cut: we slap a list of expected things on inner errors.
cut :: Parser a -> [Expected] -> Parser a
cut :: Parser a -> [Expected] -> Parser a
cut Parser a
p [Expected]
exps = do
  Pos
pos <- Parser () Error Pos
forall r e. Parser r e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall r e a. Parser r e a -> e -> (e -> e -> e) -> Parser r e a
FP.cutting Parser a
p (Pos -> Error' -> Error
Error Pos
pos ([Expected] -> Error'
Imprecise [Expected]
exps)) Error -> Error -> Error
merge

-- | Precise cut: we propagate at most a single expected thing.
cut' :: Parser a -> Expected -> Parser a
cut' :: Parser a -> Expected -> Parser a
cut' Parser a
p Expected
exp = do
  Pos
pos <- Parser () Error Pos
forall r e. Parser r e Pos
getPos
  Parser a -> Error -> (Error -> Error -> Error) -> Parser a
forall r e a. Parser r e a -> e -> (e -> e -> e) -> Parser r e a
FP.cutting Parser a
p (Pos -> Error' -> Error
Error Pos
pos (Expected -> Error'
Precise Expected
exp)) Error -> Error -> Error
merge

runParser :: Parser a -> B.ByteString -> Result Error a
runParser :: Parser a -> ByteString -> Result Error a
runParser Parser a
p = Parser a -> () -> ByteString -> Result Error a
forall r e a. Parser r e a -> r -> ByteString -> Result e a
FP.runParser Parser a
p ()

-- | Run parser, print pretty error on failure.
testParser :: Show a => Parser a -> String -> IO ()
testParser :: Parser a -> String -> IO ()
testParser Parser a
p String
str = case String -> ByteString
packUTF8 String
str of
  ByteString
b -> case Parser a -> ByteString -> Result Error a
forall a. Parser a -> ByteString -> Result Error a
runParser Parser a
p ByteString
b of
    Err Error
e  -> String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Error -> String
prettyError ByteString
b Error
e
    OK a
a ByteString
_ -> a -> IO ()
forall a. Show a => a -> IO ()
print a
a
    Result Error a
Fail   -> String -> IO ()
putStrLn String
"uncaught parse error"

-- | Parse a line comment.
lineComment :: Parser ()
lineComment :: Parser ()
lineComment =
  Parser () Error Word8
-> (Word8 -> Parser ()) -> Parser () -> Parser ()
forall r e a b.
Parser r e a -> (a -> Parser r e b) -> Parser r e b -> Parser r e b
optioned Parser () Error Word8
forall r e. Parser r e Word8
anyWord8
    (\case Word8
10 -> Parser ()
ws
           Word8
_  -> Parser ()
lineComment)
    (() -> Parser ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())

-- | Parse a potentially nested multiline comment.
multilineComment :: Parser ()
multilineComment :: Parser ()
multilineComment = Int -> Parser ()
forall a. (Eq a, Num a) => a -> Parser ()
go (Int
1 :: Int) where
  go :: a -> Parser ()
go a
0 = Parser ()
ws
  go a
n = $(switch [| case _ of
    "-}" -> go (n - 1)
    "{-" -> go (n + 1)
    _    -> branch anyWord8 (go n) (pure ()) |])

-- | Consume whitespace.
ws :: Parser ()
ws :: Parser ()
ws = $(switch [| case _ of
  " "  -> ws
  "\n" -> ws
  "\t" -> ws
  "\r" -> ws
  "--" -> lineComment
  "{-" -> multilineComment
  _    -> pure () |])

-- | Consume whitespace after running a parser.
token :: Parser a -> Parser a
token :: Parser a -> Parser a
token Parser a
p = Parser a
p Parser a -> Parser () -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
ws
{-# inline token #-}

-- | Read a starting character of an identifier.
identStartChar :: Parser Char
identStartChar :: Parser Char
identStartChar = (Char -> Bool) -> Parser Char
forall r e. (Char -> Bool) -> Parser r e Char
satisfyASCII Char -> Bool
isLatinLetter
{-# inline identStartChar #-}

-- | Read a non-starting character of an identifier.
identChar :: Parser Char
identChar :: Parser Char
identChar = (Char -> Bool) -> Parser Char
forall r e. (Char -> Bool) -> Parser r e Char
satisfyASCII (\Char
c -> Char -> Bool
isLatinLetter Char
c Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c)
{-# inline identChar #-}

-- | Check whether a `Span` contains exactly a keyword. Does not change parsing state.
isKeyword :: Span -> Parser ()
isKeyword :: Span -> Parser ()
isKeyword Span
span = Span -> Parser () -> Parser ()
forall r e a. Span -> Parser r e a -> Parser r e a
inSpan Span
span do
  $(FP.switch [| case _ of
      "lam"   -> pure ()
      "let"   -> pure ()
      "in"    -> pure ()
      "if"    -> pure ()
      "then"  -> pure ()
      "else"  -> pure ()
      "true"  -> pure ()
      "false" -> pure ()  |])
  Parser ()
forall r e. Parser r e ()
eof

-- | Parse a non-keyword string.
symbol :: String -> Q Exp
symbol :: String -> Q Exp
symbol String
str = [| token $(FP.string str) |]

-- | Parser a non-keyword string, throw precise error on failure.
cutSymbol :: String -> Q Exp
cutSymbol :: String -> Q Exp
cutSymbol String
str = [| $(symbol str) `cut'` Lit str |]

-- | Parse a keyword string.
keyword :: String -> Q Exp
keyword :: String -> Q Exp
keyword String
str = [| token ($(FP.string str) `notFollowedBy` identChar) |]

-- | Parse a keyword string, throw precise error on failure.
cutKeyword :: String -> Q Exp
cutKeyword :: String -> Q Exp
cutKeyword String
str = [| $(keyword str) `cut'` Lit str |]