{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -Wno-name-shadowing #-}
{-# OPTIONS_GHC -Wno-x-partial #-}

-- | TH stage restriction guff for flatparsing
module DotParse.FlatParse.TH where

import Data.ByteString hiding (head, length, reverse)
import Data.ByteString.Char8 qualified as B
import Data.Char hiding (isDigit)
import Data.Functor
import FlatParse.Basic
import Language.Haskell.TH

-- | Consume whitespace.
ws :: Parser e ()
ws =
  $( switch
       [|
         case _ of
           -- order matters
           " " -> ws
           "\\\n" -> ws
           "\\\\\\n" -> ws
           "\n" -> ws
           "\t" -> ws
           "\r" -> ws
           ";" -> ws
           "//" -> lineComment
           "/*" -> multilineComment
           _ -> pure ()
         |]
   )

-- | Consume whitespace after running a parser.
token :: Parser e a -> Parser e a
token p = p <* ws
{-# INLINE token #-}

-- | Parse a line comment.
lineComment :: Parser e ()
lineComment =
  withOption
    anyWord8
    ( \case
        10 -> ws
        _ -> lineComment
    )
    (pure ())

-- | Parse a potentially nested multiline comment.
multilineComment :: Parser e ()
multilineComment = go (1 :: Int)
  where
    go 0 = ws
    go n =
      $( switch
           [|
             case _ of
               "*/" -> go (n - 1)
               "/*" -> go (n + 1)
               _ -> branch anyWord8 (go n) (pure ())
             |]
       )

-- | Parse a HTML-Like string by counting the angle brackets
htmlLike :: Parser e String
htmlLike = ws *> $(char '<') *> go (1 :: Int) "<"
  where
    go 0 acc = ws $> reverse acc
    go n acc =
      ($(char '>') *> go (n - 1) ('>' : acc))
        <|> ($(char '<') *> go (n + 1) ('<' : acc))
        <|> (anyChar >>= (\c -> go n (c : acc)))

-- | First character of a dot identifier.
isValidStartChar :: Char -> Bool
isValidStartChar c =
  isAsciiUpper c
    || isAsciiLower c
    || ('\200' <= c && c <= '\377')
    || (c == '_')

-- | character of a dot identifier.
isValidChar :: Char -> Bool
isValidChar c = isValidStartChar c || isDigit c

-- | Read a starting character of an identifier.
identStartChar :: Parser e Char
identStartChar = satisfy isValidStartChar

-- | Read a non-starting character of an identifier.
identChar :: Parser e Char
identChar = satisfy isValidChar

-- | Parse a non-keyword string.
symbol :: String -> Q Exp
symbol str = [|token $(string str)|]

-- | Parse a keyword string.
keyword :: String -> Q Exp
keyword str = [|token ($(string str) `notFollowedBy` identChar)|]

-- | Parser a non-keyword string, throw precise error on failure.
symbol' :: String -> Q Exp
symbol' str = [|$(symbol str) `cut'` strToUtf8 str|]

-- | Parse a keyword string, throw precise error on failure.
keyword' :: String -> Q Exp
keyword' str = [|$(keyword str) `cut'` strToUtf8 str|]

-- | Parse an identifier.
ident :: Parser e ByteString
ident =
  token $
    byteStringOf $
      identStartChar *> skipMany identChar

-- | Parse an identifier, throw a precise error on failure.
ident' :: Parser Error ByteString
ident' = ident `cut'` "identifier"

-- | A parsing error.
data Error
  = -- | A precisely known error, like leaving out "in" from "let".
    Precise Pos ByteString
  | -- | An imprecise error, when we expect a number of different things,
    --   but parse something else.
    Imprecise Pos [ByteString]
  deriving (Eq, Show)

-- | position of error
errorPos :: Error -> Pos
errorPos (Precise p _) = p
errorPos (Imprecise p _) = p

-- | Merge two errors. Inner errors (which were thrown at points with more consumed inputs)
--   are preferred. If errors are thrown at identical input positions, we prefer precise errors
--   to imprecise ones.
--
--   The point of prioritizing inner and precise errors is to suppress the deluge of "expected"
--   items, and instead try to point to a concrete issue to fix.
merge :: Error -> Error -> Error
merge e e' = case (errorPos e, errorPos e') of
  (p, p') | p < p' -> e'
  (p, p') | p > p' -> e
  (p, _) -> case (e, e') of
    (Precise {}, _) -> e
    (_, Precise {}) -> e'
    (Imprecise _ es, Imprecise _ es') -> Imprecise p (es <> es')
{-# NOINLINE merge #-} -- merge is "cold" code, so we shouldn't inline it.

-- | Pretty print an error. The `ByteString` input is the source file. The offending line from the
--   source is displayed in the output.
prettyError :: ByteString -> Error -> ByteString
prettyError b e =
  let pos :: Pos
      pos = case e of
        Imprecise pos _ -> pos
        Precise pos _ -> pos
      ls = B.lines b
      (l, c) = head $ posLineCols b [pos]
      line = if l < length ls then ls !! l else ""
      linum = strToUtf8 $ show l
      lpad = B.replicate (B.length linum) ' '

      err (Precise _ e) = e
      err (Imprecise _ es) = imprec es

      imprec :: [ByteString] -> ByteString
      imprec [] = error "impossible"
      imprec [e] = e
      imprec (e : es) = e <> go es
        where
          go [] = ""
          go [e] = " or " <> e
          go (e : es) = ", " <> e <> go es
   in strToUtf8 (show l)
        <> ":"
        <> strToUtf8 (show c)
        <> ":\n"
        <> lpad
        <> "|\n"
        <> linum
        <> "| "
        <> line
        <> "\n"
        <> lpad
        <> "| "
        <> B.replicate c ' '
        <> "^\n"
        <> "parse error: expected "
        <> err e

-- | Imprecise cut: we slap a list of items on inner errors.
cut :: Parser Error a -> [ByteString] -> Parser Error a
cut p es = do
  pos <- getPos
  cutting p (Imprecise pos es) merge

-- | Precise cut: we propagate at most a single error.
cut' :: Parser Error a -> ByteString -> Parser Error a
cut' p e = do
  pos <- getPos
  cutting p (Precise pos e) merge