{-|
Module      : Formatter
Description : This module defines the formatter for rzk files.

The formatter is designed in a way that can be consumed both by the CLI and the
LSP server.
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms   #-}

module Rzk.Format (
  FormattingEdit (FormattingEdit),
  formatTextEdits,
  format, formatFile, formatFileWrite,
  isWellFormatted, isWellFormattedFile,
) where

import           Control.Monad              ((<$!>))
import           Data.List                  (elemIndex, foldl', sort)

import           Language.Rzk.Syntax        (tryExtractMarkdownCodeBlocks)
import           Language.Rzk.Syntax.Layout (resolveLayout)
import           Language.Rzk.Syntax.Lex    (Posn (Pn),
                                             Tok (TK, T_VarIdentToken),
                                             TokSymbol (TokSymbol), Token (PT),
                                             tokens)

-- | All indices are 1-based (as received from the lexer)
-- Note: LSP uses 0-based indices
data FormattingEdit = FormattingEdit Int Int Int Int String
  deriving (Eq, Ord)

-- TODO: more patterns, e.g. for identifiers and literals
pattern Symbol :: String -> Tok
pattern Symbol s <- TK (TokSymbol s _)

pattern Token :: String -> Int -> Int -> Token
pattern Token s line col <- PT (Pn _ line col) (Symbol s)

-- pattern TokenSym :: String -> Int -> Int -> Token
-- pattern TokenSym s line col <- PT (Pn _ line col) (Symbol s)

pattern TokenIdent :: String -> Int -> Int -> Token
pattern TokenIdent s line col <- PT (Pn _ line col) (T_VarIdentToken s)

data FormatState = FormatState
  { parensDepth  :: Int  -- ^ The level of parentheses nesting
  , definingName :: Bool -- ^ After #define, in name or assumptions (to detect the : for the type)
  , lambdaArrow  :: Bool -- ^ After a lambda '\', in the parameters (to leave its -> on the same line)
  }

-- TODO: replace all tabs with 1 space before processing
formatTextEdits :: String -> [FormattingEdit]
formatTextEdits contents = go initialState toks
  where
    initialState = FormatState { parensDepth = 0, definingName = False, lambdaArrow = False }
    incParensDepth s = s { parensDepth = parensDepth s + 1 }
    decParensDepth s = s { parensDepth = parensDepth s - 1 }
    rzkBlocks = tryExtractMarkdownCodeBlocks "rzk" contents -- TODO: replace tabs with spaces
    contentLines line = lines rzkBlocks !! (line - 1) -- Sorry
    toks = resolveLayout True (tokens rzkBlocks)
    unicodeTokens =
      [ ("->", "→")
      , ("|->", "↦")
      , ("===", "≡")
      , ("<=", "≤")
      , ("/\\", "∧")
      , ("\\/", "∨")
      , ("Sigma", "Σ")
      , ("∑", "Σ")
      , ("*_1", "*₁")
      , ("0_2", "0₂")
      , ("1_2", "1₂")
      , ("*", "×")
      ]
    go :: FormatState -> [Token] -> [FormattingEdit]
    go _ [] = []
    go s (Token "#lang" langLine langCol : Token "rzk-1" rzkLine rzkCol : tks)
      -- FIXME: Tab characters break this because BNFC increases the column number to the next multiple of 8
      -- Should probably check the first field of Pn (always incremented by 1)
      -- Or `tabSize` param sent along the formatting request
      -- But we should probably convert tabs to spaces first before any other formatting
      = edits ++ go s tks
      where
        edits = map snd $ filter fst
          -- Remove extra spaces before #lang
          [ (langCol > 1, FormattingEdit langLine 1 langLine langCol "")
          -- Remove extra spaces between #lang and rzk-1
          , (rzkLine > langLine || rzkCol > langCol + 5 + 1,
              FormattingEdit langLine (langCol + 5) rzkLine rzkCol " ")
          ]

    go s (Token "#postulate" _ _ : tks) = go (s {definingName = True}) tks

    go s (Token "#define" defLine defCol : TokenIdent _name nameLine nameCol : tks)
      = edits ++ go (s {definingName = True}) tks
      where
        edits = map snd $ filter fst
          -- Remove any space before #define
          [ (defCol > 1, FormattingEdit defLine 1 defLine defCol "")
          -- Ensure exactly one space after #define
          , (nameLine /= defLine || nameCol > defCol + 7 + 1,
              FormattingEdit defLine (defCol + 7) nameLine nameCol " ")
          ]
    -- #def is an alias for #define
    go s (Token "#def" line col : tks) = go s (PT (Pn 0 line col) (TK (TokSymbol "#define" 0)):tks)
    -- TODO: similarly for other commands

    -- Ensure exactly one space after the first open paren of a line
    go s (Token "(" line col : tks)
      | precededBySingleCharOnly && spacesAfter /= 1 && not isLastNonSpaceChar
        = FormattingEdit line spaceCol line (spaceCol + spacesAfter) " "
        : go (incParensDepth s) tks
      -- Remove extra spaces if it's not the first open paren on a new line
      | not precededBySingleCharOnly && spacesAfter > 0
        = FormattingEdit line spaceCol line (spaceCol + spacesAfter) ""
        : go (incParensDepth s) tks
      | otherwise = go (incParensDepth s) tks
      -- TODO: Split after 80 chars
      where
        spaceCol = col + 1
        lineContent = contentLines line
        -- | This is similar to (\\) but removes all occurrences (not just the first one)
        setDifference xs excludes = filter (not . (`elem` excludes)) xs
        precededBySingleCharOnly = null $ foldl' setDifference (take (col - 1) lineContent) punctuations
        punctuations = concat
          [ map fst unicodeTokens -- ASCII sequences will be converted soon
          , map snd unicodeTokens
          , ["(", ":", ",", "="]
          , [" ", "\t"]
          ]
        spacesAfter = length $ takeWhile (== ' ') (drop col lineContent)
        isLastNonSpaceChar = all (== ' ') (drop col lineContent)

    -- Remove any space before the closing paren
    go s (Token ")" line col : tks)
      = edits ++ go (decParensDepth s) tks
      where
        lineContent = contentLines line
        isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent)
        spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent)
        edits = map snd $ filter fst
          [ (not isFirstNonSpaceChar && spacesBefore > 0,
              FormattingEdit line (col - spacesBefore) line col "")
          ]

    -- line break before : (only the top-level one) and one space after
    go s (Token ":" line col : tks)
      | isDefinitionTypeSeparator = typeSepEdits ++ go (s {definingName = False}) tks
      | otherwise                 = normalEdits ++ go s tks
      where
        isDefinitionTypeSeparator = parensDepth s == 0 && definingName s
        lineContent = contentLines line
        isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent)
        isLastNonSpaceChar = all (== ' ') (drop col lineContent)
        spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent)
        spaceCol = col + 1
        spacesAfter = length $ takeWhile (== ' ') (drop col lineContent)
        typeSepEdits = map snd $ filter fst
          -- Ensure line break before : (and remove any spaces before)
          [ (not isFirstNonSpaceChar, FormattingEdit line (col - spacesBefore) line col "\n  ")
          -- Ensure 2 spaces before : (if already on a new line)
          , (isFirstNonSpaceChar && spacesBefore /= 2, FormattingEdit line 1 line col "  ")
          -- Ensure 1 space after
          , (not isLastNonSpaceChar && spacesAfter /= 1,
              FormattingEdit line spaceCol line (spaceCol + spacesAfter) " ")
          ]
        normalEdits = map snd $ filter fst
          -- 1 space before :
          [ (spacesBefore /= 1, FormattingEdit line (col - spacesBefore) line col " ")
          -- 1 space after
          , (not isLastNonSpaceChar && spacesAfter /= 1,
              FormattingEdit line spaceCol line (spaceCol + spacesAfter) " ")
          ]

    -- Line break before := and one space after
    go s (Token ":=" line col : tks)
      = edits ++ go s tks
      where
        lineContent = contentLines line
        isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent)
        spacesAfter = length $ takeWhile (== ' ') (drop (col + 1) lineContent)
        spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent)
        edits = map snd $ filter fst
            -- Ensure line break before `:=`
          [ (not isFirstNonSpaceChar, FormattingEdit line (col - spacesBefore) line col "\n  ")
            -- Ensure 2 spaces before `:=` (if already on a new line)
          , (isFirstNonSpaceChar && spacesBefore /= 2,
              FormattingEdit line 1 line col "  ")
            -- Ensure exactly one space after
          , (length lineContent > col + 2 && spacesAfter /= 1,
              FormattingEdit line (col + 2) line (col + 2 + spacesAfter) " ")
          ]

    -- One space after \
    go s (Token "\\" line col : tks)
      = edits ++ go (s { lambdaArrow = True }) tks
      where
        lineContent = contentLines line
        spacesAfter = length $ takeWhile (== ' ') (drop col lineContent)
        isLastNonSpaceChar = all (== ' ') (drop col lineContent)
        spaceCol = col + 1
        edits = map snd $ filter fst
          [ (not isLastNonSpaceChar && spacesAfter /= 1,
              FormattingEdit line spaceCol line (spaceCol + spacesAfter) " ")
          ]

    -- Reset any state necessary after finishing a command
    go s (Token ";" _ _ : tks) = go s tks

    -- One space (or new line) around binary operators and replace ASCII w/ unicode
    go s (Token tk line col : tks) = edits ++ go s' tks
      where
        s' | isArrow = s { lambdaArrow = False } -- reset flag after reaching the arrow
           | otherwise = s
        isArrow = tk `elem` ["->", "→"]
        lineContent = contentLines line
        spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent)
        spacesAfter = length $ takeWhile (== ' ') (drop (col + length tk - 1) lineContent)
        isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent)
        isLastNonSpaceChar = all (== ' ') (drop (col + length tk - 1) lineContent)
        prevLine
          | line > 0 = contentLines (line - 1)
          | otherwise = ""
        nextLine
          | line + 1 < length (lines rzkBlocks) = contentLines (line + 1)
          | otherwise = ""
        spacesNextLine = length $ takeWhile (== ' ') nextLine
        edits = spaceEdits ++ unicodeEdits
        spaceEdits
          | tk `elem` ["->", "→", ",", "*", "×", "="] = map snd $ filter fst
              -- Ensure exactly one space before (unless first char in line, or about to move to next line)
              [ (not isFirstNonSpaceChar && spacesBefore /= 1 && not isLastNonSpaceChar,
                  FormattingEdit line (col - spacesBefore) line col " ")
              -- Ensure exactly one space after (unless last char in line)
              , (not isLastNonSpaceChar && spacesAfter /= 1,
                  FormattingEdit line (col + length tk) line (col + length tk + spacesAfter) " ")
              -- If last char in line, move it to next line (except for lambda arrow)
              , (isLastNonSpaceChar && not (lambdaArrow s),
                  FormattingEdit line (col - spacesBefore) (line + 1) (spacesNextLine + 1) $
                    "\n" ++ replicate (2 `max` (spacesNextLine - (spacesNextLine `min` 2))) ' ' ++ tk ++ " ")
              -- If lambda -> is first char in line, move it to the previous line
              , (isFirstNonSpaceChar && isArrow && lambdaArrow s,
                  FormattingEdit (line - 1) (length prevLine + 1) line (col + length tk + spacesAfter) $
                    " " ++ tk ++ "\n" ++ replicate spacesBefore ' ')
              ]
          | otherwise = []
        unicodeEdits
          | Just unicodeToken <- tk `lookup` unicodeTokens =
              [ FormattingEdit line col line (col + length tk) unicodeToken
              ]
          | otherwise = []

    go s (_:tks) = go s tks

-- Adapted from https://hackage.haskell.org/package/lsp-types-2.1.0.0/docs/Language-LSP-Protocol-Types.html#g:7
applyTextEdit :: FormattingEdit -> String -> String
applyTextEdit (FormattingEdit sl sc el ec newText) oldText =
  let (_, afterEnd) = splitAtPos (el-1, ec -1) oldText
      (beforeStart, _) = splitAtPos (sl-1, sc-1) oldText
   in mconcat [beforeStart, newText, afterEnd]
 where
  splitAtPos :: (Int, Int) -> String -> (String, String)
  splitAtPos (l, c) t = let index = c + startLineIndex l t in splitAt index t

  startLineIndex :: Int -> String -> Int
  startLineIndex 0 _ = 0
  startLineIndex line t' =
    case elemIndex '\n' t' of
      Just i  -> i + 1 + startLineIndex (line - 1) (drop (i + 1) t')
      Nothing -> length t'

applyTextEdits :: [FormattingEdit] -> String -> String
applyTextEdits edits contents = foldl' (flip applyTextEdit) contents (reverse $ sort edits)

-- | Format Rzk code, returning the formatted version.
format :: String -> String
format = applyTextEdits =<< formatTextEdits

-- | Format Rzk code from a file
formatFile :: FilePath -> IO String
formatFile path = format <$!> readFile path -- strict because possibility of writing to same file

-- | Format the file and write the result back to the file.
formatFileWrite :: FilePath -> IO ()
formatFileWrite path = formatFile path >>= writeFile path

-- | Check if the given Rzk source code is well formatted.
--   This is useful for automation tasks.
isWellFormatted :: String -> Bool
isWellFormatted src = null (formatTextEdits src)

-- | Same as 'isWellFormatted', but reads the source code from a file.
isWellFormattedFile :: FilePath -> IO Bool
isWellFormattedFile path = isWellFormatted <$> readFile path