{-# LANGUAGE DeriveLift, ScopedTypeVariables, LambdaCase #-}
module TreeSitter.Symbol
( TSSymbol
, fromTSSymbol
, SymbolType(..)
, Symbol(..)
, symbolToName
, escapeOperatorPunctuation
) where

import Data.Char (isAlpha, toUpper, isControl)
import Data.Function ((&))
import Data.Ix (Ix)
import Data.List.Split (condense, split, whenElt)
import Data.Word (Word16)
import Language.Haskell.TH.Syntax

type TSSymbol = Word16

-- | Map a 'TSSymbol' to the corresponding value of a 'Symbol' datatype.
--
--   This should be used instead of 'toEnum' to perform this conversion, because tree-sitter represents parse errors with the unsigned short @65535@, which is generally not contiguous with the other symbols.
fromTSSymbol :: forall symbol . Symbol symbol => TSSymbol -> symbol
fromTSSymbol symbol = toEnum (min (fromIntegral symbol) (fromEnum (maxBound :: symbol)))


data SymbolType = Regular | Anonymous | Auxiliary
  deriving (Enum, Eq, Lift, Ord, Show)

class (Bounded s, Enum s, Ix s, Ord s, Show s) => Symbol s where
  symbolType :: s -> SymbolType


symbolToName :: SymbolType -> String -> String
symbolToName ty name
  = prefixHidden name
  & toWords
  & filter (not . all (== '_'))
  & map escapeOperatorPunctuation
  & (>>= initUpper)
  & (prefix ++)
  where toWords = split (condense (whenElt (not . isAlpha)))

        prefixHidden s@('_':_) = "Hidden" ++ s
        prefixHidden s         = s

        initUpper (c:cs) = toUpper c : cs
        initUpper ""     = ""

        prefix = case ty of
          Regular   -> ""
          Anonymous -> "Anon"
          Auxiliary -> "Aux"

-- Ensures that we generate valid Haskell identifiers from
-- the literal characters used for infix operators and punctuation.
escapeOperatorPunctuation :: String -> String
escapeOperatorPunctuation = concatMap $ \case
  '{'  -> "LBrace"
  '}'  -> "RBrace"
  '('  -> "LParen"
  ')'  -> "RParen"
  '.'  -> "Dot"
  ':'  -> "Colon"
  ','  -> "Comma"
  '|'  -> "Pipe"
  ';'  -> "Semicolon"
  '*'  -> "Star"
  '&'  -> "Ampersand"
  '='  -> "Equal"
  '<'  -> "LAngle"
  '>'  -> "RAngle"
  '['  -> "LBracket"
  ']'  -> "RBracket"
  '+'  -> "Plus"
  '-'  -> "Minus"
  '/'  -> "Slash"
  '\\' -> "Backslash"
  '^'  -> "Caret"
  '!'  -> "Bang"
  '%'  -> "Percent"
  '@'  -> "At"
  '~'  -> "Tilde"
  '?'  -> "Question"
  '`'  -> "Backtick"
  '#'  -> "Hash"
  '$'  -> "Dollar"
  '"'  -> "DQuote"
  '\'' -> "SQuote"
  '\t' -> "Tab"
  '\n' -> "LF"
  '\r' -> "CR"
  other
    | isControl other -> escapeOperatorPunctuation (show other)
    | otherwise       -> [other]