{-# LANGUAGE DeriveLift, LambdaCase, ScopedTypeVariables #-}

module TreeSitter.Symbol
  ( TSSymbol
  , fromTSSymbol
  , SymbolType(..)
  , Symbol(..)
  , symbolToName
  , toHaskellCamelCaseIdentifier
  , toHaskellPascalCaseIdentifier
  , escapeOperatorPunctuation
  , camelCase
  , capitalize
  ) where

import           Data.Char (isAlpha, isControl, toUpper)
import           Data.Function ((&))
import qualified Data.HashSet as HashSet
import           Data.Ix (Ix)
import           Data.List.Split (condense, split, whenElt)
import           Data.Word (Word16)
import           Language.Haskell.TH.Syntax

type TSSymbol = Word16

-- | Map a 'TSSymbol' to the corresponding value of a 'Symbol' datatype.
--
--   This should be used instead of 'toEnum' to perform this conversion, because tree-sitter represents parse errors with the unsigned short @65535@, which is generally not contiguous with the other symbols.
fromTSSymbol :: forall symbol. Symbol symbol => TSSymbol -> symbol
fromTSSymbol :: forall symbol. Symbol symbol => TSSymbol -> symbol
fromTSSymbol TSSymbol
symbol = Int -> symbol
forall a. Enum a => Int -> a
toEnum (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (TSSymbol -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral TSSymbol
symbol) (symbol -> Int
forall a. Enum a => a -> Int
fromEnum (symbol
forall a. Bounded a => a
maxBound :: symbol)))


data SymbolType = Regular | Anonymous | Auxiliary
  deriving (Int -> SymbolType
SymbolType -> Int
SymbolType -> [SymbolType]
SymbolType -> SymbolType
SymbolType -> SymbolType -> [SymbolType]
SymbolType -> SymbolType -> SymbolType -> [SymbolType]
(SymbolType -> SymbolType)
-> (SymbolType -> SymbolType)
-> (Int -> SymbolType)
-> (SymbolType -> Int)
-> (SymbolType -> [SymbolType])
-> (SymbolType -> SymbolType -> [SymbolType])
-> (SymbolType -> SymbolType -> [SymbolType])
-> (SymbolType -> SymbolType -> SymbolType -> [SymbolType])
-> Enum SymbolType
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: SymbolType -> SymbolType -> SymbolType -> [SymbolType]
$cenumFromThenTo :: SymbolType -> SymbolType -> SymbolType -> [SymbolType]
enumFromTo :: SymbolType -> SymbolType -> [SymbolType]
$cenumFromTo :: SymbolType -> SymbolType -> [SymbolType]
enumFromThen :: SymbolType -> SymbolType -> [SymbolType]
$cenumFromThen :: SymbolType -> SymbolType -> [SymbolType]
enumFrom :: SymbolType -> [SymbolType]
$cenumFrom :: SymbolType -> [SymbolType]
fromEnum :: SymbolType -> Int
$cfromEnum :: SymbolType -> Int
toEnum :: Int -> SymbolType
$ctoEnum :: Int -> SymbolType
pred :: SymbolType -> SymbolType
$cpred :: SymbolType -> SymbolType
succ :: SymbolType -> SymbolType
$csucc :: SymbolType -> SymbolType
Enum, SymbolType -> SymbolType -> Bool
(SymbolType -> SymbolType -> Bool)
-> (SymbolType -> SymbolType -> Bool) -> Eq SymbolType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SymbolType -> SymbolType -> Bool
$c/= :: SymbolType -> SymbolType -> Bool
== :: SymbolType -> SymbolType -> Bool
$c== :: SymbolType -> SymbolType -> Bool
Eq, (forall (m :: * -> *). Quote m => SymbolType -> m Exp)
-> (forall (m :: * -> *).
    Quote m =>
    SymbolType -> Code m SymbolType)
-> Lift SymbolType
forall t.
(forall (m :: * -> *). Quote m => t -> m Exp)
-> (forall (m :: * -> *). Quote m => t -> Code m t) -> Lift t
forall (m :: * -> *). Quote m => SymbolType -> m Exp
forall (m :: * -> *). Quote m => SymbolType -> Code m SymbolType
liftTyped :: forall (m :: * -> *). Quote m => SymbolType -> Code m SymbolType
$cliftTyped :: forall (m :: * -> *). Quote m => SymbolType -> Code m SymbolType
lift :: forall (m :: * -> *). Quote m => SymbolType -> m Exp
$clift :: forall (m :: * -> *). Quote m => SymbolType -> m Exp
Lift, Eq SymbolType
Eq SymbolType
-> (SymbolType -> SymbolType -> Ordering)
-> (SymbolType -> SymbolType -> Bool)
-> (SymbolType -> SymbolType -> Bool)
-> (SymbolType -> SymbolType -> Bool)
-> (SymbolType -> SymbolType -> Bool)
-> (SymbolType -> SymbolType -> SymbolType)
-> (SymbolType -> SymbolType -> SymbolType)
-> Ord SymbolType
SymbolType -> SymbolType -> Bool
SymbolType -> SymbolType -> Ordering
SymbolType -> SymbolType -> SymbolType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: SymbolType -> SymbolType -> SymbolType
$cmin :: SymbolType -> SymbolType -> SymbolType
max :: SymbolType -> SymbolType -> SymbolType
$cmax :: SymbolType -> SymbolType -> SymbolType
>= :: SymbolType -> SymbolType -> Bool
$c>= :: SymbolType -> SymbolType -> Bool
> :: SymbolType -> SymbolType -> Bool
$c> :: SymbolType -> SymbolType -> Bool
<= :: SymbolType -> SymbolType -> Bool
$c<= :: SymbolType -> SymbolType -> Bool
< :: SymbolType -> SymbolType -> Bool
$c< :: SymbolType -> SymbolType -> Bool
compare :: SymbolType -> SymbolType -> Ordering
$ccompare :: SymbolType -> SymbolType -> Ordering
Ord, Int -> SymbolType -> ShowS
[SymbolType] -> ShowS
SymbolType -> String
(Int -> SymbolType -> ShowS)
-> (SymbolType -> String)
-> ([SymbolType] -> ShowS)
-> Show SymbolType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SymbolType] -> ShowS
$cshowList :: [SymbolType] -> ShowS
show :: SymbolType -> String
$cshow :: SymbolType -> String
showsPrec :: Int -> SymbolType -> ShowS
$cshowsPrec :: Int -> SymbolType -> ShowS
Show)

class (Bounded s, Enum s, Ix s, Ord s, Show s) => Symbol s where
  symbolType :: s -> SymbolType


symbolToName :: SymbolType -> String -> String
symbolToName :: SymbolType -> ShowS
symbolToName SymbolType
ty String
name
  = ShowS
prefixHidden String
name
  String -> (String -> [String]) -> [String]
forall a b. a -> (a -> b) -> b
& String -> [String]
toWords
  [String] -> ([String] -> [String]) -> [String]
forall a b. a -> (a -> b) -> b
& (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'))
  [String] -> ([String] -> [String]) -> [String]
forall a b. a -> (a -> b) -> b
& ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ShowS
escapeOperatorPunctuation
  [String] -> ([String] -> String) -> String
forall a b. a -> (a -> b) -> b
& ([String] -> ShowS -> String
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ShowS
capitalize)
  String -> ShowS -> String
forall a b. a -> (a -> b) -> b
& (String
prefix String -> ShowS
forall a. [a] -> [a] -> [a]
++)
  where
    toWords :: String -> [String]
toWords = Splitter Char -> String -> [String]
forall a. Splitter a -> [a] -> [[a]]
split (Splitter Char -> Splitter Char
forall a. Splitter a -> Splitter a
condense ((Char -> Bool) -> Splitter Char
forall a. (a -> Bool) -> Splitter a
whenElt (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isAlpha)))

    prefixHidden :: ShowS
prefixHidden s :: String
s@(Char
'_':String
_) = String
"Hidden" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s
    prefixHidden String
s         = String
s

    prefix :: String
prefix = case SymbolType
ty of
      SymbolType
Regular   -> String
""
      SymbolType
Anonymous -> String
"Anon"
      SymbolType
Auxiliary -> String
"Aux"

toHaskellCamelCaseIdentifier :: String -> String
toHaskellCamelCaseIdentifier :: ShowS
toHaskellCamelCaseIdentifier = ShowS
addTickIfNecessary ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
escapeOperatorPunctuation ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
camelCase

addTickIfNecessary :: String -> String
addTickIfNecessary :: ShowS
addTickIfNecessary String
s
  | String -> HashSet String -> Bool
forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
HashSet.member String
s HashSet String
reservedNames = String
s String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"'"
  | Bool
otherwise = String
s
  where
    reservedNames :: HashSet.HashSet String
    reservedNames :: HashSet String
reservedNames = [String] -> HashSet String
forall a. (Eq a, Hashable a) => [a] -> HashSet a
HashSet.fromList [
        String
"as", String
"case", String
"class", String
"data", String
"default", String
"deriving", String
"do", String
"forall",
        String
"foreign", String
"hiding", String
"if", String
"then", String
"else", String
"import", String
"infix", String
"infixl",
        String
"infixr", String
"instance", String
"let", String
"in", String
"mdo", String
"module", String
"newtype", String
"proc",
        String
"qualified", String
"rec", String
"type", String
"where"
      ]

toHaskellPascalCaseIdentifier :: String -> String
toHaskellPascalCaseIdentifier :: ShowS
toHaskellPascalCaseIdentifier = ShowS
addTickIfNecessary ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
capitalize ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
escapeOperatorPunctuation ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
camelCase

-- Ensures that we generate valid Haskell identifiers from
-- the literal characters used for infix operators and punctuation.
escapeOperatorPunctuation :: String -> String
escapeOperatorPunctuation :: ShowS
escapeOperatorPunctuation = (Char -> String) -> ShowS
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Char -> String) -> ShowS) -> (Char -> String) -> ShowS
forall a b. (a -> b) -> a -> b
$ \case
  Char
'{' -> String
"LBrace"
  Char
'}' -> String
"RBrace"
  Char
'(' -> String
"LParen"
  Char
')' -> String
"RParen"
  Char
'.' -> String
"Dot"
  Char
':' -> String
"Colon"
  Char
',' -> String
"Comma"
  Char
'|' -> String
"Pipe"
  Char
';' -> String
"Semicolon"
  Char
'*' -> String
"Star"
  Char
'&' -> String
"Ampersand"
  Char
'=' -> String
"Equal"
  Char
'<' -> String
"LAngle"
  Char
'>' -> String
"RAngle"
  Char
'[' -> String
"LBracket"
  Char
']' -> String
"RBracket"
  Char
'+' -> String
"Plus"
  Char
'-' -> String
"Minus"
  Char
'/' -> String
"Slash"
  Char
'\\' -> String
"Backslash"
  Char
'^' -> String
"Caret"
  Char
'!' -> String
"Bang"
  Char
'%' -> String
"Percent"
  Char
'@' -> String
"At"
  Char
'~' -> String
"Tilde"
  Char
'?' -> String
"Question"
  Char
'`' -> String
"Backtick"
  Char
'#' -> String
"Hash"
  Char
'$' -> String
"Dollar"
  Char
'"' -> String
"DQuote"
  Char
'\'' -> String
"SQuote"
  Char
'\t' -> String
"Tab"
  Char
'\n' -> String
"LF"
  Char
'\r' -> String
"CR"
  Char
' ' -> String
"Space"
  Char
other
    | Char -> Bool
isControl Char
other -> ShowS
escapeOperatorPunctuation (Char -> String
forall a. Show a => a -> String
show Char
other)
    | Bool
otherwise -> [Char
other]

-- | Convert a snake_case String to camelCase
camelCase :: String -> String
camelCase :: ShowS
camelCase = ShowS
go
  where
    go :: ShowS
go (Char
'_':Char
'_':String
xs) = String
"Underscore" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> ShowS
go String
xs
    go (Char
'_':String
xs)     = ShowS
go (ShowS
capitalize String
xs)
    go (Char
x:String
xs)       = Char
x Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
go String
xs
    go String
""           = String
""

-- | Capitalize a String
capitalize :: String -> String
capitalize :: ShowS
capitalize (Char
c:String
cs) = Char -> Char
toUpper Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
cs
capitalize []     = []