-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in Lexer.x, but sadly there seems to be no way to merge them.

module Lexeme (
          -- * Lexical characteristics of Haskell names

          -- | Use these functions to figure what kind of name a 'FastString'
          -- represents; these functions do /not/ check that the identifier
          -- is valid.

        isLexCon, isLexVar, isLexId, isLexSym,
        isLexConId, isLexConSym, isLexVarId, isLexVarSym,
        startsVarSym, startsVarId, startsConSym, startsConId,

          -- * Validating identifiers

          -- | These functions (working over plain old 'String's) check
          -- to make sure that the identifier is valid.
        okVarOcc, okConOcc, okTcOcc,
        okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc

        -- Some of the exports above are not used within GHC, but may
        -- be of value to GHC API users.

  ) where

import GhcPrelude

import FastString

import Data.Char
import qualified Data.Set as Set

import GHC.Lexeme

{-

************************************************************************
*                                                                      *
    Lexical categories
*                                                                      *
************************************************************************

These functions test strings to see if they fit the lexical categories
defined in the Haskell report.

Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Some names generated for internal use can show up in debugging output,
e.g.  when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}

isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool

isLexCon :: FastString -> Bool
isLexCon cs :: FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexConSym FastString
cs
isLexVar :: FastString -> Bool
isLexVar cs :: FastString
cs = FastString -> Bool
isLexVarId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

isLexId :: FastString -> Bool
isLexId  cs :: FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarId  FastString
cs
isLexSym :: FastString -> Bool
isLexSym cs :: FastString
cs = FastString -> Bool
isLexConSym FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

-------------
isLexConId :: FastString -> Bool
isLexConId cs :: FastString
cs                           -- Prefix type or data constructors
  | FastString -> Bool
nullFS FastString
cs          = Bool
False          --      e.g. "Foo", "[]", "(,)"
  | FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== (String -> FastString
fsLit "[]") = Bool
True
  | Bool
otherwise          = Char -> Bool
startsConId (FastString -> Char
headFS FastString
cs)

isLexVarId :: FastString -> Bool
isLexVarId cs :: FastString
cs                           -- Ordinary prefix identifiers
  | FastString -> Bool
nullFS FastString
cs         = Bool
False           --      e.g. "x", "_x"
  | Bool
otherwise         = Char -> Bool
startsVarId (FastString -> Char
headFS FastString
cs)

isLexConSym :: FastString -> Bool
isLexConSym cs :: FastString
cs                          -- Infix type or data constructors
  | FastString -> Bool
nullFS FastString
cs          = Bool
False          --      e.g. ":-:", ":", "->"
  | FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== (String -> FastString
fsLit "->") = Bool
True
  | Bool
otherwise          = Char -> Bool
startsConSym (FastString -> Char
headFS FastString
cs)

isLexVarSym :: FastString -> Bool
isLexVarSym fs :: FastString
fs                          -- Infix identifiers e.g. "+"
  | FastString
fs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== (String -> FastString
fsLit "~R#") = Bool
True
  | Bool
otherwise
  = case (if FastString -> Bool
nullFS FastString
fs then [] else FastString -> String
unpackFS FastString
fs) of
      [] -> Bool
False
      (c :: Char
c:cs :: String
cs) -> Char -> Bool
startsVarSym Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isVarSymChar String
cs
        -- See Note [Classification of generated names]

{-

************************************************************************
*                                                                      *
    Detecting valid names for Template Haskell
*                                                                      *
************************************************************************

-}

----------------------
-- External interface
----------------------

-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc :: String -> Bool
okVarOcc str :: String
str@(c :: Char
c:_)
  | Char -> Bool
startsVarId Char
c
  = String -> Bool
okVarIdOcc String
str
  | Char -> Bool
startsVarSym Char
c
  = String -> Bool
okVarSymOcc String
str
okVarOcc _ = Bool
False

-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc :: String -> Bool
okConOcc str :: String
str@(c :: Char
c:_)
  | Char -> Bool
startsConId Char
c
  = String -> Bool
okConIdOcc String
str
  | Char -> Bool
startsConSym Char
c
  = String -> Bool
okConSymOcc String
str
  | String
str String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== "[]"
  = Bool
True
okConOcc _ = Bool
False

-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc :: String -> Bool
okTcOcc "[]" = Bool
True
okTcOcc "->" = Bool
True
okTcOcc "~"  = Bool
True
okTcOcc str :: String
str@(c :: Char
c:_)
  | Char -> Bool
startsConId Char
c
  = String -> Bool
okConIdOcc String
str
  | Char -> Bool
startsConSym Char
c
  = String -> Bool
okConSymOcc String
str
  | Char -> Bool
startsVarSym Char
c
  = String -> Bool
okVarSymOcc String
str
okTcOcc _ = Bool
False

-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc :: String -> Bool
okVarIdOcc str :: String
str = String -> Bool
okIdOcc String
str Bool -> Bool -> Bool
&&
                 -- admit "_" as a valid identifier.  Required to support typed
                 -- holes in Template Haskell.  See #10267
                 (String
str String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== "_" Bool -> Bool -> Bool
|| Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedIds))

-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc :: String -> Bool
okVarSymOcc str :: String
str = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
okSymChar String
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedOps) Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String -> Bool
isDashes String
str)

-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc :: String -> Bool
okConIdOcc str :: String
str = String -> Bool
okIdOcc String
str Bool -> Bool -> Bool
||
                 Bool -> String -> Bool
is_tuple_name1 Bool
True  String
str Bool -> Bool -> Bool
||
                   -- Is it a boxed tuple...
                 Bool -> String -> Bool
is_tuple_name1 Bool
False String
str Bool -> Bool -> Bool
||
                   -- ...or an unboxed tuple (Trac #12407)...
                 String -> Bool
is_sum_name1 String
str
                   -- ...or an unboxed sum (Trac #12514)?
  where
    -- check for tuple name, starting at the beginning
    is_tuple_name1 :: Bool -> String -> Bool
is_tuple_name1 True  ('(' : rest :: String
rest)       = Bool -> String -> Bool
is_tuple_name2 Bool
True  String
rest
    is_tuple_name1 False ('(' : '#' : rest :: String
rest) = Bool -> String -> Bool
is_tuple_name2 Bool
False String
rest
    is_tuple_name1 _     _                  = Bool
False

    -- check for tuple tail
    is_tuple_name2 :: Bool -> String -> Bool
is_tuple_name2 True  ")"          = Bool
True
    is_tuple_name2 False "#)"         = Bool
True
    is_tuple_name2 boxed :: Bool
boxed (',' : rest :: String
rest) = Bool -> String -> Bool
is_tuple_name2 Bool
boxed String
rest
    is_tuple_name2 boxed :: Bool
boxed (ws :: Char
ws  : rest :: String
rest)
      | Char -> Bool
isSpace Char
ws                    = Bool -> String -> Bool
is_tuple_name2 Bool
boxed String
rest
    is_tuple_name2 _     _            = Bool
False

    -- check for sum name, starting at the beginning
    is_sum_name1 :: String -> Bool
is_sum_name1 ('(' : '#' : rest :: String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
False String
rest
    is_sum_name1 _                  = Bool
False

    -- check for sum tail, only allowing at most one underscore
    is_sum_name2 :: Bool -> String -> Bool
is_sum_name2 _          "#)"         = Bool
True
    is_sum_name2 underscore :: Bool
underscore ('|' : rest :: String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
underscore String
rest
    is_sum_name2 False      ('_' : rest :: String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
True String
rest
    is_sum_name2 underscore :: Bool
underscore (ws :: Char
ws  : rest :: String
rest)
      | Char -> Bool
isSpace Char
ws                       = Bool -> String -> Bool
is_sum_name2 Bool
underscore String
rest
    is_sum_name2 _          _            = Bool
False

-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc :: String -> Bool
okConSymOcc ":" = Bool
True
okConSymOcc str :: String
str = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
okSymChar String
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedOps)

----------------------
-- Internal functions
----------------------

-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc :: String -> Bool
okIdOcc str :: String
str
  = let hashes :: String
hashes = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
okIdChar String
str in
    (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '#') String
hashes   -- -XMagicHash allows a suffix of hashes
                          -- of course, `all` says "True" to an empty list

-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in Lexer.x
okIdChar :: Char -> Bool
okIdChar :: Char -> Bool
okIdChar c :: Char
c = case Char -> GeneralCategory
generalCategory Char
c of
  UppercaseLetter -> Bool
True
  LowercaseLetter -> Bool
True
  TitlecaseLetter -> Bool
True
  ModifierLetter  -> Bool
True -- See #10196
  OtherLetter     -> Bool
True -- See #1103
  NonSpacingMark  -> Bool
True -- See #7650
  DecimalNumber   -> Bool
True
  OtherNumber     -> Bool
True -- See #4373
  _               -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\'' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_'

-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
reservedIds :: Set.Set String
reservedIds :: Set String
reservedIds = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList [ "case", "class", "data", "default", "deriving"
                           , "do", "else", "foreign", "if", "import", "in"
                           , "infix", "infixl", "infixr", "instance", "let"
                           , "module", "newtype", "of", "then", "type", "where"
                           , "_" ]

-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
reservedOps :: Set.Set String
reservedOps :: Set String
reservedOps = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
                           , "@", "~", "=>" ]

-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes :: String -> Bool
isDashes ('-' : '-' : rest :: String
rest) = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '-') String
rest
isDashes _                  = Bool
False