-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in GHC.Parser.Lexer, but sadly there seems to be no way to merge them.

module GHC.Utils.Lexeme (
          -- * Lexical characteristics of Haskell names

          -- | Use these functions to figure what kind of name a 'FastString'
          -- represents; these functions do /not/ check that the identifier
          -- is valid.

        isLexCon, isLexVar, isLexId, isLexSym,
        isLexConId, isLexConSym, isLexVarId, isLexVarSym,
        startsVarSym, startsVarId, startsConSym, startsConId,

          -- * Validating identifiers

          -- | These functions (working over plain old 'String's) check
          -- to make sure that the identifier is valid.
        okVarOcc, okConOcc, okTcOcc,
        okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc

        -- Some of the exports above are not used within GHC, but may
        -- be of value to GHC API users.

  ) where

import GHC.Prelude

import GHC.Data.FastString

import Data.Char
import qualified Data.Set as Set

import GHC.Lexeme

{-

************************************************************************
*                                                                      *
    Lexical categories
*                                                                      *
************************************************************************

These functions test strings to see if they fit the lexical categories
defined in the Haskell report.

Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Some names generated for internal use can show up in debugging output,
e.g.  when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}

isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool

isLexCon :: FastString -> Bool
isLexCon FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexConSym FastString
cs
isLexVar :: FastString -> Bool
isLexVar FastString
cs = FastString -> Bool
isLexVarId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

isLexId :: FastString -> Bool
isLexId  FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarId  FastString
cs
isLexSym :: FastString -> Bool
isLexSym FastString
cs = FastString -> Bool
isLexConSym FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

-------------
isLexConId :: FastString -> Bool
isLexConId FastString
cs = case FastString -> String
unpackFS FastString
cs of     -- Prefix type or data constructors
  []  -> Bool
False                  --      e.g. "Foo", "[]", "(,)"
  Char
c:String
_ -> FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== String -> FastString
fsLit String
"[]" Bool -> Bool -> Bool
|| Char -> Bool
startsConId Char
c

isLexVarId :: FastString -> Bool
isLexVarId FastString
cs = case FastString -> String
unpackFS FastString
cs of     -- Ordinary prefix identifiers
  []  -> Bool
False                  --      e.g. "x", "_x"
  Char
c:String
_ -> Char -> Bool
startsVarId Char
c

isLexConSym :: FastString -> Bool
isLexConSym FastString
cs = case FastString -> String
unpackFS FastString
cs of    -- Infix type or data constructors
  []  -> Bool
False                  --      e.g. ":-:", ":", "->"
  Char
c:String
_ -> FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== String -> FastString
fsLit String
"->" Bool -> Bool -> Bool
|| Char -> Bool
startsConSym Char
c

isLexVarSym :: FastString -> Bool
isLexVarSym FastString
fs                          -- Infix identifiers e.g. "+"
  | FastString
fs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
== (String -> FastString
fsLit String
"~R#") = Bool
True
  | Bool
otherwise
  = case (if FastString -> Bool
nullFS FastString
fs then [] else FastString -> String
unpackFS FastString
fs) of
      [] -> Bool
False
      (Char
c:String
cs) -> Char -> Bool
startsVarSym Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isVarSymChar String
cs
        -- See Note [Classification of generated names]

{-

************************************************************************
*                                                                      *
    Detecting valid names for Template Haskell
*                                                                      *
************************************************************************

-}

----------------------
-- External interface
----------------------

-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc :: String -> Bool
okVarOcc str :: String
str@(Char
c:String
_)
  | Char -> Bool
startsVarId Char
c
  = String -> Bool
okVarIdOcc String
str
  | Char -> Bool
startsVarSym Char
c
  = String -> Bool
okVarSymOcc String
str
okVarOcc String
_ = Bool
False

-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc :: String -> Bool
okConOcc str :: String
str@(Char
c:String
_)
  | Char -> Bool
startsConId Char
c
  = String -> Bool
okConIdOcc String
str
  | Char -> Bool
startsConSym Char
c
  = String -> Bool
okConSymOcc String
str
  | String
str String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"[]"
  = Bool
True
okConOcc String
_ = Bool
False

-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc :: String -> Bool
okTcOcc String
"[]" = Bool
True
okTcOcc String
"->" = Bool
True
okTcOcc String
"~"  = Bool
True
okTcOcc str :: String
str@(Char
c:String
_)
  | Char -> Bool
startsConId Char
c
  = String -> Bool
okConIdOcc String
str
  | Char -> Bool
startsConSym Char
c
  = String -> Bool
okConSymOcc String
str
  | Char -> Bool
startsVarSym Char
c
  = String -> Bool
okVarSymOcc String
str
okTcOcc String
_ = Bool
False

-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc :: String -> Bool
okVarIdOcc String
str = String -> Bool
okIdOcc String
str Bool -> Bool -> Bool
&&
                 -- admit "_" as a valid identifier.  Required to support typed
                 -- holes in Template Haskell.  See #10267
                 (String
str String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"_" Bool -> Bool -> Bool
|| Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedIds))

-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc :: String -> Bool
okVarSymOcc String
str = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
okSymChar String
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedOps) Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String -> Bool
isDashes String
str)

-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc :: String -> Bool
okConIdOcc String
str = String -> Bool
okIdOcc String
str Bool -> Bool -> Bool
||
                 Bool -> String -> Bool
is_tuple_name1 Bool
True  String
str Bool -> Bool -> Bool
||
                   -- Is it a boxed tuple...
                 Bool -> String -> Bool
is_tuple_name1 Bool
False String
str Bool -> Bool -> Bool
||
                   -- ...or an unboxed tuple (#12407)...
                 String -> Bool
is_sum_name1 String
str
                   -- ...or an unboxed sum (#12514)?
  where
    -- check for tuple name, starting at the beginning
    is_tuple_name1 :: Bool -> String -> Bool
is_tuple_name1 Bool
True  (Char
'(' : String
rest)       = Bool -> String -> Bool
is_tuple_name2 Bool
True  String
rest
    is_tuple_name1 Bool
False (Char
'(' : Char
'#' : String
rest) = Bool -> String -> Bool
is_tuple_name2 Bool
False String
rest
    is_tuple_name1 Bool
_     String
_                  = Bool
False

    -- check for tuple tail
    is_tuple_name2 :: Bool -> String -> Bool
is_tuple_name2 Bool
True  String
")"          = Bool
True
    is_tuple_name2 Bool
False String
"#)"         = Bool
True
    is_tuple_name2 Bool
boxed (Char
',' : String
rest) = Bool -> String -> Bool
is_tuple_name2 Bool
boxed String
rest
    is_tuple_name2 Bool
boxed (Char
ws  : String
rest)
      | Char -> Bool
isSpace Char
ws                    = Bool -> String -> Bool
is_tuple_name2 Bool
boxed String
rest
    is_tuple_name2 Bool
_     String
_            = Bool
False

    -- check for sum name, starting at the beginning
    is_sum_name1 :: String -> Bool
is_sum_name1 (Char
'(' : Char
'#' : String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
False String
rest
    is_sum_name1 String
_                  = Bool
False

    -- check for sum tail, only allowing at most one underscore
    is_sum_name2 :: Bool -> String -> Bool
is_sum_name2 Bool
_          String
"#)"         = Bool
True
    is_sum_name2 Bool
underscore (Char
'|' : String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
underscore String
rest
    is_sum_name2 Bool
False      (Char
'_' : String
rest) = Bool -> String -> Bool
is_sum_name2 Bool
True String
rest
    is_sum_name2 Bool
underscore (Char
ws  : String
rest)
      | Char -> Bool
isSpace Char
ws                       = Bool -> String -> Bool
is_sum_name2 Bool
underscore String
rest
    is_sum_name2 Bool
_          String
_            = Bool
False

-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc :: String -> Bool
okConSymOcc String
":" = Bool
True
okConSymOcc String
str = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
okSymChar String
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not (String
str String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set String
reservedOps)

----------------------
-- Internal functions
----------------------

-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc :: String -> Bool
okIdOcc String
str
  = let hashes :: String
hashes = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
okIdChar String
str in
    (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') String
hashes   -- -XMagicHash allows a suffix of hashes
                          -- of course, `all` says "True" to an empty list

-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in GHC.Parser.Lexer
okIdChar :: Char -> Bool
okIdChar :: Char -> Bool
okIdChar Char
c = case Char -> GeneralCategory
generalCategory Char
c of
  GeneralCategory
UppercaseLetter -> Bool
True
  GeneralCategory
LowercaseLetter -> Bool
True
  GeneralCategory
TitlecaseLetter -> Bool
True
  GeneralCategory
ModifierLetter  -> Bool
True -- See #10196
  GeneralCategory
OtherLetter     -> Bool
True -- See #1103
  GeneralCategory
NonSpacingMark  -> Bool
True -- See #7650
  GeneralCategory
DecimalNumber   -> Bool
True
  GeneralCategory
OtherNumber     -> Bool
True -- See #4373
  GeneralCategory
_               -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'

-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report,
-- plus the GHC-specific @forall@ keyword (see GHC Proposal #281).
reservedIds :: Set.Set String
reservedIds :: Set String
reservedIds = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList [ String
"case", String
"class", String
"data", String
"default", String
"deriving"
                           , String
"do", String
"else", String
"forall", String
"foreign", String
"if", String
"import"
                           , String
"in", String
"infix", String
"infixl", String
"infixr", String
"instance"
                           , String
"let", String
"module", String
"newtype", String
"of", String
"then", String
"type"
                           , String
"where", String
"_" ]

-- | All reserved operators. Taken from section 2.4 of the 2010 Report,
-- excluding @\@@ and @~@ that are allowed by GHC (see GHC Proposal #229).
reservedOps :: Set.Set String
reservedOps :: Set String
reservedOps = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList [ String
"..", String
":", String
"::", String
"=", String
"\\", String
"|", String
"<-", String
"->"
                           , String
"=>" ]

-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes :: String -> Bool
isDashes (Char
'-' : Char
'-' : String
rest) = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') String
rest
isDashes String
_                  = Bool
False