{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
module Haddock.Backends.Hyperlinker.Parser (parse) where

import Control.Applicative ( Alternative(..) )
import Data.List           ( isPrefixOf, isSuffixOf )

import qualified Data.ByteString as BS

import BasicTypes          ( IntegralLit(..) )
import DynFlags
import ErrUtils            ( pprLocErrMsg )
import FastString          ( mkFastString )
import Lexer               ( P(..), ParseResult(..), PState(..), Token(..)
                           , mkPStatePure, lexer, mkParserFlags', getErrorMessages, addFatalError )
import Bag                 ( bagToList )
import Outputable          ( showSDoc, panic, text, ($$) )
import SrcLoc
import StringBuffer        ( StringBuffer, atEnd )

import Haddock.Backends.Hyperlinker.Types as T
import Haddock.GhcUtils

-- | Turn source code string into a stream of more descriptive tokens.
--
-- Result should retain original file layout (including comments,
-- whitespace, and CPP).
parse
  :: DynFlags      -- ^ Flags for this module
  -> FilePath      -- ^ Path to the source of this module
  -> BS.ByteString -- ^ Raw UTF-8 encoded source of this module
  -> [T.Token]
parse :: DynFlags -> FilePath -> ByteString -> [Token]
parse DynFlags
dflags FilePath
fpath ByteString
bs = case P [Token] -> PState -> ParseResult [Token]
forall a. P a -> PState -> ParseResult a
unP (Bool -> [Token] -> P [Token]
go Bool
False []) PState
initState of
    POk PState
_ [Token]
toks -> [Token] -> [Token]
forall a. [a] -> [a]
reverse [Token]
toks
    PFailed PState
pst ->
      let ErrMsg
err:[ErrMsg]
_ = Bag ErrMsg -> [ErrMsg]
forall a. Bag a -> [a]
bagToList (PState -> DynFlags -> Bag ErrMsg
getErrorMessages PState
pst DynFlags
dflags) in
      FilePath -> [Token]
forall a. FilePath -> a
panic (FilePath -> [Token]) -> FilePath -> [Token]
forall a b. (a -> b) -> a -> b
$ DynFlags -> SDoc -> FilePath
showSDoc DynFlags
dflags (SDoc -> FilePath) -> SDoc -> FilePath
forall a b. (a -> b) -> a -> b
$
        FilePath -> SDoc
text FilePath
"Hyperlinker parse error:" SDoc -> SDoc -> SDoc
$$ ErrMsg -> SDoc
pprLocErrMsg ErrMsg
err
  where

    initState :: PState
initState = ParserFlags -> StringBuffer -> RealSrcLoc -> PState
mkPStatePure ParserFlags
pflags StringBuffer
buf RealSrcLoc
start
    buf :: StringBuffer
buf = ByteString -> StringBuffer
stringBufferFromByteString ByteString
bs
    start :: RealSrcLoc
start = FastString -> Int -> Int -> RealSrcLoc
mkRealSrcLoc (FilePath -> FastString
mkFastString FilePath
fpath) Int
1 Int
1
    pflags :: ParserFlags
pflags = EnumSet WarningFlag
-> EnumSet Extension
-> UnitId
-> Bool
-> Bool
-> Bool
-> Bool
-> ParserFlags
mkParserFlags' (DynFlags -> EnumSet WarningFlag
warningFlags DynFlags
dflags)
                            (DynFlags -> EnumSet Extension
extensionFlags DynFlags
dflags)
                            (DynFlags -> UnitId
thisPackage DynFlags
dflags)
                            (DynFlags -> Bool
safeImportsOn DynFlags
dflags)
                            Bool
False -- lex Haddocks as comment tokens
                            Bool
True  -- produce comment tokens
                            Bool
False -- produce position pragmas tokens

    go :: Bool        -- ^ are we currently in a pragma?
       -> [T.Token]   -- ^ tokens accumulated so far (in reverse)
       -> P [T.Token]
    go :: Bool -> [Token] -> P [Token]
go Bool
inPrag [Token]
toks = do
      (StringBuffer
b, RealSrcLoc
_) <- P (StringBuffer, RealSrcLoc)
getInput
      if Bool -> Bool
not (StringBuffer -> Bool
atEnd StringBuffer
b)
        then do
          ([Token]
newToks, Bool
inPrag') <- P ([Token], Bool)
parseCppLine P ([Token], Bool) -> P ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> P ([Token], Bool)
parsePlainTok Bool
inPrag P ([Token], Bool) -> P ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> P ([Token], Bool)
unknownLine
          Bool -> [Token] -> P [Token]
go Bool
inPrag' ([Token]
newToks [Token] -> [Token] -> [Token]
forall a. [a] -> [a] -> [a]
++ [Token]
toks)
        else
          [Token] -> P [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token]
toks

    -- | Like 'Lexer.lexer', but slower, with a better API, and filtering out empty tokens
    wrappedLexer :: P (RealLocated Lexer.Token)
    wrappedLexer :: P (RealLocated Token)
wrappedLexer = Bool
-> (Located Token -> P (RealLocated Token))
-> P (RealLocated Token)
forall a. Bool -> (Located Token -> P a) -> P a
Lexer.lexer Bool
False Located Token -> P (RealLocated Token)
andThen
      where andThen :: Located Token -> P (RealLocated Token)
andThen (L (RealSrcSpan RealSrcSpan
s) Token
t)
              | RealSrcSpan -> Int
srcSpanStartLine RealSrcSpan
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= RealSrcSpan -> Int
srcSpanEndLine RealSrcSpan
s Bool -> Bool -> Bool
||
                RealSrcSpan -> Int
srcSpanStartCol RealSrcSpan
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= RealSrcSpan -> Int
srcSpanEndCol RealSrcSpan
s
              = RealLocated Token -> P (RealLocated Token)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (RealSrcSpan -> Token -> RealLocated Token
forall l e. l -> e -> GenLocated l e
L RealSrcSpan
s Token
t)
            andThen (L (RealSrcSpan RealSrcSpan
s) Token
ITeof) = RealLocated Token -> P (RealLocated Token)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (RealSrcSpan -> Token -> RealLocated Token
forall l e. l -> e -> GenLocated l e
L RealSrcSpan
s Token
ITeof)
            andThen Located Token
_ = P (RealLocated Token)
wrappedLexer

    -- | Try to parse a CPP line (can fail)
    parseCppLine :: P ([T.Token], Bool)
    parseCppLine :: P ([Token], Bool)
parseCppLine = do
      (StringBuffer
b, RealSrcLoc
l) <- P (StringBuffer, RealSrcLoc)
getInput
      case RealSrcLoc
-> StringBuffer -> Maybe (ByteString, RealSrcLoc, StringBuffer)
tryCppLine RealSrcLoc
l StringBuffer
b of
        Just (ByteString
cppBStr, RealSrcLoc
l', StringBuffer
b')
             -> let cppTok :: Token
cppTok = Token :: TokenType -> ByteString -> RealSrcSpan -> Token
T.Token { tkType :: TokenType
tkType = TokenType
TkCpp
                                     , tkValue :: ByteString
tkValue = ByteString
cppBStr
                                     , tkSpan :: RealSrcSpan
tkSpan = RealSrcLoc -> RealSrcLoc -> RealSrcSpan
mkRealSrcSpan RealSrcLoc
l RealSrcLoc
l' }
                in (StringBuffer, RealSrcLoc) -> P ()
setInput (StringBuffer
b', RealSrcLoc
l') P () -> P ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([Token
cppTok], Bool
False)
        Maybe (ByteString, RealSrcLoc, StringBuffer)
_    -> P ([Token], Bool)
forall (f :: * -> *) a. Alternative f => f a
empty

    -- | Try to parse a regular old token (can fail)
    parsePlainTok :: Bool -> P ([T.Token], Bool)  -- return list is only ever 0-2 elements
    parsePlainTok :: Bool -> P ([Token], Bool)
parsePlainTok Bool
inPrag = do
      (StringBuffer
bInit, RealSrcLoc
lInit) <- P (StringBuffer, RealSrcLoc)
getInput
      L SrcSpan
sp Token
tok <- Bool -> (Located Token -> P (Located Token)) -> P (Located Token)
forall a. Bool -> (Located Token -> P a) -> P a
Lexer.lexer Bool
False Located Token -> P (Located Token)
forall (m :: * -> *) a. Monad m => a -> m a
return
      (StringBuffer
bEnd, RealSrcLoc
_) <- P (StringBuffer, RealSrcLoc)
getInput
      case SrcSpan
sp of
        UnhelpfulSpan FastString
_ -> ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([], Bool
False) -- pretend the token never existed
        RealSrcSpan RealSrcSpan
rsp -> do
          let typ :: TokenType
typ = if Bool
inPrag then TokenType
TkPragma else Token -> TokenType
classify Token
tok
              RealSrcLoc RealSrcLoc
lStart = SrcSpan -> SrcLoc
srcSpanStart SrcSpan
sp -- safe since @sp@ is real
              (ByteString
spaceBStr, StringBuffer
bStart) = RealSrcLoc
-> RealSrcLoc -> StringBuffer -> (ByteString, StringBuffer)
spanPosition RealSrcLoc
lInit RealSrcLoc
lStart StringBuffer
bInit
              inPragDef :: Bool
inPragDef = Bool -> Token -> Bool
inPragma Bool
inPrag Token
tok

          (StringBuffer
bEnd', Bool
inPrag') <- case Token
tok of

            -- Update internal line + file position if this is a LINE pragma
            ITline_prag SourceText
_ -> (StringBuffer, Bool)
-> P (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall a. a -> P a -> P a
tryOrElse (StringBuffer
bEnd, Bool
inPragDef) (P (StringBuffer, Bool) -> P (StringBuffer, Bool))
-> P (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall a b. (a -> b) -> a -> b
$ do
              L RealSrcSpan
_ (ITinteger (IL { il_value :: IntegralLit -> Integer
il_value = Integer
line })) <- P (RealLocated Token)
wrappedLexer
              L RealSrcSpan
_ (ITstring SourceText
_ FastString
file)                    <- P (RealLocated Token)
wrappedLexer
              L RealSrcSpan
spF Token
ITclose_prag                       <- P (RealLocated Token)
wrappedLexer

              let newLoc :: RealSrcLoc
newLoc = FastString -> Int -> Int -> RealSrcLoc
mkRealSrcLoc FastString
file (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
line Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (RealSrcSpan -> Int
srcSpanEndCol RealSrcSpan
spF)
              (StringBuffer
bEnd'', RealSrcLoc
_) <- P (StringBuffer, RealSrcLoc)
getInput
              (StringBuffer, RealSrcLoc) -> P ()
setInput (StringBuffer
bEnd'', RealSrcLoc
newLoc)

              (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (StringBuffer
bEnd'', Bool
False)

            -- Update internal column position if this is a COLUMN pragma
            ITcolumn_prag SourceText
_ -> (StringBuffer, Bool)
-> P (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall a. a -> P a -> P a
tryOrElse (StringBuffer
bEnd, Bool
inPragDef) (P (StringBuffer, Bool) -> P (StringBuffer, Bool))
-> P (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall a b. (a -> b) -> a -> b
$ do
              L RealSrcSpan
_ (ITinteger (IL { il_value :: IntegralLit -> Integer
il_value = Integer
col }))  <- P (RealLocated Token)
wrappedLexer
              L RealSrcSpan
spF Token
ITclose_prag                       <- P (RealLocated Token)
wrappedLexer

              let newLoc :: RealSrcLoc
newLoc = FastString -> Int -> Int -> RealSrcLoc
mkRealSrcLoc (RealSrcSpan -> FastString
srcSpanFile RealSrcSpan
spF) (RealSrcSpan -> Int
srcSpanEndLine RealSrcSpan
spF) (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
col)
              (StringBuffer
bEnd'', RealSrcLoc
_) <- P (StringBuffer, RealSrcLoc)
getInput
              (StringBuffer, RealSrcLoc) -> P ()
setInput (StringBuffer
bEnd'', RealSrcLoc
newLoc)

              (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (StringBuffer
bEnd'', Bool
False)

            Token
_ -> (StringBuffer, Bool) -> P (StringBuffer, Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (StringBuffer
bEnd, Bool
inPragDef)

          let tokBStr :: ByteString
tokBStr = StringBuffer -> StringBuffer -> ByteString
splitStringBuffer StringBuffer
bStart StringBuffer
bEnd'
              plainTok :: Token
plainTok = Token :: TokenType -> ByteString -> RealSrcSpan -> Token
T.Token { tkType :: TokenType
tkType = TokenType
typ
                                 , tkValue :: ByteString
tkValue = ByteString
tokBStr
                                 , tkSpan :: RealSrcSpan
tkSpan = RealSrcSpan
rsp }
              spaceTok :: Token
spaceTok = Token :: TokenType -> ByteString -> RealSrcSpan -> Token
T.Token { tkType :: TokenType
tkType = TokenType
TkSpace
                                 , tkValue :: ByteString
tkValue = ByteString
spaceBStr
                                 , tkSpan :: RealSrcSpan
tkSpan = RealSrcLoc -> RealSrcLoc -> RealSrcSpan
mkRealSrcSpan RealSrcLoc
lInit RealSrcLoc
lStart }

          ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token
plainTok Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: [ Token
spaceTok | Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
spaceBStr) ], Bool
inPrag')

    -- | Parse whatever remains of the line as an unknown token (can't fail)
    unknownLine :: P ([T.Token], Bool)
    unknownLine :: P ([Token], Bool)
unknownLine = do
      (StringBuffer
b, RealSrcLoc
l) <- P (StringBuffer, RealSrcLoc)
getInput
      let (ByteString
unkBStr, RealSrcLoc
l', StringBuffer
b') = RealSrcLoc
-> StringBuffer -> (ByteString, RealSrcLoc, StringBuffer)
spanLine RealSrcLoc
l StringBuffer
b
          unkTok :: Token
unkTok = Token :: TokenType -> ByteString -> RealSrcSpan -> Token
T.Token { tkType :: TokenType
tkType = TokenType
TkUnknown
                           , tkValue :: ByteString
tkValue = ByteString
unkBStr
                           , tkSpan :: RealSrcSpan
tkSpan = RealSrcLoc -> RealSrcLoc -> RealSrcSpan
mkRealSrcSpan RealSrcLoc
l RealSrcLoc
l' }
      (StringBuffer, RealSrcLoc) -> P ()
setInput (StringBuffer
b', RealSrcLoc
l')
      ([Token], Bool) -> P ([Token], Bool)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([Token
unkTok], Bool
False)


-- | Get the input
getInput :: P (StringBuffer, RealSrcLoc)
getInput :: P (StringBuffer, RealSrcLoc)
getInput = (PState -> ParseResult (StringBuffer, RealSrcLoc))
-> P (StringBuffer, RealSrcLoc)
forall a. (PState -> ParseResult a) -> P a
P ((PState -> ParseResult (StringBuffer, RealSrcLoc))
 -> P (StringBuffer, RealSrcLoc))
-> (PState -> ParseResult (StringBuffer, RealSrcLoc))
-> P (StringBuffer, RealSrcLoc)
forall a b. (a -> b) -> a -> b
$ \p :: PState
p@PState { buffer :: PState -> StringBuffer
buffer = StringBuffer
buf, loc :: PState -> RealSrcLoc
loc = RealSrcLoc
srcLoc } -> PState
-> (StringBuffer, RealSrcLoc)
-> ParseResult (StringBuffer, RealSrcLoc)
forall a. PState -> a -> ParseResult a
POk PState
p (StringBuffer
buf, RealSrcLoc
srcLoc)

-- | Set the input
setInput :: (StringBuffer, RealSrcLoc) -> P ()
setInput :: (StringBuffer, RealSrcLoc) -> P ()
setInput (StringBuffer
buf, RealSrcLoc
srcLoc) = (PState -> ParseResult ()) -> P ()
forall a. (PState -> ParseResult a) -> P a
P ((PState -> ParseResult ()) -> P ())
-> (PState -> ParseResult ()) -> P ()
forall a b. (a -> b) -> a -> b
$ \PState
p -> PState -> () -> ParseResult ()
forall a. PState -> a -> ParseResult a
POk (PState
p { buffer :: StringBuffer
buffer = StringBuffer
buf, loc :: RealSrcLoc
loc = RealSrcLoc
srcLoc }) ()


-- | Orphan instance that adds backtracking to 'P'
instance Alternative P where
  empty :: P a
empty = SrcSpan -> SDoc -> P a
forall (m :: * -> *) a. MonadP m => SrcSpan -> SDoc -> m a
addFatalError SrcSpan
noSrcSpan (FilePath -> SDoc
text FilePath
"Alterative.empty")
  P PState -> ParseResult a
x <|> :: P a -> P a -> P a
<|> P PState -> ParseResult a
y = (PState -> ParseResult a) -> P a
forall a. (PState -> ParseResult a) -> P a
P ((PState -> ParseResult a) -> P a)
-> (PState -> ParseResult a) -> P a
forall a b. (a -> b) -> a -> b
$ \PState
s -> case PState -> ParseResult a
x PState
s of { p :: ParseResult a
p@POk{} -> ParseResult a
p
                                      ; ParseResult a
_ -> PState -> ParseResult a
y PState
s }

-- | Try a parser. If it fails, backtrack and return the pure value.
tryOrElse :: a -> P a -> P a
tryOrElse :: a -> P a -> P a
tryOrElse a
x P a
p = P a
p P a -> P a -> P a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> a -> P a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
x

-- | Classify given tokens as appropriate Haskell token type.
classify :: Lexer.Token -> TokenType
classify :: Token -> TokenType
classify Token
tok =
  case Token
tok of
    Token
ITas                   -> TokenType
TkKeyword
    Token
ITcase                 -> TokenType
TkKeyword
    Token
ITclass                -> TokenType
TkKeyword
    Token
ITdata                 -> TokenType
TkKeyword
    Token
ITdefault              -> TokenType
TkKeyword
    Token
ITderiving             -> TokenType
TkKeyword
    ITdo                {} -> TokenType
TkKeyword
    Token
ITelse                 -> TokenType
TkKeyword
    Token
IThiding               -> TokenType
TkKeyword
    Token
ITforeign              -> TokenType
TkKeyword
    Token
ITif                   -> TokenType
TkKeyword
    Token
ITimport               -> TokenType
TkKeyword
    Token
ITin                   -> TokenType
TkKeyword
    Token
ITinfix                -> TokenType
TkKeyword
    Token
ITinfixl               -> TokenType
TkKeyword
    Token
ITinfixr               -> TokenType
TkKeyword
    Token
ITinstance             -> TokenType
TkKeyword
    Token
ITlet                  -> TokenType
TkKeyword
    Token
ITmodule               -> TokenType
TkKeyword
    Token
ITnewtype              -> TokenType
TkKeyword
    Token
ITof                   -> TokenType
TkKeyword
    Token
ITqualified            -> TokenType
TkKeyword
    Token
ITthen                 -> TokenType
TkKeyword
    Token
ITtype                 -> TokenType
TkKeyword
    Token
ITvia                  -> TokenType
TkKeyword
    Token
ITwhere                -> TokenType
TkKeyword

    ITforall            {} -> TokenType
TkKeyword
    Token
ITexport               -> TokenType
TkKeyword
    Token
ITlabel                -> TokenType
TkKeyword
    Token
ITdynamic              -> TokenType
TkKeyword
    Token
ITsafe                 -> TokenType
TkKeyword
    Token
ITinterruptible        -> TokenType
TkKeyword
    Token
ITunsafe               -> TokenType
TkKeyword
    Token
ITstdcallconv          -> TokenType
TkKeyword
    Token
ITccallconv            -> TokenType
TkKeyword
    Token
ITcapiconv             -> TokenType
TkKeyword
    Token
ITprimcallconv         -> TokenType
TkKeyword
    Token
ITjavascriptcallconv   -> TokenType
TkKeyword
    ITmdo               {} -> TokenType
TkKeyword
    Token
ITfamily               -> TokenType
TkKeyword
    Token
ITrole                 -> TokenType
TkKeyword
    Token
ITgroup                -> TokenType
TkKeyword
    Token
ITby                   -> TokenType
TkKeyword
    Token
ITusing                -> TokenType
TkKeyword
    Token
ITpattern              -> TokenType
TkKeyword
    Token
ITstatic               -> TokenType
TkKeyword
    Token
ITstock                -> TokenType
TkKeyword
    Token
ITanyclass             -> TokenType
TkKeyword

    Token
ITunit                 -> TokenType
TkKeyword
    Token
ITsignature            -> TokenType
TkKeyword
    Token
ITdependency           -> TokenType
TkKeyword
    Token
ITrequires             -> TokenType
TkKeyword

    ITinline_prag       {} -> TokenType
TkPragma
    ITspec_prag         {} -> TokenType
TkPragma
    ITspec_inline_prag  {} -> TokenType
TkPragma
    ITsource_prag       {} -> TokenType
TkPragma
    ITrules_prag        {} -> TokenType
TkPragma
    ITwarning_prag      {} -> TokenType
TkPragma
    ITdeprecated_prag   {} -> TokenType
TkPragma
    ITline_prag         {} -> TokenType
TkPragma
    ITcolumn_prag       {} -> TokenType
TkPragma
    ITscc_prag          {} -> TokenType
TkPragma
    ITgenerated_prag    {} -> TokenType
TkPragma
    ITcore_prag         {} -> TokenType
TkPragma
    ITunpack_prag       {} -> TokenType
TkPragma
    ITnounpack_prag     {} -> TokenType
TkPragma
    ITann_prag          {} -> TokenType
TkPragma
    ITcomplete_prag     {} -> TokenType
TkPragma
    Token
ITclose_prag           -> TokenType
TkPragma
    IToptions_prag      {} -> TokenType
TkPragma
    ITinclude_prag      {} -> TokenType
TkPragma
    Token
ITlanguage_prag        -> TokenType
TkPragma
    ITminimal_prag      {} -> TokenType
TkPragma
    IToverlappable_prag {} -> TokenType
TkPragma
    IToverlapping_prag  {} -> TokenType
TkPragma
    IToverlaps_prag     {} -> TokenType
TkPragma
    ITincoherent_prag   {} -> TokenType
TkPragma
    ITctype             {} -> TokenType
TkPragma

    Token
ITdotdot               -> TokenType
TkGlyph
    Token
ITcolon                -> TokenType
TkGlyph
    ITdcolon            {} -> TokenType
TkGlyph
    Token
ITequal                -> TokenType
TkGlyph
    Token
ITlam                  -> TokenType
TkGlyph
    Token
ITlcase                -> TokenType
TkGlyph
    Token
ITvbar                 -> TokenType
TkGlyph
    ITlarrow            {} -> TokenType
TkGlyph
    ITrarrow            {} -> TokenType
TkGlyph
    Token
ITat                   -> TokenType
TkGlyph
    Token
ITtilde                -> TokenType
TkGlyph
    ITdarrow            {} -> TokenType
TkGlyph
    Token
ITminus                -> TokenType
TkGlyph
    Token
ITbang                 -> TokenType
TkGlyph
    Token
ITdot                  -> TokenType
TkOperator
    ITstar              {} -> TokenType
TkOperator
    Token
ITtypeApp              -> TokenType
TkGlyph

    Token
ITbiglam               -> TokenType
TkGlyph

    Token
ITocurly               -> TokenType
TkSpecial
    Token
ITccurly               -> TokenType
TkSpecial
    Token
ITvocurly              -> TokenType
TkSpecial
    Token
ITvccurly              -> TokenType
TkSpecial
    Token
ITobrack               -> TokenType
TkSpecial
    Token
ITopabrack             -> TokenType
TkSpecial
    Token
ITcpabrack             -> TokenType
TkSpecial
    Token
ITcbrack               -> TokenType
TkSpecial
    Token
IToparen               -> TokenType
TkSpecial
    Token
ITcparen               -> TokenType
TkSpecial
    Token
IToubxparen            -> TokenType
TkSpecial
    Token
ITcubxparen            -> TokenType
TkSpecial
    Token
ITsemi                 -> TokenType
TkSpecial
    Token
ITcomma                -> TokenType
TkSpecial
    Token
ITunderscore           -> TokenType
TkIdentifier
    Token
ITbackquote            -> TokenType
TkSpecial
    Token
ITsimpleQuote          -> TokenType
TkSpecial

    ITvarid             {} -> TokenType
TkIdentifier
    ITconid             {} -> TokenType
TkIdentifier
    ITvarsym            {} -> TokenType
TkOperator
    ITconsym            {} -> TokenType
TkOperator
    ITqvarid            {} -> TokenType
TkIdentifier
    ITqconid            {} -> TokenType
TkIdentifier
    ITqvarsym           {} -> TokenType
TkOperator
    ITqconsym           {} -> TokenType
TkOperator

    ITdupipvarid        {} -> TokenType
TkUnknown
    ITlabelvarid        {} -> TokenType
TkUnknown

    ITchar              {} -> TokenType
TkChar
    ITstring            {} -> TokenType
TkString
    ITinteger           {} -> TokenType
TkNumber
    ITrational          {} -> TokenType
TkNumber

    ITprimchar          {} -> TokenType
TkChar
    ITprimstring        {} -> TokenType
TkString
    ITprimint           {} -> TokenType
TkNumber
    ITprimword          {} -> TokenType
TkNumber
    ITprimfloat         {} -> TokenType
TkNumber
    ITprimdouble        {} -> TokenType
TkNumber

    ITopenExpQuote      {} -> TokenType
TkSpecial
    Token
ITopenPatQuote         -> TokenType
TkSpecial
    Token
ITopenDecQuote         -> TokenType
TkSpecial
    Token
ITopenTypQuote         -> TokenType
TkSpecial
    ITcloseQuote        {} -> TokenType
TkSpecial
    ITopenTExpQuote     {} -> TokenType
TkSpecial
    Token
ITcloseTExpQuote       -> TokenType
TkSpecial
    ITidEscape          {} -> TokenType
TkUnknown
    Token
ITparenEscape          -> TokenType
TkSpecial
    ITidTyEscape        {} -> TokenType
TkUnknown
    Token
ITparenTyEscape        -> TokenType
TkSpecial
    Token
ITtyQuote              -> TokenType
TkSpecial
    ITquasiQuote        {} -> TokenType
TkUnknown
    ITqQuasiQuote       {} -> TokenType
TkUnknown

    Token
ITproc                 -> TokenType
TkKeyword
    Token
ITrec                  -> TokenType
TkKeyword
    IToparenbar         {} -> TokenType
TkGlyph
    ITcparenbar         {} -> TokenType
TkGlyph
    ITlarrowtail        {} -> TokenType
TkGlyph
    ITrarrowtail        {} -> TokenType
TkGlyph
    ITLarrowtail        {} -> TokenType
TkGlyph
    ITRarrowtail        {} -> TokenType
TkGlyph

    Token
ITcomment_line_prag    -> TokenType
TkUnknown
    ITunknown           {} -> TokenType
TkUnknown
    Token
ITeof                  -> TokenType
TkUnknown

    ITlineComment       {} -> TokenType
TkComment
    ITdocCommentNext    {} -> TokenType
TkComment
    ITdocCommentPrev    {} -> TokenType
TkComment
    ITdocCommentNamed   {} -> TokenType
TkComment
    ITdocSection        {} -> TokenType
TkComment
    ITdocOptions        {} -> TokenType
TkComment

    -- The lexer considers top-level pragmas as comments (see `pragState` in
    -- the GHC lexer for more), so we have to manually reverse this. The
    -- following is a hammer: it smashes _all_ pragma-like block comments into
    -- pragmas.
    ITblockComment FilePath
c
      | FilePath -> FilePath -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf FilePath
"{-#" FilePath
c
      , FilePath -> FilePath -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isSuffixOf FilePath
"#-}" FilePath
c -> TokenType
TkPragma
      | Bool
otherwise          -> TokenType
TkComment

-- | Classify given tokens as beginning pragmas (or not).
inPragma :: Bool           -- ^ currently in pragma
         -> Lexer.Token    -- ^ current token
         -> Bool           -- ^ new information about whether we are in a pragma
inPragma :: Bool -> Token -> Bool
inPragma Bool
_ Token
ITclose_prag = Bool
False
inPragma Bool
True Token
_ = Bool
True
inPragma Bool
False Token
tok =
  case Token
tok of
    ITinline_prag       {} -> Bool
True
    ITspec_prag         {} -> Bool
True
    ITspec_inline_prag  {} -> Bool
True
    ITsource_prag       {} -> Bool
True
    ITrules_prag        {} -> Bool
True
    ITwarning_prag      {} -> Bool
True
    ITdeprecated_prag   {} -> Bool
True
    ITline_prag         {} -> Bool
True
    ITcolumn_prag       {} -> Bool
True
    ITscc_prag          {} -> Bool
True
    ITgenerated_prag    {} -> Bool
True
    ITcore_prag         {} -> Bool
True
    ITunpack_prag       {} -> Bool
True
    ITnounpack_prag     {} -> Bool
True
    ITann_prag          {} -> Bool
True
    ITcomplete_prag     {} -> Bool
True
    IToptions_prag      {} -> Bool
True
    ITinclude_prag      {} -> Bool
True
    Token
ITlanguage_prag        -> Bool
True
    ITminimal_prag      {} -> Bool
True
    IToverlappable_prag {} -> Bool
True
    IToverlapping_prag  {} -> Bool
True
    IToverlaps_prag     {} -> Bool
True
    ITincoherent_prag   {} -> Bool
True
    ITctype             {} -> Bool
True

    Token
_                      -> Bool
False