--  C->Haskell Compiler: Lexer for CHS Files
--
--  Author : Manuel M T Chakravarty
--  Created: 13 August 99
--
--  Version $Revision: 1.1 $ from $Date: 2004/11/21 21:05:35 $
--
--  Copyright (c) [1999..2004] Manuel M T Chakravarty
--
--  This file is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  This file is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--- DESCRIPTION ---------------------------------------------------------------
--
--  Lexer for CHS files; the tokens are only partially recognised.
--
--- DOCU ----------------------------------------------------------------------
--
--  language: Haskell 98
--
--  * CHS files are assumed to be Haskell 98 files that include C2HS binding
--    hooks.
--
--  * Haskell code is not tokenised, but binding hooks (delimited by `{#'and 
--    `#}') are analysed.  Therefore the lexer operates in two states
--    (realised as two lexer coupled by meta actions) depending on whether
--    Haskell code or a binding hook is currently read.  The lexer reading
--    Haskell code is called `base lexer'; the other one, `binding-hook
--    lexer'.  In addition, there is a inline-c lexer, which, as the
--    binding-hook lexer, can be triggered from the base lexer.
--
--  * Base lexer:
--
--      haskell -> (inline \\ special)*
--               | special \\ `"'
--               | comment
--               | nested
--               | hstring
--               | '{#'
--               | cpp
--      special -> `(' | `{' | `-' | `"'
--      ctrl    -> `\n' | `\f' | `\r' | `\t' | `\v'
--
--      inline  -> any \\ ctrl
--      any     -> '\0'..'\255'
--
--    Within the base lexer control codes appear as separate tokens in the
--    token list.
--
--    NOTE: It is important that `{' is an extra lexeme and not added as an
--          optional component at the end of the first alternative for
--          `haskell'.  Otherwise, the principle of the longest match will
--          divide `foo {#' into the tokens `foo {' and `#' instead of `foo '
--          and `{#'.
--
--    One line comments are handled by
--
--      comment -> `--' (any \\ `\n')* `\n'
--
--    and nested comments by
--
--      nested -> `{-' any* `-}'
--
--    where `any*' may contain _balanced_ occurrences of `{-' and `-}'.
--
--      hstring -> `"' inhstr* `"'
--      inhstr  -> ` '..`\127' \\ `"'
--               | `\"'
--
--    Pre-precessor directives as well as the switch to inline-C code are
--    formed as follows:
--
--      cpp     -> `\n#' (inline | `\t')* `\n'
--               | `\n#c' (' ' | '\t')* `\n'
--
--    We allow whitespace between the `#' and the actual directive, but in `#c'
--    and `#endc' the directive must immediately follow the `#'.  This might
--    be regarded as a not entirely orthogonal design, but simplifies matters
--    especially for `#endc'.
--
--  * On encountering the lexeme `{#', a meta action in the base lexer
--    transfers control to the following binding-hook lexer:
--
--      ident       -> letter (letter | digit | `\'')*
--                   | `\'' letter (letter | digit)* `\''
--      reservedid  -> `as' | `call' | `class' | `context' | `deriving' 
--                   | `enum' | `foreign' | `fun' | `get' | `lib' 
--                   | `newtype' | `pointer' | `prefix' | `pure' | `set'
--                   | `sizeof' | `stable' | `type' | `underscoreToCase' 
--                   | `unsafe' | `with' | 'lock' | 'unlock'
--      reservedsym -> `{#' | `#}' | `{' | `}' | `,' | `.' | `->' | `=' 
--                   | `=>' | '-' | `*' | `&' | `^'
--      string      -> `"' instr* `"'
--      verbhs      -> `\`' instr* `\''
--      instr       -> ` '..`\127' \\ `"'
--      comment     -> `--' (any \\ `\n')* `\n'
--
--    Control characters, white space, and comments are discarded in the
--    binding-hook lexer.  Nested comments are not allowed in a binding hook.
--    Identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords.
--
--  * In the binding-hook lexer, the lexeme `#}' transfers control back to the 
--    base lexer.  An occurence of the lexeme `{#' inside the binding-hook
--    lexer triggers an error.  The symbol `{#' is not explcitly represented
--    in the resulting token stream.  However, the occurrence of a token
--    representing one of the reserved identifiers `call', `context', `enum',
--    and `field' marks the start of a binding hook.  Strictly speaking, `#}'
--    need also not occur in the token stream, as the next `haskell' token
--    marks a hook's end.  It is, however, useful for producing accurate error 
--    messages (in case an hook is closed to early) to have a token
--    representing `#}'.
--
--  * The rule `ident' describes Haskell identifiers, but without
--    distinguishing between variable and constructor identifers (ie, those
--    starting with a lowercase and those starting with an uppercase letter).
--    However, we use it also to scan C identifiers; although, strictly
--    speaking, it is too general for them.  In the case of C identifiers,
--    this should not have any impact on the range of descriptions accepted by
--    the tool, as illegal identifier will never occur in a C header file that
--    is accepted by the C lexer.  In the case of Haskell identifiers, a
--    confusion between variable and constructor identifiers will be noted by
--    the Haskell compiler translating the code generated by c2hs.  Moreover,
--    identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords, but those may not contain apostrophes.
--
--  * Any line starting with the character `#' is regarded to be a C
--    preprocessor directive.  With the exception of `#c' and `#endc', which
--    delimit a set of lines containing inline C code.  Hence, in the base
--    lexer, the lexeme `#c' triggers a meta action transferring control to the
--    following inline-C lexer:
--
--      c  -> inline* \\ `\n#endc'
--
--    We do neither treat C strings nor C comments specially.  Hence, if the
--    string "\n#endc" occurs in a comment, we will mistakenly regard it as
--    the end of the inline C code.  Note that the problem cannot happen with
--    strings, as C does not permit strings that extend over multiple lines.
--    At the moment, it just seems not to be worth the effort required to
--    treat this situation more accurately.
--
--    The inline-C lexer also doesn't handle pre-processor directives
--    specially.  Hence, structural pre-processor directives (namely,
--    conditionals) may occur within inline-C code only properly nested.
--
--  Shortcomings
--  ~~~~~~~~~~~~
--  Some lexemes that include single and double quote characters are not lexed
--  correctly.  See the implementation comment at `haskell' for details.
--
--
--- TODO ----------------------------------------------------------------------
--
--  * In `haskell', the case of a single `"' (without a matching second one)
--    is caught by an eplicit error raising rule.  This shouldn't be
--    necessary, but for some strange reason, the lexer otherwise hangs when a 
--    single `"' appears in the input.
--
--  * Comments in the "gap" of a string are not yet supported.
--

module CHSLexer (CHSToken(..), lexCHS) 
where 

import Data.List         ((\\))
import Data.Char         (isDigit)
import Control.Monad     (liftM)
import Numeric   (readDec, readOct, readHex)

import Position  (Position(..), Pos(posOf), incPos, retPos, tabPos)
import Errors    (ErrorLvl(..), Error, makeError)
import UNames    (NameSupply, Name, names)
import Idents    (Ident, lexemeToIdent, identToLexeme)
import Lexers    (Regexp, Lexer, Action, epsilon, char, (+>), lexaction,
                  lexactionErr, lexmeta, (>|<), (>||<), ctrlLexer, star, plus,
                  quest, alt, string, LexerState, execLexer)

import C2HSState (CST, raise, raiseError, nop, getNameSupply) 


-- token definition
-- ----------------

-- possible tokens (EXPORTED)
--
data CHSToken = CHSTokArrow   Position          -- `->'
              | CHSTokDArrow  Position          -- `=>'
              | CHSTokDot     Position          -- `.'
              | CHSTokComma   Position          -- `,'
              | CHSTokEqual   Position          -- `='
              | CHSTokMinus   Position          -- `-'
              | CHSTokStar    Position          -- `*'
              | CHSTokAmp     Position          -- `&'
              | CHSTokHat     Position          -- `^'
              | CHSTokLBrace  Position          -- `{'
              | CHSTokRBrace  Position          -- `}'
              | CHSTokLParen  Position          -- `('
              | CHSTokRParen  Position          -- `)'
              | CHSTokEndHook Position          -- `#}'
              | CHSTokAs      Position          -- `as'
              | CHSTokCall    Position          -- `call'
              | CHSTokClass   Position          -- `class'
              | CHSTokContext Position          -- `context'
              | CHSTokDerive  Position          -- `deriving'
              | CHSTokEnum    Position          -- `enum'
              | CHSTokForeign Position          -- `foreign'
              | CHSTokFun     Position          -- `fun'
              | CHSTokGet     Position          -- `get'
              | CHSTokImport  Position          -- `import'
              | CHSTokLib     Position          -- `lib'
              | CHSTokNewtype Position          -- `newtype'
              | CHSTokPointer Position          -- `pointer'
              | CHSTokPrefix  Position          -- `prefix'
              | CHSTokPure    Position          -- `pure'
              | CHSTokQualif  Position          -- `qualified'
              | CHSTokSet     Position          -- `set'
              | CHSTokSizeof  Position          -- `sizeof'
              | CHSTokStable  Position          -- `stable'
              | CHSTokType    Position          -- `type'
              | CHSTok_2Case  Position          -- `underscoreToCase'
              | CHSTokUnsafe  Position          -- `unsafe'
              | CHSTokWith    Position          -- `with'
              | CHSTokLock    Position          -- `lock'
              | CHSTokNolock  Position          -- `nolock'
              | CHSTokString  Position String   -- string 
              | CHSTokHSVerb  Position String   -- verbatim Haskell (`...')
              | CHSTokIdent   Position Ident    -- identifier
              | CHSTokHaskell Position String   -- verbatim Haskell code
              | CHSTokCPP     Position String   -- pre-processor directive
              | CHSTokLine    Position          -- line pragma
              | CHSTokC       Position String   -- verbatim C code
              | CHSTokCtrl    Position Char     -- control code
              | CHSTokPragma  Position          -- '{-# LANGUAGE' language pragma begin
              | CHSTokPragEnd Position          -- '#-}' language pragma end

instance Pos CHSToken where
  posOf :: CHSToken -> Position
posOf (CHSTokArrow   Position
pos  ) = Position
pos
  posOf (CHSTokDArrow  Position
pos  ) = Position
pos
  posOf (CHSTokDot     Position
pos  ) = Position
pos
  posOf (CHSTokComma   Position
pos  ) = Position
pos
  posOf (CHSTokEqual   Position
pos  ) = Position
pos
  posOf (CHSTokMinus   Position
pos  ) = Position
pos
  posOf (CHSTokStar    Position
pos  ) = Position
pos
  posOf (CHSTokAmp     Position
pos  ) = Position
pos
  posOf (CHSTokHat     Position
pos  ) = Position
pos
  posOf (CHSTokLBrace  Position
pos  ) = Position
pos
  posOf (CHSTokRBrace  Position
pos  ) = Position
pos
  posOf (CHSTokLParen  Position
pos  ) = Position
pos
  posOf (CHSTokRParen  Position
pos  ) = Position
pos
  posOf (CHSTokEndHook Position
pos  ) = Position
pos
  posOf (CHSTokAs      Position
pos  ) = Position
pos
  posOf (CHSTokCall    Position
pos  ) = Position
pos
  posOf (CHSTokClass   Position
pos  ) = Position
pos
  posOf (CHSTokContext Position
pos  ) = Position
pos
  posOf (CHSTokDerive  Position
pos  ) = Position
pos
  posOf (CHSTokEnum    Position
pos  ) = Position
pos
  posOf (CHSTokForeign Position
pos  ) = Position
pos
  posOf (CHSTokFun     Position
pos  ) = Position
pos
  posOf (CHSTokGet     Position
pos  ) = Position
pos
  posOf (CHSTokImport  Position
pos  ) = Position
pos
  posOf (CHSTokLib     Position
pos  ) = Position
pos
  posOf (CHSTokNewtype Position
pos  ) = Position
pos
  posOf (CHSTokPointer Position
pos  ) = Position
pos
  posOf (CHSTokPrefix  Position
pos  ) = Position
pos
  posOf (CHSTokPure    Position
pos  ) = Position
pos
  posOf (CHSTokQualif  Position
pos  ) = Position
pos
  posOf (CHSTokSet     Position
pos  ) = Position
pos
  posOf (CHSTokSizeof  Position
pos  ) = Position
pos
  posOf (CHSTokStable  Position
pos  ) = Position
pos
  posOf (CHSTokType    Position
pos  ) = Position
pos
  posOf (CHSTok_2Case  Position
pos  ) = Position
pos
  posOf (CHSTokUnsafe  Position
pos  ) = Position
pos
  posOf (CHSTokWith    Position
pos  ) = Position
pos
  posOf (CHSTokLock    Position
pos  ) = Position
pos
  posOf (CHSTokNolock  Position
pos  ) = Position
pos
  posOf (CHSTokString  Position
pos String
_) = Position
pos
  posOf (CHSTokHSVerb  Position
pos String
_) = Position
pos
  posOf (CHSTokIdent   Position
pos Ident
_) = Position
pos
  posOf (CHSTokHaskell Position
pos String
_) = Position
pos
  posOf (CHSTokCPP     Position
pos String
_) = Position
pos
  posOf (CHSTokC       Position
pos String
_) = Position
pos
  posOf (CHSTokCtrl    Position
pos Char
_) = Position
pos
  posOf (CHSTokPragma  Position
pos  ) = Position
pos
  posOf (CHSTokPragEnd Position
pos  ) = Position
pos

instance Eq CHSToken where
  (CHSTokArrow    Position
_  ) == :: CHSToken -> CHSToken -> Bool
== (CHSTokArrow    Position
_  ) = Bool
True
  (CHSTokDArrow   Position
_  ) == (CHSTokDArrow   Position
_  ) = Bool
True
  (CHSTokDot      Position
_  ) == (CHSTokDot      Position
_  ) = Bool
True
  (CHSTokComma    Position
_  ) == (CHSTokComma    Position
_  ) = Bool
True
  (CHSTokEqual    Position
_  ) == (CHSTokEqual    Position
_  ) = Bool
True
  (CHSTokMinus    Position
_  ) == (CHSTokMinus    Position
_  ) = Bool
True
  (CHSTokStar     Position
_  ) == (CHSTokStar     Position
_  ) = Bool
True
  (CHSTokAmp      Position
_  ) == (CHSTokAmp      Position
_  ) = Bool
True
  (CHSTokHat      Position
_  ) == (CHSTokHat      Position
_  ) = Bool
True
  (CHSTokLBrace   Position
_  ) == (CHSTokLBrace   Position
_  ) = Bool
True
  (CHSTokRBrace   Position
_  ) == (CHSTokRBrace   Position
_  ) = Bool
True
  (CHSTokLParen   Position
_  ) == (CHSTokLParen   Position
_  ) = Bool
True
  (CHSTokRParen   Position
_  ) == (CHSTokRParen   Position
_  ) = Bool
True
  (CHSTokEndHook  Position
_  ) == (CHSTokEndHook  Position
_  ) = Bool
True
  (CHSTokAs       Position
_  ) == (CHSTokAs       Position
_  ) = Bool
True
  (CHSTokCall     Position
_  ) == (CHSTokCall     Position
_  ) = Bool
True
  (CHSTokClass    Position
_  ) == (CHSTokClass    Position
_  ) = Bool
True
  (CHSTokContext  Position
_  ) == (CHSTokContext  Position
_  ) = Bool
True
  (CHSTokDerive   Position
_  ) == (CHSTokDerive   Position
_  ) = Bool
True
  (CHSTokEnum     Position
_  ) == (CHSTokEnum     Position
_  ) = Bool
True
  (CHSTokForeign  Position
_  ) == (CHSTokForeign  Position
_  ) = Bool
True
  (CHSTokFun      Position
_  ) == (CHSTokFun      Position
_  ) = Bool
True
  (CHSTokGet      Position
_  ) == (CHSTokGet      Position
_  ) = Bool
True
  (CHSTokImport   Position
_  ) == (CHSTokImport   Position
_  ) = Bool
True
  (CHSTokLib      Position
_  ) == (CHSTokLib      Position
_  ) = Bool
True
  (CHSTokNewtype  Position
_  ) == (CHSTokNewtype  Position
_  ) = Bool
True
  (CHSTokPointer  Position
_  ) == (CHSTokPointer  Position
_  ) = Bool
True
  (CHSTokPrefix   Position
_  ) == (CHSTokPrefix   Position
_  ) = Bool
True
  (CHSTokPure     Position
_  ) == (CHSTokPure     Position
_  ) = Bool
True
  (CHSTokQualif   Position
_  ) == (CHSTokQualif   Position
_  ) = Bool
True
  (CHSTokSet      Position
_  ) == (CHSTokSet      Position
_  ) = Bool
True
  (CHSTokSizeof   Position
_  ) == (CHSTokSizeof   Position
_  ) = Bool
True
  (CHSTokStable   Position
_  ) == (CHSTokStable   Position
_  ) = Bool
True
  (CHSTokType     Position
_  ) == (CHSTokType     Position
_  ) = Bool
True
  (CHSTok_2Case   Position
_  ) == (CHSTok_2Case   Position
_  ) = Bool
True
  (CHSTokUnsafe   Position
_  ) == (CHSTokUnsafe   Position
_  ) = Bool
True
  (CHSTokWith     Position
_  ) == (CHSTokWith     Position
_  ) = Bool
True
  (CHSTokLock     Position
_  ) == (CHSTokLock     Position
_  ) = Bool
True
  (CHSTokNolock   Position
_  ) == (CHSTokNolock   Position
_  ) = Bool
True
  (CHSTokString   Position
_ String
_) == (CHSTokString   Position
_ String
_) = Bool
True
  (CHSTokHSVerb   Position
_ String
_) == (CHSTokHSVerb   Position
_ String
_) = Bool
True
  (CHSTokIdent    Position
_ Ident
_) == (CHSTokIdent    Position
_ Ident
_) = Bool
True
  (CHSTokHaskell  Position
_ String
_) == (CHSTokHaskell  Position
_ String
_) = Bool
True
  (CHSTokCPP      Position
_ String
_) == (CHSTokCPP      Position
_ String
_) = Bool
True
  (CHSTokC        Position
_ String
_) == (CHSTokC        Position
_ String
_) = Bool
True
  (CHSTokCtrl     Position
_ Char
_) == (CHSTokCtrl     Position
_ Char
_) = Bool
True
  (CHSTokPragma   Position
_  ) == (CHSTokPragma   Position
_  ) = Bool
True
  (CHSTokPragEnd  Position
_  ) == (CHSTokPragEnd  Position
_  ) = Bool
True
  CHSToken
_                    == CHSToken
_                    = Bool
False

instance Show CHSToken where
  showsPrec :: Int -> CHSToken -> ShowS
showsPrec Int
_ (CHSTokArrow   Position
_  ) = String -> ShowS
showString String
"->"
  showsPrec Int
_ (CHSTokDArrow  Position
_  ) = String -> ShowS
showString String
"=>"
  showsPrec Int
_ (CHSTokDot     Position
_  ) = String -> ShowS
showString String
"."
  showsPrec Int
_ (CHSTokComma   Position
_  ) = String -> ShowS
showString String
","
  showsPrec Int
_ (CHSTokEqual   Position
_  ) = String -> ShowS
showString String
"="
  showsPrec Int
_ (CHSTokMinus   Position
_  ) = String -> ShowS
showString String
"-"
  showsPrec Int
_ (CHSTokStar    Position
_  ) = String -> ShowS
showString String
"*"
  showsPrec Int
_ (CHSTokAmp     Position
_  ) = String -> ShowS
showString String
"&"
  showsPrec Int
_ (CHSTokHat     Position
_  ) = String -> ShowS
showString String
"^"
  showsPrec Int
_ (CHSTokLBrace  Position
_  ) = String -> ShowS
showString String
"{"
  showsPrec Int
_ (CHSTokRBrace  Position
_  ) = String -> ShowS
showString String
"}"
  showsPrec Int
_ (CHSTokLParen  Position
_  ) = String -> ShowS
showString String
"("
  showsPrec Int
_ (CHSTokRParen  Position
_  ) = String -> ShowS
showString String
")"
  showsPrec Int
_ (CHSTokEndHook Position
_  ) = String -> ShowS
showString String
"#}"
  showsPrec Int
_ (CHSTokAs      Position
_  ) = String -> ShowS
showString String
"as"
  showsPrec Int
_ (CHSTokCall    Position
_  ) = String -> ShowS
showString String
"call"
  showsPrec Int
_ (CHSTokClass   Position
_  ) = String -> ShowS
showString String
"class"
  showsPrec Int
_ (CHSTokContext Position
_  ) = String -> ShowS
showString String
"context"
  showsPrec Int
_ (CHSTokDerive  Position
_  ) = String -> ShowS
showString String
"deriving"
  showsPrec Int
_ (CHSTokEnum    Position
_  ) = String -> ShowS
showString String
"enum"
  showsPrec Int
_ (CHSTokForeign Position
_  ) = String -> ShowS
showString String
"foreign"
  showsPrec Int
_ (CHSTokFun     Position
_  ) = String -> ShowS
showString String
"fun"
  showsPrec Int
_ (CHSTokGet     Position
_  ) = String -> ShowS
showString String
"get"
  showsPrec Int
_ (CHSTokImport  Position
_  ) = String -> ShowS
showString String
"import"
  showsPrec Int
_ (CHSTokLib     Position
_  ) = String -> ShowS
showString String
"lib"
  showsPrec Int
_ (CHSTokNewtype Position
_  ) = String -> ShowS
showString String
"newtype"
  showsPrec Int
_ (CHSTokPointer Position
_  ) = String -> ShowS
showString String
"pointer"
  showsPrec Int
_ (CHSTokPrefix  Position
_  ) = String -> ShowS
showString String
"prefix"
  showsPrec Int
_ (CHSTokPure    Position
_  ) = String -> ShowS
showString String
"pure"
  showsPrec Int
_ (CHSTokQualif  Position
_  ) = String -> ShowS
showString String
"qualified"
  showsPrec Int
_ (CHSTokSet     Position
_  ) = String -> ShowS
showString String
"set"
  showsPrec Int
_ (CHSTokSizeof  Position
_  ) = String -> ShowS
showString String
"sizeof"
  showsPrec Int
_ (CHSTokStable  Position
_  ) = String -> ShowS
showString String
"stable"
  showsPrec Int
_ (CHSTokType    Position
_  ) = String -> ShowS
showString String
"type"
  showsPrec Int
_ (CHSTok_2Case  Position
_  ) = String -> ShowS
showString String
"underscoreToCase"
  showsPrec Int
_ (CHSTokUnsafe  Position
_  ) = String -> ShowS
showString String
"unsafe"
  showsPrec Int
_ (CHSTokWith    Position
_  ) = String -> ShowS
showString String
"with"
  showsPrec Int
_ (CHSTokLock    Position
_  ) = String -> ShowS
showString String
"lock"
  showsPrec Int
_ (CHSTokNolock  Position
_  ) = String -> ShowS
showString String
"nolock"
  showsPrec Int
_ (CHSTokString  Position
_ String
s) = String -> ShowS
showString (String
"\"" forall a. [a] -> [a] -> [a]
++ String
s forall a. [a] -> [a] -> [a]
++ String
"\"")
  showsPrec Int
_ (CHSTokHSVerb  Position
_ String
s) = String -> ShowS
showString (String
"`" forall a. [a] -> [a] -> [a]
++ String
s forall a. [a] -> [a] -> [a]
++ String
"'")
  showsPrec Int
_ (CHSTokIdent   Position
_ Ident
i) = (String -> ShowS
showString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ident -> String
identToLexeme) Ident
i
  showsPrec Int
_ (CHSTokHaskell Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokCPP     Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokC       Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokCtrl    Position
_ Char
c) = Char -> ShowS
showChar Char
c
  showsPrec Int
_ (CHSTokPragma  Position
_  ) = String -> ShowS
showString String
"{-# LANGUAGE"
  showsPrec Int
_ (CHSTokPragEnd Position
_  ) = String -> ShowS
showString String
"#-}"


-- lexer state
-- -----------

-- state threaded through the lexer
--
data CHSLexerState = CHSLS {
                       CHSLexerState -> Int
nestLvl :: Int,   -- nesting depth of nested comments
                       CHSLexerState -> Bool
inHook  :: Bool,  -- within a binding hook?
                       CHSLexerState -> [Name]
namesup :: [Name] -- supply of unique names
                     }

-- initial state
--
initialState :: CST s CHSLexerState
initialState :: forall s. CST s CHSLexerState
initialState  = do
                  [Name]
namesup <- forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM NameSupply -> [Name]
names forall e s. PreCST e s NameSupply
getNameSupply
                  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ CHSLS {
                             nestLvl :: Int
nestLvl = Int
0,
                             inHook :: Bool
inHook  = Bool
False,
                             namesup :: [Name]
namesup = [Name]
namesup
                           }

-- raise an error if the given state is not a final state
--
assertFinalState :: Position -> CHSLexerState -> CST s ()
assertFinalState :: forall s. Position -> CHSLexerState -> CST s ()
assertFinalState Position
pos CHSLS {nestLvl :: CHSLexerState -> Int
nestLvl = Int
nestLvl, inHook :: CHSLexerState -> Bool
inHook = Bool
inHook} 
  | Int
nestLvl forall a. Ord a => a -> a -> Bool
> Int
0 = forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos [String
"Unexpected end of file!",
                                  String
"Unclosed nested comment."]
  | Bool
inHook      = forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos [String
"Unexpected end of file!",
                                  String
"Unclosed binding hook."]
  | Bool
otherwise   = forall e s. PreCST e s ()
nop

-- lexer and action type used throughout this specification
--
type CHSLexer  = Lexer  CHSLexerState CHSToken
type CHSAction = Action               CHSToken
type CHSRegexp = Regexp CHSLexerState CHSToken

-- for actions that need a new unique name
--
infixl 3 `lexactionName`
lexactionName :: CHSRegexp 
              -> (String -> Position -> Name -> CHSToken) 
              -> CHSLexer
CHSRegexp
re lexactionName :: CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` String -> Position -> Name -> CHSToken
action = CHSRegexp
re forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {a} {a}.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action'
  where
    action' :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action' String
str Position
pos CHSLexerState
state = let Name
name:[Name]
ns = CHSLexerState -> [Name]
namesup CHSLexerState
state
                            in
                            (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (String -> Position -> Name -> CHSToken
action String
str Position
pos Name
name),
                             Position -> Int -> Position
incPos Position
pos (forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str),
                             CHSLexerState
state {namesup :: [Name]
namesup = [Name]
ns},
                             forall a. Maybe a
Nothing)


-- lexical specification
-- ---------------------

-- the lexical definition of the tokens (the base lexer)
--
--
chslexer :: CHSLexer
chslexer :: CHSLexer
chslexer  =      CHSLexer
pragma         -- LANGUAGE pragma
            forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
haskell        -- Haskell code
            forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested         -- nested comments
            forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl           -- control code (that has to be preserved)
            forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hook           -- start of a binding hook
            forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
cpp            -- a pre-processor directive (or `#c')

-- the LANGUAGE pragma
pragma :: CHSLexer
pragma :: CHSLexer
pragma = forall s t. String -> Regexp s t
string String
"{-# LANGUAGE" forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s ->
         (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragma Position
pos), Position -> Int -> Position
incPos Position
pos Int
12, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
langLexer)

langLexer :: CHSLexer
langLexer :: CHSLexer
langLexer = CHSLexer
whitespace forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
identOrKW forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
            (forall s t. String -> Regexp s t
string String
"#-}" forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s ->
            (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragEnd Position
pos), Position -> Int -> Position
incPos Position
pos Int
3, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
chslexer))

-- stream of Haskell code (terminated by a control character or binding hook)
--
haskell :: CHSLexer
--
-- NB: We need to make sure that '"' is not regarded as the beginning of a
--     string; however, we cannot really lex character literals properly
--     without lexing identifiers (as the latter may containing single quotes
--     as part of their lexeme).  Thus, we special case '"'.  This is still a
--     kludge, as a program fragment, such as
--
--       foo'"'strange string"
--
--     will not be handled correctly.
--
haskell :: CHSLexer
haskell  = (    forall {s} {t}. Regexp s t
anyButSpecialforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall {s} {t}. Regexp s t
epsilon
            forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall {s} {t}. Regexp s t
specialButQuotes
            forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. Char -> Regexp s t
char Char
'"'  forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall {s} {t}. Regexp s t
inhstrforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall s t. Char -> Regexp s t
char Char
'"'
            forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. String -> Regexp s t
string String
"'\"'"                           -- special case of "
            forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. String -> Regexp s t
string String
"--" forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall {s} {t}. Regexp s t
anyButNLforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall {s} {t}. Regexp s t
epsilon   -- comment
           )
           forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` CHSAction
copyVerbatim
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Char -> Regexp s t
char Char
'"'                                -- this is a bad kludge
                forall s t. Regexp s t -> ActionErr t -> Lexer s t
`lexactionErr` 
                  \String
_ Position
pos -> (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                                              [String
"Lexical error!", 
                                              String
"Unclosed string."])
           where
             anyButSpecial :: Regexp s t
anyButSpecial    = forall s t. String -> Regexp s t
alt (String
inlineSet forall a. Eq a => [a] -> [a] -> [a]
\\ String
specialSet)
             specialButQuotes :: Regexp s t
specialButQuotes = forall s t. String -> Regexp s t
alt (String
specialSet forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'"'])
             anyButNL :: Regexp s t
anyButNL         = forall s t. String -> Regexp s t
alt (String
anySet forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'\n'])
             inhstr :: Regexp s t
inhstr           = forall {s} {t}. Regexp s t
instr forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. Char -> Regexp s t
char Char
'\\' forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. String -> Regexp s t
string String
"\\\"" forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall {s} {t}. Regexp s t
gap
             gap :: Regexp s t
gap              = forall s t. Char -> Regexp s t
char Char
'\\' forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall s t. String -> Regexp s t
alt (Char
' 'forall a. a -> [a] -> [a]
:String
ctrlSet)forall s t. Regexp s t -> Regexp s t -> Regexp s t
`plus` forall s t. Char -> Regexp s t
char Char
'\\'

-- action copying the input verbatim to `CHSTokHaskell' tokens
--
copyVerbatim        :: CHSAction 
copyVerbatim :: CHSAction
copyVerbatim String
cs Position
pos  = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs

-- nested comments
--
nested :: CHSLexer
nested :: CHSLexer
nested  =
       forall s t. String -> Regexp s t
string String
"{-"              {- for Haskell emacs mode :-( -}
       forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {a}.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
       forall s t. String -> Regexp s t
string String
"-}"
       forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` String
-> Position
-> CHSLexerState
-> (Maybe (Either Error CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
leaveComment
  where
    enterComment :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment String
cs Position
pos CHSLexerState
s =
      (forall {a}. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,                    -- collect the lexeme
       Position -> Int -> Position
incPos Position
pos Int
2,                            -- advance current position
       CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s forall a. Num a => a -> a -> a
+ Int
1},             -- increase nesting level
       forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ CHSLexer
inNestedComment)                  -- continue in comment lexer
    --
    leaveComment :: String
-> Position
-> CHSLexerState
-> (Maybe (Either Error CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
leaveComment String
cs Position
pos CHSLexerState
s =
      case CHSLexerState -> Int
nestLvl CHSLexerState
s of
        Int
0 -> (forall {b}. Position -> Maybe (Either Error b)
commentCloseErr Position
pos,              -- 0: -} outside comment => err
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s,
              forall a. Maybe a
Nothing)
        Int
1 -> (forall {a}. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s forall a. Num a => a -> a -> a
- Int
1},      -- decrease nesting level
              forall a. a -> Maybe a
Just CHSLexer
chslexer)                    -- 1: continue with root lexer
        Int
_ -> (forall {a}. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s forall a. Num a => a -> a -> a
- Int
1},      -- decrease nesting level
              forall a. Maybe a
Nothing)                          -- _: cont with comment lexer
    --
    copyVerbatim' :: String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos  = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs)
    --
    commentCloseErr :: Position -> Maybe (Either Error b)
commentCloseErr Position
pos =
      forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left (ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                             [String
"Lexical error!", 
                             String
"`-}' not preceded by a matching `{-'."])
                             {- for Haskell emacs mode :-( -}


-- lexer processing the inner of a comment
--
inNestedComment :: CHSLexer
inNestedComment :: CHSLexer
inNestedComment  =      CHSLexer
commentInterior         -- inside a comment
                   forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested                  -- nested comments
                   forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                    -- control code (preserved)

-- standard characters in a nested comment
--
commentInterior :: CHSLexer
commentInterior :: CHSLexer
commentInterior  = (    forall {s} {t}. Regexp s t
anyButSpecialforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall {s} {t}. Regexp s t
epsilon
                    forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall {s} {t}. Regexp s t
special
                   )
                   forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` CHSAction
copyVerbatim
                   where
                     anyButSpecial :: Regexp s t
anyButSpecial = forall s t. String -> Regexp s t
alt (String
inlineSet forall a. Eq a => [a] -> [a] -> [a]
\\ String
commentSpecialSet)
                     special :: Regexp s t
special       = forall s t. String -> Regexp s t
alt String
commentSpecialSet

-- control code in the base lexer (is turned into a token)
--
--  * this covers exactly the same set of characters as contained in `ctrlSet'
--   and `Lexers.ctrlLexer' and advances positions also like the `ctrlLexer'
--
ctrl :: CHSLexer
ctrl :: CHSLexer
ctrl  =     
       forall s t. Char -> Regexp s t
char Char
'\n' forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {c} {a} {a}.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Char -> Regexp s t
char Char
'\r' forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {c} {a} {a}.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Char -> Regexp s t
char Char
'\v' forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {c} {a} {a}.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Char -> Regexp s t
char Char
'\f' forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {c} {a} {a}.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Char -> Regexp s t
char Char
'\t' forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` forall {c} {a} {a}.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab
  where
    newline :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline  [Char
c] Position
pos = forall {b} {c} {a} {a}.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
retPos Position
pos)
    formfeed :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed [Char
c] Position
pos = forall {b} {c} {a} {a}.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Int -> Position
incPos Position
pos Int
1)
    tab :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab      [Char
c] Position
pos = forall {b} {c} {a} {a}.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
tabPos Position
pos)

    ctrlResult :: Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c b
pos' c
s = 
      (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos Char
c), b
pos', c
s, forall a. Maybe a
Nothing)

-- start of a binding hook (ie, enter the binding hook lexer)
--
hook :: CHSLexer
hook :: CHSLexer
hook  = forall s t. String -> Regexp s t
string String
"{#"
        forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s -> (forall a. Maybe a
Nothing, Position -> Int -> Position
incPos Position
pos Int
2, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
bhLexer)

-- pre-processor directives and `#c'
--
--  * we lex `#c' as a directive and special case it in the action
--
--  * we lex C line number pragmas and special case it in the action
--
cpp :: CHSLexer
cpp :: CHSLexer
cpp = CHSLexer
directive
      where
        directive :: CHSLexer
directive = 
          forall s t. String -> Regexp s t
string String
"\n#" forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall s t. String -> Regexp s t
alt (Char
'\t'forall a. a -> [a] -> [a]
:String
inlineSet)forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall {s} {t}. Regexp s t
epsilon
          forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
             \(Char
_:Char
_:String
dir) Position
pos CHSLexerState
s ->        -- strip off the "\n#"
               case String
dir of
                 [Char
'c']                      ->          -- #c
                   (forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 -- a #c may be followed by whitespace
                 Char
'c':Char
sp:String
_ | Char
sp forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
" \t" ->          -- #c
                   (forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 Char
' ':line :: String
line@(Char
n:String
_) | Char -> Bool
isDigit Char
n ->                 -- C line pragma
                   let pos' :: Position
pos' = String -> Position -> Position
adjustPosByCLinePragma String
line Position
pos
                    in (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokLine Position
pos'), Position
pos', CHSLexerState
s, forall a. Maybe a
Nothing)
                 String
_                            ->        -- CPP directive
                   (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokCPP Position
pos String
dir), 
                    Position -> Position
retPos Position
pos, CHSLexerState
s, forall a. Maybe a
Nothing)

adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma String
str (Position String
fname Int
_ Int
_) = 
  (String -> Int -> Int -> Position
Position String
fname' Int
row' Int
0)
  where
    str' :: String
str'            = ShowS
dropWhite String
str
    (String
rowStr, String
str'') = forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
str'
    row' :: Int
row'            = forall a. Read a => String -> a
read String
rowStr
    str''' :: String
str'''          = ShowS
dropWhite String
str''
    fnameStr :: String
fnameStr        = forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'"') forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> [a]
drop Int
1 forall a b. (a -> b) -> a -> b
$ String
str'''
    fname' :: String
fname'          | forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
str''' Bool -> Bool -> Bool
|| forall a. [a] -> a
head String
str''' forall a. Eq a => a -> a -> Bool
/= Char
'"' = String
fname
                    -- try and get more sharing of file name strings
                    | String
fnameStr forall a. Eq a => a -> a -> Bool
== String
fname                 = String
fname
                    | Bool
otherwise                         = String
fnameStr
    --
    dropWhite :: ShowS
dropWhite = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t')

-- the binding hook lexer
--
bhLexer :: CHSLexer
bhLexer :: CHSLexer
bhLexer  =      CHSLexer
identOrKW
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
strlit
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hsverb
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
whitespace
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
endOfHook
           forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. String -> Regexp s t
string String
"--" forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall {s} {t}. Regexp s t
anyButNLforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall s t. Char -> Regexp s t
char Char
'\n'   -- comment
                forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s -> (forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, forall a. Maybe a
Nothing)
           where
             anyButNL :: Regexp s t
anyButNL  = forall s t. String -> Regexp s t
alt (String
anySet forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'\n'])
             endOfHook :: CHSLexer
endOfHook = forall s t. String -> Regexp s t
string String
"#}"
                         forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
                          \String
_ Position
pos CHSLexerState
s -> (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokEndHook Position
pos), 
                                       Position -> Int -> Position
incPos Position
pos Int
2, CHSLexerState
s, forall a. a -> Maybe a
Just CHSLexer
chslexer)

-- the inline-C lexer
--
cLexer :: CHSLexer
cLexer :: CHSLexer
cLexer =      forall {s}. Lexer s CHSToken
inlineC                     -- inline C code
         forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                        -- control code (preserved)
         forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. String -> Regexp s t
string String
"\n#endc"            -- end of inline C code...
              forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta`                   -- ...preserve '\n' as control token
              \String
_ Position
pos CHSLexerState
s -> (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos Char
'\n'), Position -> Position
retPos Position
pos, CHSLexerState
s, 
                           forall a. a -> Maybe a
Just CHSLexer
chslexer)
         where
           inlineC :: Lexer s CHSToken
inlineC = forall s t. String -> Regexp s t
alt String
inlineSet forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` CHSAction
copyVerbatimC
           --
           copyVerbatimC :: CHSAction 
           copyVerbatimC :: CHSAction
copyVerbatimC String
cs Position
pos = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokC Position
pos String
cs

-- whitespace
--
--  * horizontal and vertical tabs, newlines, and form feeds are filter out by
--   `Lexers.ctrlLexer' 
--
whitespace :: CHSLexer
whitespace :: CHSLexer
whitespace  =      (forall s t. Char -> Regexp s t
char Char
' ' forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
_ Position
_ -> forall a. Maybe a
Nothing)
              forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall s t. Lexer s t
ctrlLexer

-- identifiers and keywords
--
identOrKW :: CHSLexer
--
-- the strictness annotations seem to help a bit
--
identOrKW :: CHSLexer
identOrKW  = 
       -- identifier or keyword
       (forall {s} {t}. Regexp s t
letter forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (forall {s} {t}. Regexp s t
letter forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall {s} {t}. Regexp s t
digit forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. Char -> Regexp s t
char Char
'\'')forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall {s} {t}. Regexp s t
epsilon
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \String
cs Position
pos Name
name -> (Position -> String -> Name -> CHSToken
idkwtok forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
  forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< -- identifier in single quotes
       (forall s t. Char -> Regexp s t
char Char
'\'' forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall {s} {t}. Regexp s t
letter forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (forall {s} {t}. Regexp s t
letter forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall {s} {t}. Regexp s t
digit)forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall s t. Char -> Regexp s t
char Char
'\''
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \String
cs Position
pos Name
name -> (Position -> String -> Name -> CHSToken
mkid forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
       -- NB: quotes are removed by lexemeToIdent
  where
    idkwtok :: Position -> String -> Name -> CHSToken
idkwtok Position
pos String
"as"               Name
_    = Position -> CHSToken
CHSTokAs      Position
pos
    idkwtok Position
pos String
"call"             Name
_    = Position -> CHSToken
CHSTokCall    Position
pos
    idkwtok Position
pos String
"class"            Name
_    = Position -> CHSToken
CHSTokClass   Position
pos
    idkwtok Position
pos String
"context"          Name
_    = Position -> CHSToken
CHSTokContext Position
pos
    idkwtok Position
pos String
"deriving"         Name
_    = Position -> CHSToken
CHSTokDerive  Position
pos
    idkwtok Position
pos String
"enum"             Name
_    = Position -> CHSToken
CHSTokEnum    Position
pos
    idkwtok Position
pos String
"foreign"          Name
_    = Position -> CHSToken
CHSTokForeign Position
pos
    idkwtok Position
pos String
"fun"              Name
_    = Position -> CHSToken
CHSTokFun     Position
pos
    idkwtok Position
pos String
"get"              Name
_    = Position -> CHSToken
CHSTokGet     Position
pos
    idkwtok Position
pos String
"import"           Name
_    = Position -> CHSToken
CHSTokImport  Position
pos
    idkwtok Position
pos String
"lib"              Name
_    = Position -> CHSToken
CHSTokLib     Position
pos
    idkwtok Position
pos String
"newtype"          Name
_    = Position -> CHSToken
CHSTokNewtype Position
pos
    idkwtok Position
pos String
"pointer"          Name
_    = Position -> CHSToken
CHSTokPointer Position
pos
    idkwtok Position
pos String
"prefix"           Name
_    = Position -> CHSToken
CHSTokPrefix  Position
pos
    idkwtok Position
pos String
"pure"             Name
_    = Position -> CHSToken
CHSTokPure    Position
pos
    idkwtok Position
pos String
"qualified"        Name
_    = Position -> CHSToken
CHSTokQualif  Position
pos
    idkwtok Position
pos String
"set"              Name
_    = Position -> CHSToken
CHSTokSet     Position
pos
    idkwtok Position
pos String
"sizeof"           Name
_    = Position -> CHSToken
CHSTokSizeof  Position
pos
    idkwtok Position
pos String
"stable"           Name
_    = Position -> CHSToken
CHSTokStable  Position
pos
    idkwtok Position
pos String
"type"             Name
_    = Position -> CHSToken
CHSTokType    Position
pos
    idkwtok Position
pos String
"underscoreToCase" Name
_    = Position -> CHSToken
CHSTok_2Case  Position
pos
    idkwtok Position
pos String
"unsafe"           Name
_    = Position -> CHSToken
CHSTokUnsafe  Position
pos
    idkwtok Position
pos String
"with"             Name
_    = Position -> CHSToken
CHSTokWith    Position
pos
    idkwtok Position
pos String
"lock"             Name
_    = Position -> CHSToken
CHSTokLock    Position
pos
    idkwtok Position
pos String
"nolock"           Name
_    = Position -> CHSToken
CHSTokNolock  Position
pos
    idkwtok Position
pos String
cs                 Name
name = Position -> String -> Name -> CHSToken
mkid Position
pos String
cs Name
name
    --
    mkid :: Position -> String -> Name -> CHSToken
mkid Position
pos String
cs Name
name = Position -> Ident -> CHSToken
CHSTokIdent Position
pos (Position -> String -> Name -> Ident
lexemeToIdent Position
pos String
cs Name
name)

-- reserved symbols
--
symbol :: CHSLexer
symbol :: CHSLexer
symbol  =      forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"->" Position -> CHSToken
CHSTokArrow
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"=>" Position -> CHSToken
CHSTokDArrow
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"."  Position -> CHSToken
CHSTokDot
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
","  Position -> CHSToken
CHSTokComma
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"="  Position -> CHSToken
CHSTokEqual
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"-"  Position -> CHSToken
CHSTokMinus
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"*"  Position -> CHSToken
CHSTokStar
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"&"  Position -> CHSToken
CHSTokAmp
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"^"  Position -> CHSToken
CHSTokHat
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"{"  Position -> CHSToken
CHSTokLBrace
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"}"  Position -> CHSToken
CHSTokRBrace
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
"("  Position -> CHSToken
CHSTokLParen
          forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< forall {t} {s}. String -> (Position -> t) -> Lexer s t
sym String
")"  Position -> CHSToken
CHSTokRParen
          where
            sym :: String -> (Position -> t) -> Lexer s t
sym String
cs Position -> t
con = forall s t. String -> Regexp s t
string String
cs forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
_ Position
pos -> forall a. a -> Maybe a
Just (Position -> t
con Position
pos)

-- string
--
strlit :: CHSLexer
strlit :: CHSLexer
strlit  = forall s t. Char -> Regexp s t
char Char
'"' forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (forall {s} {t}. Regexp s t
instr forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. Char -> Regexp s t
char Char
'\\')forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall s t. Char -> Regexp s t
char Char
'"'
          forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
cs Position
pos -> forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokString Position
pos (forall a. [a] -> [a]
init forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
tail forall a b. (a -> b) -> a -> b
$ String
cs))

-- verbatim code
--
hsverb :: CHSLexer
hsverb :: CHSLexer
hsverb  = forall s t. Char -> Regexp s t
char Char
'`' forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> forall {s} {t}. Regexp s t
inhsverbforall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` forall s t. Char -> Regexp s t
char Char
'\''
          forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
cs Position
pos -> forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokHSVerb Position
pos (forall a. [a] -> [a]
init forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
tail forall a b. (a -> b) -> a -> b
$ String
cs))


-- regular expressions
--
letter, digit, instr, inchar, inhsverb :: Regexp s t
letter :: forall {s} {t}. Regexp s t
letter   = forall s t. String -> Regexp s t
alt [Char
'a'..Char
'z'] forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. String -> Regexp s t
alt [Char
'A'..Char
'Z'] forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< forall s t. Char -> Regexp s t
char Char
'_'
digit :: forall {s} {t}. Regexp s t
digit    = forall s t. String -> Regexp s t
alt [Char
'0'..Char
'9']
instr :: forall {s} {t}. Regexp s t
instr    = forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\"\\")
inchar :: forall {s} {t}. Regexp s t
inchar   = forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\'")
inhsverb :: forall {s} {t}. Regexp s t
inhsverb = forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\'")

-- character sets
--
anySet, inlineSet, specialSet, commentSpecialSet, ctrlSet :: [Char]
anySet :: String
anySet            = [Char
'\0'..Char
'\255']
inlineSet :: String
inlineSet         = String
anySet forall a. Eq a => [a] -> [a] -> [a]
\\ String
ctrlSet
specialSet :: String
specialSet        = [Char
'{', Char
'-', Char
'"', Char
'\'']
commentSpecialSet :: String
commentSpecialSet = [Char
'{', Char
'-']
ctrlSet :: String
ctrlSet           = [Char
'\n', Char
'\f', Char
'\r', Char
'\t', Char
'\v']


-- main lexing routine
-- -------------------

-- generate a token sequence out of a string denoting a CHS file
-- (EXPORTED) 
--
--  * the given position is attributed to the first character in the string
--
--  * errors are entered into the compiler state
--
lexCHS        :: String -> Position -> CST s [CHSToken]
lexCHS :: forall s. String -> Position -> CST s [CHSToken]
lexCHS String
cs Position
pos  = 
  do
    CHSLexerState
state <- forall s. CST s CHSLexerState
initialState
    let ([CHSToken]
ts, LexerState CHSLexerState
lstate, [Error]
errs) = forall s t.
Lexer s t -> LexerState s -> ([t], LexerState s, [Error])
execLexer CHSLexer
chslexer (String
cs, Position
pos, CHSLexerState
state)
        (String
_, Position
pos', CHSLexerState
state')  = LexerState CHSLexerState
lstate
    forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall e s. Error -> PreCST e s ()
raise [Error]
errs
    forall s. Position -> CHSLexerState -> CST s ()
assertFinalState Position
pos' CHSLexerState
state'
    forall (m :: * -> *) a. Monad m => a -> m a
return [CHSToken]
ts