--  C->Haskell Compiler: Lexer for CHS Files
--
--  Author : Manuel M T Chakravarty
--  Created: 13 August 99
--
--  Version $Revision: 1.1 $ from $Date: 2004/11/21 21:05:35 $
--
--  Copyright (c) [1999..2004] Manuel M T Chakravarty
--
--  This file is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  This file is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--- DESCRIPTION ---------------------------------------------------------------
--
--  Lexer for CHS files; the tokens are only partially recognised.
--
--- DOCU ----------------------------------------------------------------------
--
--  language: Haskell 98
--
--  * CHS files are assumed to be Haskell 98 files that include C2HS binding
--    hooks.
--
--  * Haskell code is not tokenised, but binding hooks (delimited by `{#'and 
--    `#}') are analysed.  Therefore the lexer operates in two states
--    (realised as two lexer coupled by meta actions) depending on whether
--    Haskell code or a binding hook is currently read.  The lexer reading
--    Haskell code is called `base lexer'; the other one, `binding-hook
--    lexer'.  In addition, there is a inline-c lexer, which, as the
--    binding-hook lexer, can be triggered from the base lexer.
--
--  * Base lexer:
--
--      haskell -> (inline \\ special)*
--               | special \\ `"'
--               | comment
--               | nested
--               | hstring
--               | '{#'
--               | cpp
--      special -> `(' | `{' | `-' | `"'
--      ctrl    -> `\n' | `\f' | `\r' | `\t' | `\v'
--
--      inline  -> any \\ ctrl
--      any     -> '\0'..'\255'
--
--    Within the base lexer control codes appear as separate tokens in the
--    token list.
--
--    NOTE: It is important that `{' is an extra lexeme and not added as an
--          optional component at the end of the first alternative for
--          `haskell'.  Otherwise, the principle of the longest match will
--          divide `foo {#' into the tokens `foo {' and `#' instead of `foo '
--          and `{#'.
--
--    One line comments are handled by
--
--      comment -> `--' (any \\ `\n')* `\n'
--
--    and nested comments by
--
--      nested -> `{-' any* `-}'
--
--    where `any*' may contain _balanced_ occurrences of `{-' and `-}'.
--
--      hstring -> `"' inhstr* `"'
--      inhstr  -> ` '..`\127' \\ `"'
--               | `\"'
--
--    Pre-precessor directives as well as the switch to inline-C code are
--    formed as follows:
--
--      cpp     -> `\n#' (inline | `\t')* `\n'
--               | `\n#c' (' ' | '\t')* `\n'
--
--    We allow whitespace between the `#' and the actual directive, but in `#c'
--    and `#endc' the directive must immediately follow the `#'.  This might
--    be regarded as a not entirely orthogonal design, but simplifies matters
--    especially for `#endc'.
--
--  * On encountering the lexeme `{#', a meta action in the base lexer
--    transfers control to the following binding-hook lexer:
--
--      ident       -> letter (letter | digit | `\'')*
--                   | `\'' letter (letter | digit)* `\''
--      reservedid  -> `as' | `call' | `class' | `context' | `deriving' 
--                   | `enum' | `foreign' | `fun' | `get' | `lib' 
--                   | `newtype' | `pointer' | `prefix' | `pure' | `set'
--                   | `sizeof' | `stable' | `type' | `underscoreToCase' 
--                   | `unsafe' | `with' | 'lock' | 'unlock'
--      reservedsym -> `{#' | `#}' | `{' | `}' | `,' | `.' | `->' | `=' 
--                   | `=>' | '-' | `*' | `&' | `^'
--      string      -> `"' instr* `"'
--      verbhs      -> `\`' instr* `\''
--      instr       -> ` '..`\127' \\ `"'
--      comment     -> `--' (any \\ `\n')* `\n'
--
--    Control characters, white space, and comments are discarded in the
--    binding-hook lexer.  Nested comments are not allowed in a binding hook.
--    Identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords.
--
--  * In the binding-hook lexer, the lexeme `#}' transfers control back to the 
--    base lexer.  An occurence of the lexeme `{#' inside the binding-hook
--    lexer triggers an error.  The symbol `{#' is not explcitly represented
--    in the resulting token stream.  However, the occurrence of a token
--    representing one of the reserved identifiers `call', `context', `enum',
--    and `field' marks the start of a binding hook.  Strictly speaking, `#}'
--    need also not occur in the token stream, as the next `haskell' token
--    marks a hook's end.  It is, however, useful for producing accurate error 
--    messages (in case an hook is closed to early) to have a token
--    representing `#}'.
--
--  * The rule `ident' describes Haskell identifiers, but without
--    distinguishing between variable and constructor identifers (ie, those
--    starting with a lowercase and those starting with an uppercase letter).
--    However, we use it also to scan C identifiers; although, strictly
--    speaking, it is too general for them.  In the case of C identifiers,
--    this should not have any impact on the range of descriptions accepted by
--    the tool, as illegal identifier will never occur in a C header file that
--    is accepted by the C lexer.  In the case of Haskell identifiers, a
--    confusion between variable and constructor identifiers will be noted by
--    the Haskell compiler translating the code generated by c2hs.  Moreover,
--    identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords, but those may not contain apostrophes.
--
--  * Any line starting with the character `#' is regarded to be a C
--    preprocessor directive.  With the exception of `#c' and `#endc', which
--    delimit a set of lines containing inline C code.  Hence, in the base
--    lexer, the lexeme `#c' triggers a meta action transferring control to the
--    following inline-C lexer:
--
--      c  -> inline* \\ `\n#endc'
--
--    We do neither treat C strings nor C comments specially.  Hence, if the
--    string "\n#endc" occurs in a comment, we will mistakenly regard it as
--    the end of the inline C code.  Note that the problem cannot happen with
--    strings, as C does not permit strings that extend over multiple lines.
--    At the moment, it just seems not to be worth the effort required to
--    treat this situation more accurately.
--
--    The inline-C lexer also doesn't handle pre-processor directives
--    specially.  Hence, structural pre-processor directives (namely,
--    conditionals) may occur within inline-C code only properly nested.
--
--  Shortcomings
--  ~~~~~~~~~~~~
--  Some lexemes that include single and double quote characters are not lexed
--  correctly.  See the implementation comment at `haskell' for details.
--
--
--- TODO ----------------------------------------------------------------------
--
--  * In `haskell', the case of a single `"' (without a matching second one)
--    is caught by an eplicit error raising rule.  This shouldn't be
--    necessary, but for some strange reason, the lexer otherwise hangs when a 
--    single `"' appears in the input.
--
--  * Comments in the "gap" of a string are not yet supported.
--

module CHSLexer (CHSToken(..), lexCHS) 
where 

import Data.List         ((\\))
import Data.Char         (isDigit)
import Control.Monad     (liftM)
import Numeric   (readDec, readOct, readHex)

import Position  (Position(..), Pos(posOf), incPos, retPos, tabPos)
import Errors    (ErrorLvl(..), Error, makeError)
import UNames    (NameSupply, Name, names)
import Idents    (Ident, lexemeToIdent, identToLexeme)
import Lexers    (Regexp, Lexer, Action, epsilon, char, (+>), lexaction,
                  lexactionErr, lexmeta, (>|<), (>||<), ctrlLexer, star, plus,
                  quest, alt, string, LexerState, execLexer)

import C2HSState (CST, raise, raiseError, nop, getNameSupply) 


-- token definition
-- ----------------

-- possible tokens (EXPORTED)
--
data CHSToken = CHSTokArrow   Position          -- `->'
              | CHSTokDArrow  Position          -- `=>'
              | CHSTokDot     Position          -- `.'
              | CHSTokComma   Position          -- `,'
              | CHSTokEqual   Position          -- `='
              | CHSTokMinus   Position          -- `-'
              | CHSTokStar    Position          -- `*'
              | CHSTokAmp     Position          -- `&'
              | CHSTokHat     Position          -- `^'
              | CHSTokLBrace  Position          -- `{'
              | CHSTokRBrace  Position          -- `}'
              | CHSTokLParen  Position          -- `('
              | CHSTokRParen  Position          -- `)'
              | CHSTokEndHook Position          -- `#}'
              | CHSTokAs      Position          -- `as'
              | CHSTokCall    Position          -- `call'
              | CHSTokClass   Position          -- `class'
              | CHSTokContext Position          -- `context'
              | CHSTokDerive  Position          -- `deriving'
              | CHSTokEnum    Position          -- `enum'
              | CHSTokForeign Position          -- `foreign'
              | CHSTokFun     Position          -- `fun'
              | CHSTokGet     Position          -- `get'
              | CHSTokImport  Position          -- `import'
              | CHSTokLib     Position          -- `lib'
              | CHSTokNewtype Position          -- `newtype'
              | CHSTokPointer Position          -- `pointer'
              | CHSTokPrefix  Position          -- `prefix'
              | CHSTokPure    Position          -- `pure'
              | CHSTokQualif  Position          -- `qualified'
              | CHSTokSet     Position          -- `set'
              | CHSTokSizeof  Position          -- `sizeof'
              | CHSTokStable  Position          -- `stable'
              | CHSTokType    Position          -- `type'
              | CHSTok_2Case  Position          -- `underscoreToCase'
              | CHSTokUnsafe  Position          -- `unsafe'
              | CHSTokWith    Position          -- `with'
              | CHSTokLock    Position          -- `lock'
              | CHSTokNolock  Position          -- `nolock'
              | CHSTokString  Position String   -- string 
              | CHSTokHSVerb  Position String   -- verbatim Haskell (`...')
              | CHSTokIdent   Position Ident    -- identifier
              | CHSTokHaskell Position String   -- verbatim Haskell code
              | CHSTokCPP     Position String   -- pre-processor directive
              | CHSTokLine    Position          -- line pragma
              | CHSTokC       Position String   -- verbatim C code
              | CHSTokCtrl    Position Char     -- control code
              | CHSTokPragma  Position          -- '{-# LANGUAGE' language pragma begin
              | CHSTokPragEnd Position          -- '#-}' language pragma end

instance Pos CHSToken where
  posOf :: CHSToken -> Position
posOf (CHSTokArrow   Position
pos  ) = Position
pos
  posOf (CHSTokDArrow  Position
pos  ) = Position
pos
  posOf (CHSTokDot     Position
pos  ) = Position
pos
  posOf (CHSTokComma   Position
pos  ) = Position
pos
  posOf (CHSTokEqual   Position
pos  ) = Position
pos
  posOf (CHSTokMinus   Position
pos  ) = Position
pos
  posOf (CHSTokStar    Position
pos  ) = Position
pos
  posOf (CHSTokAmp     Position
pos  ) = Position
pos
  posOf (CHSTokHat     Position
pos  ) = Position
pos
  posOf (CHSTokLBrace  Position
pos  ) = Position
pos
  posOf (CHSTokRBrace  Position
pos  ) = Position
pos
  posOf (CHSTokLParen  Position
pos  ) = Position
pos
  posOf (CHSTokRParen  Position
pos  ) = Position
pos
  posOf (CHSTokEndHook Position
pos  ) = Position
pos
  posOf (CHSTokAs      Position
pos  ) = Position
pos
  posOf (CHSTokCall    Position
pos  ) = Position
pos
  posOf (CHSTokClass   Position
pos  ) = Position
pos
  posOf (CHSTokContext Position
pos  ) = Position
pos
  posOf (CHSTokDerive  Position
pos  ) = Position
pos
  posOf (CHSTokEnum    Position
pos  ) = Position
pos
  posOf (CHSTokForeign Position
pos  ) = Position
pos
  posOf (CHSTokFun     Position
pos  ) = Position
pos
  posOf (CHSTokGet     Position
pos  ) = Position
pos
  posOf (CHSTokImport  Position
pos  ) = Position
pos
  posOf (CHSTokLib     Position
pos  ) = Position
pos
  posOf (CHSTokNewtype Position
pos  ) = Position
pos
  posOf (CHSTokPointer Position
pos  ) = Position
pos
  posOf (CHSTokPrefix  Position
pos  ) = Position
pos
  posOf (CHSTokPure    Position
pos  ) = Position
pos
  posOf (CHSTokQualif  Position
pos  ) = Position
pos
  posOf (CHSTokSet     Position
pos  ) = Position
pos
  posOf (CHSTokSizeof  Position
pos  ) = Position
pos
  posOf (CHSTokStable  Position
pos  ) = Position
pos
  posOf (CHSTokType    Position
pos  ) = Position
pos
  posOf (CHSTok_2Case  Position
pos  ) = Position
pos
  posOf (CHSTokUnsafe  Position
pos  ) = Position
pos
  posOf (CHSTokWith    Position
pos  ) = Position
pos
  posOf (CHSTokLock    Position
pos  ) = Position
pos
  posOf (CHSTokNolock  Position
pos  ) = Position
pos
  posOf (CHSTokString  Position
pos String
_) = Position
pos
  posOf (CHSTokHSVerb  Position
pos String
_) = Position
pos
  posOf (CHSTokIdent   Position
pos Ident
_) = Position
pos
  posOf (CHSTokHaskell Position
pos String
_) = Position
pos
  posOf (CHSTokCPP     Position
pos String
_) = Position
pos
  posOf (CHSTokC       Position
pos String
_) = Position
pos
  posOf (CHSTokCtrl    Position
pos Char
_) = Position
pos
  posOf (CHSTokPragma  Position
pos  ) = Position
pos
  posOf (CHSTokPragEnd Position
pos  ) = Position
pos

instance Eq CHSToken where
  (CHSTokArrow    Position
_  ) == :: CHSToken -> CHSToken -> Bool
== (CHSTokArrow    Position
_  ) = Bool
True
  (CHSTokDArrow   Position
_  ) == (CHSTokDArrow   Position
_  ) = Bool
True
  (CHSTokDot      Position
_  ) == (CHSTokDot      Position
_  ) = Bool
True
  (CHSTokComma    Position
_  ) == (CHSTokComma    Position
_  ) = Bool
True
  (CHSTokEqual    Position
_  ) == (CHSTokEqual    Position
_  ) = Bool
True
  (CHSTokMinus    Position
_  ) == (CHSTokMinus    Position
_  ) = Bool
True
  (CHSTokStar     Position
_  ) == (CHSTokStar     Position
_  ) = Bool
True
  (CHSTokAmp      Position
_  ) == (CHSTokAmp      Position
_  ) = Bool
True
  (CHSTokHat      Position
_  ) == (CHSTokHat      Position
_  ) = Bool
True
  (CHSTokLBrace   Position
_  ) == (CHSTokLBrace   Position
_  ) = Bool
True
  (CHSTokRBrace   Position
_  ) == (CHSTokRBrace   Position
_  ) = Bool
True
  (CHSTokLParen   Position
_  ) == (CHSTokLParen   Position
_  ) = Bool
True
  (CHSTokRParen   Position
_  ) == (CHSTokRParen   Position
_  ) = Bool
True
  (CHSTokEndHook  Position
_  ) == (CHSTokEndHook  Position
_  ) = Bool
True
  (CHSTokAs       Position
_  ) == (CHSTokAs       Position
_  ) = Bool
True
  (CHSTokCall     Position
_  ) == (CHSTokCall     Position
_  ) = Bool
True
  (CHSTokClass    Position
_  ) == (CHSTokClass    Position
_  ) = Bool
True
  (CHSTokContext  Position
_  ) == (CHSTokContext  Position
_  ) = Bool
True
  (CHSTokDerive   Position
_  ) == (CHSTokDerive   Position
_  ) = Bool
True
  (CHSTokEnum     Position
_  ) == (CHSTokEnum     Position
_  ) = Bool
True
  (CHSTokForeign  Position
_  ) == (CHSTokForeign  Position
_  ) = Bool
True
  (CHSTokFun      Position
_  ) == (CHSTokFun      Position
_  ) = Bool
True
  (CHSTokGet      Position
_  ) == (CHSTokGet      Position
_  ) = Bool
True
  (CHSTokImport   Position
_  ) == (CHSTokImport   Position
_  ) = Bool
True
  (CHSTokLib      Position
_  ) == (CHSTokLib      Position
_  ) = Bool
True
  (CHSTokNewtype  Position
_  ) == (CHSTokNewtype  Position
_  ) = Bool
True
  (CHSTokPointer  Position
_  ) == (CHSTokPointer  Position
_  ) = Bool
True
  (CHSTokPrefix   Position
_  ) == (CHSTokPrefix   Position
_  ) = Bool
True
  (CHSTokPure     Position
_  ) == (CHSTokPure     Position
_  ) = Bool
True
  (CHSTokQualif   Position
_  ) == (CHSTokQualif   Position
_  ) = Bool
True
  (CHSTokSet      Position
_  ) == (CHSTokSet      Position
_  ) = Bool
True
  (CHSTokSizeof   Position
_  ) == (CHSTokSizeof   Position
_  ) = Bool
True
  (CHSTokStable   Position
_  ) == (CHSTokStable   Position
_  ) = Bool
True
  (CHSTokType     Position
_  ) == (CHSTokType     Position
_  ) = Bool
True
  (CHSTok_2Case   Position
_  ) == (CHSTok_2Case   Position
_  ) = Bool
True
  (CHSTokUnsafe   Position
_  ) == (CHSTokUnsafe   Position
_  ) = Bool
True
  (CHSTokWith     Position
_  ) == (CHSTokWith     Position
_  ) = Bool
True
  (CHSTokLock     Position
_  ) == (CHSTokLock     Position
_  ) = Bool
True
  (CHSTokNolock   Position
_  ) == (CHSTokNolock   Position
_  ) = Bool
True
  (CHSTokString   Position
_ String
_) == (CHSTokString   Position
_ String
_) = Bool
True
  (CHSTokHSVerb   Position
_ String
_) == (CHSTokHSVerb   Position
_ String
_) = Bool
True
  (CHSTokIdent    Position
_ Ident
_) == (CHSTokIdent    Position
_ Ident
_) = Bool
True
  (CHSTokHaskell  Position
_ String
_) == (CHSTokHaskell  Position
_ String
_) = Bool
True
  (CHSTokCPP      Position
_ String
_) == (CHSTokCPP      Position
_ String
_) = Bool
True
  (CHSTokC        Position
_ String
_) == (CHSTokC        Position
_ String
_) = Bool
True
  (CHSTokCtrl     Position
_ Char
_) == (CHSTokCtrl     Position
_ Char
_) = Bool
True
  (CHSTokPragma   Position
_  ) == (CHSTokPragma   Position
_  ) = Bool
True
  (CHSTokPragEnd  Position
_  ) == (CHSTokPragEnd  Position
_  ) = Bool
True
  CHSToken
_                    == CHSToken
_                    = Bool
False

instance Show CHSToken where
  showsPrec :: Int -> CHSToken -> ShowS
showsPrec Int
_ (CHSTokArrow   Position
_  ) = String -> ShowS
showString String
"->"
  showsPrec Int
_ (CHSTokDArrow  Position
_  ) = String -> ShowS
showString String
"=>"
  showsPrec Int
_ (CHSTokDot     Position
_  ) = String -> ShowS
showString String
"."
  showsPrec Int
_ (CHSTokComma   Position
_  ) = String -> ShowS
showString String
","
  showsPrec Int
_ (CHSTokEqual   Position
_  ) = String -> ShowS
showString String
"="
  showsPrec Int
_ (CHSTokMinus   Position
_  ) = String -> ShowS
showString String
"-"
  showsPrec Int
_ (CHSTokStar    Position
_  ) = String -> ShowS
showString String
"*"
  showsPrec Int
_ (CHSTokAmp     Position
_  ) = String -> ShowS
showString String
"&"
  showsPrec Int
_ (CHSTokHat     Position
_  ) = String -> ShowS
showString String
"^"
  showsPrec Int
_ (CHSTokLBrace  Position
_  ) = String -> ShowS
showString String
"{"
  showsPrec Int
_ (CHSTokRBrace  Position
_  ) = String -> ShowS
showString String
"}"
  showsPrec Int
_ (CHSTokLParen  Position
_  ) = String -> ShowS
showString String
"("
  showsPrec Int
_ (CHSTokRParen  Position
_  ) = String -> ShowS
showString String
")"
  showsPrec Int
_ (CHSTokEndHook Position
_  ) = String -> ShowS
showString String
"#}"
  showsPrec Int
_ (CHSTokAs      Position
_  ) = String -> ShowS
showString String
"as"
  showsPrec Int
_ (CHSTokCall    Position
_  ) = String -> ShowS
showString String
"call"
  showsPrec Int
_ (CHSTokClass   Position
_  ) = String -> ShowS
showString String
"class"
  showsPrec Int
_ (CHSTokContext Position
_  ) = String -> ShowS
showString String
"context"
  showsPrec Int
_ (CHSTokDerive  Position
_  ) = String -> ShowS
showString String
"deriving"
  showsPrec Int
_ (CHSTokEnum    Position
_  ) = String -> ShowS
showString String
"enum"
  showsPrec Int
_ (CHSTokForeign Position
_  ) = String -> ShowS
showString String
"foreign"
  showsPrec Int
_ (CHSTokFun     Position
_  ) = String -> ShowS
showString String
"fun"
  showsPrec Int
_ (CHSTokGet     Position
_  ) = String -> ShowS
showString String
"get"
  showsPrec Int
_ (CHSTokImport  Position
_  ) = String -> ShowS
showString String
"import"
  showsPrec Int
_ (CHSTokLib     Position
_  ) = String -> ShowS
showString String
"lib"
  showsPrec Int
_ (CHSTokNewtype Position
_  ) = String -> ShowS
showString String
"newtype"
  showsPrec Int
_ (CHSTokPointer Position
_  ) = String -> ShowS
showString String
"pointer"
  showsPrec Int
_ (CHSTokPrefix  Position
_  ) = String -> ShowS
showString String
"prefix"
  showsPrec Int
_ (CHSTokPure    Position
_  ) = String -> ShowS
showString String
"pure"
  showsPrec Int
_ (CHSTokQualif  Position
_  ) = String -> ShowS
showString String
"qualified"
  showsPrec Int
_ (CHSTokSet     Position
_  ) = String -> ShowS
showString String
"set"
  showsPrec Int
_ (CHSTokSizeof  Position
_  ) = String -> ShowS
showString String
"sizeof"
  showsPrec Int
_ (CHSTokStable  Position
_  ) = String -> ShowS
showString String
"stable"
  showsPrec Int
_ (CHSTokType    Position
_  ) = String -> ShowS
showString String
"type"
  showsPrec Int
_ (CHSTok_2Case  Position
_  ) = String -> ShowS
showString String
"underscoreToCase"
  showsPrec Int
_ (CHSTokUnsafe  Position
_  ) = String -> ShowS
showString String
"unsafe"
  showsPrec Int
_ (CHSTokWith    Position
_  ) = String -> ShowS
showString String
"with"
  showsPrec Int
_ (CHSTokLock    Position
_  ) = String -> ShowS
showString String
"lock"
  showsPrec Int
_ (CHSTokNolock  Position
_  ) = String -> ShowS
showString String
"nolock"
  showsPrec Int
_ (CHSTokString  Position
_ String
s) = String -> ShowS
showString (String
"\"" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\"")
  showsPrec Int
_ (CHSTokHSVerb  Position
_ String
s) = String -> ShowS
showString (String
"`" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"'")
  showsPrec Int
_ (CHSTokIdent   Position
_ Ident
i) = (String -> ShowS
showString (String -> ShowS) -> (Ident -> String) -> Ident -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ident -> String
identToLexeme) Ident
i
  showsPrec Int
_ (CHSTokHaskell Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokCPP     Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokC       Position
_ String
s) = String -> ShowS
showString String
s
  showsPrec Int
_ (CHSTokCtrl    Position
_ Char
c) = Char -> ShowS
showChar Char
c
  showsPrec Int
_ (CHSTokPragma  Position
_  ) = String -> ShowS
showString String
"{-# LANGUAGE"
  showsPrec Int
_ (CHSTokPragEnd Position
_  ) = String -> ShowS
showString String
"#-}"


-- lexer state
-- -----------

-- state threaded through the lexer
--
data CHSLexerState = CHSLS {
                       CHSLexerState -> Int
nestLvl :: Int,   -- nesting depth of nested comments
                       CHSLexerState -> Bool
inHook  :: Bool,  -- within a binding hook?
                       CHSLexerState -> [Name]
namesup :: [Name] -- supply of unique names
                     }

-- initial state
--
initialState :: CST s CHSLexerState
initialState :: CST s CHSLexerState
initialState  = do
                  [Name]
namesup <- (NameSupply -> [Name])
-> PreCST SwitchBoard s NameSupply -> PreCST SwitchBoard s [Name]
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM NameSupply -> [Name]
names PreCST SwitchBoard s NameSupply
forall e s. PreCST e s NameSupply
getNameSupply
                  CHSLexerState -> CST s CHSLexerState
forall (m :: * -> *) a. Monad m => a -> m a
return (CHSLexerState -> CST s CHSLexerState)
-> CHSLexerState -> CST s CHSLexerState
forall a b. (a -> b) -> a -> b
$ CHSLS :: Int -> Bool -> [Name] -> CHSLexerState
CHSLS {
                             nestLvl :: Int
nestLvl = Int
0,
                             inHook :: Bool
inHook  = Bool
False,
                             namesup :: [Name]
namesup = [Name]
namesup
                           }

-- raise an error if the given state is not a final state
--
assertFinalState :: Position -> CHSLexerState -> CST s ()
assertFinalState :: Position -> CHSLexerState -> CST s ()
assertFinalState Position
pos CHSLS {nestLvl :: CHSLexerState -> Int
nestLvl = Int
nestLvl, inHook :: CHSLexerState -> Bool
inHook = Bool
inHook} 
  | Int
nestLvl Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Position -> [String] -> CST s ()
forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos [String
"Unexpected end of file!",
                                  String
"Unclosed nested comment."]
  | Bool
inHook      = Position -> [String] -> CST s ()
forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos [String
"Unexpected end of file!",
                                  String
"Unclosed binding hook."]
  | Bool
otherwise   = CST s ()
forall e s. PreCST e s ()
nop

-- lexer and action type used throughout this specification
--
type CHSLexer  = Lexer  CHSLexerState CHSToken
type CHSAction = Action               CHSToken
type CHSRegexp = Regexp CHSLexerState CHSToken

-- for actions that need a new unique name
--
infixl 3 `lexactionName`
lexactionName :: CHSRegexp 
              -> (String -> Position -> Name -> CHSToken) 
              -> CHSLexer
CHSRegexp
re lexactionName :: CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` String -> Position -> Name -> CHSToken
action = CHSRegexp
re CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall a a.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action'
  where
    action' :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action' String
str Position
pos CHSLexerState
state = let Name
name:[Name]
ns = CHSLexerState -> [Name]
namesup CHSLexerState
state
                            in
                            (Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (String -> Position -> Name -> CHSToken
action String
str Position
pos Name
name),
                             Position -> Int -> Position
incPos Position
pos (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str),
                             CHSLexerState
state {namesup :: [Name]
namesup = [Name]
ns},
                             Maybe a
forall a. Maybe a
Nothing)


-- lexical specification
-- ---------------------

-- the lexical definition of the tokens (the base lexer)
--
--
chslexer :: CHSLexer
chslexer :: CHSLexer
chslexer  =      CHSLexer
pragma         -- LANGUAGE pragma
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
haskell        -- Haskell code
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested         -- nested comments
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl           -- control code (that has to be preserved)
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hook           -- start of a binding hook
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
cpp            -- a pre-processor directive (or `#c')

-- the LANGUAGE pragma
pragma :: CHSLexer
pragma :: CHSLexer
pragma = String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"{-# LANGUAGE" CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s ->
         (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragma Position
pos), Position -> Int -> Position
incPos Position
pos Int
12, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
langLexer)

langLexer :: CHSLexer
langLexer :: CHSLexer
langLexer = CHSLexer
whitespace CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
identOrKW CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
            (String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"#-}" CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s ->
            (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragEnd Position
pos), Position -> Int -> Position
incPos Position
pos Int
3, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer))

-- stream of Haskell code (terminated by a control character or binding hook)
--
haskell :: CHSLexer
--
-- NB: We need to make sure that '"' is not regarded as the beginning of a
--     string; however, we cannot really lex character literals properly
--     without lexing identifiers (as the latter may containing single quotes
--     as part of their lexeme).  Thus, we special case '"'.  This is still a
--     kludge, as a program fragment, such as
--
--       foo'"'strange string"
--
--     will not be handled correctly.
--
haskell :: CHSLexer
haskell  = (    CHSRegexp
forall s t. Regexp s t
anyButSpecialCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
specialButQuotes
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'"'  CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
inhstrCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'"'
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"'\"'"                           -- special case of "
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"--" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
anyButNLCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon   -- comment
           )
           CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatim
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'"'                                -- this is a bad kludge
                CHSRegexp -> ActionErr CHSToken -> CHSLexer
forall s t. Regexp s t -> ActionErr t -> Lexer s t
`lexactionErr` 
                  \String
_ Position
pos -> (Error -> Either Error CHSToken
forall a b. a -> Either a b
Left (Error -> Either Error CHSToken) -> Error -> Either Error CHSToken
forall a b. (a -> b) -> a -> b
$ ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                                              [String
"Lexical error!", 
                                              String
"Unclosed string."])
           where
             anyButSpecial :: Regexp s t
anyButSpecial    = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
inlineSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
specialSet)
             specialButQuotes :: Regexp s t
specialButQuotes = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
specialSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'"'])
             anyButNL :: Regexp s t
anyButNL         = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'\n'])
             inhstr :: Regexp s t
inhstr           = Regexp s t
forall s t. Regexp s t
instr Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> Regexp s t
forall s t. Char -> Regexp s t
char Char
'\\' Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> Regexp s t
forall s t. String -> Regexp s t
string String
"\\\"" Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Regexp s t
forall s t. Regexp s t
gap
             gap :: Regexp s t
gap              = Char -> Regexp s t
forall s t. Char -> Regexp s t
char Char
'\\' Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> String -> Regexp s t
forall s t. String -> Regexp s t
alt (Char
' 'Char -> ShowS
forall a. a -> [a] -> [a]
:String
ctrlSet)Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`plus` Char -> Regexp s t
forall s t. Char -> Regexp s t
char Char
'\\'

-- action copying the input verbatim to `CHSTokHaskell' tokens
--
copyVerbatim        :: CHSAction 
copyVerbatim :: Action CHSToken
copyVerbatim String
cs Position
pos  = CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (CHSToken -> Maybe CHSToken) -> CHSToken -> Maybe CHSToken
forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs

-- nested comments
--
nested :: CHSLexer
nested :: CHSLexer
nested  =
       String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"{-"              {- for Haskell emacs mode :-( -}
       CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall a.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
       String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"-}"
       CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
leaveComment
  where
    enterComment :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment String
cs Position
pos CHSLexerState
s =
      (String -> Position -> Maybe (Either a CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,                    -- collect the lexeme
       Position -> Int -> Position
incPos Position
pos Int
2,                            -- advance current position
       CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1},             -- increase nesting level
       CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just (CHSLexer -> Maybe CHSLexer) -> CHSLexer -> Maybe CHSLexer
forall a b. (a -> b) -> a -> b
$ CHSLexer
inNestedComment)                  -- continue in comment lexer
    --
    leaveComment :: Meta CHSLexerState CHSToken
leaveComment String
cs Position
pos CHSLexerState
s =
      case CHSLexerState -> Int
nestLvl CHSLexerState
s of
        Int
0 -> (Position -> Maybe (Either Error CHSToken)
forall b. Position -> Maybe (Either Error b)
commentCloseErr Position
pos,              -- 0: -} outside comment => err
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s,
              Maybe CHSLexer
forall a. Maybe a
Nothing)
        Int
1 -> (String -> Position -> Maybe (Either Error CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1},      -- decrease nesting level
              CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)                    -- 1: continue with root lexer
        Int
_ -> (String -> Position -> Maybe (Either Error CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos Int
2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1},      -- decrease nesting level
              Maybe CHSLexer
forall a. Maybe a
Nothing)                          -- _: cont with comment lexer
    --
    copyVerbatim' :: String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos  = Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs)
    --
    commentCloseErr :: Position -> Maybe (Either Error b)
commentCloseErr Position
pos =
      Either Error b -> Maybe (Either Error b)
forall a. a -> Maybe a
Just (Either Error b -> Maybe (Either Error b))
-> Either Error b -> Maybe (Either Error b)
forall a b. (a -> b) -> a -> b
$ Error -> Either Error b
forall a b. a -> Either a b
Left (ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                             [String
"Lexical error!", 
                             String
"`-}' not preceded by a matching `{-'."])
                             {- for Haskell emacs mode :-( -}


-- lexer processing the inner of a comment
--
inNestedComment :: CHSLexer
inNestedComment :: CHSLexer
inNestedComment  =      CHSLexer
commentInterior         -- inside a comment
                   CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested                  -- nested comments
                   CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                    -- control code (preserved)

-- standard characters in a nested comment
--
commentInterior :: CHSLexer
commentInterior :: CHSLexer
commentInterior  = (    CHSRegexp
forall s t. Regexp s t
anyButSpecialCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
                    CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
special
                   )
                   CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatim
                   where
                     anyButSpecial :: Regexp s t
anyButSpecial = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
inlineSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
commentSpecialSet)
                     special :: Regexp s t
special       = String -> Regexp s t
forall s t. String -> Regexp s t
alt String
commentSpecialSet

-- control code in the base lexer (is turned into a token)
--
--  * this covers exactly the same set of characters as contained in `ctrlSet'
--   and `Lexers.ctrlLexer' and advances positions also like the `ctrlLexer'
--
ctrl :: CHSLexer
ctrl :: CHSLexer
ctrl  =     
       Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\n' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\r' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\v' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\f' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\t' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab
  where
    newline :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline  [Char
c] Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
retPos Position
pos)
    formfeed :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed [Char
c] Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Int -> Position
incPos Position
pos Int
1)
    tab :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab      [Char
c] Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
tabPos Position
pos)

    ctrlResult :: Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c b
pos' c
s = 
      (Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos Char
c), b
pos', c
s, Maybe a
forall a. Maybe a
Nothing)

-- start of a binding hook (ie, enter the binding hook lexer)
--
hook :: CHSLexer
hook :: CHSLexer
hook  = String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"{#"
        CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s -> (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Int -> Position
incPos Position
pos Int
2, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
bhLexer)

-- pre-processor directives and `#c'
--
--  * we lex `#c' as a directive and special case it in the action
--
--  * we lex C line number pragmas and special case it in the action
--
cpp :: CHSLexer
cpp :: CHSLexer
cpp = CHSLexer
directive
      where
        directive :: CHSLexer
directive = 
          String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"\n#" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> String -> CHSRegexp
forall s t. String -> Regexp s t
alt (Char
'\t'Char -> ShowS
forall a. a -> [a] -> [a]
:String
inlineSet)CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
          CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
             \(Char
_:Char
_:String
dir) Position
pos CHSLexerState
s ->        -- strip off the "\n#"
               case String
dir of
                 [Char
'c']                      ->          -- #c
                   (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 -- a #c may be followed by whitespace
                 Char
'c':Char
sp:String
_ | Char
sp Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
" \t" ->          -- #c
                   (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 Char
' ':line :: String
line@(Char
n:String
_) | Char -> Bool
isDigit Char
n ->                 -- C line pragma
                   let pos' :: Position
pos' = String -> Position -> Position
adjustPosByCLinePragma String
line Position
pos
                    in (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokLine Position
pos'), Position
pos', CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)
                 String
_                            ->        -- CPP directive
                   (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokCPP Position
pos String
dir), 
                    Position -> Position
retPos Position
pos, CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)

adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma String
str (Position String
fname Int
_ Int
_) = 
  (String -> Int -> Int -> Position
Position String
fname' Int
row' Int
0)
  where
    str' :: String
str'            = ShowS
dropWhite String
str
    (String
rowStr, String
str'') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
str'
    row' :: Int
row'            = String -> Int
forall a. Read a => String -> a
read String
rowStr
    str''' :: String
str'''          = ShowS
dropWhite String
str''
    fnameStr :: String
fnameStr        = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"') ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
1 ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
str'''
    fname' :: String
fname'          | String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
str''' Bool -> Bool -> Bool
|| String -> Char
forall a. [a] -> a
head String
str''' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"' = String
fname
                    -- try and get more sharing of file name strings
                    | String
fnameStr String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
fname                 = String
fname
                    | Bool
otherwise                         = String
fnameStr
    --
    dropWhite :: ShowS
dropWhite = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t')

-- the binding hook lexer
--
bhLexer :: CHSLexer
bhLexer :: CHSLexer
bhLexer  =      CHSLexer
identOrKW
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
strlit
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hsverb
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
whitespace
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
endOfHook
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"--" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
anyButNLCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\n'   -- comment
                CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \String
_ Position
pos CHSLexerState
s -> (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)
           where
             anyButNL :: Regexp s t
anyButNL  = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ [Char
'\n'])
             endOfHook :: CHSLexer
endOfHook = String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"#}"
                         CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
                          \String
_ Position
pos CHSLexerState
s -> (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokEndHook Position
pos), 
                                       Position -> Int -> Position
incPos Position
pos Int
2, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)

-- the inline-C lexer
--
cLexer :: CHSLexer
cLexer :: CHSLexer
cLexer =      CHSLexer
forall s. Lexer s CHSToken
inlineC                     -- inline C code
         CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                        -- control code (preserved)
         CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> CHSRegexp
forall s t. String -> Regexp s t
string String
"\n#endc"            -- end of inline C code...
              CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta`                   -- ...preserve '\n' as control token
              \String
_ Position
pos CHSLexerState
s -> (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos Char
'\n'), Position -> Position
retPos Position
pos, CHSLexerState
s, 
                           CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)
         where
           inlineC :: Lexer s CHSToken
inlineC = String -> Regexp s CHSToken
forall s t. String -> Regexp s t
alt String
inlineSet Regexp s CHSToken -> Action CHSToken -> Lexer s CHSToken
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatimC
           --
           copyVerbatimC :: CHSAction 
           copyVerbatimC :: Action CHSToken
copyVerbatimC String
cs Position
pos = CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (CHSToken -> Maybe CHSToken) -> CHSToken -> Maybe CHSToken
forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokC Position
pos String
cs

-- whitespace
--
--  * horizontal and vertical tabs, newlines, and form feeds are filter out by
--   `Lexers.ctrlLexer' 
--
whitespace :: CHSLexer
whitespace :: CHSLexer
whitespace  =      (Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
' ' CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
_ Position
_ -> Maybe CHSToken
forall a. Maybe a
Nothing)
              CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
forall s t. Lexer s t
ctrlLexer

-- identifiers and keywords
--
identOrKW :: CHSLexer
--
-- the strictness annotations seem to help a bit
--
identOrKW :: CHSLexer
identOrKW  = 
       -- identifier or keyword
       (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
digit CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\'')CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \String
cs Position
pos Name
name -> (Position -> String -> Name -> CHSToken
idkwtok (Position -> String -> Name -> CHSToken)
-> Position -> String -> Name -> CHSToken
forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< -- identifier in single quotes
       (Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\'' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
digit)CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\''
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \String
cs Position
pos Name
name -> (Position -> String -> Name -> CHSToken
mkid (Position -> String -> Name -> CHSToken)
-> Position -> String -> Name -> CHSToken
forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
       -- NB: quotes are removed by lexemeToIdent
  where
    idkwtok :: Position -> String -> Name -> CHSToken
idkwtok Position
pos String
"as"               Name
_    = Position -> CHSToken
CHSTokAs      Position
pos
    idkwtok Position
pos String
"call"             Name
_    = Position -> CHSToken
CHSTokCall    Position
pos
    idkwtok Position
pos String
"class"            Name
_    = Position -> CHSToken
CHSTokClass   Position
pos
    idkwtok Position
pos String
"context"          Name
_    = Position -> CHSToken
CHSTokContext Position
pos
    idkwtok Position
pos String
"deriving"         Name
_    = Position -> CHSToken
CHSTokDerive  Position
pos
    idkwtok Position
pos String
"enum"             Name
_    = Position -> CHSToken
CHSTokEnum    Position
pos
    idkwtok Position
pos String
"foreign"          Name
_    = Position -> CHSToken
CHSTokForeign Position
pos
    idkwtok Position
pos String
"fun"              Name
_    = Position -> CHSToken
CHSTokFun     Position
pos
    idkwtok Position
pos String
"get"              Name
_    = Position -> CHSToken
CHSTokGet     Position
pos
    idkwtok Position
pos String
"import"           Name
_    = Position -> CHSToken
CHSTokImport  Position
pos
    idkwtok Position
pos String
"lib"              Name
_    = Position -> CHSToken
CHSTokLib     Position
pos
    idkwtok Position
pos String
"newtype"          Name
_    = Position -> CHSToken
CHSTokNewtype Position
pos
    idkwtok Position
pos String
"pointer"          Name
_    = Position -> CHSToken
CHSTokPointer Position
pos
    idkwtok Position
pos String
"prefix"           Name
_    = Position -> CHSToken
CHSTokPrefix  Position
pos
    idkwtok Position
pos String
"pure"             Name
_    = Position -> CHSToken
CHSTokPure    Position
pos
    idkwtok Position
pos String
"qualified"        Name
_    = Position -> CHSToken
CHSTokQualif  Position
pos
    idkwtok Position
pos String
"set"              Name
_    = Position -> CHSToken
CHSTokSet     Position
pos
    idkwtok Position
pos String
"sizeof"           Name
_    = Position -> CHSToken
CHSTokSizeof  Position
pos
    idkwtok Position
pos String
"stable"           Name
_    = Position -> CHSToken
CHSTokStable  Position
pos
    idkwtok Position
pos String
"type"             Name
_    = Position -> CHSToken
CHSTokType    Position
pos
    idkwtok Position
pos String
"underscoreToCase" Name
_    = Position -> CHSToken
CHSTok_2Case  Position
pos
    idkwtok Position
pos String
"unsafe"           Name
_    = Position -> CHSToken
CHSTokUnsafe  Position
pos
    idkwtok Position
pos String
"with"             Name
_    = Position -> CHSToken
CHSTokWith    Position
pos
    idkwtok Position
pos String
"lock"             Name
_    = Position -> CHSToken
CHSTokLock    Position
pos
    idkwtok Position
pos String
"nolock"           Name
_    = Position -> CHSToken
CHSTokNolock  Position
pos
    idkwtok Position
pos String
cs                 Name
name = Position -> String -> Name -> CHSToken
mkid Position
pos String
cs Name
name
    --
    mkid :: Position -> String -> Name -> CHSToken
mkid Position
pos String
cs Name
name = Position -> Ident -> CHSToken
CHSTokIdent Position
pos (Position -> String -> Name -> Ident
lexemeToIdent Position
pos String
cs Name
name)

-- reserved symbols
--
symbol :: CHSLexer
symbol :: CHSLexer
symbol  =      String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"->" Position -> CHSToken
CHSTokArrow
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"=>" Position -> CHSToken
CHSTokDArrow
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"."  Position -> CHSToken
CHSTokDot
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
","  Position -> CHSToken
CHSTokComma
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"="  Position -> CHSToken
CHSTokEqual
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"-"  Position -> CHSToken
CHSTokMinus
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"*"  Position -> CHSToken
CHSTokStar
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"&"  Position -> CHSToken
CHSTokAmp
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"^"  Position -> CHSToken
CHSTokHat
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"{"  Position -> CHSToken
CHSTokLBrace
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"}"  Position -> CHSToken
CHSTokRBrace
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
"("  Position -> CHSToken
CHSTokLParen
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym String
")"  Position -> CHSToken
CHSTokRParen
          where
            sym :: String -> (Position -> t) -> Lexer s t
sym String
cs Position -> t
con = String -> Regexp s t
forall s t. String -> Regexp s t
string String
cs Regexp s t -> Action t -> Lexer s t
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
_ Position
pos -> t -> Maybe t
forall a. a -> Maybe a
Just (Position -> t
con Position
pos)

-- string
--
strlit :: CHSLexer
strlit :: CHSLexer
strlit  = Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'"' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
instr CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\\')CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'"'
          CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
cs Position
pos -> CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokString Position
pos (ShowS
forall a. [a] -> [a]
init ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
tail ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
cs))

-- verbatim code
--
hsverb :: CHSLexer
hsverb :: CHSLexer
hsverb  = Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'`' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
inhsverbCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char Char
'\''
          CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \String
cs Position
pos -> CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokHSVerb Position
pos (ShowS
forall a. [a] -> [a]
init ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
tail ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
cs))


-- regular expressions
--
letter, digit, instr, inchar, inhsverb :: Regexp s t
letter :: Regexp s t
letter   = String -> Regexp s t
forall s t. String -> Regexp s t
alt [Char
'a'..Char
'z'] Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> Regexp s t
forall s t. String -> Regexp s t
alt [Char
'A'..Char
'Z'] Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> Regexp s t
forall s t. Char -> Regexp s t
char Char
'_'
digit :: Regexp s t
digit    = String -> Regexp s t
forall s t. String -> Regexp s t
alt [Char
'0'..Char
'9']
instr :: Regexp s t
instr    = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\"\\")
inchar :: Regexp s t
inchar   = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\'")
inhsverb :: Regexp s t
inhsverb = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([Char
' '..Char
'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
"\'")

-- character sets
--
anySet, inlineSet, specialSet, commentSpecialSet, ctrlSet :: [Char]
anySet :: String
anySet            = [Char
'\0'..Char
'\255']
inlineSet :: String
inlineSet         = String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
ctrlSet
specialSet :: String
specialSet        = [Char
'{', Char
'-', Char
'"', Char
'\'']
commentSpecialSet :: String
commentSpecialSet = [Char
'{', Char
'-']
ctrlSet :: String
ctrlSet           = [Char
'\n', Char
'\f', Char
'\r', Char
'\t', Char
'\v']


-- main lexing routine
-- -------------------

-- generate a token sequence out of a string denoting a CHS file
-- (EXPORTED) 
--
--  * the given position is attributed to the first character in the string
--
--  * errors are entered into the compiler state
--
lexCHS        :: String -> Position -> CST s [CHSToken]
lexCHS :: String -> Position -> CST s [CHSToken]
lexCHS String
cs Position
pos  = 
  do
    CHSLexerState
state <- CST s CHSLexerState
forall s. CST s CHSLexerState
initialState
    let ([CHSToken]
ts, LexerState CHSLexerState
lstate, [Error]
errs) = CHSLexer
-> LexerState CHSLexerState
-> ([CHSToken], LexerState CHSLexerState, [Error])
forall s t.
Lexer s t -> LexerState s -> ([t], LexerState s, [Error])
execLexer CHSLexer
chslexer (String
cs, Position
pos, CHSLexerState
state)
        (String
_, Position
pos', CHSLexerState
state')  = LexerState CHSLexerState
lstate
    (Error -> PreCST SwitchBoard s ())
-> [Error] -> PreCST SwitchBoard s [()]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Error -> PreCST SwitchBoard s ()
forall e s. Error -> PreCST e s ()
raise [Error]
errs
    Position -> CHSLexerState -> PreCST SwitchBoard s ()
forall s. Position -> CHSLexerState -> CST s ()
assertFinalState Position
pos' CHSLexerState
state'
    [CHSToken] -> CST s [CHSToken]
forall (m :: * -> *) a. Monad m => a -> m a
return [CHSToken]
ts