{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}

{-
    BNF Converter: C flex generator
    Copyright (C) 2004  Author:  Michael Pellauer
    Copyright (C) 2020  Andreas Abel

    Description   : This module generates the Flex file. It is
                    similar to JLex but with a few peculiarities.

    Author        : Michael Pellauer
    Created       : 5 August, 2003
-}

module BNFC.Backend.C.CFtoFlexC
  ( cf2flex
  , ParserMode(..), parserName, parserPackage, cParser, stlParser, parserHExt
  , preludeForBuffer  -- C code defining a buffer for lexing string literals.
  , cMacros           -- Lexer definitions.
  , commentStates     -- Stream of names for lexer states for comments.
  , lexComments       -- Lexing rules for comments.
  , lexStrings        -- Lexing rules for string literals.
  , lexChars          -- Lexing rules for character literals.
  ) where

import Prelude hiding                ( (<>) )
import Data.Bifunctor                ( first )
import Data.Char                     ( isAlphaNum, isAscii )
import Data.List                     ( isInfixOf )
import Data.Maybe                    ( fromMaybe, maybeToList )
import qualified Data.Map as Map
import System.FilePath               ( (<.>) )

import BNFC.CF
import BNFC.Backend.C.Common         ( posixC )
import BNFC.Backend.C.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.Options                  ( InPackage )
import BNFC.PrettyPrint
import BNFC.Utils                    ( cstring, symbolToName, unless, when )

data ParserMode
  = CParser Bool String    -- ^ @C@ (@False@) or @C++ no STL@ (@True@) mode, with @name@ to use as prefix.
  | CppParser InPackage String    -- ^ @C++@ mode, with optional package name

parserName :: ParserMode -> String
parserName :: ParserMode -> [Char]
parserName = \case
  CParser   Bool
_ [Char]
n -> [Char]
n
  CppParser InPackage
p [Char]
n -> [Char] -> InPackage -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
n InPackage
p

parserPackage :: ParserMode -> InPackage
parserPackage :: ParserMode -> InPackage
parserPackage = \case
  CParser   Bool
_ [Char]
_ -> InPackage
forall a. Maybe a
Nothing
  CppParser InPackage
p [Char]
_ -> InPackage
p

cParser :: ParserMode -> Bool
cParser :: ParserMode -> Bool
cParser = \case
  CParser   Bool
b [Char]
_ -> Bool -> Bool
not Bool
b
  CppParser InPackage
_ [Char]
_ -> Bool
False

stlParser :: ParserMode -> Bool
stlParser :: ParserMode -> Bool
stlParser = \case
  CParser   Bool
_ [Char]
_ -> Bool
False
  CppParser InPackage
_ [Char]
_ -> Bool
True

parserHExt :: ParserMode -> String
parserHExt :: ParserMode -> [Char]
parserHExt = \case
  CParser   Bool
b [Char]
_ -> if Bool
b then [Char]
"H" else [Char]
"h"
  CppParser InPackage
_ [Char]
_ -> [Char]
"H"

-- | Entrypoint.
cf2flex :: ParserMode -> CF -> (String, SymMap) -- The environment is reused by the parser.
cf2flex :: ParserMode -> CF -> ([Char], SymMap)
cf2flex ParserMode
mode CF
cf = (, SymMap
env) ([Char] -> ([Char], SymMap)) -> [Char] -> ([Char], SymMap)
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char]
unlines
    [ Bool -> ParserMode -> [Char]
prelude Bool
stringLiterals ParserMode
mode
    , CF -> [Char]
cMacros CF
cf
    , KeywordEnv -> [Char]
lexSymbols KeywordEnv
env1
    , InPackage -> CF -> SymMap -> [Char]
restOfFlex (ParserMode -> InPackage
parserPackage ParserMode
mode) CF
cf SymMap
env
    , [Char]
footer -- mode
    ]
  where
    env :: SymMap
env  = [(SymKey, [Char])] -> SymMap
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(SymKey, [Char])]
env2
    env0 :: KeywordEnv
env0 = [[Char]] -> [Int] -> KeywordEnv
makeSymEnv (CF -> [[Char]]
forall function. CFG function -> [[Char]]
cfgSymbols CF
cf) [Int
0 :: Int ..]
    env1 :: KeywordEnv
env1 = KeywordEnv
env0 KeywordEnv -> KeywordEnv -> KeywordEnv
forall a. [a] -> [a] -> [a]
++ [[Char]] -> [Int] -> KeywordEnv
makeKwEnv (CF -> [[Char]]
forall function. CFG function -> [[Char]]
reservedWords CF
cf) [KeywordEnv -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length KeywordEnv
env0 ..]
    env2 :: [(SymKey, [Char])]
env2 = (([Char], [Char]) -> (SymKey, [Char]))
-> KeywordEnv -> [(SymKey, [Char])]
forall a b. (a -> b) -> [a] -> [b]
map (([Char] -> SymKey) -> ([Char], [Char]) -> (SymKey, [Char])
forall a b c. (a -> b) -> (a, c) -> (b, c)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first [Char] -> SymKey
Keyword) KeywordEnv
env1 [(SymKey, [Char])] -> [(SymKey, [Char])] -> [(SymKey, [Char])]
forall a. [a] -> [a] -> [a]
++ ([Char] -> (SymKey, [Char])) -> [[Char]] -> [(SymKey, [Char])]
forall a b. (a -> b) -> [a] -> [b]
map (\ [Char]
x -> ([Char] -> SymKey
Tokentype [Char]
x, [Char]
"T_" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
x)) (CF -> [[Char]]
forall function. CFG function -> [[Char]]
tokenNames CF
cf)
    makeSymEnv :: [[Char]] -> [Int] -> KeywordEnv
makeSymEnv     = ([Char] -> Int -> ([Char], [Char]))
-> [[Char]] -> [Int] -> KeywordEnv
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (([Char] -> Int -> ([Char], [Char]))
 -> [[Char]] -> [Int] -> KeywordEnv)
-> ([Char] -> Int -> ([Char], [Char]))
-> [[Char]]
-> [Int]
-> KeywordEnv
forall a b. (a -> b) -> a -> b
$ \ [Char]
s Int
n -> ([Char]
s, Char
'_' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char] -> InPackage -> [Char]
forall a. a -> Maybe a -> a
fromMaybe ([Char]
"SYMB_" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n) ([Char] -> InPackage
symbolToName [Char]
s))
    makeKwEnv :: [[Char]] -> [Int] -> KeywordEnv
makeKwEnv      = ([Char] -> Int -> ([Char], [Char]))
-> [[Char]] -> [Int] -> KeywordEnv
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (([Char] -> Int -> ([Char], [Char]))
 -> [[Char]] -> [Int] -> KeywordEnv)
-> ([Char] -> Int -> ([Char], [Char]))
-> [[Char]]
-> [Int]
-> KeywordEnv
forall a b. (a -> b) -> a -> b
$ \ [Char]
s Int
n -> ([Char]
s, [Char]
"_KW_" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ if (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\ Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAscii Char
c) [Char]
s then [Char]
s else Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n)
    stringLiterals :: Bool
stringLiterals = CF -> Cat -> Bool
forall f. CFG f -> Cat -> Bool
isUsedCat CF
cf ([Char] -> Cat
TokenCat [Char]
catString)

prelude :: Bool -> ParserMode -> String
prelude :: Bool -> ParserMode -> [Char]
prelude Bool
stringLiterals ParserMode
mode = [[Char]] -> [Char]
unlines ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ [ [Char]
"/* Lexer definition for use with FLex */"
    , [Char]
""
    -- noinput and nounput are most often unused
    -- https://stackoverflow.com/questions/39075510/option-noinput-nounput-what-are-they-for
    , [Char]
"%option noyywrap noinput nounput"
    , [Char]
"%option reentrant bison-bridge bison-locations"
    , [Char]
""
    ]
  , Bool -> [[Char]] -> [[Char]]
forall m. Monoid m => Bool -> m -> m
when Bool
stringLiterals
    [ [Char]
"/* Additional data for the lexer: a buffer for lexing string literals. */"
    , [Char]
"%option extra-type=\"Buffer\""
    , [Char]
""
    ]
  , InPackage -> [[Char]]
forall a. Maybe a -> [a]
maybeToList (InPackage -> [[Char]]) -> InPackage -> [[Char]]
forall a b. (a -> b) -> a -> b
$ ([Char]
"%option prefix=\"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++) ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\"" ) ([Char] -> [Char]) -> InPackage -> InPackage
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserMode -> InPackage
parserPackage ParserMode
mode
  , Bool -> [[Char]] -> [[Char]]
forall m. Monoid m => Bool -> m -> m
when (ParserMode -> Bool
cParser ParserMode
mode) ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
    -- The following #define needs to be at the top before the automatic #include <stdlib.h>
    [ [ [Char]
"%top{" ]
    , [[Char]]
posixC
    , [ [Char]
"}" ]
    ]
  , [ [Char]
"%{"
    , [Char]
"#include \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([Char]
"Absyn" [Char] -> [Char] -> [Char]
<.> [Char]
h) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
    , [Char]
"#include \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([Char]
"Bison" [Char] -> [Char] -> [Char]
<.> [Char]
h) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
    , [Char]
""
    ]
  , [ [Char]
"#define initialize_lexer " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ParserMode -> [Char]
parserName ParserMode
mode [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"_initialize_lexer"
    , [Char]
""
    ]
  , Bool -> [[Char]] -> [[Char]]
forall m. Monoid m => Bool -> m -> m
when Bool
stringLiterals ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [[Char]]
preludeForBuffer ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char]
"Buffer" [Char] -> [Char] -> [Char]
<.> [Char]
h
    -- https://www.gnu.org/software/bison/manual/html_node/Token-Locations.html
    -- Flex is responsible for keeping tracking of the yylloc for Bison.
    -- Flex also doesn't do this automatically so we need this function
    -- https://stackoverflow.com/a/22125500/425756
  , [ [Char]
"static void update_loc(YYLTYPE* loc, char* text)"
    , [Char]
"{"
    , [Char]
"  loc->first_line = loc->last_line;"
    , [Char]
"  loc->first_column = loc->last_column;"
    , [Char]
"  int i = 0;"  -- put this here as @for (int i...)@ is only allowed in C99
    , [Char]
"  for (; text[i] != '\\0'; ++i) {"
    , [Char]
"      if (text[i] == '\\n') {"        -- Checking for \n is good enough to also support \r\n (but not \r)
    , [Char]
"          ++loc->last_line;"
    , [Char]
"          loc->last_column = 0; "
    , [Char]
"      } else {"
    , [Char]
"          ++loc->last_column; "
    , [Char]
"      }"
    , [Char]
"  }"
    , [Char]
"}"
    , [Char]
"#define YY_USER_ACTION update_loc(yylloc, yytext);"
    , [Char]
""
    , [Char]
"%}"
    ]
  ]
  where
  h :: [Char]
h = ParserMode -> [Char]
parserHExt ParserMode
mode

-- | Part of the lexer prelude needed when string literals are to be lexed.
--   Defines an interface to the Buffer.
preludeForBuffer :: String -> [String]
preludeForBuffer :: [Char] -> [[Char]]
preludeForBuffer [Char]
bufferH =
    [ [Char]
"/* BEGIN extensible string buffer */"
    , [Char]
""
    , [Char]
"#include \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
bufferH [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
    , [Char]
""
    , [Char]
"/* The initial size of the buffer to lex string literals. */"
    , [Char]
"#define LITERAL_BUFFER_INITIAL_SIZE 1024"
    , [Char]
""
    , [Char]
"/* The pointer to the literal buffer. */"
    , [Char]
"#define literal_buffer yyextra"
    , [Char]
""
    , [Char]
"/* Initialize the literal buffer. */"
    , [Char]
"#define LITERAL_BUFFER_CREATE() literal_buffer = newBuffer(LITERAL_BUFFER_INITIAL_SIZE)"
    , [Char]
""
    , [Char]
"/* Append characters at the end of the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_APPEND(s) bufferAppendString(literal_buffer, s)"
    , [Char]
""
    , [Char]
"/* Append a character at the end of the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_APPEND_CHAR(c) bufferAppendChar(literal_buffer, c)"
    , [Char]
""
    , [Char]
"/* Release the buffer, returning a pointer to its content. */"
    , [Char]
"#define LITERAL_BUFFER_HARVEST() releaseBuffer(literal_buffer)"
    , [Char]
""
    , [Char]
"/* In exceptional cases, e.g. when reaching EOF, we have to free the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_FREE() freeBuffer(literal_buffer)"
    , [Char]
""
    , [Char]
"/* END extensible string buffer */"
    , [Char]
""
    ]

-- For now all categories are included.
-- Optimally only the ones that are used should be generated.
cMacros :: CF ->  String
cMacros :: CF -> [Char]
cMacros CF
cf = [[Char]] -> [Char]
unlines
  [ [Char]
"LETTER [a-zA-Z]"
  , [Char]
"CAPITAL [A-Z]"
  , [Char]
"SMALL [a-z]"
  , [Char]
"DIGIT [0-9]"
  , [Char]
"IDENT [a-zA-Z0-9'_]"
  , [[Char]] -> [Char]
unwords ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
      [ [ [Char]
"%START CHAR CHARESC CHAREND STRING ESCAPED" ]
      , Int -> [[Char]] -> [[Char]]
forall a. Int -> [a] -> [a]
take (CF -> Int
numberOfBlockCommentForms CF
cf) [[Char]]
commentStates
      ]
  , [Char]
""
  , [Char]
"%%  /* Rules. */"
  ]

lexSymbols :: KeywordEnv -> String
lexSymbols :: KeywordEnv -> [Char]
lexSymbols KeywordEnv
ss = (([Char], [Char]) -> [Char]) -> KeywordEnv -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([Char], [Char]) -> [Char]
transSym KeywordEnv
ss
  where
    transSym :: ([Char], [Char]) -> [Char]
transSym ([Char]
s,[Char]
r) =
      [Char]
"<INITIAL>\"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s' [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\"      \t return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
r [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";\n"
        where
         s' :: [Char]
s' = [Char] -> [Char]
escapeChars [Char]
s

restOfFlex :: InPackage -> CF -> SymMap -> String
restOfFlex :: InPackage -> CF -> SymMap -> [Char]
restOfFlex InPackage
_inPackage CF
cf SymMap
env = [[Char]] -> [Char]
unlines ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ [ Doc -> [Char]
render (Doc -> [Char]) -> Doc -> [Char]
forall a b. (a -> b) -> a -> b
$ (KeywordEnv, [[Char]]) -> Doc
lexComments ((KeywordEnv, [[Char]]) -> Doc) -> (KeywordEnv, [[Char]]) -> Doc
forall a b. (a -> b) -> a -> b
$ CF -> (KeywordEnv, [[Char]])
comments CF
cf
    , [Char]
""
    ]
  , [[Char]]
userDefTokens
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catString  ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> [[Char]]
lexStrings [Char]
"yylval" [Char]
"_STRING_" [Char]
"_ERROR_"
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catChar    ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [[Char]]
lexChars   [Char]
"yylval" [Char]
"_CHAR_"
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catDouble  [ [Char]
"<INITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)?      \t yylval->_double = atof(yytext); return _DOUBLE_;" ]
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catInteger [ [Char]
"<INITIAL>{DIGIT}+      \t yylval->_int = atoi(yytext); return _INTEGER_;" ]
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catIdent   [ [Char]
"<INITIAL>{LETTER}{IDENT}*      \t yylval->_string = strdup(yytext); return _IDENT_;" ]
  , [ [Char]
"<INITIAL>[ \\t\\r\\n\\f]      \t /* ignore white space. */;"
    , [Char]
"<INITIAL>.      \t return _ERROR_;"
    , [Char]
""
    , [Char]
"%%  /* Initialization code. */"
    ]
  ]
  where
  ifC :: [Char] -> [a] -> [a]
ifC [Char]
cat [a]
s = if CF -> Cat -> Bool
forall f. CFG f -> Cat -> Bool
isUsedCat CF
cf ([Char] -> Cat
TokenCat [Char]
cat) then [a]
s else []
  userDefTokens :: [[Char]]
userDefTokens =
    [ [Char]
"<INITIAL>" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Reg -> [Char]
printRegFlex Reg
exp [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
       [Char]
"    \t yylval->_string = strdup(yytext); return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
sName [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    | ([Char]
name, Reg
exp) <- CF -> [([Char], Reg)]
forall f. CFG f -> [([Char], Reg)]
tokenPragmas CF
cf
    ]
    where sName :: [Char] -> [Char]
sName [Char]
n = [Char] -> InPackage -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
n (InPackage -> [Char]) -> InPackage -> [Char]
forall a b. (a -> b) -> a -> b
$ SymKey -> SymMap -> InPackage
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup ([Char] -> SymKey
Tokentype [Char]
n) SymMap
env

footer :: String
footer :: [Char]
footer = [[Char]] -> [Char]
unlines
    [ [Char]
"yyscan_t initialize_lexer(FILE *inp)"
    , [Char]
"{"
    , [Char]
"  yyscan_t scanner;"
    , [Char]
"  if (yylex_init_extra(NULL, &scanner)) return 0;"
    , [Char]
"  if (inp) yyrestart(inp, scanner);"
    , [Char]
"  return scanner;"
    , [Char]
"}"
    ]

-- | Lexing of strings, converting escaped characters.
lexStrings :: String -> String -> String -> [String]
lexStrings :: [Char] -> [Char] -> [Char] -> [[Char]]
lexStrings [Char]
yylval [Char]
stringToken [Char]
errorToken =
    [ [Char]
"<INITIAL>\"\\\"\"        \t LITERAL_BUFFER_CREATE(); BEGIN STRING;"
    , [Char]
"<STRING>\\\\             \t BEGIN ESCAPED;"
    , [Char]
"<STRING>\\\"             \t " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_string = LITERAL_BUFFER_HARVEST(); BEGIN INITIAL; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
stringToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<STRING>.              \t LITERAL_BUFFER_APPEND_CHAR(yytext[0]);"
    , [Char]
"<ESCAPED>f             \t LITERAL_BUFFER_APPEND_CHAR('\\f'); BEGIN STRING;"
    , [Char]
"<ESCAPED>n             \t LITERAL_BUFFER_APPEND_CHAR('\\n'); BEGIN STRING;"
    , [Char]
"<ESCAPED>r             \t LITERAL_BUFFER_APPEND_CHAR('\\r'); BEGIN STRING;"
    , [Char]
"<ESCAPED>t             \t LITERAL_BUFFER_APPEND_CHAR('\\t'); BEGIN STRING;"
    , [Char]
"<ESCAPED>\\\"            \t LITERAL_BUFFER_APPEND_CHAR('\"');  BEGIN STRING;"
    , [Char]
"<ESCAPED>\\\\            \t LITERAL_BUFFER_APPEND_CHAR('\\\\'); BEGIN STRING;"
    , [Char]
"<ESCAPED>.             \t LITERAL_BUFFER_APPEND(yytext);    BEGIN STRING;"
    , [Char]
"<STRING,ESCAPED><<EOF>>\t LITERAL_BUFFER_FREE(); return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
errorToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    ]

-- | Lexing of characters, converting escaped characters.
lexChars :: String -> String -> [String]
lexChars :: [Char] -> [Char] -> [[Char]]
lexChars [Char]
yylval [Char]
charToken =
    [ [Char]
"<INITIAL>\"'\" \tBEGIN CHAR;"
    , [Char]
"<CHAR>\\\\      \t BEGIN CHARESC;"
    , [Char]
"<CHAR>[^']      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = yytext[0]; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>f      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = '\\f';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>n      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = '\\n';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>r      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = '\\r';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>t      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = '\\t';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>.      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"->_char = yytext[0]; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHAREND>\"'\"      \t BEGIN INITIAL;"
    ]

-- ---------------------------------------------------------------------------
-- Comments

-- | Create flex rules for single-line and multi-lines comments.
-- The first argument is an optional namespace (for C++); the second
-- argument is the set of comment delimiters as returned by BNFC.CF.comments.
--
-- This function is only compiling the results of applying either
-- lexSingleComment or lexMultiComment on each comment delimiter or pair of
-- delimiters.
--
-- >>> lexComments ([("{-","-}")],["--"])
-- <INITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <INITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexComments :: ([(String, String)], [String]) -> Doc
lexComments :: (KeywordEnv, [[Char]]) -> Doc
lexComments (KeywordEnv
m,[[Char]]
s) = [Doc] -> Doc
vcat ([Doc] -> Doc) -> [Doc] -> Doc
forall a b. (a -> b) -> a -> b
$ [[Doc]] -> [Doc]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ ([Char] -> Doc) -> [[Char]] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map [Char] -> Doc
lexSingleComment [[Char]]
s
  , (([Char], [Char]) -> [Char] -> Doc)
-> KeywordEnv -> [[Char]] -> [Doc]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith ([Char], [Char]) -> [Char] -> Doc
lexMultiComment KeywordEnv
m [[Char]]
commentStates
  ]

-- | If we have several block comments, we need different COMMENT lexing states.
commentStates :: [String]
commentStates :: [[Char]]
commentStates = ([Char] -> [Char]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ([Char]
"COMMENT" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++) ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char]
"" [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: (Int -> [Char]) -> [Int] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map Int -> [Char]
forall a. Show a => a -> [Char]
show [Int
1::Int ..]

-- | Create a lexer rule for single-line comments.
-- The first argument is -- an optional c++ namespace
-- The second argument is the delimiter that marks the beginning of the
-- comment.
--
-- >>> lexSingleComment "--"
-- <INITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
--
-- >>> lexSingleComment "\""
-- <INITIAL>"\""[^\n]* /* skip */; /* BNFC: comment "\"" */
lexSingleComment :: String -> Doc
lexSingleComment :: [Char] -> Doc
lexSingleComment [Char]
c =
    Doc
"<INITIAL>" Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
c Doc -> Doc -> Doc
<> Doc
"[^\\n]*"
    Doc -> Doc -> Doc
<+> Doc
"/* skip */;"
    Doc -> Doc -> Doc
<+> Bool -> Doc -> Doc
forall m. Monoid m => Bool -> m -> m
unless ([Char] -> Bool
containsCCommentMarker [Char]
c) (Doc
"/* BNFC: comment" Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
c Doc -> Doc -> Doc
<+> Doc
"*/")

containsCCommentMarker :: String -> Bool
containsCCommentMarker :: [Char] -> Bool
containsCCommentMarker [Char]
s = [Char]
"/*" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` [Char]
s Bool -> Bool -> Bool
|| [Char]
"*/" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` [Char]
s

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
-- start deleminiter and end delimiter.
-- There might be a possible bug here if a language includes 2 multi-line
-- comments. They could possibly start a comment with one character and end it
-- with another.  However this seems rare.
--
-- >>> lexMultiComment ("{-", "-}") "COMMENT"
-- <INITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
--
-- >>> lexMultiComment ("\"'", "'\"") "COMMENT"
-- <INITIAL>"\"'" BEGIN COMMENT; /* BNFC: block comment "\"'" "'\"" */
-- <COMMENT>"'\"" BEGIN INITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexMultiComment :: (String, String) -> String -> Doc
lexMultiComment :: ([Char], [Char]) -> [Char] -> Doc
lexMultiComment ([Char]
b,[Char]
e) [Char]
comment = [Doc] -> Doc
vcat
    [ Doc
"<INITIAL>" Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
b Doc -> Doc -> Doc
<+> Doc
"BEGIN" Doc -> Doc -> Doc
<+> [Char] -> Doc
text [Char]
comment Doc -> Doc -> Doc
<> Doc
";"
      Doc -> Doc -> Doc
<+> Bool -> Doc -> Doc
forall m. Monoid m => Bool -> m -> m
unless ([Char] -> Bool
containsCCommentMarker [Char]
b Bool -> Bool -> Bool
|| [Char] -> Bool
containsCCommentMarker [Char]
e)
          (Doc
"/* BNFC: block comment" Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
b Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
e Doc -> Doc -> Doc
<+> Doc
"*/")
    , Doc
commentTag Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
e Doc -> Doc -> Doc
<+> Doc
"BEGIN INITIAL;"
    , Doc
commentTag Doc -> Doc -> Doc
<> Doc
".    /* skip */;"
    , Doc
commentTag Doc -> Doc -> Doc
<> Doc
"[\\n] /* skip */;"
    ]
  where
  commentTag :: Doc
commentTag = [Char] -> Doc
text ([Char] -> Doc) -> [Char] -> Doc
forall a b. (a -> b) -> a -> b
$ [Char]
"<" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
comment [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
">"

-- | Helper function that escapes characters in strings.
escapeChars :: String -> String
escapeChars :: [Char] -> [Char]
escapeChars [] = []
escapeChars (Char
'\\':[Char]
xs) = Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: (Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs))
escapeChars (Char
'\"':[Char]
xs) = Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: (Char
'\"' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs))
escapeChars (Char
x:[Char]
xs) = Char
x Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs)