-- | Utilies for the Haskell lexer specification. module BNFC.Backend.Haskell.Utilities.Lexer where import BNFC.Backend.Haskell.Utilities.Utils import BNFC.CF import BNFC.Prelude import qualified Data.Map as Map import Data.String (fromString) import Prettyprinter tokenName :: Token -> Doc () tokenName (Builtin BString) = fromString "TL" tokenName (Builtin BInteger) = fromString "TI" tokenName (Builtin BDouble) = fromString "TD" tokenName (Builtin BChar) = fromString "TC" tokenName Identifier = fromString "TV" tokenName (UserDefined s) = fromString $ ("T_" <>) $ toList s tokenComment :: Token -> Doc () tokenComment (Builtin BString) = fromString " -- ^ String literal." tokenComment (Builtin BInteger) = fromString " -- ^ Integer literal." tokenComment (Builtin BDouble) = fromString " -- ^ Float literal." tokenComment (Builtin BChar) = fromString " -- ^ Character literal." tokenComment Identifier = fromString " -- ^ Identifier." tokenComment (UserDefined _) = emptyDoc isUserDefined :: Token -> Bool isUserDefined (Builtin _) = False isUserDefined Identifier = False isUserDefined (UserDefined _) = True -------------------------------------------------------- -- Andreas, 2020-10-08, issue #292: -- Since the produced lexer for Haskell and Ocaml only recognizes ASCII identifiers, -- but _lbnfKeywords also contains those using unicode characters, -- we have to reclassify any keyword using non-ASCII characters -- as symbol. unicodeAndSymbols :: LBNF -> [String] unicodeAndSymbols lbnf = keywords ++ symbols where -- keywords containing unicode characters keywords :: [String] keywords = toList . theKeyword <$> Map.keys (Map.filterWithKey -- does keyword contain unicode characters? (\k _ -> (not . all isAscii) (theKeyword k)) (_lbnfKeywords lbnf)) symbols :: [String] symbols = toList . theSymbol <$> Map.keys (_lbnfSymbols lbnf) asciiKeywords :: LBNF -> [String] asciiKeywords lbnf = toList . theKeyword <$> Map.keys (Map.filterWithKey (\k _-> all isAscii (theKeyword k)) (_lbnfKeywords lbnf))