{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}

{- Generates a Pygments lexer from a BNF grammar.
 -
 - Resources:
 - * Pygments: http://pygments.org/
 - * Lexer development: http://pygments.org/docs/lexerdevelopment/
 - * Token types: http://pygments.org/docs/tokens/
 - -}
module BNFC.Backend.Pygments where

import Prelude hiding ((<>))

import BNFC.Abs (Reg(..))
import BNFC.Backend.Base (mkfile, Backend)
import BNFC.CF
import BNFC.Lexing
import BNFC.Options hiding (Backend)
import BNFC.Utils
import BNFC.PrettyPrint

makePygments :: SharedOptions -> CF -> Backend
makePygments :: SharedOptions -> CF -> Backend
makePygments SharedOptions
opts CF
cf = do
    let lexerfile :: [Char]
lexerfile = Doc -> [Char]
render ([Char] -> Doc
lowerCase [Char]
name Doc -> Doc -> Doc
<> Doc
"/__init__.py")
        setupfile :: [Char]
setupfile = [Char]
"setup.py"
    [Char] -> MakeComment -> Doc -> Backend
forall c. FileContent c => [Char] -> MakeComment -> c -> Backend
mkfile [Char]
lexerfile MakeComment
comment ([Char] -> CF -> Doc
lexer [Char]
name CF
cf)
    [Char] -> MakeComment -> Doc -> Backend
forall c. FileContent c => [Char] -> MakeComment -> c -> Backend
mkfile [Char]
setupfile MakeComment
comment ([Char] -> Doc
setup [Char]
name)
  where name :: [Char]
name = SharedOptions -> [Char]
lang SharedOptions
opts

comment :: String -> String
comment :: MakeComment
comment = ([Char]
"# " [Char] -> MakeComment
forall a. [a] -> [a] -> [a]
++)

setup :: String -> Doc
setup :: [Char] -> Doc
setup [Char]
name = [Doc] -> Doc
vcat
    [ Doc
"from setuptools import setup, find_packages"
    , Doc
"setup" Doc -> Doc -> Doc
<> Doc -> Doc
parens ([Doc] -> Doc
fsep (Doc -> [Doc] -> [Doc]
punctuate Doc
","
        [ Doc
"name" Doc -> Doc -> Doc
<=> Doc -> Doc
quotes (Doc
"pygment-"Doc -> Doc -> Doc
<>[Char] -> Doc
lowerCase [Char]
name)
        , Doc
"version" Doc -> Doc -> Doc
<=> Doc
"0.1"
        , Doc
"packages" Doc -> Doc -> Doc
<=> Doc -> Doc
brackets (Doc -> Doc
quotes Doc
moduleName)
        , Doc
"entry_points" Doc -> Doc -> Doc
<=> Doc
entryPoints
        , Doc
"install_requires = ['pygments']"
        ]))
    ]
  where
    className :: Doc
className = [Char] -> Doc
camelCase [Char]
name Doc -> Doc -> Doc
<> Doc
"Lexer"
    moduleName :: Doc
moduleName = [Char] -> Doc
lowerCase [Char]
name
    entryPoints :: Doc
entryPoints =
        Doc -> Doc
braces( Doc
"'pygments.lexers':"
              Doc -> Doc -> Doc
<> Doc -> Doc
doubleQuotes (Doc
moduleName Doc -> Doc -> Doc
<> Doc
"=" Doc -> Doc -> Doc
<> Doc
moduleName Doc -> Doc -> Doc
<> Doc
":" Doc -> Doc -> Doc
<> Doc
className))

lexer :: String -> CF -> Doc
lexer :: [Char] -> CF -> Doc
lexer [Char]
name CF
cf = [Doc] -> Doc
vcat
    -- Import statments
    [ Doc
"import pygments.lexer"
    , Doc
"from pygments.token import *"
    -- Declare our lexer
    , Doc
"__all__" Doc -> Doc -> Doc
<=> Doc -> Doc
brackets (Doc -> Doc
doubleQuotes Doc
className)
    -- define lexer
    , Doc
"class" Doc -> Doc -> Doc
<+> Doc
className Doc -> Doc -> Doc
<> Doc -> Doc
parens Doc
"pygments.lexer.RegexLexer" Doc -> Doc -> Doc
<> Doc
":"
    , [Doc] -> Doc
indent
        [ Doc
"name" Doc -> Doc -> Doc
<=> Doc -> Doc
quotes ([Char] -> Doc
text [Char]
name)
        , Doc
"aliases" Doc -> Doc -> Doc
<=> Doc -> Doc
brackets (Doc -> Doc
quotes ([Char] -> Doc
lowerCase [Char]
name))
        -- filenames = ['*.cf', '*lbnf']
        , Doc
"KEYWORDS" Doc -> Doc -> Doc
<=> Doc -> Doc
brackets Doc
keywords
        -- We override the get_tokens_unprocessed method to filter keywords
        -- from identifiers
        , Doc
"def get_tokens_unprocessed(self, text):"
        , [Doc] -> Doc
indent
            [ Doc
"for index, token, value in super(" Doc -> Doc -> Doc
<> Doc
className Doc -> Doc -> Doc
<> Doc
",self).get_tokens_unprocessed(text):"
            , [Doc] -> Doc
indent
                [ Doc
"if token is Name and value in self.KEYWORDS:"
                , [Doc] -> Doc
indent [ Doc
"yield index, Keyword, value" ]
                , Doc
"else:"
                , [Doc] -> Doc
indent [ Doc
"yield index, token, value" ]
                ]
            ]
        -- The token is defined using regex
        , Doc
"tokens = {"
        , [Doc] -> Doc
indent
            [ Doc
"'root': ["
            , [Doc] -> Doc
indent (((Reg, LexType) -> Doc) -> [(Reg, LexType)] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map (Reg, LexType) -> Doc
prLexRule (CF -> [(Reg, LexType)]
mkLexer CF
cf) [Doc] -> [Doc] -> [Doc]
forall a. [a] -> [a] -> [a]
++ [Doc
"(r'\\s+', Token.Space)"])
            , Doc
"]"
            ]
        , Doc
"}"
        ]
    ]
  where
    className :: Doc
className = [Char] -> Doc
camelCase [Char]
name Doc -> Doc -> Doc
<> Doc
"Lexer"
    keywords :: Doc
keywords = [Doc] -> Doc
fsep (Doc -> [Doc] -> [Doc]
punctuate Doc
"," (([Char] -> Doc) -> [[Char]] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map (Doc -> Doc
quotes (Doc -> Doc) -> ([Char] -> Doc) -> [Char] -> Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Doc
text) (CF -> [[Char]]
forall f. CFG f -> [[Char]]
reservedWords CF
cf)))
    indent :: [Doc] -> Doc
indent = Int -> Doc -> Doc
nest Int
4 (Doc -> Doc) -> ([Doc] -> Doc) -> [Doc] -> Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Doc] -> Doc
vcat
    prLexRule :: (Reg, LexType) -> Doc
prLexRule (Reg
reg,LexType
ltype) =
        Doc -> Doc
parens (Doc
"r" Doc -> Doc -> Doc
<> Doc -> Doc
quotes (Reg -> Doc
pyRegex Reg
reg) Doc -> Doc -> Doc
<> Doc
"," Doc -> Doc -> Doc
<+> LexType -> Doc
forall {p}. IsString p => LexType -> p
pyToken LexType
ltype) Doc -> Doc -> Doc
<> Doc
","
    pyToken :: LexType -> p
pyToken LexType
LexComment = p
"Comment"
    pyToken LexType
LexSymbols = p
"Operator"
    pyToken (LexToken [Char]
"Integer") = p
"Number.Integer"
    pyToken (LexToken [Char]
"Double") = p
"Number.Float"
    pyToken (LexToken [Char]
"Char") = p
"String.Char"
    pyToken (LexToken [Char]
"String") = p
"String.Double"
    pyToken (LexToken [Char]
_) = p
"Name"



-- | Convert a Reg to a python regex
-- >>> pyRegex (RSeqs "abc")
-- abc
-- >>> pyRegex (RAlt (RSeqs "::=") (RChar '.'))
-- ::=|\.
-- >>> pyRegex (RChar '=')
-- =
-- >>> pyRegex RAny
-- .
-- >>> pyRegex (RStar RAny)
-- .*
-- >>> pyRegex (RPlus (RSeqs "xxx"))
-- (xxx)+
-- >>> pyRegex (ROpt (RSeqs "abc"))
-- (abc)?
-- >>> pyRegex (RSeq (RSeqs "--") (RSeq (RStar RAny) (RChar '\n')))
-- --.*\n
-- >>> pyRegex (RStar (RSeq (RSeqs "abc") (RChar '*')))
-- (abc\*)*
-- >>> pyRegex REps
-- <BLANKLINE>
-- >>> pyRegex (RAlts "abc[].")
-- [abc\[\]\.]
-- >>> pyRegex RDigit
-- \d
-- >>> pyRegex RLetter
-- [a-zA-Z]
-- >>> pyRegex RUpper
-- [A-Z]
-- >>> pyRegex RLower
-- [a-z]
-- >>> pyRegex (RMinus RAny RDigit)
-- (.)(?<!\d)
-- >>> pyRegex (RSeq (RAlt (RChar 'a') RAny) (RAlt (RChar 'b') (RChar 'c')))
-- (a|.)(b|c)
pyRegex :: Reg -> Doc
pyRegex :: Reg -> Doc
pyRegex Reg
reg = case Reg
reg of
    RSeqs [Char]
s       -> [Char] -> Doc
text ((Char -> [Char]) -> MakeComment
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Char]
escape [Char]
s)
    RAlt Reg
r1 Reg
r2    -> Reg -> Doc
pyRegex Reg
r1 Doc -> Doc -> Doc
<> Doc
"|" Doc -> Doc -> Doc
<> Reg -> Doc
pyRegex Reg
r2
    RChar Char
c       -> [Char] -> Doc
text (Char -> [Char]
escape Char
c)
    Reg
RAny          -> Char -> Doc
char Char
'.'
    RStar Reg
RAny    -> Doc
".*"
    RStar Reg
re      -> Doc -> Doc
parens (Reg -> Doc
pyRegex Reg
re) Doc -> Doc -> Doc
<> Char -> Doc
char Char
'*'
    RPlus Reg
re      -> Doc -> Doc
parens (Reg -> Doc
pyRegex Reg
re) Doc -> Doc -> Doc
<> Char -> Doc
char Char
'+'
    ROpt Reg
re       -> Doc -> Doc
parens (Reg -> Doc
pyRegex Reg
re) Doc -> Doc -> Doc
<> Char -> Doc
char Char
'?'
    RSeq Reg
r1 Reg
r2    -> Reg -> Doc
pyRegex' Reg
r1 Doc -> Doc -> Doc
<> Reg -> Doc
pyRegex' Reg
r2
    Reg
REps          -> Doc
empty
    RAlts [Char]
cs      -> Doc -> Doc
brackets ([Doc] -> Doc
hcat ((Char -> Doc) -> [Char] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map (Reg -> Doc
pyRegex (Reg -> Doc) -> (Char -> Reg) -> Char -> Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Reg
RChar) [Char]
cs))
    Reg
RDigit        -> Doc
"\\d"
    Reg
RUpper        -> Doc
"[A-Z]"
    Reg
RLower        -> Doc
"[a-z]"
    Reg
RLetter       -> Doc
"[a-zA-Z]"
    RMinus Reg
r1 Reg
r2  -> Doc -> Doc
parens (Reg -> Doc
pyRegex Reg
r1) Doc -> Doc -> Doc
<> Doc -> Doc
parens (Doc
"?<!" Doc -> Doc -> Doc
<> Reg -> Doc
pyRegex Reg
r2)
  where
    escape :: Char -> [Char]
escape Char
'\n' = [Char]
"\\n"
    escape Char
'\t' = [Char]
"\\t"
    escape Char
c | Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
".'[]()|*+?{}\\" :: String) = [Char
'\\',Char
c]
    escape Char
c = [Char
c]
    pyRegex' :: Reg -> Doc
pyRegex' r :: Reg
r@(RAlt{}) = Doc -> Doc
parens (Reg -> Doc
pyRegex Reg
r)
    pyRegex' Reg
r = Reg -> Doc
pyRegex Reg
r