{-# LANGUAGE OverloadedStrings #-} module BNFC.Backend.Latex.Latex where import BNFC.Prelude import Control.Monad.State import Data.Foldable (fold) import Data.List (intercalate, intersperse) import Data.List.Split (chunksOf) import qualified Data.Map as Map import Data.String (fromString) import Prettyprinter import System.FilePath (takeBaseName, (<.>)) import BNFC.CF import BNFC.Backend.CommonInterface.Backend import BNFC.Backend.Common.Utils as Utils import BNFC.Backend.Latex.State import BNFC.Options.GlobalOptions import BNFC.Types.Position import BNFC.Types.Regex import qualified BNFC.Utils.List2 as List2 latex :: LBNF -> State LatexBackendState Result latex lbnf = do st <- get let cfName = takeBaseName $ optInput $ globalOpt st return [(cfName <.> "tex", cf2string lbnf cfName)] cf2string :: LBNF -> String -> String cf2string lbnf cfName = docToString defaultLayoutOptions $ cf2doc lbnf cfName cf2doc :: LBNF -> String -> Doc () cf2doc lbnf cfName = vsep . intersperse emptyDoc $ [ "%% File generated by the BNF Converter." , beginning cfName , macros , "This document was automatically generated by the {\\em BNF-Converter}." , printTerminals lbnf cfName , printGrammar lbnf cfName , "\\end{document}" ] beginning :: String -> Doc () beginning cfName = vsep [ "\\batchmode" , emptyDoc , "\\documentclass[a4paper,11pt]{article}" , "\\usepackage[T1]{fontenc}" , "\\usepackage[utf8x]{inputenc}" , "\\setlength{\\parindent}{0mm}" , "\\setlength{\\parskip}{1mm}" , emptyDoc , "\\title{The Language" <+> fromString cfName <> "}" , "\\author{BNF-converter}" , emptyDoc , "\\begin{document}" , "\\maketitle" ] macros :: Doc () macros = vsep [ "\\newcommand{\\emptyP}{\\mbox{$\\epsilon$}}" , "\\newcommand{\\terminal}[1]{\\mbox{{\\texttt {#1}}}}" , "\\newcommand{\\nonterminal}[1]{\\mbox{$\\langle \\mbox{{\\sl #1 }} \\! \\rangle$}}" , "\\newcommand{\\arrow}{\\mbox{::=}}" , "\\newcommand{\\delimit}{\\mbox{$|$}}" , "\\newcommand{\\reserved}[1]{\\mbox{{\\texttt {#1}}}}" , "\\newcommand{\\literal}[1]{\\mbox{{\\texttt {#1}}}}" , "\\newcommand{\\symb}[1]{\\mbox{{\\texttt {#1}}}}" ] printTerminals :: LBNF -> String -> Doc () printTerminals lbnf cfName = vsep . intersperse emptyDoc $ [ "\\section*{The lexical structure of" <+> fromString cfName <> "}" ] ++ Utils.when hasBuiltins [ "\\subsection*{Literals}" , vsep . intersperse emptyDoc $ printBuiltin <$> usedBuiltins ] ++ Utils.when hasTokens [ vsep . intersperse emptyDoc $ printToken <$> tokens ] ++ [ "\\subsection*{Reserved words and symbols}" , reservedWords , printKeywords cfName keywords , printSymbols cfName symbols ] ++ [ printComments lineComments blockComments ] where hasBuiltins :: Bool hasBuiltins = not $ Map.null $ _lbnfASTBuiltins lbnf usedBuiltins :: [BuiltinCat] usedBuiltins = Map.keys $ _lbnfASTBuiltins lbnf hasTokens :: Bool hasTokens = not $ Map.null $ _lbnfTokenDefs lbnf tokens :: [(CatName, WithPosition TokenDef)] tokens = Map.toList $ _lbnfTokenDefs lbnf keywords :: [String] keywords = toList . theKeyword <$> Map.keys (_lbnfKeywords lbnf) symbols :: [String] symbols = toList . theSymbol <$> Map.keys (_lbnfSymbols lbnf) lineComments :: [String] lineComments = (\(LineComment s1) -> toList s1) <$> Map.elems (_lbnfLineComments lbnf) blockComments :: [(String, String)] blockComments = (\(BlockComment s1 s2) -> (toList s1, toList s2)) <$> Map.elems (_lbnfBlockComments lbnf) printBuiltin :: BuiltinCat -> Doc () printBuiltin = \case BChar -> vsep [ "Character literals \\nonterminal{Char}\\ have the form" <+> "\\terminal{'}$c$\\terminal{'}, where $c$ is any single character." ] BDouble -> vsep [ "Double-precision float literals \\nonterminal{Double}\\ have the structure" <+> "indicated by the regular expression" <+> "$\\nonterminal{digit}+ \\mbox{{\\it `.'}} \\nonterminal{digit}+ (\\mbox{{\\it `e'}} \\mbox{{\\it `-'}}? \\nonterminal{digit}+)?$ i.e.\\" <+> "two sequences of digits separated by a decimal point, optionally" <+> "followed by an unsigned or negative exponent." ] BInteger -> "Integer literals \\nonterminal{Int}\\ are nonempty sequences of digits." BString -> vsep [ "String literals \\nonterminal{String}\\ have the form" <+> "\\terminal{\"}$x$\\terminal{\"}, where $x$ is any sequence of any characters" <+> "except \\terminal{\"}\\ unless preceded by \\verb6\\6." ] printToken :: (CatName, WithPosition TokenDef) -> Doc () printToken (cName, WithPosition _ tokenDef) = vsep [ fromString (toList cName) <+> "literals are recognized by the regular expression", "\\(" <> printRegLatex (regexToken tokenDef) <> "\\)"] reservedWords :: Doc () reservedWords = vsep [ "The set of reserved words is the set of terminals " , "appearing in the grammar. Those reserved words " , "that consist of non-letter characters are called symbols, and " , "they are treated in a different way from those that " , "are similar to identifiers. The lexer " , "follows rules familiar from languages " , "like Haskell, C, and Java, including longest match " , "and spacing conventions." ] tabular :: Doc () -> Doc () tabular d = vsep [ "\\begin{tabular}{lll}" , d , "\\end{tabular} \\\\" ] printKeywords :: String -> [String] -> Doc () printKeywords cfName keywords = if null keywords then "There are no reserved words in" <+> fromString cfName <> dot else vsep [ "The reserved words used in" <+> fromString cfName <+> "are the following: \\\\" , emptyDoc , tabular $ vsep (printLine <$> chunksOf 3 keywords) ] where printLine :: [String] -> Doc () printLine l = if length l == 3 then (hsep . intersperse "&" $ reserved <$> l) <+> "\\\\" else (hsep . intersperse "&") (reserved <$> l) <+> hsep (replicate (3 - length l) (space <> "&")) <+> "\\\\" reserved :: String -> Doc () reserved s = "{\\reserved{" <> fromString (printEscape s) <> "}}" printSymbols :: String -> [String] -> Doc () printSymbols cfName symbols = if null symbols then "There are no symbols in" <+> fromString cfName <> dot else vsep [ "The symbols used in" <+> fromString cfName <+> "are the following: \\\\" , emptyDoc , tabular $ vsep (printLine <$> chunksOf 3 symbols) ] where printLine :: [String] -> Doc () printLine l = if length l == 3 then (hsep . intersperse "&") (symbol <$> l) <+> "\\\\" else (hsep . intersperse "&") (symbol <$> l) <+> hsep (replicate (3 - length l) (space <> "&")) <+> "\\\\" symbol :: String -> Doc () symbol s = "{\\symb{" <> fromString (printEscape s) <> "}}" printEscape :: String -> String printEscape = concatMap escape where escape '\\' = "$\\backslash$" escape '~' = "\\~{}" escape '^' = "{\\textasciicircum}" escape c | c `elem` ("$&%#_{}" :: String) = ['\\', c] escape c | c `elem` ("+=|<>-" :: String) = "{$" ++ [c] ++ "$}" escape c = [c] printComments :: [String] -> [(String, String)] -> Doc () printComments linecomments blockcomments = vsep [ "\\subsection*{Comments}" , if null linecomments then "There are no single-line comments in the grammar." else "Single-line comments begin with" <+> (fold . intercalate [comma <> space]) (map (: []) (symbol <$> linecomments)) <> dot , if null blockcomments then "There are no multiple-line comments in the grammar." else "Multiple-line comments are enclosed with" <+> (fold . intercalate [comma <> space]) (map (: []) ((\(s1,s2) -> symbol s1 <+> "and" <+> symbol s2) <$> blockcomments)) <> dot ] printGrammar :: LBNF -> String -> Doc () printGrammar lbnf cfName = vsep [ "\\section*{The syntactic structure of" <+> fromString cfName <> "}" , emptyDoc , "Non-terminals are enclosed between $\\langle$ and $\\rangle$." , "The symbols {\\arrow} (production), {\\delimit} (union)" , "and {\\emptyP} (empty rule) belong to the BNF notation." , "All other symbols are terminals.\\\\" , emptyDoc , if null rules then "There are no rules in this grammar." else vsep $ intersperse emptyDoc $ tabular . printRule <$> rules ] where rules :: [(Cat, [ARHS])] rules = map (\(c,m) -> (c,getARHS <$> Map.toList m)) (Map.toList (_lbnfASTRules lbnf)) getARHS :: (Label, WithPosition ARuleRHS) -> ARHS getARHS (_, WithPosition _ (ARuleRHS _ _ arhs)) = arhs printRule :: (Cat, [ARHS]) -> Doc () printRule = \case (c, []) -> nonterminal c <+> "&" <+> fromString "{\\arrow}" <> space <> space <> "&" <> fromString "{\\emptyP}" <> " \\\\" (c, [a]) -> nonterminal c <+> "&" <+> fromString "{\\arrow}" <> space <> space <> "&" <> printARHS a <> " \\\\" (c, a:as) -> vsep [ nonterminal c <+> "&" <+> fromString "{\\arrow}" <> space <> space <> "&" <> printARHS a <> " \\\\" , indent 1 $ vsep $ (\arhs -> "&" <+> fromString "{\\delimit}" <> space <> space <> "&" <> printARHS arhs <> " \\\\") <$> as ] terminal :: String1 -> Doc () terminal s = "{\\terminal{" <> fromString (printEscape (toList s)) <> "}}" nonterminal :: Cat -> Doc () nonterminal c = "{\\nonterminal{" <> fromString (map escape (printCatNamePrec' c)) <> "}}" where escape :: Char -> Char escape char = case char of '_' -> '-' _ -> char printItem :: Item' String1 -> Doc () printItem = \case (Terminal s) -> terminal s (NTerminal c) -> nonterminal c printARHS :: ARHS -> Doc () printARHS [] = fromString "{\\emptyP}" printARHS arhs = hsep $ printItem <$> arhs printRegLatex :: Regex -> Doc () printRegLatex = prt 0 class Print a where prt :: Int -> a -> Doc () instance {-# OVERLAPPABLE #-} Print a => Print [a] where prt i as = hsep $ map (prt i) as instance Print Char where prt _ c = fromString $ "\\mbox{`" ++ printEscape [c] ++ "'}" instance Print Regex where prt i e = case e of RChar (CMinus yes no) -> prPrec i 1 $ if isEmpty no then prt 2 yes else prt 2 yes <+> fromString "-" <+> prt 2 no RAlts regs -> prPrec i 1 $ hsep $ intersperse "\\mid" $ map (prt 1) $ List2.toList regs RMinus reg1 reg2 -> prPrec i 1 $ prt 2 reg1 <+> fromString "-" <+> prt 2 reg2 REps -> "\\epsilon" RSeqs regs -> prPrec i 2 $ prt 2 $ List2.toList regs RStar reg -> prt 3 reg <> fromString "*" RPlus reg -> prt 3 reg <> fromString "+" ROpt reg -> prt 3 reg <> fromString "?" instance Print CharClassUnion where prt i e = case e of CAny -> "{\\nonterminal{anychar}}" CAlt [] -> panic "CharClass shouldn't be empty" CAlt [a] -> prt 0 a CAlt alts -> hsep $ intersperse "\\mid" $ map (prt i) alts instance Print CharClassAtom where prt _ e = case e of CChar c -> prt 0 c CDigit -> fromString "{\\nonterminal{digit}}" CLower -> fromString "{\\nonterminal{lower}}" CUpper -> fromString "{\\nonterminal{upper}}"