{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module BNFC.Backend.Txt2Tags.Txt2Tags where import BNFC.Prelude import Prettyprinter import Control.Monad.State import Data.Foldable (fold) import Data.List (intercalate, intersperse) import Data.List.Split (chunksOf) import qualified Data.Map as Map import Data.String (fromString) import System.FilePath (takeBaseName, (<.>)) import BNFC.Backend.CommonInterface.Backend import BNFC.Backend.Common.Utils as Utils import BNFC.Backend.Txt2Tags.State import BNFC.CF import BNFC.Options.GlobalOptions import BNFC.Types.Position import BNFC.Types.Regex import qualified BNFC.Utils.List2 as List2 txt2tags :: LBNF -> State Txt2TagsBackendState Result txt2tags lbnf = do st <- get let cfName = takeBaseName $ optInput $ globalOpt st return [(cfName <.> "t2t", cf2string lbnf cfName)] cf2string :: LBNF -> String -> String cf2string lbnf cfName = docToString defaultLayoutOptions $ cf2doc lbnf cfName cf2doc :: LBNF -> String -> Doc () cf2doc lbnf cfName = vsep . intersperse emptyDoc $ [ introduction cfName , printTerminals lbnf cfName , printGrammar lbnf cfName ] introduction :: String -> Doc () introduction cfName = vsep [ "The Language" <+> fromString cfName , "BNF Converter" , emptyDoc , emptyDoc , "% File generated by the BNF Converter." , emptyDoc , emptyDoc , "This document was automatically generated by the //BNF-Converter//." ] printTerminals :: LBNF -> String -> Doc () printTerminals lbnf cfName = vsep . intersperse emptyDoc $ [ "==The lexical structure of" <+> fromString cfName <> "==" ] ++ Utils.when hasBuiltins [ "===Literals===" , vsep . intersperse emptyDoc $ printBuiltin <$> usedBuiltins ] ++ Utils.when hasTokens [ vsep . intersperse emptyDoc $ printToken <$> tokens ] ++ [ "===Reserved words and symbols===" , reservedWords , printKeywords cfName keywords , printSymbols cfName symbols ] ++ [ "===Comments===" , printComments lineComments blockComments ] where hasBuiltins :: Bool hasBuiltins = not $ Map.null $ _lbnfASTBuiltins lbnf usedBuiltins :: [BuiltinCat] usedBuiltins = Map.keys $ _lbnfASTBuiltins lbnf hasTokens :: Bool hasTokens = not $ Map.null $ _lbnfTokenDefs lbnf tokens :: [(CatName, WithPosition TokenDef)] tokens = Map.toList $ _lbnfTokenDefs lbnf keywords :: [String] keywords = toList . theKeyword <$> Map.keys (_lbnfKeywords lbnf) symbols :: [String] symbols = toList . theSymbol <$> Map.keys (_lbnfSymbols lbnf) lineComments :: [String] lineComments = (\(LineComment s1) -> toList s1) <$> Map.elems (_lbnfLineComments lbnf) blockComments :: [(String, String)] blockComments = (\(BlockComment s1 s2) -> (toList s1, toList s2)) <$> Map.elems (_lbnfBlockComments lbnf) printBuiltin :: BuiltinCat -> Doc () printBuiltin = \case BChar -> vsep [ "Character literals //Char// have the form" <+> "``'``//c//``'``, where //c// is any single character." ] BDouble -> vsep [ "Double-precision float literals //Double// have the structure" <+> "indicated by the regular expression" <+> "``digit+ '.' digit+ ('e' ('-')? digit+)?``" <+> "i.e. two sequences of digits separated by a decimal point, optionally" <+> "followed by an unsigned or negative exponent." ] BInteger -> "Integer literals //Integer// are nonempty sequences of digits." BString -> vsep [ "Double-precision float literals //Double// have the structure" <+> "indicated by the regular expression" <+> "``digit+ '.' digit+ ('e' ('-')? digit+)?``" <+> "i.e. two sequences of digits separated by a decimal point, optionally" <+> "followed by an unsigned or negative exponent." ] reservedWords :: Doc () reservedWords = vsep [ "The set of reserved words is the set of terminals " , "appearing in the grammar. Those reserved words " , "that consist of non-letter characters are called symbols, and " , "they are treated in a different way from those that " , "are similar to identifiers. The lexer " , "follows rules familiar from languages " , "like Haskell, C, and Java, including longest match " , "and spacing conventions." ] printKeywords :: String -> [String] -> Doc () printKeywords cfName keywords = if null keywords then "There are no reserved words in" <+> fromString cfName <> dot else vsep [ "The reserved words used in" <+> fromString cfName <+> "are the following:" , indent 2 $ vsep (printLine <$> chunksOf 4 keywords) ] where printLine :: [String] -> Doc () printLine l = if length l == 4 then pipe <+> (hsep . intersperse pipe) ((\k -> "``" <> fromString k <> "``") <$> l) else pipe <+> (hsep . intersperse pipe) ((\k -> "``" <> fromString k <> "``") <$> l) <+> hsep (replicate (4 - length l) (space <> pipe)) printSymbols :: String -> [String] -> Doc () printSymbols cfName symbols = if null symbols then "There are no symbols in" <+> fromString cfName <> dot else vsep ["The symbols used in" <+> fromString cfName <+> " the following:" , indent 2 $ vsep (printLine <$> chunksOf 4 symbols) ] where printLine :: [String] -> Doc () printLine l = if length l == 4 then pipe <+> (hsep . intersperse pipe) (fromString <$> l) else pipe <+> (hsep . intersperse pipe) (fromString <$> l) <+> hsep (replicate (4 - length l) (space <> pipe)) printComments :: [String] -> [(String, String)] -> Doc () printComments linecomments blockcomments = vsep [ if null linecomments then "There are no single-line comments in the grammar." else "Single-line comments begin with" <+> (fold . intercalate [comma <> space]) (map (: []) (fromString <$> linecomments)) <> dot , if null blockcomments then "There are no multiple-line comments in the grammar." else "Multiple-line comments are enclosed with" <+> (fold . intercalate [comma <> space]) (map (: []) ((\(s1,s2) -> fromString s1 <+> "and" <+> fromString s2) <$> blockcomments)) <> dot ] printToken :: (CatName, WithPosition TokenDef) -> Doc () printToken (cName, WithPosition _ tokenDef) = vsep [ fromString (toList cName) <+> "literals are recognized by the regular expression" , "`````" <> printRegTxt2Tags (regexToken tokenDef) <> "`````" ] printGrammar :: LBNF -> String -> Doc () printGrammar lbnf cfName = vsep [ "==The syntactic structure of" <+> fromString cfName <+> "==" , "Non-terminals are enclosed between < and >." , "The symbols **->** (production), **|** (union)" , "and **eps** (empty rule) belong to the BNF notation." , "All other symbols are terminals." , emptyDoc , indent 2 $ vsep $ printRule <$> rules ] where rules :: [(Cat, [ARHS])] rules = map (\(c,m) -> (c,getARHS <$> Map.toList m)) (Map.toList (_lbnfASTRules lbnf)) getARHS :: (Label, WithPosition ARuleRHS) -> ARHS getARHS (_, WithPosition _ (ARuleRHS _ _ arhs)) = arhs printRule :: (Cat, [ARHS]) -> Doc () printRule (c, []) = pipe <+> "//" <> fromString (printCat c) <> "// | -> | **eps**" printRule (c, a:as) = vsep $ pipe <+> "//" <> fromString (printCat c) <> "//" <+> "| ->" <+> pipe <+> printARHS a : Utils.when (not (null as)) [ vsep (((pipe <> space <> space <> "| **|** |") <+>) . printARHS <$> as)] printARHS :: ARHS -> Doc () printARHS arhs = if null arhs then "**eps**" else hsep $ printItem <$> arhs printItem :: Item' String1 -> Doc () printItem (Terminal s) = "``" <> fromString (toList s) <> "``" printItem (NTerminal c) = "//" <> fromString (printCat c) <> "//" printCat :: Cat -> String printCat = \case Cat b -> printBaseCatName b ListCat c -> "[" ++ printCat c ++ "]" CoerceCat c i -> toList c ++ show i printRegTxt2Tags :: Regex -> Doc () printRegTxt2Tags = prt 0 class Print a where prt :: Int -> a -> Doc () instance {-# OVERLAPPABLE #-} Print a => Print [a] where prt i as = hsep $ map (prt i) as instance Print Char where prt _ c = squotes $ fromString [c] instance Print Regex where prt i e = case e of RChar (CMinus yes no) -> prPrec i 1 $ if isEmpty no then prt 2 yes else prt 2 yes <+> fromString "-" <+> prt 2 no RAlts regs -> prPrec i 1 $ hsep $ intersperse "|" $ map (prt 1) $ List2.toList regs RMinus reg1 reg2 -> prPrec i 1 $ prt 2 reg1 <+> fromString "-" <+> prt 2 reg2 REps -> "eps" RSeqs regs -> prPrec i 2 $ prt 2 $ List2.toList regs RStar reg -> prt 3 reg <> fromString "*" RPlus reg -> prt 3 reg <> fromString "+" ROpt reg -> prt 3 reg <> fromString "?" instance Print CharClassUnion where prt i e = case e of CAny -> "char" CAlt [] -> panic "CharClass shouldn't be empty" CAlt [a] -> prt 0 a CAlt alts -> hsep $ intersperse "|" $ map (prt i) alts instance Print CharClassAtom where prt _ e = case e of CChar c -> prt 0 c CDigit -> fromString "digit" CLower -> fromString "lower" CUpper -> fromString "upper"