{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}

module BNFC.Backend.Txt2Tags.Txt2Tags where

import BNFC.Prelude

import Prettyprinter

import Control.Monad.State

import           Data.Foldable   (fold)
import           Data.List       (intercalate, intersperse)
import           Data.List.Split (chunksOf)
import qualified Data.Map        as Map
import           Data.String     (fromString)

import System.FilePath (takeBaseName, (<.>))

import BNFC.Backend.CommonInterface.Backend
import BNFC.Backend.Common.Utils as Utils
import BNFC.Backend.Txt2Tags.State

import BNFC.CF

import BNFC.Options.GlobalOptions

import BNFC.Types.Position
import BNFC.Types.Regex

import qualified BNFC.Utils.List2 as List2

txt2tags :: LBNF -> State Txt2TagsBackendState Result
txt2tags lbnf = do
  st <- get
  let cfName = takeBaseName $ optInput $ globalOpt st
  return [(cfName <.> "t2t", cf2string lbnf cfName)]

cf2string :: LBNF -> String -> String
cf2string lbnf cfName = docToString defaultLayoutOptions $ cf2doc lbnf cfName

cf2doc :: LBNF -> String -> Doc ()
cf2doc lbnf cfName = vsep . intersperse emptyDoc $
  [ introduction cfName
  , printTerminals lbnf cfName
  , printGrammar lbnf cfName
  ]

introduction :: String -> Doc ()
introduction cfName = vsep
  [ "The Language" <+> fromString cfName
  , "BNF Converter"
  , emptyDoc
  , emptyDoc
  , "% File generated by the BNF Converter."
  , emptyDoc
  , emptyDoc
  , "This document was automatically generated by the //BNF-Converter//."
  ]

printTerminals :: LBNF -> String -> Doc ()
printTerminals lbnf cfName  = vsep . intersperse emptyDoc $
  [ "==The lexical structure of" <+> fromString cfName <> "==" ]
  ++ Utils.when hasBuiltins
    [ "===Literals==="
    , vsep . intersperse emptyDoc $ printBuiltin <$> usedBuiltins
    ]
  ++ Utils.when hasTokens
    [ vsep . intersperse emptyDoc $ printToken <$> tokens ]
  ++ [ "===Reserved words and symbols==="
     , reservedWords
     , printKeywords cfName keywords
     , printSymbols cfName symbols ]
  ++ [ "===Comments==="
     , printComments lineComments blockComments ]

  where

    hasBuiltins :: Bool
    hasBuiltins = not $ Map.null $ _lbnfASTBuiltins lbnf

    usedBuiltins :: [BuiltinCat]
    usedBuiltins = Map.keys $ _lbnfASTBuiltins lbnf

    hasTokens :: Bool
    hasTokens = not $ Map.null $ _lbnfTokenDefs lbnf

    tokens :: [(CatName, WithPosition TokenDef)]
    tokens = Map.toList $ _lbnfTokenDefs lbnf

    keywords :: [String]
    keywords = toList . theKeyword <$> Map.keys (_lbnfKeywords lbnf)

    symbols :: [String]
    symbols = toList . theSymbol <$> Map.keys (_lbnfSymbols lbnf)

    lineComments :: [String]
    lineComments =
      (\(LineComment s1) -> toList s1) <$> Map.elems (_lbnfLineComments lbnf)

    blockComments :: [(String, String)]
    blockComments =
      (\(BlockComment s1 s2) -> (toList s1, toList s2))
      <$>
      Map.elems (_lbnfBlockComments lbnf)

printBuiltin :: BuiltinCat -> Doc ()
printBuiltin = \case
 BChar -> vsep
  [ "Character literals //Char// have the form" <+>
    "``'``//c//``'``, where //c// is any single character."
  ]
 BDouble -> vsep
  [ "Double-precision float literals //Double// have the structure" <+>
    "indicated by the regular expression" <+>
    "``digit+ '.' digit+ ('e' ('-')? digit+)?``" <+>
    "i.e. two sequences of digits separated by a decimal point, optionally" <+>
    "followed by an unsigned or negative exponent."
  ]
 BInteger -> "Integer literals //Integer// are nonempty sequences of digits."
 BString -> vsep
  [ "Double-precision float literals //Double// have the structure" <+>
    "indicated by the regular expression" <+>
    "``digit+ '.' digit+ ('e' ('-')? digit+)?``" <+>
    "i.e. two sequences of digits separated by a decimal point, optionally" <+>
    "followed by an unsigned or negative exponent."
  ]

reservedWords :: Doc ()
reservedWords = vsep
  [ "The set of reserved words is the set of terminals "
  , "appearing in the grammar. Those reserved words "
  , "that consist of non-letter characters are called symbols, and "
  , "they are treated in a different way from those that "
  , "are similar to identifiers. The lexer "
  , "follows rules familiar from languages "
  , "like Haskell, C, and Java, including longest match "
  , "and spacing conventions."
  ]

printKeywords :: String -> [String] -> Doc ()
printKeywords cfName keywords =
  if null keywords
  then "There are no reserved words in" <+> fromString cfName <> dot
  else vsep
    [ "The reserved words used in" <+> fromString cfName <+> "are the following:"
    , indent 2 $ vsep (printLine <$> chunksOf 4 keywords)
    ]
  where

    printLine :: [String] -> Doc ()
    printLine l =
      if length l == 4
      then pipe <+>
        (hsep . intersperse pipe) ((\k -> "``" <> fromString k <> "``") <$> l)
      else pipe <+>
        (hsep . intersperse pipe) ((\k -> "``" <> fromString k <> "``") <$> l) <+>
        hsep (replicate (4 - length l) (space <> pipe))

printSymbols :: String -> [String] -> Doc ()
printSymbols cfName symbols =
  if null symbols
  then "There are no symbols in" <+> fromString cfName <> dot
  else vsep
  ["The symbols used in" <+> fromString cfName <+> " the following:"
  , indent 2 $ vsep (printLine <$> chunksOf 4 symbols)
  ]
  where

    printLine :: [String] -> Doc ()
    printLine l =
      if length l == 4
      then pipe <+> (hsep . intersperse pipe) (fromString <$> l)
      else pipe <+> (hsep . intersperse pipe) (fromString <$> l) <+>
        hsep (replicate (4 - length l) (space <> pipe))

printComments :: [String] -> [(String, String)] -> Doc ()
printComments linecomments blockcomments = vsep
  [ if null linecomments
    then "There are no single-line comments in the grammar."
    else "Single-line comments begin with" <+>
      (fold . intercalate [comma <> space]) (map (: []) (fromString <$> linecomments)) <> dot
  , if null blockcomments
    then "There are no multiple-line comments in the grammar."
    else "Multiple-line comments are  enclosed with" <+>
      (fold . intercalate [comma <> space])
      (map (: []) ((\(s1,s2) -> fromString s1 <+> "and" <+> fromString s2) <$> blockcomments)) <> dot
  ]

printToken :: (CatName, WithPosition TokenDef) -> Doc ()
printToken (cName, WithPosition _ tokenDef) = vsep
  [ fromString (toList cName) <+> "literals are recognized by the regular expression"
  , "`````" <> printRegTxt2Tags (regexToken tokenDef) <> "`````"
  ]

printGrammar :: LBNF -> String -> Doc ()
printGrammar lbnf cfName = vsep
  [ "==The syntactic structure of" <+> fromString cfName <+> "=="
  , "Non-terminals are enclosed between < and >."
  , "The symbols **->** (production), **|** (union)"
  , "and **eps** (empty rule) belong to the BNF notation."
  , "All other symbols are terminals."
  , emptyDoc
  , indent 2 $ vsep $ printRule <$> rules
  ]

  where

    rules :: [(Cat, [ARHS])]
    rules = map
            (\(c,m) -> (c,getARHS <$> Map.toList m))
            (Map.toList (_lbnfASTRules lbnf))

    getARHS :: (Label, WithPosition ARuleRHS) -> ARHS
    getARHS (_, WithPosition _ (ARuleRHS _ _ arhs)) = arhs

printRule :: (Cat, [ARHS]) -> Doc ()
printRule (c, []) = pipe <+> "//" <> fromString (printCat c) <> "// | -> | **eps**"
printRule (c, a:as) = vsep $
  pipe <+> "//" <> fromString (printCat c) <> "//" <+>
    "| ->" <+> pipe <+> printARHS a
  :
  Utils.when (not (null as))
    [ vsep (((pipe <> space <> space <> "|  **|**  |") <+>) . printARHS <$> as)]

printARHS :: ARHS -> Doc ()
printARHS arhs =
  if null arhs
  then "**eps**"
  else hsep $ printItem <$> arhs

printItem :: Item' String1 -> Doc ()
printItem (Terminal s)  = "``" <> fromString (toList s) <> "``"
printItem (NTerminal c) = "//" <> fromString (printCat c) <> "//"

printCat :: Cat -> String
printCat = \case
  Cat       b   -> printBaseCatName b
  ListCat   c   -> "[" ++ printCat c ++ "]"
  CoerceCat c i -> toList c ++ show i

printRegTxt2Tags :: Regex -> Doc ()
printRegTxt2Tags = prt 0

class Print a where
  prt :: Int -> a -> Doc ()

instance {-# OVERLAPPABLE #-} Print a => Print [a] where
  prt i as = hsep $ map (prt i) as

instance Print Char where
  prt _ c = squotes $ fromString [c]

instance Print Regex where
  prt i e = case e of

    RChar (CMinus yes no) -> prPrec i 1 $
      if isEmpty no
      then prt 2 yes
      else
        prt 2 yes <+>
        fromString "-" <+>
        prt 2 no

    RAlts  regs -> prPrec i 1 $ hsep $ intersperse "|" $
      map (prt 1) $ List2.toList regs

    RMinus reg1 reg2 -> prPrec i 1 $
      prt 2 reg1 <+>
      fromString "-" <+>
      prt 2 reg2

    REps -> "eps"

    RSeqs  regs -> prPrec i 2 $ prt 2 $ List2.toList regs

    RStar  reg  -> prt 3 reg <> fromString "*"

    RPlus  reg  -> prt 3 reg <> fromString "+"

    ROpt   reg  -> prt 3 reg <> fromString "?"

instance Print CharClassUnion where
  prt i e = case e of
    CAny      -> "char"
    CAlt []   -> panic "CharClass shouldn't be empty"
    CAlt [a]  -> prt 0 a
    CAlt alts -> hsep $ intersperse "|" $ map (prt i) alts

instance Print CharClassAtom where
  prt _ e = case e of
    CChar c -> prt 0 c
    CDigit  -> fromString "digit"
    CLower  -> fromString "lower"
    CUpper  -> fromString "upper"