{-
    BNF Converter: Alex 3.x Generator
    Copyright (C) 2012  Author:  Antti-Juhani Kaijanaho
    Copyright (C) 2004  Author:  Peter Gammie
    (C)opyright 2003, {aarne,markus,peteg} at cs dot chalmers dot se

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
-}

module BNFC.Backend.Haskell.CFtoAlex3 (cf2alex3) where

import Data.Char
import qualified Data.List as List

import AbsBNF
import BNFC.CF
import BNFC.Lexing  (mkRegMultilineComment)
import BNFC.Options (TokenText(..))
-- import BNFC.Utils   (unless)

import BNFC.Backend.Common (unicodeAndSymbols)
import BNFC.Backend.Haskell.Utils

cf2alex3 :: String -> String -> Bool -> TokenText -> CF -> String
cf2alex3 name shareMod shareStrings tokenText cf =
  unlines $ concat $
  [ prelude name shareMod shareStrings tokenText
  , cMacros
  , rMacros cf
  , restOfAlex shareMod shareStrings tokenText cf
  ]

prelude :: String -> String -> Bool -> TokenText -> [String]
prelude name shareMod shareStrings tokenText = concat
  [ [ "-- -*- haskell -*-"
    , "-- This Alex file was machine-generated by the BNF converter"
    , "{"
    , "{-# OPTIONS -fno-warn-incomplete-patterns #-}"
    , "{-# OPTIONS_GHC -w #-}"
    , "module " ++ name ++ " where"
    , ""
    ]
  , [ "import " ++ shareMod | shareStrings ]
  , tokenTextImport tokenText
  , [ "import qualified Data.Bits"
    , "import Data.Word (Word8)"
    , "import Data.Char (ord)"
    , "}"
    , ""
    ]
  ]

cMacros :: [String]
cMacros =
  [ "$c = [A-Z\\192-\\221] # [\\215]  -- capital isolatin1 letter (215 = \\times) FIXME"
  , "$s = [a-z\\222-\\255] # [\\247]  -- small   isolatin1 letter (247 = \\div  ) FIXME"
  , "$l = [$c $s]         -- letter"
  , "$d = [0-9]           -- digit"
  , "$i = [$l $d _ ']     -- identifier character"
  , "$u = [. \\n]          -- universal: any character"
  ]

rMacros :: CF -> [String]
rMacros cf = if null symbs then [] else
  [ "@rsyms =    -- symbols and non-identifier-like reserved words"
  , "   " ++ List.intercalate " | " (map mkEsc symbs)
  ]
  where
  symbs = unicodeAndSymbols cf
  mkEsc = unwords . esc
  esc s = if null a then rest else show a : rest
      where (a,r) = span (\ c -> isAscii c && isAlphaNum c) s
            rest = case r of
                       [] -> []
                       (c:xs) -> s : esc xs
                         where s = if isPrint c then ['\\',c]
                                                else '\\':show (ord c)

restOfAlex :: String -> Bool -> TokenText -> CF -> [String]
restOfAlex _ shareStrings tokenText cf = [
  ":-",
  "",
  lexComments (comments cf),
  "$white+ ;",
  pTSpec (unicodeAndSymbols cf),

  userDefTokenTypes,
  ident,

  ifC catString ("\\\" ([$u # [\\\" \\\\ \\n]] | (\\\\ (\\\" | \\\\ | \\' | n | t | r | f)))* \\\"" ++
                  "\n    { tok (\\p s -> PT p (TL $ share $ unescapeInitTail s)) }"),
  ifC catChar    "\\\' ($u # [\\\' \\\\] | \\\\ [\\\\ \\\' n t r f]) \\'\n    { tok (\\p s -> PT p (TC $ share s))  }",
  ifC catInteger "$d+\n    { tok (\\p s -> PT p (TI $ share s))    }",
  ifC catDouble  "$d+ \\. $d+ (e (\\-)? $d+)?\n    { tok (\\p s -> PT p (TD $ share s)) }",
  "",
  "{",
  "",
  "tok :: (Posn -> " ++ stringType ++ " -> Token) -> (Posn -> " ++ stringType ++ " -> Token)",
  "tok f p s = f p s",
  "",
  "share :: "++stringType++" -> "++stringType,
  "share = " ++ if shareStrings then "shareString" else "id",
  "",
  "data Tok =",
  "   TS !"++stringType++" !Int    -- reserved words and symbols",
  " | TL !"++stringType++"         -- string literals",
  " | TI !"++stringType++"         -- integer literals",
  " | TV !"++stringType++"         -- identifiers",
  " | TD !"++stringType++"         -- double precision float literals",
  " | TC !"++stringType++"         -- character literals",
  userDefTokenConstrs,
  " deriving (Eq,Show,Ord)",
  "",
  "data Token =",
  "   PT  Posn Tok",
  " | Err Posn",
  "  deriving (Eq,Show,Ord)",
  "",
  "printPosn :: Posn -> String",
  "printPosn (Pn _ l c) = \"line \" ++ show l ++ \", column \" ++ show c",
  "",
  "tokenPos :: [Token] -> String",
  "tokenPos (t:_) = printPosn (tokenPosn t)",
  "tokenPos [] = \"end of file\"",
  "",
  "tokenPosn :: Token -> Posn",
  "tokenPosn (PT p _) = p",
  "tokenPosn (Err p) = p",
  "",
  "tokenLineCol :: Token -> (Int, Int)",
  "tokenLineCol = posLineCol . tokenPosn",
  "",
  "posLineCol :: Posn -> (Int, Int)",
  "posLineCol (Pn _ l c) = (l,c)",
  "",
  "mkPosToken :: Token -> ((Int, Int), " ++ stringType ++ ")",
  "mkPosToken t@(PT p _) = (posLineCol p, tokenText t)",
  "",
  "tokenText :: Token -> " ++ stringType,
  "tokenText t = case t of",
  "  PT _ (TS s _) -> s",
  "  PT _ (TL s)   -> " ++ applyP stringPack "show s",
  "  PT _ (TI s)   -> s",
  "  PT _ (TV s)   -> s",
  "  PT _ (TD s)   -> s",
  "  PT _ (TC s)   -> s",
  "  Err _         -> " ++ apply stringPack "\"#error\"",
  userDefTokenPrint,
  "prToken :: Token -> String",
  "prToken t = " ++ applyP stringUnpack "tokenText t",
  "",
  "data BTree = N | B "++stringType++" Tok BTree BTree deriving (Show)",
  "",
  "eitherResIdent :: ("++stringType++" -> Tok) -> "++stringType++" -> Tok",
  "eitherResIdent tv s = treeFind resWords",
  "  where",
  "  treeFind N = tv s",
  "  treeFind (B a t left right) | s < a  = treeFind left",
  "                              | s > a  = treeFind right",
  "                              | s == a = t",
  "",
  "resWords :: BTree",
  "resWords = " ++ show (sorted2tree $ cfTokens cf),
  "   where b s n = let bs = "++ apply stringPack "s",
  "                 in  B bs (TS bs n)",
  "",
  "unescapeInitTail :: "++stringType++" -> "++stringType++"",
  "unescapeInitTail = "++stringPack++" . unesc . tail . "++stringUnpack,
  "  where",
  "  unesc s = case s of",
  "    '\\\\':c:cs | elem c ['\\\"', '\\\\', '\\\''] -> c : unesc cs",
  "    '\\\\':'n':cs  -> '\\n' : unesc cs",
  "    '\\\\':'t':cs  -> '\\t' : unesc cs",
  "    '\\\\':'r':cs  -> '\\r' : unesc cs",
  "    '\\\\':'f':cs  -> '\\f' : unesc cs",
  "    '\"':[]    -> []",
  "    c:cs      -> c : unesc cs",
  "    _         -> []",
  "",
  "-------------------------------------------------------------------",
  "-- Alex wrapper code.",
  "-- A modified \"posn\" wrapper.",
  "-------------------------------------------------------------------",
  "",
  "data Posn = Pn !Int !Int !Int",
  "      deriving (Eq, Show,Ord)",
  "",
  "alexStartPos :: Posn",
  "alexStartPos = Pn 0 1 1",
  "",
  "alexMove :: Posn -> Char -> Posn",
  "alexMove (Pn a l c) '\\t' = Pn (a+1)  l     (((c+7) `div` 8)*8+1)",
  "alexMove (Pn a l c) '\\n' = Pn (a+1) (l+1)   1",
  "alexMove (Pn a l c) _    = Pn (a+1)  l     (c+1)",
  "",
  "type Byte = Word8",
  "",
  "type AlexInput = (Posn,     -- current position,",
  "                  Char,     -- previous char",
  "                  [Byte],   -- pending bytes on the current char",
  "                  "++stringType++")   -- current input string",
  "",
  "tokens :: "++stringType++" -> [Token]",
  "tokens str = go (alexStartPos, '\\n', [], str)",
  "    where",
  "      go :: AlexInput -> [Token]",
  "      go inp@(pos, _, _, str) =",
  "               case alexScan inp 0 of",
  "                AlexEOF                   -> []",
  "                AlexError (pos, _, _, _)  -> [Err pos]",
  "                AlexSkip  inp' len        -> go inp'",
  "                AlexToken inp' len act    -> act pos ("++stringTake++" len str) : (go inp')",
  "",
  "alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)",
  "alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))",
  "alexGetByte (p, _, [], s) =",
  "  case " ++ apply stringUncons "s" ++ " of",
  "    "++stringNilP++"  -> Nothing",
  "    "++stringConsP++" ->",
  "             let p'     = alexMove p c",
  "                 (b:bs) = utf8Encode c",
  "              in p' `seq` Just (b, (p', c, bs, s))",
  "",
  "alexInputPrevChar :: AlexInput -> Char",
  "alexInputPrevChar (p, c, bs, s) = c",
  "",
  "-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.",
  "utf8Encode :: Char -> [Word8]",
  "utf8Encode = map fromIntegral . go . ord",
  " where",
  "  go oc",
  "   | oc <= 0x7f       = [oc]",
  "",
  "   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6)",
  "                        , 0x80 + oc Data.Bits..&. 0x3f",
  "                        ]",
  "",
  "   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12)",
  "                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)",
  "                        , 0x80 + oc Data.Bits..&. 0x3f",
  "                        ]",
  "   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18)",
  "                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)",
  "                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)",
  "                        , 0x80 + oc Data.Bits..&. 0x3f",
  "                        ]",
  "}"
  ]
 where
   (stringType,stringTake,stringUncons,stringPack,stringUnpack,stringNilP,stringConsP) = case tokenText of
     StringToken     -> ("String",        "take",    "",          "id",      "id",        "[]",      "(c:s)"     )
     ByteStringToken -> ("BS.ByteString", "BS.take", "BS.uncons", "BS.pack", "BS.unpack", "Nothing", "Just (c,s)")
     TextToken       -> ("Data.Text.Text", "Data.Text.take", "Data.Text.uncons", "Data.Text.pack", "Data.Text.unpack", "Nothing", "Just (c,s)")

   apply :: String -> String -> String
   apply ""   s = s
   apply "id" s = s
   apply f    s = f ++ " " ++ s

   applyP :: String -> String -> String
   applyP ""   s = s
   applyP "id" s = s
   applyP f    s = f ++ " (" ++ s ++ ")"

   ifC :: TokenCat -> String -> String
   ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""

   lexComments
     :: ( [(String, String)]  -- block comment delimiters
        , [String]            -- line  comment initiators
        ) -> String           -- Alex declarations
   lexComments (block, line) = unlines $ concat $
     [ [ "-- Line comments"  | not (null line)               ]
     , map lexLineComment line
     , [ ""                  | not (null line || null block) ]
     , [ "-- Block comments" | not (null block)              ]
     , map (uncurry lexBlockComment) block
     ]

   lexLineComment
     :: String   -- ^ Line comment start.
     -> String   -- ^ Alex declaration.
   lexLineComment s = concat [ "\"", s, "\" [.]* ;" ]

   lexBlockComment
     :: String   -- ^ Start of block comment.
     -> String   -- ^ End of block comment.
     -> String   -- ^ Alex declaration.
   lexBlockComment start end = printRegAlex (mkRegMultilineComment start end) ++ " ;"

   -- tokens consisting of special symbols
   pTSpec [] = ""
   pTSpec _ = "@rsyms\n    { tok (\\p s -> PT p (eitherResIdent (TV . share) s)) }"

   userDefTokenTypes = unlines
     [ printRegAlex exp ++
       "\n    { tok (\\p s -> PT p (eitherResIdent (T_"  ++ name ++ " . share) s)) }"
     | (name,exp) <- tokenPragmas cf
     ]

   userDefTokenConstrs = unlines
     [ " | T_" ++ name ++ " !"++stringType
     | name <- tokenNames cf
     ]

   userDefTokenPrint = unlines
     [ "  PT _ (T_" ++ name ++ " s) -> s"
     | name <- tokenNames cf
     ]

   ident =
     "$l $i*\n    { tok (\\p s -> PT p (eitherResIdent (TV . share) s)) }"
     --ifC "Ident"  "<ident>   ::= ^l ^i*   { ident  p = PT p . eitherResIdent TV }"


data BTree = N | B String Int BTree BTree

instance Show BTree where
  showsPrec _  N          = showString "N"
  showsPrec n (B s k l r) = mparens
      $ showString "b " . shows s
      . showChar ' ' . shows k
      . showChar ' ' . showsPrec 1 l
      . showChar ' ' . showsPrec 1 r
    where
    mparens f = if n > 0 then showChar '(' . f . showChar ')' else f

sorted2tree :: [(String,Int)] -> BTree
sorted2tree [] = N
sorted2tree xs = B x n (sorted2tree t1) (sorted2tree t2)
  where
  (t1, (x,n) : t2) = splitAt (length xs `div` 2) xs


-------------------------------------------------------------------
-- Inlined version of @BNFC.Backend.Haskell.RegToAlex@.
-- Syntax has changed...
-------------------------------------------------------------------

-- modified from pretty-printer generated by the BNF converter

-- the top-level printing method
printRegAlex :: Reg -> String
printRegAlex = render . prt 0

render :: [String] -> String
render = \case
    "["      : ts -> cons "["  $ render ts
    "("      : ts -> cons "("  $ render ts
    t  : "," : ts -> cons t    $ space "," $ render ts
    t  : ")" : ts -> cons t    $ cons ")"  $ render ts
    t  : "]" : ts -> cons t    $ cons "]"  $ render ts
    t        : ts -> space t   $ render ts
    _             -> ""
  where
  cons s t  = s ++ t
  space t s = if null s then t else t ++ " " ++ s

parenth :: [String] -> [String]
parenth ss = ["("] ++ ss ++ [")"]

-- the printer class does the job
class Print a where
  prt :: Int -> a -> [String]
  prtList :: [a] -> [String]
  prtList = concatMap (prt 0)

instance Print a => Print [a] where
  prt _ = prtList

instance Print Char where
  prt _ = \case
    '\n'             -> ["\\n"]
    '\t'             -> ["\\t"]
    '\r'             -> ["\\r"]
    '\f'             -> ["\\f"]
    c | isAlphaNum c -> [[c]]
    c | isPrint c    -> ['\\':[c]]  -- ['\'':c:'\'':[]] -- Does not work for )
    c                -> ['\\':show (ord c)]

  prtList = map (concat . prt 0)

prPrec :: Int -> Int -> [String] -> [String]
prPrec i j = if j<i then parenth else id

instance Print Identifier where
  prt _ (Identifier (_, i)) = [i]

instance Print Reg where
  prt i e = case e of
   RSeq reg0 reg    -> prPrec i 2 $ prt 2 reg0 ++ prt 3 reg
   RAlt reg0 reg    -> prPrec i 1 $ concat [prt 1 reg0 , ["|"] , prt 2 reg]
   RStar reg        -> prPrec i 3 $ prt 3 reg ++ ["*"]
   RPlus reg        -> prPrec i 3 $ prt 3 reg ++ ["+"]
   ROpt reg         -> prPrec i 3 $ prt 3 reg ++ ["?"]
   -- Atomic/parenthesized expressions
   RMinus reg0 reg  -> concat [ ["["], prt 2 reg0 , ["#"] , prt 2 reg, ["]"] ]
   REps             -> ["()"]
   RChar c          -> prt 0 c
   RAlts str        -> concat [["["],prt 0 str,["]"]]
   RSeqs str        -> prPrec i 2 $ prt 0 str
   RDigit           -> ["$d"]
   RLetter          -> ["$l"]
   RUpper           -> ["$c"]
   RLower           -> ["$s"]
   RAny             -> ["$u"]