-- | Erlang term parser and pretty printer.
module B9.Artifact.Content.ErlTerms
  ( parseErlTerm,
    erlTermParser,
    renderErlTerm,
    SimpleErlangTerm (..),
    arbitraryErlSimpleAtom,
    arbitraryErlString,
    arbitraryErlNumber,
    arbitraryErlNatural,
    arbitraryErlFloat,
    arbitraryErlNameChar,
  )
where

import B9.QCUtil
import B9.Text
import Control.Monad
import Control.Parallel.Strategies
import Data.Binary
import Data.Data
import Data.Function
import Data.Hashable
import GHC.Generics (Generic)
import Test.QuickCheck
import Text.Parsec
  ( (<|>),
    alphaNum,
    anyChar,
    between,
    char,
    choice,
    digit,
    hexDigit,
    lower,
    many,
    many1,
    noneOf,
    octDigit,
    option,
    parse,
    spaces,
    string,
    try,
  )
import Text.Parsec.Text
import qualified Text.PrettyPrint as PP
import Text.Printf
import Text.Show.Pretty

-- | Simplified Erlang term representation.
data SimpleErlangTerm
  = ErlString String
  | ErlFloat Double
  | ErlNatural Integer
  | ErlAtom String
  | ErlChar Char
  | ErlBinary String
  | ErlList [SimpleErlangTerm]
  | ErlTuple [SimpleErlangTerm]
  deriving (Eq, Ord, Read, Show, Data, Typeable, Generic)

instance Hashable SimpleErlangTerm

instance Binary SimpleErlangTerm

instance NFData SimpleErlangTerm

-- | Parse a subset of valid Erlang terms. It parses no maps and binaries are
-- restricted to either empty binaries or binaries with a string. The input
-- encoding must be restricted to ascii compatible 8-bit characters
-- (e.g. latin-1 or UTF8).
parseErlTerm :: String -> Text -> Either String SimpleErlangTerm
parseErlTerm src content =
  either (Left . ppShow) Right (parse erlTermParser src content)

-- | Convert an abstract Erlang term to a pretty byte string preserving the
-- encoding.
renderErlTerm :: SimpleErlangTerm -> Text
renderErlTerm s =
  unsafeRenderToText (PP.render (prettyPrintErlTerm s PP.<> PP.char '.'))

prettyPrintErlTerm :: SimpleErlangTerm -> PP.Doc
prettyPrintErlTerm (ErlString str) =
  PP.doubleQuotes (PP.text (toErlStringString str))
prettyPrintErlTerm (ErlNatural n) = PP.integer n
prettyPrintErlTerm (ErlFloat f) = PP.double f
prettyPrintErlTerm (ErlChar c) = PP.text ("$" ++ toErlAtomChar c)
prettyPrintErlTerm (ErlAtom a) = PP.text quotedAtom
  where
    quotedAtom = case toErlAtomString a of
      "" -> "''"
      a'@(firstChar : rest)
        | firstChar
            `elem` ['a' .. 'z']
            && all (`elem` atomCharsThatDontNeedQuoting) rest ->
          a'
      a' -> "'" ++ a' ++ "'"
    atomCharsThatDontNeedQuoting =
      ['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] ++ "@_"
prettyPrintErlTerm (ErlBinary []) = PP.text "<<>>"
prettyPrintErlTerm (ErlBinary b) =
  PP.text ("<<\"" ++ toErlStringString b ++ "\">>")
prettyPrintErlTerm (ErlList xs) =
  PP.brackets (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs)))
prettyPrintErlTerm (ErlTuple xs) =
  PP.braces (PP.sep (PP.punctuate PP.comma (prettyPrintErlTerm <$> xs)))

toErlStringString :: String -> String
toErlStringString = join . map toErlStringChar

toErlStringChar :: Char -> String
toErlStringChar = (table !!) . fromEnum
  where
    table =
      [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]]
        ++ (pure . toEnum <$> [32 .. 33])
        ++ ["\\\""]
        ++ (pure . toEnum <$> [35 .. 91])
        ++ ["\\\\"]
        ++ (pure . toEnum <$> [93 .. 126])
        ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]]

toErlAtomString :: String -> String
toErlAtomString = join . map toErlAtomChar

toErlAtomChar :: Char -> String
toErlAtomChar = (table !!) . fromEnum
  where
    table =
      [printf "\\x{%x}" c | c <- [0 .. (31 :: Int)]]
        ++ (pure . toEnum <$> [32 .. 38])
        ++ ["\\'"]
        ++ (pure . toEnum <$> [40 .. 91])
        ++ ["\\\\"]
        ++ (pure . toEnum <$> [93 .. 126])
        ++ [printf "\\x{%x}" c | c <- [(127 :: Int) ..]]

instance Arbitrary SimpleErlangTerm where
  arbitrary =
    oneof
      [ sized aErlString,
        sized aErlNatural,
        sized aErlFloat,
        sized aErlChar,
        sized aErlAtomUnquoted,
        sized aErlAtomQuoted,
        sized aErlBinary,
        sized aErlList,
        sized aErlTuple
      ]
    where
      decrSize 0 = resize 0
      decrSize n = resize (n - 1)
      aErlString n =
        ErlString <$> decrSize n (listOf (choose (toEnum 0, toEnum 255)))
      aErlFloat n = do
        f <- decrSize n arbitrary :: Gen Float
        let d = fromRational (toRational f)
        return (ErlFloat d)
      aErlNatural n = ErlNatural <$> decrSize n arbitrary
      aErlChar n = ErlChar <$> decrSize n (choose (toEnum 0, toEnum 255))
      aErlAtomUnquoted n = do
        f <- choose ('a', 'z')
        rest <- decrSize n aErlNameString
        return (ErlAtom (f : rest))
      aErlAtomQuoted n = do
        cs <- decrSize n aParsableErlString
        return (ErlAtom ("'" ++ cs ++ "'"))
      aErlBinary n =
        ErlBinary <$> decrSize n (listOf (choose (toEnum 0, toEnum 255)))
      aParsableErlString =
        oneof
          [ aErlNameString,
            aErlEscapedCharString,
            aErlControlCharString,
            aErlOctalCharString,
            aErlHexCharString
          ]
      aErlNameString =
        listOf (elements (['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] ++ "@_"))
      aErlEscapedCharString = elements (("\\" ++) . pure <$> "0bdefnrstv\\\"\'")
      aErlControlCharString =
        elements (("\\^" ++) . pure <$> (['a' .. 'z'] ++ ['A' .. 'Z']))
      aErlOctalCharString = do
        n <- choose (1, 3)
        os <- vectorOf n (choose (0, 7))
        return (join ("\\" : (show <$> (os :: [Int]))))
      aErlHexCharString = oneof [twoDigitHex, nDigitHex]
        where
          twoDigitHex = do
            d1 <- choose (0, 15) :: Gen Int
            d2 <- choose (0, 15) :: Gen Int
            return (printf "\\x%x%X" d1 d2)
          nDigitHex = do
            zs <- listOf (elements "0")
            v <- choose (0, 255) :: Gen Int
            return (printf "\\x{%s%x}" zs v)
      aErlList n = ErlList <$> resize (n `div` 2) (listOf arbitrary)
      aErlTuple n = ErlTuple <$> resize (n `div` 2) (listOf arbitrary)

erlTermParser :: Parser SimpleErlangTerm
erlTermParser = between spaces (char '.') erlExpressionParser

erlExpressionParser :: Parser SimpleErlangTerm
erlExpressionParser =
  erlAtomParser
    <|> erlCharParser
    <|> erlStringParser
    <|> erlBinaryParser
    <|> erlListParser
    <|> erlTupleParser
    <|> try erlFloatParser
    <|> erlNaturalParser

erlAtomParser :: Parser SimpleErlangTerm
erlAtomParser =
  ErlAtom
    <$> ( between (char '\'') (char '\'') (many (erlCharEscaped <|> noneOf "'"))
            <|> ((:) <$> lower <*> many erlNameChar)
        )

erlNameChar :: Parser Char
erlNameChar = alphaNum <|> char '@' <|> char '_'

erlCharParser :: Parser SimpleErlangTerm
erlCharParser = ErlChar <$> (char '$' >> (erlCharEscaped <|> anyChar))

erlFloatParser :: Parser SimpleErlangTerm
erlFloatParser = do
  -- Parse a float as string, then use read :: Double to 'parse' the floating
  -- point value. Calculating by hand is complicated because of precision
  -- issues.
  sign <- option "" ((char '-' >> return "-") <|> (char '+' >> return ""))
  s1 <- many digit
  _ <- char '.'
  s2 <- many1 digit
  e <-
    do
      expSym <- choice [char 'e', char 'E']
      expSign <-
        option
          ""
          ((char '-' >> return "-") <|> (char '+' >> return "+"))
      expAbs <- many1 digit
      return ([expSym] ++ expSign ++ expAbs)
      <|> return ""
  return (ErlFloat (read (sign ++ s1 ++ "." ++ s2 ++ e)))

erlNaturalParser :: Parser SimpleErlangTerm
erlNaturalParser = do
  sign <- signParser
  dec <- decimalLiteral
  return $ ErlNatural $ sign * dec

signParser :: Parser Integer
signParser = (char '-' >> return (-1)) <|> (char '+' >> return 1) <|> return 1

decimalLiteral :: Parser Integer
decimalLiteral =
  foldr
    ( \radix acc ->
        ( try (string (show radix ++ "#")) >> calcBE (toInteger radix)
            <$> many1
              (erlDigits radix)
        )
          <|> acc
    )
    (calcBE 10 <$> many1 (erlDigits 10))
    [2 .. 36]
  where
    calcBE a = foldl (\acc d -> a * acc + d) 0
    erlDigits k = choice (take k digitParsers)
    digitParsers =
      -- create parsers that consume/match '0' .. '9' and "aA" .. "zZ" and return 0 .. 35
      map
        (\(cs, v) -> choice (char <$> cs) >> return v)
        ( ( (pure <$> ['0' .. '9'])
              ++ zipWith ((++) `on` pure) ['a' .. 'z'] ['A' .. 'Z']
          )
            `zip` [0 ..]
        )

erlStringParser :: Parser SimpleErlangTerm
erlStringParser = do
  _ <- char '"'
  str <- many (erlCharEscaped <|> noneOf "\"")
  _ <- char '"'
  return (ErlString str)

erlCharEscaped :: Parser Char
erlCharEscaped =
  char '\\'
    >> ( do
           _ <- char '^'
           choice (zipWith escapedChar ccodes creplacements)
           <|> do
             _ <- char 'x'
             do
               ds <-
                 between
                   (char '{')
                   (char '}')
                   (fmap hexVal <$> many1 hexDigit)
               let val = foldl (\acc v -> acc * 16 + v) 0 ds
               return (toEnum val)
               <|> do
                 x1 <- hexVal <$> hexDigit
                 x2 <- hexVal <$> hexDigit
                 return (toEnum ((x1 * 16) + x2))
           <|> do
             o1 <- octVal <$> octDigit
             do
               o2 <- octVal <$> octDigit
               do
                 o3 <- octVal <$> octDigit
                 return (toEnum ((((o1 * 8) + o2) * 8) + o3))
                 <|> return (toEnum ((o1 * 8) + o2))
               <|> return (toEnum o1)
           <|> choice (zipWith escapedChar codes replacements)
       )
  where
    escapedChar code replacement = char code >> return replacement
    codes = "0bdefnrstv\\\"'"
    replacements = "\NUL\b\DEL\ESC\f\n\r \t\v\\\"'"
    ccodes = ['a' .. 'z'] ++ ['A' .. 'Z']
    creplacements = cycle ['\^A' .. '\^Z']
    hexVal v
      | v `elem` ['a' .. 'z'] = 0xA + (fromEnum v - fromEnum 'a')
      | v `elem` ['A' .. 'Z'] = 0xA + (fromEnum v - fromEnum 'A')
      | otherwise = fromEnum v - fromEnum '0'
    octVal = hexVal

erlBinaryParser :: Parser SimpleErlangTerm
erlBinaryParser = do
  _ <- string "<<"
  spaces
  ErlString str <- option (ErlString "") erlStringParser
  _ <- string ">>"
  spaces
  return (ErlBinary str)

erlListParser :: Parser SimpleErlangTerm
erlListParser = ErlList <$> erlNestedParser (char '[') (char ']')

erlTupleParser :: Parser SimpleErlangTerm
erlTupleParser = ErlTuple <$> erlNestedParser (char '{') (char '}')

erlNestedParser :: Parser a -> Parser b -> Parser [SimpleErlangTerm]
erlNestedParser open close =
  between (open >> spaces) (close >> spaces) (commaSep erlExpressionParser)

commaSep :: Parser a -> Parser [a]
commaSep p =
  do
    r <- p
    spaces
    rest <- option [] (char ',' >> spaces >> commaSep p)
    return (r : rest)
    <|> return []

arbitraryErlSimpleAtom :: Gen SimpleErlangTerm
arbitraryErlSimpleAtom =
  ErlAtom <$> ((:) <$> arbitraryLetterLower <*> listOf arbitraryErlNameChar)

arbitraryErlString :: Gen SimpleErlangTerm
arbitraryErlString =
  ErlString <$> listOf (oneof [arbitraryLetter, arbitraryDigit])

arbitraryErlNumber :: Gen SimpleErlangTerm
arbitraryErlNumber = oneof [arbitraryErlNatural, arbitraryErlFloat]

arbitraryErlNatural :: Gen SimpleErlangTerm
arbitraryErlNatural = ErlNatural <$> arbitrary

arbitraryErlFloat :: Gen SimpleErlangTerm
arbitraryErlFloat = ErlFloat <$> arbitrary

arbitraryErlNameChar :: Gen Char
arbitraryErlNameChar =
  oneof [arbitraryLetter, arbitraryDigit, pure '_', pure '@']