-- | Definition of the S-Expression tokens used to
-- (de)serialize What4 expressions.

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module What4.Serialize.SETokens
    ( Atom(..)
    , string, ident, int, nat, bitvec, bool, real, float
    , string', ident'
    , printAtom
    , printSExpr
    , parseSExpr
    )
    where

import qualified Data.Foldable as F
import qualified Data.Parameterized.NatRepr as PN
import qualified Data.SCargot as SC
import qualified Data.SCargot.Comments as SC
import qualified Data.SCargot.Repr as SC
import qualified Data.SCargot.Repr.WellFormed as SC
import           Data.Semigroup
import qualified Data.Sequence as Seq
import           Data.Text (Text)
import qualified Data.Text as T
import qualified LibBF as BF
import           Numeric.Natural ( Natural )
import qualified Text.Parsec as P
import           Text.Parsec.Text ( Parser )
import           Text.Printf ( printf )
import           Data.Ratio

import           Data.Parameterized.Some ( Some(..))
import qualified What4.BaseTypes as W4

import           Prelude

data Atom =
  AId Text
  -- ^ An identifier.
  | AStr (Some W4.StringInfoRepr) Text
  -- ^ A prefix followed by a string literal
  -- (.e.g, AStr "u" "Hello World" is serialize as `#u"Hello World"`).
  | AInt Integer
  -- ^ Integer (i.e., unbounded) literal.
  | ANat Natural
  -- ^ Natural (i.e., unbounded) literal
  | AReal Rational
  -- ^ Real (i.e., unbounded) literal.
  | AFloat (Some W4.FloatPrecisionRepr) BF.BigFloat
  -- ^ A floating point literal (with precision)
  | ABV Int Integer
  -- ^ Bitvector, width and then value.
  | ABool Bool
  -- ^ Boolean literal.
  deriving (Show, Eq, Ord)

type SExpr = SC.WellFormedSExpr Atom

string :: Some W4.StringInfoRepr -> Text -> SExpr
string strInfo str = SC.A $ AStr strInfo str

string' :: Some W4.StringInfoRepr -> String -> SExpr
string' strInfo str = SC.A $ AStr strInfo (T.pack str)

-- | Lift an unquoted identifier.
ident :: Text -> SExpr
ident = SC.A . AId

ident' :: String -> SExpr
ident' = SC.A . AId . T.pack

-- | Lift an integer.
int :: Integer -> SExpr
int = SC.A . AInt

-- | Lift a natural
nat :: Natural -> SExpr
nat = SC.A . ANat

-- | Lift a real
real :: Rational -> SExpr
real = SC.A . AReal

-- | Lift a float
float :: W4.FloatPrecisionRepr fpp -> BF.BigFloat -> SExpr
float rep bf = SC.A (AFloat (Some rep) bf)

-- | Lift a bitvector.
bitvec :: Natural -> Integer -> SExpr
bitvec w v = SC.A $ ABV (fromEnum w) v

-- | Lift a boolean.
bool :: Bool -> SExpr
bool = SC.A . ABool


-- * Output of the S-Expression Formula language


-- | Generates the the S-expression tokens represented by the sexpr
-- argument, preceeded by a list of strings output as comments.
printSExpr :: Seq.Seq String -> SExpr -> T.Text
printSExpr comments sexpr =
  let outputFmt = SC.setIndentAmount 1 $ SC.unconstrainedPrint printAtom
  in formatComment comments <> (SC.encodeOne outputFmt $ SC.fromWellFormed sexpr)


formatComment :: Seq.Seq String -> T.Text
formatComment c
  | Seq.null c = T.empty
  | otherwise = T.pack $ unlines $ fmap formatLine (F.toList c)
  where
    formatLine l = printf ";; %s" l


printAtom :: Atom -> T.Text
printAtom a =
  case a of
    AId s -> s
    AStr si s -> (stringInfoToPrefix si)<>"\""<>s<>"\""
    AInt i -> T.pack (show i)
    ANat n -> T.pack $ "#u"++(show n)
    AReal r -> T.pack $ "#r"++(show (numerator r))++"/"++(show (denominator r))
    ABV w val -> formatBV w val
    ABool b -> if b then "#true" else "#false"
    AFloat (Some rep) bf -> formatFloat rep bf

-- | Format a floating point value with no rounding in base 16
formatFloat :: W4.FloatPrecisionRepr fpp -> BF.BigFloat -> T.Text
formatFloat (W4.FloatingPointPrecisionRepr eb sb) bf =
  T.pack (printf "#f#%s#%s#%s" (show eb) (show sb) (BF.bfToString 16 (BF.showFree Nothing) bf))

formatBV :: Int -> Integer -> T.Text
formatBV w val = T.pack (prefix ++ printf fmt val)
  where
    (prefix, fmt)
      | w `rem` 4 == 0 = ("#x", "%0" ++ show (w `div` 4) ++ "x")
      | otherwise = ("#b", "%0" ++ show w ++ "b")


-- * Input and parse of the S-Expression Formula language

-- | This is only the base-level parsing of atoms.  The full language
-- parsing is handled by the base here and the Parser definitions.

parseId :: Parser Text
parseId = T.pack <$> ((:) <$> first <*> P.many rest)
  where first = P.letter P.<|> P.oneOf "@+-=<>_."
        rest = P.letter P.<|> P.digit P.<|> P.oneOf "+-=<>_."

stringInfoToPrefix :: Some W4.StringInfoRepr -> Text
stringInfoToPrefix (Some W4.Char16Repr) = "#char16"
stringInfoToPrefix (Some W4.Char8Repr) = "#char8"
stringInfoToPrefix (Some W4.UnicodeRepr) = ""


parseStrInfo :: Parser (Some W4.StringInfoRepr)
parseStrInfo =
  P.try (P.string "#char16" >> return (Some W4.Char16Repr))
  P.<|> P.try (P.string "#char8" >> return (Some W4.Char8Repr))
  P.<|> (return (Some W4.UnicodeRepr))


parseStr :: Parser (Some W4.StringInfoRepr, Text)
parseStr = do
  prefix <- parseStrInfo
  _ <- P.char '"'
  str <- concat <$> P.many ( do { _ <- P.char '\\'; c <- P.anyChar ; return ['\\',c]} P.<|> P.many1 (P.noneOf ('"':"\\")))
  _ <- P.char '"'
  return $ (prefix, T.pack str)

parseReal :: Parser Rational
parseReal = do
  _ <- P.string "#r"
  n <- (read :: (String -> Integer)) <$> P.many P.digit
  _ <- P.char '/'
  d <- (read :: (String -> Integer)) <$> P.many P.digit
  return $ n % d

parseInt :: Parser Integer
parseInt = do
  (read <$> P.many1 P.digit)
  P.<|> (*(-1)) . read <$> (P.char '-' >> P.many1 P.digit)


parseNat :: Parser Natural
parseNat = do
  _ <- P.string "#u"
  n <- P.many1 P.digit
  return $ read n

parseBool :: Parser Bool
parseBool = do 
  (P.try (P.string "#false" *> return False))
  P.<|> (P.string "#true" *> return True)
  
parseBV :: Parser (Int, Integer)
parseBV = P.char '#' >> ((P.char 'b' >> parseBin) P.<|> (P.char 'x' >> parseHex))
  where parseBin = P.oneOf "10" >>= \d -> parseBin' (1, if d == '1' then 1 else 0)

        parseBin' :: (Int, Integer) -> Parser (Int, Integer)
        parseBin' (bits, x) = do
          P.optionMaybe (P.oneOf "10") >>= \case
            Just d -> parseBin' (bits + 1, x * 2 + (if d == '1' then 1 else 0))
            Nothing -> return (bits, x)

        parseHex = (\s -> (length s * 4, read ("0x" ++ s))) <$> P.many1 P.hexDigit

parseFloat :: Parser (Some W4.FloatPrecisionRepr, BF.BigFloat)
parseFloat = do
  _ <- P.string "#f#"
  -- We printed the nat reprs out in decimal
  eb :: Natural
     <- read <$> P.many1 P.digit
  _ <- P.char '#'
  sb :: Natural
     <- read <$> P.many1 P.digit
  _ <- P.char '#'

  -- The float value itself is printed out as a hex literal
  hexDigits <- P.many1 P.hexDigit

  Some ebRepr <- return (PN.mkNatRepr eb)
  Some sbRepr <- return (PN.mkNatRepr sb)
  case (PN.testLeq (PN.knownNat @2) ebRepr, PN.testLeq (PN.knownNat @2) sbRepr) of
    (Just PN.LeqProof, Just PN.LeqProof) -> do
      let rep = W4.FloatingPointPrecisionRepr ebRepr sbRepr

      -- We know our format: it is determined by the exponent bits (eb) and the
      -- significand bits (sb) parsed above
      let fmt = BF.precBits (fromIntegral sb) <> BF.expBits (fromIntegral eb)
      let (bf, status) = BF.bfFromString 16 fmt hexDigits
      case status of
        BF.Ok -> return (Some rep, bf)
        _ -> P.unexpected ("Error parsing hex float: 0x" ++ hexDigits)
    _ -> P.unexpected ("Invalid exponent or significand size: " ++ show (eb, sb))


parseAtom :: Parser Atom
parseAtom
  =     P.try (ANat  <$> parseNat)
  P.<|> P.try (uncurry AFloat <$> parseFloat)
  P.<|> P.try (AReal <$> parseReal)
  P.<|> P.try (AInt  <$> parseInt)
  P.<|> P.try (AId   <$> parseId)
  P.<|> P.try (uncurry AStr  <$> parseStr)
  P.<|> P.try (ABool <$> parseBool)
  P.<|> P.try (uncurry ABV <$> parseBV)

parseSExpr :: T.Text -> Either String SExpr
parseSExpr = SC.decodeOne $ SC.asWellFormed $ SC.withLispComments (SC.mkParser parseAtom)