{-|
Module      : Language.Rust.Parser.Literals
Description : Parsing literals
Copyright   : (c) Alec Theriault, 2017-2018
License     : BSD-style
Maintainer  : alec.theriault@gmail.com
Stability   : experimental
Portability : portable

Functions for parsing literals from valid literal tokens. Note the functions in this module fail
badly is fed invalid 'LitTok's; it is expected their input is coming from Alex and is correct.
-}

module Language.Rust.Parser.Literals (
  translateLit
) where

import Language.Rust.Syntax.Token ( LitTok(..) )
import Language.Rust.Syntax.AST   ( IntRep(..), Lit(..), StrStyle(..), Suffix(..) )

import Data.Char                  ( chr, digitToInt, ord, isHexDigit, isSpace )
import Data.List                  ( unfoldr )
import Data.Maybe                 ( fromMaybe )
import Data.Word                  ( Word8 )

import Text.Read                  ( readMaybe )

-- | Parse a valid 'LitTok' into a 'Lit'.
translateLit :: LitTok -> Suffix -> a -> Lit a
translateLit (ByteTok s)         = Byte (unescapeByte' s)
translateLit (CharTok s)         = Char (unescapeChar' s)
translateLit (FloatTok s)        = Float (unescapeFloat s)
translateLit (StrTok s)          = Str (unfoldr (unescapeChar True) s) Cooked
translateLit (StrRawTok s n)     = Str s (Raw n)
translateLit (ByteStrTok s)      = ByteStr (unfoldr (unescapeByte True) s) Cooked
translateLit (ByteStrRawTok s n) = ByteStr (map (fromIntegral . ord) s) (Raw n)
translateLit (IntegerTok s)      = \suf -> case (suf, unescapeInteger s) of
                                             (F32, (Dec, n)) -> Float (fromInteger n) F32
                                             (F64, (Dec, n)) -> Float (fromInteger n) F64
                                             (_,   (rep, n)) -> Int rep n s suf

-- | Given a string of characters read from a Rust source, extract the next underlying char taking
-- into account escapes and unicode.
unescapeChar :: Bool                    -- ^ multi-line strings allowed
             -> String                  -- ^ input string
             -> Maybe (Char, String)
unescapeChar multiline ('\\':c:cs) = case c of
  'n'  -> pure ('\n', cs)
  'r'  -> pure ('\r', cs)
  't'  -> pure ('\t', cs)
  '\\' -> pure ('\\', cs)
  '\'' -> pure ('\'', cs)
  '"'  -> pure ('"',  cs)
  '0'  -> pure ('\0', cs)
  'x'  -> do (h,cs') <- readHex 2 cs; pure (chr h, cs')
  'X'  -> do (h,cs') <- readHex 2 cs; pure (chr h, cs')
  'U'  -> do (h,cs') <- readHex 8 cs; pure (chr h, cs')
  'u'  -> case cs of
    '{':x1:'}':cs'                -> do (h,_)   <- readHex 1 [x1];                pure (chr h, cs')
    '{':x1:x2:'}':cs'             -> do (h,_)   <- readHex 2 [x1,x2];             pure (chr h, cs')
    '{':x1:x2:x3:'}':cs'          -> do (h,_)   <- readHex 3 [x1,x2,x3];          pure (chr h, cs')
    '{':x1:x2:x3:x4:'}':cs'       -> do (h,_)   <- readHex 4 [x1,x2,x3,x4];       pure (chr h, cs')
    '{':x1:x2:x3:x4:x5:'}':cs'    -> do (h,_)   <- readHex 5 [x1,x2,x3,x4,x5];    pure (chr h, cs')
    '{':x1:x2:x3:x4:x5:x6:'}':cs' -> do (h,_)   <- readHex 6 [x1,x2,x3,x4,x5,x6]; pure (chr h, cs')
    _                             -> do (h,cs') <- readHex 4 cs;                  pure (chr h, cs')
  '\n' | multiline -> unescapeChar multiline $ dropWhile isSpace cs
  _ -> error "unescape char: bad escape sequence"
unescapeChar _ (c:cs) = Just (c, cs)
unescapeChar _ [] = fail "unescape char: empty string"

-- | Given a string of characters read from a Rust source, extract the next underlying byte taking
-- into account escapes.
unescapeByte :: Bool                    -- ^ multi-line strings allowed
             -> String                  -- ^ input string
             -> Maybe (Word8, String)
unescapeByte multiline ('\\':c:cs) = case c of
       'n'  -> pure (toEnum $ fromEnum '\n', cs)
       'r'  -> pure (toEnum $ fromEnum '\r', cs)
       't'  -> pure (toEnum $ fromEnum '\t', cs)
       '\\' -> pure (toEnum $ fromEnum '\\', cs)
       '\'' -> pure (toEnum $ fromEnum '\'', cs)
       '"'  -> pure (toEnum $ fromEnum '"',  cs)
       '0'  -> pure (toEnum $ fromEnum '\0', cs)
       'x'  -> do (h,cs') <- readHex 2 cs; pure (h, cs')
       'X'  -> do (h,cs') <- readHex 2 cs; pure (h, cs')
       '\n' | multiline -> unescapeByte multiline $ dropWhile isSpace cs
       _    -> error "unescape byte: bad escape sequence"
unescapeByte _ (c:cs) = Just (toEnum $ fromEnum c, cs)
unescapeByte _ [] = fail "unescape byte: empty string"

-- | Given a string Rust representation of a character, parse it into a character
unescapeChar' :: String -> Char
unescapeChar' s = case unescapeChar False s of
                    Just (c, "") -> c
                    _ -> error "unescape char: bad character literal"

-- | Given a string Rust representation of a byte, parse it into a byte
unescapeByte' :: String -> Word8
unescapeByte' s = case unescapeByte False s of
                    Just (w8, "") -> w8
                    _ -> error "unescape byte: bad byte literal"

-- | Given a string Rust representation of an integer, parse it into a number
unescapeInteger :: Num a => String -> (IntRep,a)
unescapeInteger ('0':'b':cs@(_:_)) | all (`elem` "_01") cs = (Bin, numBase 2 (filter (/= '_') cs))
unescapeInteger ('0':'o':cs@(_:_)) | all (`elem` "_01234567") cs = (Oct, numBase 8 (filter (/= '_') cs))
unescapeInteger ('0':'x':cs@(_:_)) | all (`elem` "_0123456789abcdefABCDEF") cs = (Hex, numBase 16 (filter (/= '_') cs))
unescapeInteger cs@(_:_)           | all (`elem` "_0123456789") cs = (Dec, numBase 10 (filter (/= '_') cs))
unescapeInteger _ = error "unescape integer: bad decimal literal"

-- | Given a string Rust representation of a float, parse it into a float.
-- NOTE: this is a bit hacky. Eventually, we might not do this and change the internal
-- representation of a float to a string (what language-c has opted to do).
unescapeFloat :: String -> Double
unescapeFloat cs = fromMaybe (error $ "unescape float: cannot parse float " ++ cs') (readMaybe cs')
  where cs' = filter (/= '_') (if last cs == '.' then cs ++ "0" else cs)

-- | Try to read a hexadecimal number of the specified length off of the front of the string
-- provided. If there are not enough characters to do this, or the characters don't fall in the
-- right range, this fails with 'Nothing'.
readHex :: Num a => Int -> String -> Maybe (a, String)
readHex n cs = let digits = take n cs
               in if length digits == n && all isHexDigit digits
                    then Just (numBase 16 digits, drop n cs)
                    else Nothing

-- | Convert a list of characters to the number they represent.
numBase :: Num a => a -> String -> a
numBase b = foldl (\n x -> fromIntegral (digitToInt x) + b * n) 0