{-# OPTIONS  #-}
-----------------------------------------------------------------------------
-- |
-- Module      : Language.Python.Common.LexerUtils 
-- Copyright   : (c) 2009 Bernie Pope 
-- License     : BSD-style
-- Maintainer  : bjpop@csse.unimelb.edu.au
-- Stability   : experimental
-- Portability : ghc
--
-- Various utilities to support the Python lexer. 
-----------------------------------------------------------------------------

module Language.Python.Common.LexerUtils where

import Control.Monad (liftM)
import Control.Monad.Error.Class (throwError)
import Data.List (foldl')
import Data.Word (Word8)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
import Codec.Binary.UTF8.String as UTF8 (encode)

type Byte = Word8

-- Beginning of. BOF = beginning of file, BOL = beginning of line
data BO = BOF | BOL

-- Functions for building tokens 

type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token

lineJoin :: Action
lineJoin span _len _str =
   return $ LineJoinToken $ spanStartPoint span

endOfLine :: P Token -> Action
endOfLine lexToken span _len _str = do
   setLastEOL $ spanStartPoint span
   lexToken

bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine lexToken bol span len inp = do
   pushStartCode bol
   endOfLine lexToken span len inp

dedentation :: P Token -> Action
dedentation lexToken span _len _str = do
   topIndent <- getIndent
   -- case compare (endCol span) topIndent of
   case compare (startCol span) topIndent of
      EQ -> do popStartCode
               lexToken
      LT -> do popIndent
               return dedentToken
      GT -> spanError span "indentation error"

indentation :: P Token -> Int -> BO -> Action
-- Check if we are at the EOF. If yes, we may need to generate a newline,
-- in case we came here from BOL (but not BOF).
indentation lexToken _dedentCode bo _loc _len [] = do
   popStartCode
   case bo of
      BOF -> lexToken
      BOL -> newlineToken
indentation lexToken dedentCode bo span _len _str = do
   popStartCode
   parenDepth <- getParenStackDepth
   if parenDepth > 0
      then lexToken
      else do
         topIndent <- getIndent
         -- case compare (endCol span) topIndent of
         case compare (startCol span) topIndent of
            EQ -> case bo of
                     BOF -> lexToken
                     BOL -> newlineToken
            LT -> do pushStartCode dedentCode
                     newlineToken
            -- GT -> do pushIndent (endCol span)
            GT -> do pushIndent (startCol span)
                     return indentToken
   where
   indentToken = IndentToken span

symbolToken :: (SrcSpan -> Token) -> Action
symbolToken mkToken location _ _ = return (mkToken location)

token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token mkToken read location len str
   = return $ mkToken location literal (read literal)
   where
   literal = take len str

-- special tokens for the end of file and end of line
endOfFileToken :: Token
endOfFileToken = EOFToken SpanEmpty
dedentToken = DedentToken SpanEmpty

newlineToken :: P Token
newlineToken = do
   loc <- getLastEOL
   return $ NewlineToken loc

-- Test if we are at the end of the line or file
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken)
   = null inputAfterToken || nextChar == '\n' || nextChar == '\r'
   where
   nextChar = head inputAfterToken

notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken)
   = not (null inputAfterToken)

readBinary :: String -> Integer
readBinary
   = toBinary . drop 2
   where
   toBinary = foldl' acc 0
   acc b '0' = 2 * b
   acc b '1' = 2 * b + 1

readFloat :: String -> Double
readFloat str@('.':cs) = read ('0':readFloatRest str)
readFloat str = read (readFloatRest str)
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c:cs) = c : readFloatRest cs

mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken loc len str = do
   return $ toToken loc (take len str)

stringToken :: SrcSpan -> String -> Token
stringToken = StringToken

rawStringToken :: SrcSpan -> String -> Token
rawStringToken = StringToken

byteStringToken :: SrcSpan -> String -> Token
byteStringToken = ByteStringToken

formatStringToken :: SrcSpan -> String -> Token
formatStringToken = StringToken

formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken = StringToken

unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = UnicodeStringToken

rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = ByteStringToken

openParen :: (SrcSpan -> Token) -> Action
openParen mkToken loc _len _str = do
   let token = mkToken loc
   pushParen token
   return token

closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken loc _len _str = do
  let token = mkToken loc
  topParen <- getParen
  case topParen of
     Nothing -> spanError loc err1
     Just open -> if matchParen open token
                    then popParen >> return token
                    else spanError loc err2
   where
   -- XXX fix these error messages
   err1 = "Lexical error ! unmatched closing paren"
   err2 = "Lexical error ! unmatched closing paren"

matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = True
matchParen _ _ = False

-- -----------------------------------------------------------------------------
-- Functionality required by Alex 

type AlexInput = (SrcLocation,  -- current src location
                 [Byte],        -- byte buffer for next character
                 String)        -- input string

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "alexInputPrevChar not used"

-- byte buffer should be empty here
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc, [], input)
   | null input  = Nothing
   | otherwise = seq nextLoc (Just (nextChar, (nextLoc, [], rest)))
   where
   nextChar = head input
   rest = tail input
   nextLoc = moveChar nextChar loc
alexGetChar (loc, _:_, _) = error "alexGetChar called with non-empty byte buffer"

-- mapFst :: (a -> b) -> (a, c) -> (b, c)
-- mapFst f (a, c) = (f a, c)

alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
-- alexGetByte = fmap (mapFst (fromIntegral . ord)) . alexGetChar
alexGetByte (loc, b:bs, input) = Just (b, (loc, bs, input))
alexGetByte (loc, [], []) = Nothing
alexGetByte (loc, [], nextChar:rest) =
   seq nextLoc (Just (byte, (nextLoc, restBytes, rest)))
   where
   nextLoc = moveChar nextChar loc
   byte:restBytes = UTF8.encode [nextChar]

moveChar :: Char -> SrcLocation -> SrcLocation
moveChar '\n' = incLine 1
moveChar '\t' = incTab
moveChar '\r' = id
moveChar _    = incColumn 1

lexicalError :: P a
lexicalError = do
  location <- getLocation
  c <- liftM head getInput
  throwError $ UnexpectedChar c location

readOctNoO :: String -> Integer
readOctNoO (zero:rest) = read (zero:'O':rest)