module Text.Highlighter.Lexer (runLexer) where

import Control.Monad.Except (ExceptT, runExceptT, throwError, catchError)
import Control.Monad.State (State, gets, modify, evalState)
import Text.Regex.PCRE.Light hiding (compile)
import Text.Regex.PCRE.Light.Char8 (compile)
import qualified Data.ByteString as BS
import Data.Sequence (Seq, empty, singleton, (><), viewl, null, ViewL(..))
import Data.Monoid ((<>))
import Control.Applicative ((<$>))
import Data.Foldable (toList, foldr1, mapM_)
import Prelude hiding (lex, foldr1, mapM_, concat, head, drop, tail, reverse, dropWhile, null)
import qualified Prelude as P

import Text.Highlighter.Types


data LexerState =
    LexerState
        { lsLexer :: Lexer
        , lsInput :: BS.ByteString
        , lsState :: [TokenMatcher]
        , lsLexed :: (Seq Token)
        , lastNotNull :: Bool
        }
    deriving Show

type LexerM = ExceptT LexerError (State LexerState)

data LexerError
    = NoMatchFor BS.ByteString
    | OtherLexerError String
    deriving Show

runLexer :: Lexer -> BS.ByteString -> Either LexerError [Token]
runLexer l s =  toList <$> runLexer' l s

runLexer' :: Lexer -> BS.ByteString -> Either LexerError (Seq Token)
runLexer' l s = evalState (runExceptT lex) (LexerState l s [lStart l] empty True)

lex :: LexerM (Seq Token)
lex = do
    done <- gets (BS.null . lsInput)

    if done
        then gets lsLexed
        else do

    ms <- getState
    ts <- tryAll ms
    if null ts || (BS.null . tText . head $ ts)
       then modify $ \ls -> ls { lsLexed = lsLexed ls >< ts }
       else modify $ \ls -> ls { lsLexed = lsLexed ls >< ts 
                              , lastNotNull = (BS.last . tText . head $ ts) == 10
                              }
    lex
  where
    getState = gets (P.head . lsState)

isBOL :: LexerM Bool
isBOL = gets lastNotNull

head :: Seq a -> a
head x = let (b :< _) =  viewl x
         in b

tryAll :: [Match] -> LexerM (Seq Token)
tryAll [] = do
    i <- gets lsInput
    throwError (NoMatchFor i)
tryAll (AnyOf ms:ms') =
    tryAll (ms ++ ms')
tryAll (m:ms) = do
    atbol <- isBOL
    fs <- gets (lFlags . lsLexer)

    let opts
            | atbol     = [exec_anchored]
            | otherwise = [exec_anchored, exec_notbol]

    i <- gets lsInput
    case match (compile (mRegexp m) fs) i opts of
        Just [] -> do
            nextState (mNextState m) []
            return empty

        Just (s:ss) -> do
            modify $ \ls -> ls { lsInput = BS.drop (BS.length s) i }

            nextState (mNextState m) (s:ss)

            toTokens (s:ss) (mType m)

        Nothing ->
            tryAll ms `catchError` trySkipping
  where
    trySkipping (NoMatchFor _) = tryAllFirst (m:ms)
    trySkipping e = throwError e

tryAllFirst :: [Match] -> LexerM (Seq Token)
tryAllFirst [] = do
    i <- gets lsInput
    throwError (NoMatchFor i)
tryAllFirst (AnyOf ms:ms') =
    tryAllFirst (ms ++ ms')
tryAllFirst (m:ms) = do
    atbol <- isBOL
    fs <- gets (lFlags . lsLexer)

    let opts
            | atbol     = []
            | otherwise = [exec_notbol]

    i <- gets lsInput
    case match (compile (mRegexp m) fs) i opts of
        Just (s:ss) -> do
            let (skipped, next) = skipFailed i s
            modify $ \ls -> ls { lsInput = next }
            ts <- toTokens (s:ss) (mType m)
            return . singleton . Token Error $ (skipped <> (tText $ head ts))

        _ -> tryAllFirst ms

toTokens :: [BS.ByteString] -> TokenType -> LexerM (Seq Token)
toTokens (s:_) (Using l) = either throwError return (runLexer' l s)
toTokens (_:ss) (ByGroups ts) = foldr1 (><) <$> mapM (\(s,t) -> toTokens [s] t) (P.zip ss ts)
toTokens (s:_) t = return $ singleton $ Token t s
toTokens [] _ = return empty

-- Given the starting point, return the text preceding and after
-- the failing regexp match
skipFailed :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
skipFailed i r
    | r `BS.isPrefixOf` i = (BS.empty, BS.drop (BS.length r) i)
    | otherwise =
        let (pre, next) = skipFailed (BS.tail i) r
        in (BS.cons (BS.head i) pre, next)

nextState :: NextState -> [BS.ByteString] -> LexerM ()
nextState Continue _ = return ()
nextState Pop _ =
    modify $ \ls -> ls { lsState = P.tail (lsState ls) }
nextState (PopNum n) _ =
    modify $ \ls -> ls { lsState = P.drop n (lsState ls) }
nextState Push _ =
    modify $ \ls -> ls { lsState = P.head (lsState ls) : lsState ls }
nextState (GoTo n) _ =
    modify $ \ls -> ls { lsState = n : lsState ls }
nextState (CapturesTo f) cs =
    modify $ \ls -> ls { lsState = f (map fromBS cs) : lsState ls }
  where
    fromBS = map (toEnum . fromEnum) . BS.unpack
nextState (DoAll nss) cs = mapM_ (flip nextState cs) nss
nextState (Combined nss) _ =
    modify $ \ls -> ls { lsState = P.concat nss : lsState ls }