{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings   #-}

module Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList) where

import           Control.Lens                     (Identity (runIdentity))
import           Control.Monad                    (foldM, guard)
import           Control.Monad.State.Strict       (MonadState (get),
                                                   MonadTrans (lift),
                                                   evalStateT, modify, put)
import           Control.Monad.Trans.State.Strict (StateT, runStateT)
import           Data.Char                        (isAlphaNum)
import           Data.DList                       (DList)
import qualified Data.DList                       as DL
import qualified Data.Map.Strict                  as M
import qualified Data.Map.Strict                  as Map
import           Data.Text                        (Text)
import qualified Data.Text                        as T
import qualified Data.Text.Rope                   as Char
import qualified Data.Text.Utf16.Rope             as Utf16
import           Data.Text.Utf16.Rope.Mixed       (Rope)
import qualified Data.Text.Utf16.Rope.Mixed       as Rope
import           Development.IDE.GHC.Compat
import           Development.IDE.GHC.Error        (realSrcSpanToCodePointRange)
import           Ide.Plugin.SemanticTokens.Types  (HsSemanticTokenType (TModule),
                                                   RangeHsSemanticTokenTypes (..))
import           Language.LSP.Protocol.Types      (Position (Position),
                                                   Range (Range), UInt, mkRange)
import           Language.LSP.VFS                 hiding (line)
import           Prelude                          hiding (length, span)

type Tokenizer m a = StateT PTokenState m a
type HsSemanticLookup = Identifier -> Maybe HsSemanticTokenType


data PTokenState = PTokenState
  {
    rope             :: !Rope -- the remains of rope we are working on
    , cursor         :: !Char.Position -- the cursor position of the current rope to the start of the original file in code point position
    , columnsInUtf16 :: !UInt -- the column of the start of the current rope in utf16
  }

data SplitResult
  = NoSplit (Text, Range) -- does not need to split, token text, token range
  | Split (Text, Range, Range) -- token text, prefix range(module range), token range
  deriving (Show)

getSplitTokenText :: SplitResult -> Text
getSplitTokenText (NoSplit (t, _))  = t
getSplitTokenText (Split (t, _, _)) = t


mkPTokenState :: VirtualFile -> PTokenState
mkPTokenState vf =
  PTokenState
    {
      rope = vf._file_text,
      cursor = Char.Position 0 0,
      columnsInUtf16 = 0
    }

-- lift a Tokenizer Maybe a to Tokenizer m a,
-- if the Maybe is Nothing, do nothing, recover the state, and return the mempty value
-- if the Maybe is Just x, do the action, and keep the state, and return x
liftMaybeM :: (Monad m, Monoid a) => Tokenizer Maybe a -> Tokenizer m a
liftMaybeM p = do
  st <- get
  maybe (return mempty) (\(ans, st') -> put st' >> return ans) $ runStateT p st

foldMapM :: (Monad m, Monoid b, Foldable t) => (a -> m b) -> t a -> m b
foldMapM f ta = foldM (\b a -> mappend b <$> f a) mempty ta

computeRangeHsSemanticTokenTypeList :: HsSemanticLookup -> VirtualFile -> HieAST a -> RangeHsSemanticTokenTypes
computeRangeHsSemanticTokenTypeList lookupHsTokenType vf ast =
    RangeHsSemanticTokenTypes $ DL.toList $ runIdentity $ evalStateT (foldAst lookupHsTokenType ast) (mkPTokenState vf)
-- | foldAst
-- visit every leaf node in the ast in depth first order
foldAst :: (Monad m) => HsSemanticLookup -> HieAST t -> Tokenizer m (DList (Range, HsSemanticTokenType))
foldAst lookupHsTokenType ast = if null (nodeChildren ast)
  then liftMaybeM (visitLeafIds lookupHsTokenType ast)
  else foldMapM (foldAst lookupHsTokenType) $ nodeChildren ast

visitLeafIds :: HsSemanticLookup -> HieAST t -> Tokenizer Maybe (DList (Range, HsSemanticTokenType))
visitLeafIds lookupHsTokenType leaf = liftMaybeM $ do
  let span = nodeSpan leaf
  (ran, token) <- focusTokenAt leaf
  -- if `focusTokenAt` succeed, we can safely assume we have shift the cursor correctly
  -- we do not need to recover the cursor state, even if the following computation failed
  liftMaybeM $ do
    -- only handle the leaf node with single column token
    guard $ srcSpanStartLine span == srcSpanEndLine span
    splitResult <- lift $ splitRangeByText token ran
    foldMapM (combineNodeIds lookupHsTokenType ran splitResult) $ Map.filterWithKey (\k _ -> k == SourceInfo) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
  where
    combineNodeIds :: (Monad m) => HsSemanticLookup -> Range -> SplitResult -> NodeInfo a -> Tokenizer m (DList (Range, HsSemanticTokenType))
    combineNodeIds lookupHsTokenType ran ranSplit (NodeInfo _ _ bd) =
        case (maybeTokenType, ranSplit) of
            (Nothing, _) -> return mempty
            (Just TModule, _) -> return $ DL.singleton (ran, TModule)
            (Just tokenType, NoSplit (_, tokenRan)) -> return $ DL.singleton (tokenRan, tokenType)
            (Just tokenType, Split (_, ranPrefix, tokenRan)) -> return $ DL.fromList [(ranPrefix, TModule),(tokenRan, tokenType)]
        where maybeTokenType = foldMap (getIdentifier lookupHsTokenType ranSplit) (M.keys bd)

    getIdentifier :: HsSemanticLookup -> SplitResult -> Identifier -> Maybe HsSemanticTokenType
    getIdentifier lookupHsTokenType ranSplit idt = do
      case idt of
        Left _moduleName -> Just TModule
        Right name -> do
          occStr <- T.pack <$> case (occNameString . nameOccName) name of
            -- the generated selector name with {-# LANGUAGE DuplicateRecordFields #-}
            '$' : 's' : 'e' : 'l' : ':' : xs -> Just $ takeWhile (/= ':') xs
            -- other generated names that should not be visible
            '$' : c : _ | isAlphaNum c       -> Nothing
            c : ':' : _ | isAlphaNum c       -> Nothing
            ns                               -> Just ns
          guard $ getSplitTokenText ranSplit == occStr
          lookupHsTokenType idt


focusTokenAt ::
  -- | leaf node we want to focus on
  HieAST a ->
  -- | (token, remains)
  Tokenizer Maybe (Range, Text)
focusTokenAt leaf = do
  PTokenState{cursor, rope, columnsInUtf16} <- get
  let span = nodeSpan leaf
  let (tokenStartPos, tokenEndPos) = srcSpanCharPositions span
  -- tokenStartOff: the offset position of the token start position to the cursor position
  tokenStartOff <- lift $ tokenStartPos `sub` cursor
  -- tokenOff: the offset position of the token end position to the token start position
  tokenOff <- lift $ tokenEndPos `sub` tokenStartPos
  (gap, tokenStartRope) <- lift $ charSplitAtPositionMaybe tokenStartOff rope
  (token, remains) <- lift $ charSplitAtPositionMaybe tokenOff tokenStartRope
  -- ncs: token start column in utf16
  let ncs = newColumn columnsInUtf16 gap
  -- nce: token end column in utf16
  let nce = newColumn ncs token
  -- compute the new range for utf16, tuning the columns is enough
  let ran = codePointRangeToRangeWith ncs nce $ realSrcSpanToCodePointRange span
  modify $ \s -> s {columnsInUtf16 = nce, rope = remains, cursor = tokenEndPos}
  return (ran, token)
  where
    srcSpanCharPositions :: RealSrcSpan -> (Char.Position, Char.Position)
    srcSpanCharPositions real =
        ( realSrcLocRopePosition $ realSrcSpanStart real,
          realSrcLocRopePosition $ realSrcSpanEnd real
        )
    charSplitAtPositionMaybe :: Char.Position -> Rope -> Maybe (Text, Rope)
    charSplitAtPositionMaybe tokenOff rpe = do
      let (prefix, suffix) = Rope.charSplitAtPosition tokenOff rpe
      guard $ Rope.charLengthAsPosition prefix == tokenOff
      return (Rope.toText prefix, suffix)
    sub :: Char.Position -> Char.Position -> Maybe Char.Position
    sub (Char.Position l1 c1) (Char.Position l2 c2)
      | l1 == l2 && c1 >= c2 = Just $ Char.Position 0 (c1 - c2)
      | l1 > l2 = Just $ Char.Position (l1 - l2) c1
      | otherwise = Nothing
    realSrcLocRopePosition :: RealSrcLoc -> Char.Position
    realSrcLocRopePosition real = Char.Position (fromIntegral $ srcLocLine real - 1) (fromIntegral $ srcLocCol real - 1)
    -- | newColumn
    -- rope do not treat single \n in our favor
    -- for example, the row length of "123\n" and "123" are both 1
    -- we are forced to use text to compute new column
    newColumn :: UInt -> Text -> UInt
    newColumn n rp = case T.breakOnEnd "\n" rp of
      ("", nEnd) -> n + utf16Length nEnd
      (_, nEnd)  -> utf16Length nEnd
    codePointRangeToRangeWith :: UInt -> UInt -> CodePointRange -> Range
    codePointRangeToRangeWith newStartCol newEndCol (CodePointRange (CodePointPosition startLine _) (CodePointPosition endLine _)) =
      Range (Position startLine newStartCol) (Position endLine newEndCol)

-- | splitRangeByText
-- split a qualified identifier into module name and identifier and/or strip the (), ``
-- for `ModuleA.b`, break it into `ModuleA.` and `b`
-- for `(b)`, strip `()`, and get `b`
-- for `(ModuleA.b)`, strip `()` and break it into `ModuleA.` and `b`
splitRangeByText :: Text -> Range -> Maybe SplitResult
splitRangeByText tk ran = do
  let (ran', tk') = case T.uncons tk of
        Just ('(', xs) -> (subOneRange ran, T.takeWhile (/= ')') xs)
        Just ('`', xs) -> (subOneRange ran, T.takeWhile (/= '`') xs)
        _              -> (ran, tk)
  let (prefix, tk'') = T.breakOnEnd "." tk'
  splitRange tk'' (utf16PositionPosition $ Rope.utf16LengthAsPosition $ Rope.fromText prefix) ran'
  where
    splitRange :: Text -> Position -> Range -> Maybe SplitResult
    splitRange tx (Position l c) r@(Range (Position l1 c1) (Position l2 c2))
      | l1 + l > l2 || (l1 + l == l2 && c > c2) = Nothing -- out of range
      | l==0 && c==0 = Just $ NoSplit (tx, r)
      | otherwise = let c' = if l <= 0 then c1+c else c
                    in Just $ Split (tx, mkRange l1 c1 (l1 + l) c', mkRange (l1 + l) c' l2 c2)
    subOneRange :: Range -> Range
    subOneRange (Range (Position l1 c1) (Position l2 c2)) = Range (Position l1 (c1 + 1)) (Position l2 (c2 - 1))
    utf16PositionPosition :: Utf16.Position -> Position
    utf16PositionPosition (Utf16.Position l c) = Position (fromIntegral l) (fromIntegral c)


utf16Length :: Integral i => Text -> i
utf16Length = fromIntegral . Utf16.length . Utf16.fromText