-- Alfred-Margaret: Fast Aho-Corasick string searching
-- Copyright 2019 Channable
--
-- Licensed under the 3-clause BSD license, see the LICENSE file in the
-- repository root.

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}

module Data.Text.BoyerMoore.Replacer
  ( -- Replacer
  replaceSingleLimited
  )
  where

import Data.Text (Text)
import Data.Text.BoyerMoore.Automaton (CodeUnitIndex, Automaton)

import qualified Data.Text as Text

import Data.Text.BoyerMoore.Automaton (CaseSensitivity (..))

import qualified Data.Text.BoyerMoore.Automaton as BoyerMoore
import qualified Data.Text.Utf16 as Utf16

-- | Replace all occurrences matched by the Boyer-Moore automaton
-- with the given replacement text in some haystack.
replaceSingleLimited
  :: CaseSensitivity
  -- ^ In case of 'IgnoreCase', the automaton must have been created with a lower-case needle
  -> Automaton -- ^ Matches the needles
  -> Text -- ^ Replacement string
  -> Text -- ^ Haystack
  -> CodeUnitIndex -- ^ Maximum number of code units in the returned text
  -> Maybe Text
replaceSingleLimited caseSensitivity needle replacement haystack maxLength
  | needleLength == 0 = Just $ if haystackLength == 0 then replacement else haystack
  | otherwise = finish $ case caseSensitivity of
      CaseSensitive -> BoyerMoore.runText initial foundMatch needle haystack
      IgnoreCase -> BoyerMoore.runLower initial foundMatch needle haystack
  where
    needleLength = BoyerMoore.patternLength needle
    haystackLength = Utf16.lengthUtf16 haystack
    replacementLength = Utf16.lengthUtf16 replacement

    initial = ReplaceState
      { rsChunks = []
      , rsPreviousMatchEnd = 0
      , rsLength = 0
      }

    foundMatch rs matchStart =
      let
        matchEnd = matchStart + needleLength

        -- Slice the part of the haystack between the end of the previous match
        -- and the start of the current match
        haystackPartLength = matchStart - rsPreviousMatchEnd rs
        haystackPart = Utf16.unsafeSliceUtf16 (rsPreviousMatchEnd rs) haystackPartLength haystack

        -- Add the preceding part of the haystack and the replacement in reverse
        -- order to the chunk list (all chunks will be reversed at once in the final step).
        newChunks = replacement : haystackPart : rsChunks rs
        newLength = replacementLength + haystackPartLength + rsLength rs

        newState = ReplaceState
          { rsChunks = newChunks
          , rsPreviousMatchEnd = matchEnd
          , rsLength = newLength
          }
      in
        if newLength > maxLength
          then BoyerMoore.Done newState
          else BoyerMoore.Step newState

    finish rs =
      let
        -- Slice the remaining part of the haystack from the end of the last match
        -- to the end of the haystack.
        haystackPartLength = haystackLength - rsPreviousMatchEnd rs
        finalChunks
            = Utf16.unsafeSliceUtf16 (rsPreviousMatchEnd rs) haystackPartLength haystack
            : rsChunks rs
        finalLength = rsLength rs + haystackPartLength
      in
        if finalLength > maxLength
          then Nothing
          else Just $ Text.concat $ reverse finalChunks

-- | Internal accumulator state for performing a replace while stepping an automaton
data ReplaceState = ReplaceState
  { rsChunks :: [Text]
    -- ^ Chunks of the final text, in reverse order so that we can efficiently prepend
  , rsPreviousMatchEnd :: !CodeUnitIndex
    -- ^ Index one past the end of the last match.
  , rsLength :: !CodeUnitIndex
    -- ^ Length of the newly build string so far, measured in CodeUnits
  }