-- Alfred-Margaret: Fast Aho-Corasick string searching
-- Copyright 2019 Channable
--
-- Licensed under the 3-clause BSD license, see the LICENSE file in the
-- repository root.

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}

module Data.Text.BoyerMoore.Replacer
    ( -- Replacer
      replaceSingleLimited
    ) where

import Data.Text.Utf8 (Text)
import Data.Text.BoyerMoore.Automaton (Automaton, CodeUnitIndex)

import qualified Data.Text.Utf8 as Text
import qualified Data.Text.Utf8 as Utf8
import qualified Data.Text.BoyerMoore.Automaton as BoyerMoore

-- | Replace all occurrences matched by the Boyer-Moore automaton
-- with the given replacement text in some haystack.
-- Performs case-sensitive replacement.
replaceSingleLimited
  :: Automaton -- ^ Matches the needles
  -> Text -- ^ Replacement string
  -> Text -- ^ Haystack
  -> CodeUnitIndex -- ^ Maximum number of code units in the returned text
  -> Maybe Text
replaceSingleLimited :: Automaton -> Text -> Text -> CodeUnitIndex -> Maybe Text
replaceSingleLimited Automaton
needle Text
replacement Text
haystack CodeUnitIndex
maxLength
  | CodeUnitIndex
needleLength forall a. Eq a => a -> a -> Bool
== CodeUnitIndex
0 = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ if CodeUnitIndex
haystackLength forall a. Eq a => a -> a -> Bool
== CodeUnitIndex
0 then Text
replacement else Text
haystack
  | Bool
otherwise = ReplaceState -> Maybe Text
finish forall a b. (a -> b) -> a -> b
$ forall a.
a -> (a -> CodeUnitIndex -> Next a) -> Automaton -> Text -> a
BoyerMoore.runText ReplaceState
initial ReplaceState -> CodeUnitIndex -> Next ReplaceState
foundMatch Automaton
needle Text
haystack
  where
    needleLength :: CodeUnitIndex
needleLength = Automaton -> CodeUnitIndex
BoyerMoore.patternLength Automaton
needle
    haystackLength :: CodeUnitIndex
haystackLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
haystack
    replacementLength :: CodeUnitIndex
replacementLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
replacement

    initial :: ReplaceState
initial = ReplaceState
      { rsChunks :: [Text]
rsChunks = []
      , rsPreviousMatchEnd :: CodeUnitIndex
rsPreviousMatchEnd = CodeUnitIndex
0
      , rsLength :: CodeUnitIndex
rsLength = CodeUnitIndex
0
      }

    foundMatch :: ReplaceState -> CodeUnitIndex -> Next ReplaceState
foundMatch ReplaceState
rs CodeUnitIndex
matchStart =
      let
        matchEnd :: CodeUnitIndex
matchEnd = CodeUnitIndex
matchStart forall a. Num a => a -> a -> a
+ CodeUnitIndex
needleLength

        -- Slice the part of the haystack between the end of the previous match
        -- and the start of the current match
        haystackPartLength :: CodeUnitIndex
haystackPartLength = CodeUnitIndex
matchStart forall a. Num a => a -> a -> a
- ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs
        haystackPart :: Text
haystackPart = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 (ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs) CodeUnitIndex
haystackPartLength Text
haystack

        -- Add the preceding part of the haystack and the replacement in reverse
        -- order to the chunk list (all chunks will be reversed at once in the final step).
        newChunks :: [Text]
newChunks = Text
replacement forall a. a -> [a] -> [a]
: Text
haystackPart forall a. a -> [a] -> [a]
: ReplaceState -> [Text]
rsChunks ReplaceState
rs
        newLength :: CodeUnitIndex
newLength = CodeUnitIndex
replacementLength forall a. Num a => a -> a -> a
+ CodeUnitIndex
haystackPartLength forall a. Num a => a -> a -> a
+ ReplaceState -> CodeUnitIndex
rsLength ReplaceState
rs

        newState :: ReplaceState
newState = ReplaceState
          { rsChunks :: [Text]
rsChunks = [Text]
newChunks
          , rsPreviousMatchEnd :: CodeUnitIndex
rsPreviousMatchEnd = CodeUnitIndex
matchEnd
          , rsLength :: CodeUnitIndex
rsLength = CodeUnitIndex
newLength
          }
      in
        if CodeUnitIndex
newLength forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength
          then forall a. a -> Next a
BoyerMoore.Done ReplaceState
newState
          else forall a. a -> Next a
BoyerMoore.Step ReplaceState
newState

    finish :: ReplaceState -> Maybe Text
finish ReplaceState
rs =
      let
        -- Slice the remaining part of the haystack from the end of the last match
        -- to the end of the haystack.
        haystackPartLength :: CodeUnitIndex
haystackPartLength = CodeUnitIndex
haystackLength forall a. Num a => a -> a -> a
- ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs
        finalChunks :: [Text]
finalChunks
            = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 (ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs) CodeUnitIndex
haystackPartLength Text
haystack
            forall a. a -> [a] -> [a]
: ReplaceState -> [Text]
rsChunks ReplaceState
rs
        finalLength :: CodeUnitIndex
finalLength = ReplaceState -> CodeUnitIndex
rsLength ReplaceState
rs forall a. Num a => a -> a -> a
+ CodeUnitIndex
haystackPartLength
      in
        if CodeUnitIndex
finalLength forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength
          then forall a. Maybe a
Nothing
          else forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Text.concat forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
reverse [Text]
finalChunks

-- | Internal accumulator state for performing a replace while stepping an automaton
data ReplaceState = ReplaceState
  { ReplaceState -> [Text]
rsChunks :: [Text]
    -- ^ Chunks of the final text, in reverse order so that we can efficiently prepend
  , ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd :: !CodeUnitIndex
    -- ^ Index one past the end of the last match.
  , ReplaceState -> CodeUnitIndex
rsLength :: !CodeUnitIndex
    -- ^ Length of the newly build string so far, measured in CodeUnits
  }