{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
module Data.Text.AhoCorasick.Replacer
(
Needle
, Payload (..)
, Replacement
, Replacer (..)
, replacerCaseSensitivity
, build
, compose
, mapReplacement
, run
, runWithLimit
, setCaseSensitivity
) where
import Control.DeepSeq (NFData)
import Data.Hashable (Hashable)
import Data.List (sort)
import Data.Maybe (fromJust)
import GHC.Generics (Generic)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import Data.Text.AhoCorasick.Searcher (Searcher)
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodeUnitIndex (..), Text)
import qualified Data.Text as Text
import qualified Data.Text.AhoCorasick.Automaton as Aho
import qualified Data.Text.AhoCorasick.Searcher as Searcher
import qualified Data.Text.Utf8 as Utf8
type Needle = Text
type Replacement = Text
type Priority = Int
data Payload = Payload
{ Payload -> Priority
needlePriority :: {-# UNPACK #-} !Priority
, Payload -> CodeUnitIndex
needleLengthBytes :: {-# UNPACK #-} !CodeUnitIndex
, Payload -> Priority
needleLengthCodePoints :: {-# UNPACK #-} !Int
, Payload -> Replacement
needleReplacement :: !Replacement
}
#if defined(HAS_AESON)
deriving (Payload -> Payload -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Payload -> Payload -> Bool
$c/= :: Payload -> Payload -> Bool
== :: Payload -> Payload -> Bool
$c== :: Payload -> Payload -> Bool
Eq, forall x. Rep Payload x -> Payload
forall x. Payload -> Rep Payload x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Payload x -> Payload
$cfrom :: forall x. Payload -> Rep Payload x
Generic, Eq Payload
Priority -> Payload -> Priority
Payload -> Priority
forall a.
Eq a
-> (Priority -> a -> Priority) -> (a -> Priority) -> Hashable a
hash :: Payload -> Priority
$chash :: Payload -> Priority
hashWithSalt :: Priority -> Payload -> Priority
$chashWithSalt :: Priority -> Payload -> Priority
Hashable, Payload -> ()
forall a. (a -> ()) -> NFData a
rnf :: Payload -> ()
$crnf :: Payload -> ()
NFData, Priority -> Payload -> ShowS
[Payload] -> ShowS
Payload -> String
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Payload] -> ShowS
$cshowList :: [Payload] -> ShowS
show :: Payload -> String
$cshow :: Payload -> String
showsPrec :: Priority -> Payload -> ShowS
$cshowsPrec :: Priority -> Payload -> ShowS
Show, Value -> Parser [Payload]
Value -> Parser Payload
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Payload]
$cparseJSONList :: Value -> Parser [Payload]
parseJSON :: Value -> Parser Payload
$cparseJSON :: Value -> Parser Payload
AE.FromJSON, [Payload] -> Encoding
[Payload] -> Value
Payload -> Encoding
Payload -> Value
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Payload] -> Encoding
$ctoEncodingList :: [Payload] -> Encoding
toJSONList :: [Payload] -> Value
$ctoJSONList :: [Payload] -> Value
toEncoding :: Payload -> Encoding
$ctoEncoding :: Payload -> Encoding
toJSON :: Payload -> Value
$ctoJSON :: Payload -> Value
AE.ToJSON)
#else
deriving (Eq, Generic, Hashable, NFData, Show)
#endif
data Replacer = Replacer
{ Replacer -> Searcher Payload
replacerSearcher :: Searcher Payload
}
deriving stock (Priority -> Replacer -> ShowS
[Replacer] -> ShowS
Replacer -> String
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Replacer] -> ShowS
$cshowList :: [Replacer] -> ShowS
show :: Replacer -> String
$cshow :: Replacer -> String
showsPrec :: Priority -> Replacer -> ShowS
$cshowsPrec :: Priority -> Replacer -> ShowS
Show, Replacer -> Replacer -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Replacer -> Replacer -> Bool
$c/= :: Replacer -> Replacer -> Bool
== :: Replacer -> Replacer -> Bool
$c== :: Replacer -> Replacer -> Bool
Eq, forall x. Rep Replacer x -> Replacer
forall x. Replacer -> Rep Replacer x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Replacer x -> Replacer
$cfrom :: forall x. Replacer -> Rep Replacer x
Generic)
#if defined(HAS_AESON)
deriving (Eq Replacer
Priority -> Replacer -> Priority
Replacer -> Priority
forall a.
Eq a
-> (Priority -> a -> Priority) -> (a -> Priority) -> Hashable a
hash :: Replacer -> Priority
$chash :: Replacer -> Priority
hashWithSalt :: Priority -> Replacer -> Priority
$chashWithSalt :: Priority -> Replacer -> Priority
Hashable, Replacer -> ()
forall a. (a -> ()) -> NFData a
rnf :: Replacer -> ()
$crnf :: Replacer -> ()
NFData, Value -> Parser [Replacer]
Value -> Parser Replacer
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Replacer]
$cparseJSONList :: Value -> Parser [Replacer]
parseJSON :: Value -> Parser Replacer
$cparseJSON :: Value -> Parser Replacer
AE.FromJSON, [Replacer] -> Encoding
[Replacer] -> Value
Replacer -> Encoding
Replacer -> Value
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Replacer] -> Encoding
$ctoEncodingList :: [Replacer] -> Encoding
toJSONList :: [Replacer] -> Value
$ctoJSONList :: [Replacer] -> Value
toEncoding :: Replacer -> Encoding
$ctoEncoding :: Replacer -> Encoding
toJSON :: Replacer -> Value
$ctoJSON :: Replacer -> Value
AE.ToJSON)
#else
deriving (Hashable, NFData)
#endif
build :: CaseSensitivity -> [(Needle, Replacement)] -> Replacer
build :: CaseSensitivity -> [(Replacement, Replacement)] -> Replacer
build CaseSensitivity
caseSensitivity [(Replacement, Replacement)]
replaces = Searcher Payload -> Replacer
Replacer Searcher Payload
searcher
where
searcher :: Searcher Payload
searcher = forall v.
Hashable v =>
CaseSensitivity -> [(Replacement, v)] -> Searcher v
Searcher.buildWithValues CaseSensitivity
caseSensitivity forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Priority -> (Replacement, Replacement) -> (Replacement, Payload)
mapNeedle [Priority
0..] [(Replacement, Replacement)]
replaces
mapNeedle :: Priority -> (Replacement, Replacement) -> (Replacement, Payload)
mapNeedle Priority
i (Replacement
needle, Replacement
replacement) =
let needle' :: Replacement
needle' = case forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher of
CaseSensitivity
CaseSensitive -> Replacement
needle
CaseSensitivity
IgnoreCase -> Replacement -> Replacement
Utf8.lowerUtf8 Replacement
needle
payload :: Payload
payload = Payload
{ needlePriority :: Priority
needlePriority = (-Priority
i)
, needleLengthBytes :: CodeUnitIndex
needleLengthBytes = Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
needle
, needleLengthCodePoints :: Priority
needleLengthCodePoints = Replacement -> Priority
Text.length Replacement
needle
, needleReplacement :: Replacement
needleReplacement = Replacement
replacement
}
in (Replacement
needle', Payload
payload)
compose :: Replacer -> Replacer -> Maybe Replacer
compose :: Replacer -> Replacer -> Maybe Replacer
compose (Replacer Searcher Payload
searcher1) (Replacer Searcher Payload
searcher2)
| forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher1 forall a. Eq a => a -> a -> Bool
/= forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher2 = forall a. Maybe a
Nothing
| Bool
otherwise =
let
renumber :: Priority -> (a, Payload) -> (a, Payload)
renumber Priority
i (a
needle, Payload Priority
_ CodeUnitIndex
lenb Priority
lenc Replacement
replacement) = (a
needle, Priority -> CodeUnitIndex -> Priority -> Replacement -> Payload
Payload (-Priority
i) CodeUnitIndex
lenb Priority
lenc Replacement
replacement)
needles1 :: [(Replacement, Payload)]
needles1 = forall v. Searcher v -> [(Replacement, v)]
Searcher.needles Searcher Payload
searcher1
needles2 :: [(Replacement, Payload)]
needles2 = forall v. Searcher v -> [(Replacement, v)]
Searcher.needles Searcher Payload
searcher2
cs :: CaseSensitivity
cs = forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher1
searcher :: Searcher Payload
searcher = forall v.
Hashable v =>
CaseSensitivity -> [(Replacement, v)] -> Searcher v
Searcher.buildWithValues CaseSensitivity
cs forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a}. Priority -> (a, Payload) -> (a, Payload)
renumber [Priority
0..] ([(Replacement, Payload)]
needles1 forall a. [a] -> [a] -> [a]
++ [(Replacement, Payload)]
needles2)
in
forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Searcher Payload -> Replacer
Replacer Searcher Payload
searcher
mapReplacement :: (Replacement -> Replacement) -> Replacer -> Replacer
mapReplacement :: (Replacement -> Replacement) -> Replacer -> Replacer
mapReplacement Replacement -> Replacement
f Replacer
replacer = Replacer
replacer{
replacerSearcher :: Searcher Payload
replacerSearcher = forall b a. Hashable b => (a -> b) -> Searcher a -> Searcher b
Searcher.mapSearcher
(\Payload
p -> Payload
p {needleReplacement :: Replacement
needleReplacement = Replacement -> Replacement
f (Payload -> Replacement
needleReplacement Payload
p)})
(Replacer -> Searcher Payload
replacerSearcher Replacer
replacer)
}
replacerCaseSensitivity :: Replacer -> CaseSensitivity
replacerCaseSensitivity :: Replacer -> CaseSensitivity
replacerCaseSensitivity (Replacer Searcher Payload
searcher) = forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher
setCaseSensitivity :: CaseSensitivity -> Replacer -> Replacer
setCaseSensitivity :: CaseSensitivity -> Replacer -> Replacer
setCaseSensitivity CaseSensitivity
case_ (Replacer Searcher Payload
searcher) =
Searcher Payload -> Replacer
Replacer (forall v. CaseSensitivity -> Searcher v -> Searcher v
Searcher.setCaseSensitivity CaseSensitivity
case_ Searcher Payload
searcher)
data Match = Match !CodeUnitIndex !CodeUnitIndex !Text deriving (Match -> Match -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Match -> Match -> Bool
$c/= :: Match -> Match -> Bool
== :: Match -> Match -> Bool
$c== :: Match -> Match -> Bool
Eq, Eq Match
Match -> Match -> Bool
Match -> Match -> Ordering
Match -> Match -> Match
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Match -> Match -> Match
$cmin :: Match -> Match -> Match
max :: Match -> Match -> Match
$cmax :: Match -> Match -> Match
>= :: Match -> Match -> Bool
$c>= :: Match -> Match -> Bool
> :: Match -> Match -> Bool
$c> :: Match -> Match -> Bool
<= :: Match -> Match -> Bool
$c<= :: Match -> Match -> Bool
< :: Match -> Match -> Bool
$c< :: Match -> Match -> Bool
compare :: Match -> Match -> Ordering
$ccompare :: Match -> Match -> Ordering
Ord, Priority -> Match -> ShowS
[Match] -> ShowS
Match -> String
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Match] -> ShowS
$cshowList :: [Match] -> ShowS
show :: Match -> String
$cshow :: Match -> String
showsPrec :: Priority -> Match -> ShowS
$cshowsPrec :: Priority -> Match -> ShowS
Show)
replace :: [Match] -> Text -> Text
replace :: [Match] -> Replacement -> Replacement
replace [Match]
matches Replacement
haystack = [Replacement] -> Replacement
Utf8.concat forall a b. (a -> b) -> a -> b
$ CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go CodeUnitIndex
0 [Match]
matches Replacement
haystack
where
go :: CodeUnitIndex -> [Match] -> Text -> [Text]
go :: CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go !CodeUnitIndex
_offset [] Replacement
remainder = [Replacement
remainder]
go !CodeUnitIndex
offset ((Match CodeUnitIndex
pos CodeUnitIndex
len Replacement
replacement) : [Match]
ms) Replacement
remainder =
let
(Replacement
prefix, Replacement
suffix) = CodeUnitIndex
-> CodeUnitIndex -> Replacement -> (Replacement, Replacement)
Utf8.unsafeCutUtf8 (CodeUnitIndex
pos forall a. Num a => a -> a -> a
- CodeUnitIndex
offset) CodeUnitIndex
len Replacement
remainder
in
Replacement
prefix forall a. a -> [a] -> [a]
: Replacement
replacement forall a. a -> [a] -> [a]
: CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go (CodeUnitIndex
pos forall a. Num a => a -> a -> a
+ CodeUnitIndex
len) [Match]
ms Replacement
suffix
replacementLength :: [Match] -> Text -> CodeUnitIndex
replacementLength :: [Match] -> Replacement -> CodeUnitIndex
replacementLength [Match]
matches Replacement
initial = [Match] -> CodeUnitIndex -> CodeUnitIndex
go [Match]
matches (Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
initial)
where
go :: [Match] -> CodeUnitIndex -> CodeUnitIndex
go [] !CodeUnitIndex
acc = CodeUnitIndex
acc
go (Match CodeUnitIndex
_ CodeUnitIndex
matchLen Replacement
repl : [Match]
rest) !CodeUnitIndex
acc = [Match] -> CodeUnitIndex -> CodeUnitIndex
go [Match]
rest (CodeUnitIndex
acc forall a. Num a => a -> a -> a
- CodeUnitIndex
matchLen forall a. Num a => a -> a -> a
+ Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
repl)
removeOverlap :: [Match] -> [Match]
removeOverlap :: [Match] -> [Match]
removeOverlap [Match]
matches = case [Match]
matches of
[] -> []
[Match
m] -> [Match
m]
(m0 :: Match
m0@(Match CodeUnitIndex
pos0 CodeUnitIndex
len0 Replacement
_) : m1 :: Match
m1@(Match CodeUnitIndex
pos1 CodeUnitIndex
_ Replacement
_) : [Match]
ms) ->
if CodeUnitIndex
pos1 forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
pos0 forall a. Num a => a -> a -> a
+ CodeUnitIndex
len0
then Match
m0 forall a. a -> [a] -> [a]
: [Match] -> [Match]
removeOverlap (Match
m1forall a. a -> [a] -> [a]
:[Match]
ms)
else [Match] -> [Match]
removeOverlap (Match
m0forall a. a -> [a] -> [a]
:[Match]
ms)
run :: Replacer -> Text -> Text
run :: Replacer -> Replacement -> Replacement
run Replacer
replacer = forall a. HasCallStack => Maybe a -> a
fromJust forall b c a. (b -> c) -> (a -> b) -> a -> c
. Replacer -> CodeUnitIndex -> Replacement -> Maybe Replacement
runWithLimit Replacer
replacer forall a. Bounded a => a
maxBound
{-# NOINLINE runWithLimit #-}
runWithLimit :: Replacer -> CodeUnitIndex -> Text -> Maybe Text
runWithLimit :: Replacer -> CodeUnitIndex -> Replacement -> Maybe Replacement
runWithLimit (Replacer Searcher Payload
searcher) CodeUnitIndex
maxLength = Priority -> Replacement -> Maybe Replacement
go Priority
initialThreshold
where
!automaton :: AcMachine Payload
automaton = forall v. Searcher v -> AcMachine v
Searcher.automaton Searcher Payload
searcher
!initialThreshold :: Priority
initialThreshold = Priority
1
!minPriority :: Priority
minPriority = Priority
1 forall a. Num a => a -> a -> a
- forall v. Searcher v -> Priority
Searcher.numNeedles Searcher Payload
searcher
go :: Priority -> Text -> Maybe Text
go :: Priority -> Replacement -> Maybe Replacement
go !Priority
threshold Replacement
haystack =
let
seed :: (Priority, [a])
seed = (forall a. Bounded a => a
minBound :: Priority, [])
matchesWithPriority :: (Priority, [Match])
matchesWithPriority = case forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher of
CaseSensitivity
CaseSensitive -> forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Replacement -> a
Aho.runText forall {a}. (Priority, [a])
seed (Priority
-> Replacement
-> (Priority, [Match])
-> Match Payload
-> Next (Priority, [Match])
prependMatch Priority
threshold Replacement
haystack) AcMachine Payload
automaton Replacement
haystack
CaseSensitivity
IgnoreCase -> forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Replacement -> a
Aho.runLower forall {a}. (Priority, [a])
seed (Priority
-> Replacement
-> (Priority, [Match])
-> Match Payload
-> Next (Priority, [Match])
prependMatch Priority
threshold Replacement
haystack) AcMachine Payload
automaton Replacement
haystack
in
case (Priority, [Match])
matchesWithPriority of
(Priority
_, []) -> forall a. a -> Maybe a
Just Replacement
haystack
(Priority
p, [Match]
matches)
| [Match] -> Replacement -> CodeUnitIndex
replacementLength [Match]
matches Replacement
haystack forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength -> forall a. Maybe a
Nothing
| Priority
p forall a. Eq a => a -> a -> Bool
== Priority
minPriority -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ [Match] -> Replacement -> Replacement
replace ([Match] -> [Match]
removeOverlap forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> [a]
sort [Match]
matches) Replacement
haystack
| Bool
otherwise -> Priority -> Replacement -> Maybe Replacement
go Priority
p forall a b. (a -> b) -> a -> b
$ [Match] -> Replacement -> Replacement
replace ([Match] -> [Match]
removeOverlap forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> [a]
sort [Match]
matches) Replacement
haystack
prependMatch
:: Priority -> Text -> (Priority, [Match]) -> Aho.Match Payload -> Aho.Next (Priority, [Match])
{-# INLINE prependMatch #-}
prependMatch :: Priority
-> Replacement
-> (Priority, [Match])
-> Match Payload
-> Next (Priority, [Match])
prependMatch !Priority
threshold Replacement
haystack (!Priority
pBest, ![Match]
matches) (Aho.Match CodeUnitIndex
pos (Payload Priority
pMatch CodeUnitIndex
lenb Priority
lenc Replacement
replacement))
| Priority
pMatch forall a. Ord a => a -> a -> Bool
< Priority
threshold Bool -> Bool -> Bool
&& Priority
pMatch forall a. Ord a => a -> a -> Bool
> Priority
pBest =
forall a. a -> Next a
Aho.Step (Priority
pMatch, [Replacement
-> CodeUnitIndex
-> CodeUnitIndex
-> Priority
-> Replacement
-> Match
makeMatch Replacement
haystack CodeUnitIndex
pos CodeUnitIndex
lenb Priority
lenc Replacement
replacement])
| Priority
pMatch forall a. Ord a => a -> a -> Bool
< Priority
threshold Bool -> Bool -> Bool
&& Priority
pMatch forall a. Eq a => a -> a -> Bool
== Priority
pBest =
forall a. a -> Next a
Aho.Step (Priority
pMatch, Replacement
-> CodeUnitIndex
-> CodeUnitIndex
-> Priority
-> Replacement
-> Match
makeMatch Replacement
haystack CodeUnitIndex
pos CodeUnitIndex
lenb Priority
lenc Replacement
replacement forall a. a -> [a] -> [a]
: [Match]
matches)
| Bool
otherwise = forall a. a -> Next a
Aho.Step (Priority
pBest, [Match]
matches)
makeMatch :: Text -> CodeUnitIndex -> CodeUnitIndex -> Int -> Replacement -> Match
{-# INLINE makeMatch #-}
makeMatch :: Replacement
-> CodeUnitIndex
-> CodeUnitIndex
-> Priority
-> Replacement
-> Match
makeMatch = case forall v. Searcher v -> CaseSensitivity
Searcher.caseSensitivity Searcher Payload
searcher of
CaseSensitivity
CaseSensitive -> \Replacement
_ CodeUnitIndex
pos CodeUnitIndex
lenb Priority
_ Replacement
replacement ->
CodeUnitIndex -> CodeUnitIndex -> Replacement -> Match
Match (CodeUnitIndex
pos forall a. Num a => a -> a -> a
- CodeUnitIndex
lenb) CodeUnitIndex
lenb Replacement
replacement
CaseSensitivity
IgnoreCase -> \Replacement
haystack CodeUnitIndex
pos CodeUnitIndex
_ Priority
lenc Replacement
replacement ->
let start :: CodeUnitIndex
start = Replacement -> CodeUnitIndex -> Priority -> CodeUnitIndex
Utf8.skipCodePointsBackwards Replacement
haystack (CodeUnitIndex
posforall a. Num a => a -> a -> a
-CodeUnitIndex
1) (Priority
lencforall a. Num a => a -> a -> a
-Priority
1) in
CodeUnitIndex -> CodeUnitIndex -> Replacement -> Match
Match CodeUnitIndex
start (CodeUnitIndex
pos forall a. Num a => a -> a -> a
- CodeUnitIndex
start) Replacement
replacement