{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Data.Text.AhoCorasick.Splitter
( Splitter
, automaton
, build
, separator
, split
, splitIgnoreCase
, splitReverse
, splitReverseIgnoreCase
) where
import Control.DeepSeq (NFData (..))
import Data.Function (on)
import Data.Hashable (Hashable (..))
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text.Utf8 (Text)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text.AhoCorasick.Automaton (AcMachine)
import qualified Data.Text.Utf8 as Utf8
import qualified Data.Text.AhoCorasick.Automaton as Aho
data Splitter =
Splitter
{ Splitter -> AcMachine ()
splitterAutomaton :: AcMachine ()
, Splitter -> Text
splitterSeparator :: Text
}
#if defined(HAS_AESON)
instance AE.ToJSON Splitter where
toJSON :: Splitter -> Value
toJSON = Text -> Value
forall a. ToJSON a => a -> Value
AE.toJSON (Text -> Value) -> (Splitter -> Text) -> Splitter -> Value
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text
separator
instance AE.FromJSON Splitter where
parseJSON :: Value -> Parser Splitter
parseJSON Value
v = Text -> Splitter
build (Text -> Splitter) -> Parser Text -> Parser Splitter
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Value -> Parser Text
forall a. FromJSON a => Value -> Parser a
AE.parseJSON Value
v
#endif
{-# INLINE build #-}
build :: Text -> Splitter
build :: Text -> Splitter
build Text
sep =
let !auto :: AcMachine ()
auto = [(Text, ())] -> AcMachine ()
forall v. [(Text, v)] -> AcMachine v
Aho.build [(Text
sep, ())] in
AcMachine () -> Text -> Splitter
Splitter AcMachine ()
auto Text
sep
{-# INLINE automaton #-}
automaton :: Splitter -> AcMachine ()
automaton :: Splitter -> AcMachine ()
automaton = Splitter -> AcMachine ()
splitterAutomaton
{-# INLINE separator #-}
separator :: Splitter -> Text
separator :: Splitter -> Text
separator = Splitter -> Text
splitterSeparator
{-# INLINE split #-}
split :: Splitter -> Text -> NonEmpty Text
split :: Splitter -> Text -> NonEmpty Text
split = (NonEmpty Text -> NonEmpty Text
forall a. NonEmpty a -> NonEmpty a
NonEmpty.reverse (NonEmpty Text -> NonEmpty Text)
-> (Text -> NonEmpty Text) -> Text -> NonEmpty Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((Text -> NonEmpty Text) -> Text -> NonEmpty Text)
-> (Splitter -> Text -> NonEmpty Text)
-> Splitter
-> Text
-> NonEmpty Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text -> NonEmpty Text
splitReverse
{-# INLINE splitIgnoreCase #-}
splitIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitIgnoreCase = (NonEmpty Text -> NonEmpty Text
forall a. NonEmpty a -> NonEmpty a
NonEmpty.reverse (NonEmpty Text -> NonEmpty Text)
-> (Text -> NonEmpty Text) -> Text -> NonEmpty Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((Text -> NonEmpty Text) -> Text -> NonEmpty Text)
-> (Splitter -> Text -> NonEmpty Text)
-> Splitter
-> Text
-> NonEmpty Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase
{-# INLINE splitReverse #-}
splitReverse :: Splitter -> Text -> NonEmpty Text
splitReverse :: Splitter -> Text -> NonEmpty Text
splitReverse Splitter
s Text
t =
Accum -> NonEmpty Text
finalizeAccum (Accum -> NonEmpty Text) -> Accum -> NonEmpty Text
forall a b. (a -> b) -> a -> b
$
Accum
-> (Accum -> Match () -> Next Accum)
-> AcMachine ()
-> Text
-> Accum
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
Aho.runText
(Text -> Text -> Accum
zeroAccum (Splitter -> Text
separator Splitter
s) Text
t)
Accum -> Match () -> Next Accum
forall v. Accum -> Match v -> Next Accum
stepAccum
(Splitter -> AcMachine ()
automaton Splitter
s)
Text
t
{-# INLINE splitReverseIgnoreCase #-}
splitReverseIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase Splitter
s Text
t =
Accum -> NonEmpty Text
finalizeAccum (Accum -> NonEmpty Text) -> Accum -> NonEmpty Text
forall a b. (a -> b) -> a -> b
$
Accum
-> (Accum -> Match () -> Next Accum)
-> AcMachine ()
-> Text
-> Accum
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
Aho.runLower
(Text -> Text -> Accum
zeroAccum (Splitter -> Text
separator Splitter
s) Text
t)
Accum -> Match () -> Next Accum
forall v. Accum -> Match v -> Next Accum
stepAccum
(Splitter -> AcMachine ()
automaton Splitter
s)
Text
t
data Accum =
Accum
{ Accum -> CodeUnitIndex
_accumSepLen :: !Aho.CodeUnitIndex
, Accum -> Text
_accumHaystack :: !Text
, Accum -> [Text]
accumResult :: ![Text]
, Accum -> CodeUnitIndex
accumPrevEnd :: !Aho.CodeUnitIndex
}
{-# INLINE finalizeAccum #-}
finalizeAccum :: Accum -> NonEmpty Text
finalizeAccum :: Accum -> NonEmpty Text
finalizeAccum (Accum CodeUnitIndex
_ Text
hay [Text]
res CodeUnitIndex
prevEnd) =
let !str :: Text
str = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 CodeUnitIndex
prevEnd (Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
hay CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
prevEnd) Text
hay in
Text
str Text -> [Text] -> NonEmpty Text
forall a. a -> [a] -> NonEmpty a
:| [Text]
res
{-# INLINE zeroAccum #-}
zeroAccum :: Text -> Text -> Accum
zeroAccum :: Text -> Text -> Accum
zeroAccum Text
sep Text
hay = CodeUnitIndex -> Text -> [Text] -> CodeUnitIndex -> Accum
Accum (Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
sep) Text
hay [] CodeUnitIndex
0
{-# INLINE stepAccum #-}
stepAccum :: Accum -> Aho.Match v -> Aho.Next Accum
stepAccum :: Accum -> Match v -> Next Accum
stepAccum acc :: Accum
acc@(Accum CodeUnitIndex
sepLen Text
hay [Text]
res CodeUnitIndex
prevEnd) (Aho.Match CodeUnitIndex
sepEnd v
_)
| CodeUnitIndex
sepEnd CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
sepLen CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
prevEnd =
Accum -> Next Accum
forall a. a -> Next a
Aho.Step Accum
acc
| Bool
otherwise =
let !str :: Text
str = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 CodeUnitIndex
prevEnd (CodeUnitIndex
sepEnd CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
sepLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
prevEnd) Text
hay in
Accum -> Next Accum
forall a. a -> Next a
Aho.Step Accum
acc { accumResult :: [Text]
accumResult = Text
str Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
res, accumPrevEnd :: CodeUnitIndex
accumPrevEnd = CodeUnitIndex
sepEnd }
instance Eq Splitter where
{-# INLINE (==) #-}
== :: Splitter -> Splitter -> Bool
(==) = Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Text -> Text -> Bool)
-> (Splitter -> Text) -> Splitter -> Splitter -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Splitter -> Text
separator
instance Ord Splitter where
{-# INLINE compare #-}
compare :: Splitter -> Splitter -> Ordering
compare = Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Text -> Text -> Ordering)
-> (Splitter -> Text) -> Splitter -> Splitter -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Splitter -> Text
separator
instance Hashable Splitter where
{-# INLINE hashWithSalt #-}
hashWithSalt :: Int -> Splitter -> Int
hashWithSalt Int
salt Splitter
searcher =
Int
salt Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
`hashWithSalt` Splitter -> Text
separator Splitter
searcher
instance NFData Splitter where
{-# INLINE rnf #-}
rnf :: Splitter -> ()
rnf (Splitter AcMachine ()
searcher Text
sepLength) =
AcMachine () -> ()
forall a. NFData a => a -> ()
rnf AcMachine ()
searcher () -> () -> ()
`seq`
Text -> ()
forall a. NFData a => a -> ()
rnf Text
sepLength
instance Show Splitter where
showsPrec :: Int -> Splitter -> ShowS
showsPrec Int
p Splitter
splitter =
Bool -> ShowS -> ShowS
showParen (Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10) (ShowS -> ShowS) -> ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$
String -> ShowS
showString String
"build " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Int -> Text -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 (Splitter -> Text
separator Splitter
splitter)