{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Data.Text.AhoCorasick.Splitter
( Splitter
, automaton
, build
, separator
, split
, splitIgnoreCase
, splitReverse
, splitReverseIgnoreCase
) where
import Control.DeepSeq (NFData (..))
import Data.Function (on)
import Data.Hashable (Hashable (..))
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text.Utf8 (Text)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Text as Text
import Data.Text.AhoCorasick.Automaton (AcMachine)
import qualified Data.Text.Utf8 as Utf8
import qualified Data.Text.AhoCorasick.Automaton as Aho
data Splitter =
Splitter
{ Splitter -> AcMachine ()
splitterAutomaton :: AcMachine ()
, Splitter -> Text
splitterSeparator :: Text
}
#if defined(HAS_AESON)
instance AE.ToJSON Splitter where
toJSON :: Splitter -> Value
toJSON = forall a. ToJSON a => a -> Value
AE.toJSON forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text
separator
instance AE.FromJSON Splitter where
parseJSON :: Value -> Parser Splitter
parseJSON Value
v = Text -> Splitter
build forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FromJSON a => Value -> Parser a
AE.parseJSON Value
v
#endif
{-# INLINE build #-}
build :: Text -> Splitter
build :: Text -> Splitter
build Text
sep =
let !auto :: AcMachine ()
auto = forall v. [(Text, v)] -> AcMachine v
Aho.build [(Text
sep, ())] in
AcMachine () -> Text -> Splitter
Splitter AcMachine ()
auto Text
sep
{-# INLINE automaton #-}
automaton :: Splitter -> AcMachine ()
automaton :: Splitter -> AcMachine ()
automaton = Splitter -> AcMachine ()
splitterAutomaton
{-# INLINE separator #-}
separator :: Splitter -> Text
separator :: Splitter -> Text
separator = Splitter -> Text
splitterSeparator
{-# INLINE split #-}
split :: Splitter -> Text -> NonEmpty Text
split :: Splitter -> Text -> NonEmpty Text
split = (forall a. NonEmpty a -> NonEmpty a
NonEmpty.reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
.) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text -> NonEmpty Text
splitReverse
{-# INLINE splitIgnoreCase #-}
splitIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitIgnoreCase = (forall a. NonEmpty a -> NonEmpty a
NonEmpty.reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
.) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase
{-# INLINE splitReverse #-}
splitReverse :: Splitter -> Text -> NonEmpty Text
splitReverse :: Splitter -> Text -> NonEmpty Text
splitReverse Splitter
s Text
t =
Text -> Accum -> NonEmpty Text
finalizeAccum Text
t forall a b. (a -> b) -> a -> b
$ forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
Aho.runText Accum
zeroAccum forall {v}. Accum -> Match v -> Next Accum
stepAccum' (Splitter -> AcMachine ()
automaton Splitter
s) Text
t
where
sepLength :: CodeUnitIndex
sepLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 (Splitter -> Text
separator Splitter
s)
stepAccum' :: Accum -> Match v -> Next Accum
stepAccum' Accum
accum (Aho.Match CodeUnitIndex
newFragmentStart v
_) =
Text -> Accum -> CodeUnitIndex -> CodeUnitIndex -> Next Accum
stepAccum Text
t Accum
accum (CodeUnitIndex
newFragmentStart forall a. Num a => a -> a -> a
- CodeUnitIndex
sepLength) CodeUnitIndex
newFragmentStart
{-# INLINE splitReverseIgnoreCase #-}
splitReverseIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase Splitter
s Text
t =
Text -> Accum -> NonEmpty Text
finalizeAccum Text
t forall a b. (a -> b) -> a -> b
$ forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
Aho.runLower Accum
zeroAccum forall {v}. Accum -> Match v -> Next Accum
stepAccum' (Splitter -> AcMachine ()
automaton Splitter
s) Text
t
where
sepLength :: Int
sepLength = Text -> Int
Text.length (Splitter -> Text
separator Splitter
s)
stepAccum' :: Accum -> Match v -> Next Accum
stepAccum' Accum
accum (Aho.Match CodeUnitIndex
newFragmentStart v
_) =
let sepStart :: CodeUnitIndex
sepStart = Text -> CodeUnitIndex -> Int -> CodeUnitIndex
Utf8.skipCodePointsBackwards Text
t (CodeUnitIndex
newFragmentStartforall a. Num a => a -> a -> a
-CodeUnitIndex
1) (Int
sepLengthforall a. Num a => a -> a -> a
-Int
1) in
Text -> Accum -> CodeUnitIndex -> CodeUnitIndex -> Next Accum
stepAccum Text
t Accum
accum CodeUnitIndex
sepStart CodeUnitIndex
newFragmentStart
data Accum =
Accum
{ Accum -> [Text]
accumResult :: ![Text]
, Accum -> CodeUnitIndex
accumFragmentStart :: !Aho.CodeUnitIndex
}
{-# INLINE finalizeAccum #-}
finalizeAccum :: Text -> Accum -> NonEmpty Text
finalizeAccum :: Text -> Accum -> NonEmpty Text
finalizeAccum Text
hay (Accum [Text]
res CodeUnitIndex
prevEnd) =
let !str :: Text
str = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 CodeUnitIndex
prevEnd (Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
hay forall a. Num a => a -> a -> a
- CodeUnitIndex
prevEnd) Text
hay in
Text
str forall a. a -> [a] -> NonEmpty a
:| [Text]
res
{-# INLINE zeroAccum #-}
zeroAccum :: Accum
zeroAccum :: Accum
zeroAccum = Accum { accumResult :: [Text]
accumResult = [], accumFragmentStart :: CodeUnitIndex
accumFragmentStart = CodeUnitIndex
0 }
{-# INLINE stepAccum #-}
stepAccum :: Text -> Accum -> Aho.CodeUnitIndex -> Aho.CodeUnitIndex -> Aho.Next Accum
stepAccum :: Text -> Accum -> CodeUnitIndex -> CodeUnitIndex -> Next Accum
stepAccum Text
hay acc :: Accum
acc@(Accum [Text]
res CodeUnitIndex
fragmentStart) CodeUnitIndex
sepStart CodeUnitIndex
newFragmentStart
| CodeUnitIndex
sepStart forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
fragmentStart =
forall a. a -> Next a
Aho.Step Accum
acc
| Bool
otherwise =
let !str :: Text
str = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 CodeUnitIndex
fragmentStart (CodeUnitIndex
sepStart forall a. Num a => a -> a -> a
- CodeUnitIndex
fragmentStart) Text
hay in
forall a. a -> Next a
Aho.Step Accum
acc { accumResult :: [Text]
accumResult = Text
str forall a. a -> [a] -> [a]
: [Text]
res, accumFragmentStart :: CodeUnitIndex
accumFragmentStart = CodeUnitIndex
newFragmentStart }
instance Eq Splitter where
{-# INLINE (==) #-}
== :: Splitter -> Splitter -> Bool
(==) = forall a. Eq a => a -> a -> Bool
(==) forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Splitter -> Text
separator
instance Ord Splitter where
{-# INLINE compare #-}
compare :: Splitter -> Splitter -> Ordering
compare = forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Splitter -> Text
separator
instance Hashable Splitter where
{-# INLINE hashWithSalt #-}
hashWithSalt :: Int -> Splitter -> Int
hashWithSalt Int
salt Splitter
searcher =
Int
salt forall a. Hashable a => Int -> a -> Int
`hashWithSalt` Splitter -> Text
separator Splitter
searcher
instance NFData Splitter where
{-# INLINE rnf #-}
rnf :: Splitter -> ()
rnf (Splitter AcMachine ()
searcher Text
sepLength) =
forall a. NFData a => a -> ()
rnf AcMachine ()
searcher seq :: forall a b. a -> b -> b
`seq`
forall a. NFData a => a -> ()
rnf Text
sepLength
instance Show Splitter where
showsPrec :: Int -> Splitter -> ShowS
showsPrec Int
p Splitter
splitter =
Bool -> ShowS -> ShowS
showParen (Int
p forall a. Ord a => a -> a -> Bool
> Int
10) forall a b. (a -> b) -> a -> b
$
String -> ShowS
showString String
"build " forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 (Splitter -> Text
separator Splitter
splitter)