{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.BoyerMooreCI.Automaton
( Automaton
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Next (..)
, buildAutomaton
, patternLength
, patternText
, runText
, minimumSkipForCodePoint
) where
import Control.DeepSeq (NFData)
import Control.Monad.ST (runST)
import Data.Hashable (Hashable (..))
import Data.Text.Internal (Text (..))
import GHC.Generics (Generic)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (BackwardsIter (..), CodePoint, CodeUnitIndex (..))
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.Char as Char
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import qualified Data.Text.Utf8 as Utf8
import qualified Data.TypedByteArray as TBA
data Next a
= Done !a
| Step !a
data Automaton = Automaton
{ Automaton -> TypedByteArray CodePoint
automatonPattern :: !(TypedByteArray CodePoint)
, Automaton -> Int
automatonPatternHash :: !Int
, Automaton -> SuffixTable
automatonSuffixTable :: !SuffixTable
, Automaton -> BadCharLookup
automatonBadCharLookup :: !BadCharLookup
, Automaton -> CodeUnitIndex
automatonMinPatternSkip :: !CodeUnitIndex
}
deriving stock (forall x. Rep Automaton x -> Automaton
forall x. Automaton -> Rep Automaton x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Automaton x -> Automaton
$cfrom :: forall x. Automaton -> Rep Automaton x
Generic, Int -> Automaton -> ShowS
[Automaton] -> ShowS
Automaton -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Automaton] -> ShowS
$cshowList :: [Automaton] -> ShowS
show :: Automaton -> String
$cshow :: Automaton -> String
showsPrec :: Int -> Automaton -> ShowS
$cshowsPrec :: Int -> Automaton -> ShowS
Show)
deriving anyclass (Automaton -> ()
forall a. (a -> ()) -> NFData a
rnf :: Automaton -> ()
$crnf :: Automaton -> ()
NFData)
instance Hashable Automaton where
hashWithSalt :: Int -> Automaton -> Int
hashWithSalt Int
salt = forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> Int
automatonPatternHash
instance Eq Automaton where
Automaton
x == :: Automaton -> Automaton -> Bool
== Automaton
y = Automaton -> TypedByteArray CodePoint
automatonPattern Automaton
x forall a. Eq a => a -> a -> Bool
== Automaton -> TypedByteArray CodePoint
automatonPattern Automaton
y
#if defined(HAS_AESON)
instance AE.FromJSON Automaton where
parseJSON :: Value -> Parser Automaton
parseJSON Value
v = Text -> Automaton
buildAutomaton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FromJSON a => Value -> Parser a
AE.parseJSON Value
v
instance AE.ToJSON Automaton where
toJSON :: Automaton -> Value
toJSON = forall a. ToJSON a => a -> Value
AE.toJSON forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> Text
patternText
#endif
buildAutomaton :: Text -> Automaton
buildAutomaton :: Text -> Automaton
buildAutomaton Text
pattern_ =
Automaton
{ automatonPattern :: TypedByteArray CodePoint
automatonPattern = TypedByteArray CodePoint
patternVec
, automatonPatternHash :: Int
automatonPatternHash = forall a. Hashable a => a -> Int
hash Text
pattern_
, automatonSuffixTable :: SuffixTable
automatonSuffixTable = TypedByteArray CodePoint -> SuffixTable
buildSuffixTable TypedByteArray CodePoint
patternVec
, automatonBadCharLookup :: BadCharLookup
automatonBadCharLookup = TypedByteArray CodePoint -> BadCharLookup
buildBadCharLookup TypedByteArray CodePoint
patternVec
, automatonMinPatternSkip :: CodeUnitIndex
automatonMinPatternSkip = TypedByteArray CodePoint -> CodeUnitIndex
minimumSkipForVector TypedByteArray CodePoint
patternVec
}
where
patternVec :: TypedByteArray CodePoint
patternVec = forall a. Prim a => [a] -> TypedByteArray a
TBA.fromList (Text -> String
Text.unpack Text
pattern_)
runText :: forall a
. a
-> (a -> CodeUnitIndex -> CodeUnitIndex -> Next a)
-> Automaton
-> Text
-> a
{-# INLINE runText #-}
runText :: forall a.
a
-> (a -> CodeUnitIndex -> CodeUnitIndex -> Next a)
-> Automaton
-> Text
-> a
runText a
seed a -> CodeUnitIndex -> CodeUnitIndex -> Next a
f Automaton
automaton !Text
text
| forall a. TypedByteArray a -> Bool
TBA.null TypedByteArray CodePoint
pattern_ = a
seed
| Bool
otherwise = a -> CodeUnitIndex -> CodeUnitIndex -> a
alignPattern a
seed CodeUnitIndex
initialHaystackMin (CodeUnitIndex
initialHaystackMin forall a. Num a => a -> a -> a
+ CodeUnitIndex
minPatternSkip forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
where
Automaton TypedByteArray CodePoint
pattern_ Int
_ SuffixTable
suffixTable BadCharLookup
badCharTable CodeUnitIndex
minPatternSkip = Automaton
automaton
haystackMax :: CodeUnitIndex
haystackMax = case Text
text of Text Array
_ Int
offset Int
len -> Int -> CodeUnitIndex
CodeUnitIndex (Int
offset forall a. Num a => a -> a -> a
+ Int
len forall a. Num a => a -> a -> a
- Int
1)
initialHaystackMin :: CodeUnitIndex
initialHaystackMin = case Text
text of Text Array
_ Int
offset Int
_ -> Int -> CodeUnitIndex
CodeUnitIndex Int
offset
alignPattern
:: a
-> CodeUnitIndex
-> CodeUnitIndex
-> a
{-# INLINE alignPattern #-}
alignPattern :: a -> CodeUnitIndex -> CodeUnitIndex -> a
alignPattern !a
result !CodeUnitIndex
haystackMin !CodeUnitIndex
alignmentEnd
| CodeUnitIndex
alignmentEnd forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
haystackMax = a
result
| Bool
otherwise =
let
!iter :: BackwardsIter
iter = Array -> CodeUnitIndex -> BackwardsIter
Utf8.unsafeIndexAnywhereInCodePoint' (case Text
text of Text Array
d Int
_ Int
_ -> Array
d) CodeUnitIndex
alignmentEnd
!patternIndex :: Int
patternIndex = forall a. Prim a => TypedByteArray a -> Int
TBA.length TypedByteArray CodePoint
pattern_ forall a. Num a => a -> a -> a
- Int
1
!alignmentEnd' :: CodeUnitIndex
alignmentEnd' = BackwardsIter -> CodeUnitIndex
backwardsIterEndOfChar BackwardsIter
iter
in
a -> CodeUnitIndex -> CodeUnitIndex -> BackwardsIter -> Int -> a
matchLoop a
result CodeUnitIndex
haystackMin CodeUnitIndex
alignmentEnd' BackwardsIter
iter Int
patternIndex
matchLoop
:: a
-> CodeUnitIndex
-> CodeUnitIndex
-> BackwardsIter
-> Int
-> a
matchLoop :: a -> CodeUnitIndex -> CodeUnitIndex -> BackwardsIter -> Int -> a
matchLoop !a
result !CodeUnitIndex
haystackMin !CodeUnitIndex
alignmentEnd !BackwardsIter
iter !Int
patternIndex =
let
!haystackCodePointLower :: CodePoint
haystackCodePointLower = CodePoint -> CodePoint
Utf8.lowerCodePoint (BackwardsIter -> CodePoint
backwardsIterChar BackwardsIter
iter)
in
case CodePoint
haystackCodePointLower forall a. Eq a => a -> a -> Bool
== forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ Int
patternIndex of
Bool
True | Int
patternIndex forall a. Eq a => a -> a -> Bool
== Int
0 ->
let !from :: CodeUnitIndex
from = BackwardsIter -> CodeUnitIndex
backwardsIterNext BackwardsIter
iter forall a. Num a => a -> a -> a
+ CodeUnitIndex
1 forall a. Num a => a -> a -> a
- CodeUnitIndex
initialHaystackMin
!to :: CodeUnitIndex
to = CodeUnitIndex
alignmentEnd forall a. Num a => a -> a -> a
- CodeUnitIndex
initialHaystackMin
in
case a -> CodeUnitIndex -> CodeUnitIndex -> Next a
f a
result CodeUnitIndex
from CodeUnitIndex
to of
Done a
final -> a
final
Step a
intermediate ->
let haystackMin' :: CodeUnitIndex
haystackMin' = CodeUnitIndex
alignmentEnd forall a. Num a => a -> a -> a
+ CodeUnitIndex
1
alignmentEnd' :: CodeUnitIndex
alignmentEnd' = CodeUnitIndex
alignmentEnd forall a. Num a => a -> a -> a
+ CodeUnitIndex
minPatternSkip
in a -> CodeUnitIndex -> CodeUnitIndex -> a
alignPattern a
intermediate CodeUnitIndex
haystackMin' CodeUnitIndex
alignmentEnd'
Bool
True | BackwardsIter -> CodeUnitIndex
backwardsIterNext BackwardsIter
iter forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
haystackMin ->
a -> CodeUnitIndex -> CodeUnitIndex -> a
alignPattern a
result CodeUnitIndex
haystackMin (CodeUnitIndex
alignmentEnd forall a. Num a => a -> a -> a
+ CodeUnitIndex
1)
Bool
True ->
let
next :: CodeUnitIndex
next = BackwardsIter -> CodeUnitIndex
backwardsIterNext BackwardsIter
iter
!iter' :: BackwardsIter
iter' = Array -> CodeUnitIndex -> BackwardsIter
Utf8.unsafeIndexEndOfCodePoint' (case Text
text of Text Array
d Int
_ Int
_ -> Array
d) CodeUnitIndex
next
in
a -> CodeUnitIndex -> CodeUnitIndex -> BackwardsIter -> Int -> a
matchLoop a
result CodeUnitIndex
haystackMin CodeUnitIndex
alignmentEnd BackwardsIter
iter' (Int
patternIndex forall a. Num a => a -> a -> a
- Int
1)
Bool
False ->
let
!fromBadChar :: CodeUnitIndex
fromBadChar =
BackwardsIter -> CodeUnitIndex
backwardsIterEndOfChar BackwardsIter
iter forall a. Num a => a -> a -> a
+ BadCharLookup -> CodePoint -> CodeUnitIndex
badCharLookup BadCharLookup
badCharTable CodePoint
haystackCodePointLower
!fromSuffixLookup :: CodeUnitIndex
fromSuffixLookup =
CodeUnitIndex
alignmentEnd forall a. Num a => a -> a -> a
+ SuffixTable -> Int -> CodeUnitIndex
suffixLookup SuffixTable
suffixTable Int
patternIndex
!alignmentEnd' :: CodeUnitIndex
alignmentEnd' = forall a. Ord a => a -> a -> a
max CodeUnitIndex
fromBadChar CodeUnitIndex
fromSuffixLookup
in
a -> CodeUnitIndex -> CodeUnitIndex -> a
alignPattern a
result CodeUnitIndex
haystackMin CodeUnitIndex
alignmentEnd'
patternLength :: Automaton -> CodeUnitIndex
patternLength :: Automaton -> CodeUnitIndex
patternLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> Text
patternText
patternText :: Automaton -> Text
patternText :: Automaton -> Text
patternText = String -> Text
Text.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Prim a => TypedByteArray a -> [a]
TBA.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> TypedByteArray CodePoint
automatonPattern
minimumSkipForCodePoint :: CodePoint -> CodeUnitIndex
minimumSkipForCodePoint :: CodePoint -> CodeUnitIndex
minimumSkipForCodePoint CodePoint
cp =
case CodePoint -> Int
Char.ord CodePoint
cp of
Int
c | Int
c forall a. Ord a => a -> a -> Bool
< Int
0x80 -> CodeUnitIndex
1
Int
c | Int
c forall a. Ord a => a -> a -> Bool
< Int
0x800 -> CodeUnitIndex
2
Int
0x2C65 -> CodeUnitIndex
2
Int
0x2C66 -> CodeUnitIndex
2
Int
c | Int
c forall a. Ord a => a -> a -> Bool
< Int
0x10000 -> CodeUnitIndex
3
Int
_ -> CodeUnitIndex
4
minimumSkipForVector :: TypedByteArray CodePoint -> CodeUnitIndex
minimumSkipForVector :: TypedByteArray CodePoint -> CodeUnitIndex
minimumSkipForVector = forall a b. Prim a => (a -> b -> b) -> b -> TypedByteArray a -> b
TBA.foldr (\CodePoint
cp CodeUnitIndex
s -> CodeUnitIndex
s forall a. Num a => a -> a -> a
+ CodePoint -> CodeUnitIndex
minimumSkipForCodePoint CodePoint
cp) CodeUnitIndex
0
newtype SuffixTable = SuffixTable (TypedByteArray CodeUnitIndex)
deriving stock (forall x. Rep SuffixTable x -> SuffixTable
forall x. SuffixTable -> Rep SuffixTable x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SuffixTable x -> SuffixTable
$cfrom :: forall x. SuffixTable -> Rep SuffixTable x
Generic)
deriving anyclass (SuffixTable -> ()
forall a. (a -> ()) -> NFData a
rnf :: SuffixTable -> ()
$crnf :: SuffixTable -> ()
NFData)
instance Show SuffixTable where
show :: SuffixTable -> String
show (SuffixTable TypedByteArray CodeUnitIndex
table) = String
"SuffixTable (TBA.toList " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (forall a. Prim a => TypedByteArray a -> [a]
TBA.toList TypedByteArray CodeUnitIndex
table) forall a. Semigroup a => a -> a -> a
<> String
")"
suffixLookup :: SuffixTable -> Int -> CodeUnitIndex
{-# INLINE suffixLookup #-}
suffixLookup :: SuffixTable -> Int -> CodeUnitIndex
suffixLookup (SuffixTable TypedByteArray CodeUnitIndex
table) = forall a. Prim a => TypedByteArray a -> Int -> a
indexTable TypedByteArray CodeUnitIndex
table
buildSuffixTable :: TypedByteArray CodePoint -> SuffixTable
buildSuffixTable :: TypedByteArray CodePoint -> SuffixTable
buildSuffixTable TypedByteArray CodePoint
pattern_ = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
let
patLen :: Int
patLen = forall a. Prim a => TypedByteArray a -> Int
TBA.length TypedByteArray CodePoint
pattern_
wholePatternSkip :: CodeUnitIndex
wholePatternSkip = TypedByteArray CodePoint -> CodeUnitIndex
minimumSkipForVector TypedByteArray CodePoint
pattern_
MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table <- forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
Int -> m (MutableTypedByteArray a (PrimState m))
TBA.newTypedByteArray Int
patLen
let
init1 :: CodeUnitIndex -> Int -> ST s ()
init1 CodeUnitIndex
lastSkipBytes Int
p
| Int
p forall a. Ord a => a -> a -> Bool
>= Int
0 = do
let
skipBytes :: CodeUnitIndex
skipBytes = case TypedByteArray CodePoint -> Int -> Maybe CodeUnitIndex
suffixIsPrefix TypedByteArray CodePoint
pattern_ (Int
p forall a. Num a => a -> a -> a
+ Int
1) of
Maybe CodeUnitIndex
Nothing -> CodeUnitIndex
lastSkipBytes
Just CodeUnitIndex
nonSkippableBytes -> CodeUnitIndex
wholePatternSkip forall a. Num a => a -> a -> a
- CodeUnitIndex
nonSkippableBytes
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table Int
p CodeUnitIndex
skipBytes
CodeUnitIndex -> Int -> ST s ()
init1 CodeUnitIndex
skipBytes (Int
p forall a. Num a => a -> a -> a
- Int
1)
| Bool
otherwise = forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
init2 :: Int -> CodeUnitIndex -> ST s ()
init2 Int
p CodeUnitIndex
skipBytes
| Int
p forall a. Ord a => a -> a -> Bool
< Int
patLen forall a. Num a => a -> a -> a
- Int
1 = do
let skipBytes' :: CodeUnitIndex
skipBytes' = CodeUnitIndex
skipBytes forall a. Num a => a -> a -> a
- CodePoint -> CodeUnitIndex
minimumSkipForCodePoint (forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ Int
p)
case TypedByteArray CodePoint -> Int -> Maybe Int
substringIsSuffix TypedByteArray CodePoint
pattern_ Int
p of
Maybe Int
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
Just Int
suffixLen -> do
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table (Int
patLen forall a. Num a => a -> a -> a
- Int
1 forall a. Num a => a -> a -> a
- Int
suffixLen) CodeUnitIndex
skipBytes'
Int -> CodeUnitIndex -> ST s ()
init2 (Int
p forall a. Num a => a -> a -> a
+ Int
1) CodeUnitIndex
skipBytes'
| Bool
otherwise = forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
CodeUnitIndex -> Int -> ST s ()
init1 (CodeUnitIndex
wholePatternSkipforall a. Num a => a -> a -> a
-CodeUnitIndex
1) (Int
patLen forall a. Num a => a -> a -> a
- Int
1)
Int -> CodeUnitIndex -> ST s ()
init2 Int
0 CodeUnitIndex
wholePatternSkip
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table (Int
patLen forall a. Num a => a -> a -> a
- Int
1) (Int -> CodeUnitIndex
CodeUnitIndex Int
1)
TypedByteArray CodeUnitIndex -> SuffixTable
SuffixTable forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
PrimMonad m =>
MutableTypedByteArray a (PrimState m) -> m (TypedByteArray a)
TBA.unsafeFreezeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table
suffixIsPrefix :: TypedByteArray CodePoint -> Int -> Maybe CodeUnitIndex
suffixIsPrefix :: TypedByteArray CodePoint -> Int -> Maybe CodeUnitIndex
suffixIsPrefix TypedByteArray CodePoint
pattern_ Int
pos = Int -> CodeUnitIndex -> Maybe CodeUnitIndex
go Int
0 (Int -> CodeUnitIndex
CodeUnitIndex Int
0)
where
suffixLen :: Int
suffixLen = forall a. Prim a => TypedByteArray a -> Int
TBA.length TypedByteArray CodePoint
pattern_ forall a. Num a => a -> a -> a
- Int
pos
go :: Int -> CodeUnitIndex -> Maybe CodeUnitIndex
go !Int
i !CodeUnitIndex
skipBytes
| Int
i forall a. Ord a => a -> a -> Bool
< Int
suffixLen =
let prefixChar :: CodePoint
prefixChar = forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ Int
i in
if CodePoint
prefixChar forall a. Eq a => a -> a -> Bool
== forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ (Int
pos forall a. Num a => a -> a -> a
+ Int
i)
then Int -> CodeUnitIndex -> Maybe CodeUnitIndex
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) (CodeUnitIndex
skipBytes forall a. Num a => a -> a -> a
+ CodePoint -> CodeUnitIndex
minimumSkipForCodePoint CodePoint
prefixChar)
else forall a. Maybe a
Nothing
| Bool
otherwise = forall a. a -> Maybe a
Just CodeUnitIndex
skipBytes
substringIsSuffix :: TypedByteArray CodePoint -> Int -> Maybe Int
substringIsSuffix :: TypedByteArray CodePoint -> Int -> Maybe Int
substringIsSuffix TypedByteArray CodePoint
pattern_ Int
pos = Int -> Maybe Int
go Int
0
where
patLen :: Int
patLen = forall a. Prim a => TypedByteArray a -> Int
TBA.length TypedByteArray CodePoint
pattern_
go :: Int -> Maybe Int
go Int
i | Int
i forall a. Ord a => a -> a -> Bool
> Int
pos = forall a. Maybe a
Nothing
| forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ (Int
pos forall a. Num a => a -> a -> a
- Int
i) forall a. Eq a => a -> a -> Bool
== forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex TypedByteArray CodePoint
pattern_ (Int
patLen forall a. Num a => a -> a -> a
- Int
1 forall a. Num a => a -> a -> a
- Int
i) =
Int -> Maybe Int
go (Int
i forall a. Num a => a -> a -> a
+ Int
1)
| Int
i forall a. Eq a => a -> a -> Bool
== Int
0 = forall a. Maybe a
Nothing
| Bool
otherwise = forall a. a -> Maybe a
Just Int
i
data BadCharLookup = BadCharLookup
{ BadCharLookup -> TypedByteArray CodeUnitIndex
badCharLookupTable :: {-# UNPACK #-} !(TypedByteArray CodeUnitIndex)
, BadCharLookup -> HashMap CodePoint CodeUnitIndex
badCharLookupMap :: !(HashMap.HashMap CodePoint CodeUnitIndex)
, BadCharLookup -> CodeUnitIndex
badCharLookupDefault :: !CodeUnitIndex
}
deriving stock (forall x. Rep BadCharLookup x -> BadCharLookup
forall x. BadCharLookup -> Rep BadCharLookup x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BadCharLookup x -> BadCharLookup
$cfrom :: forall x. BadCharLookup -> Rep BadCharLookup x
Generic, Int -> BadCharLookup -> ShowS
[BadCharLookup] -> ShowS
BadCharLookup -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BadCharLookup] -> ShowS
$cshowList :: [BadCharLookup] -> ShowS
show :: BadCharLookup -> String
$cshow :: BadCharLookup -> String
showsPrec :: Int -> BadCharLookup -> ShowS
$cshowsPrec :: Int -> BadCharLookup -> ShowS
Show)
deriving anyclass (BadCharLookup -> ()
forall a. (a -> ()) -> NFData a
rnf :: BadCharLookup -> ()
$crnf :: BadCharLookup -> ()
NFData)
badCharTableSize :: Int
{-# INLINE badCharTableSize #-}
badCharTableSize :: Int
badCharTableSize = Int
256
badCharLookup :: BadCharLookup -> CodePoint -> CodeUnitIndex
{-# INLINE badCharLookup #-}
badCharLookup :: BadCharLookup -> CodePoint -> CodeUnitIndex
badCharLookup (BadCharLookup TypedByteArray CodeUnitIndex
bclTable HashMap CodePoint CodeUnitIndex
bclMap CodeUnitIndex
bclDefault) CodePoint
char
| Int
intChar forall a. Ord a => a -> a -> Bool
< Int
badCharTableSize = forall a. Prim a => TypedByteArray a -> Int -> a
indexTable TypedByteArray CodeUnitIndex
bclTable Int
intChar
| Bool
otherwise = forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
HashMap.lookupDefault CodeUnitIndex
bclDefault CodePoint
char HashMap CodePoint CodeUnitIndex
bclMap
where
intChar :: Int
intChar = forall a. Enum a => a -> Int
fromEnum CodePoint
char
buildBadCharLookup :: TypedByteArray CodePoint -> BadCharLookup
buildBadCharLookup :: TypedByteArray CodePoint -> BadCharLookup
buildBadCharLookup TypedByteArray CodePoint
pattern_ = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
let
defaultSkip :: CodeUnitIndex
defaultSkip = TypedByteArray CodePoint -> CodeUnitIndex
minimumSkipForVector TypedByteArray CodePoint
pattern_
MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table <- (forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
Int -> a -> m (MutableTypedByteArray a (PrimState m))
TBA.replicate Int
badCharTableSize CodeUnitIndex
defaultSkip)
let
fillTable :: HashMap CodePoint CodeUnitIndex
-> CodeUnitIndex
-> String
-> ST s (HashMap CodePoint CodeUnitIndex)
fillTable !HashMap CodePoint CodeUnitIndex
badCharMap !CodeUnitIndex
skipBytes = \case
[] -> forall (f :: * -> *) a. Applicative f => a -> f a
pure HashMap CodePoint CodeUnitIndex
badCharMap
[CodePoint
_] -> forall (f :: * -> *) a. Applicative f => a -> f a
pure HashMap CodePoint CodeUnitIndex
badCharMap
(!CodePoint
patChar : !String
patChars) ->
let skipBytes' :: CodeUnitIndex
skipBytes' = CodeUnitIndex
skipBytes forall a. Num a => a -> a -> a
- CodePoint -> CodeUnitIndex
minimumSkipForCodePoint CodePoint
patChar in
if forall a. Enum a => a -> Int
fromEnum CodePoint
patChar forall a. Ord a => a -> a -> Bool
< Int
badCharTableSize
then do
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table (forall a. Enum a => a -> Int
fromEnum CodePoint
patChar) CodeUnitIndex
skipBytes'
HashMap CodePoint CodeUnitIndex
-> CodeUnitIndex
-> String
-> ST s (HashMap CodePoint CodeUnitIndex)
fillTable HashMap CodePoint CodeUnitIndex
badCharMap CodeUnitIndex
skipBytes' String
patChars
else
let badCharMap' :: HashMap CodePoint CodeUnitIndex
badCharMap' = forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
HashMap.insert CodePoint
patChar CodeUnitIndex
skipBytes' HashMap CodePoint CodeUnitIndex
badCharMap
in HashMap CodePoint CodeUnitIndex
-> CodeUnitIndex
-> String
-> ST s (HashMap CodePoint CodeUnitIndex)
fillTable HashMap CodePoint CodeUnitIndex
badCharMap' CodeUnitIndex
skipBytes' String
patChars
HashMap CodePoint CodeUnitIndex
badCharMap <- HashMap CodePoint CodeUnitIndex
-> CodeUnitIndex
-> String
-> ST s (HashMap CodePoint CodeUnitIndex)
fillTable forall k v. HashMap k v
HashMap.empty CodeUnitIndex
defaultSkip (forall a. Prim a => TypedByteArray a -> [a]
TBA.toList TypedByteArray CodePoint
pattern_)
TypedByteArray CodeUnitIndex
tableFrozen <- forall (m :: * -> *) a.
PrimMonad m =>
MutableTypedByteArray a (PrimState m) -> m (TypedByteArray a)
TBA.unsafeFreezeTypedByteArray MutableTypedByteArray CodeUnitIndex (PrimState (ST s))
table
forall (f :: * -> *) a. Applicative f => a -> f a
pure BadCharLookup
{ badCharLookupTable :: TypedByteArray CodeUnitIndex
badCharLookupTable = TypedByteArray CodeUnitIndex
tableFrozen
, badCharLookupMap :: HashMap CodePoint CodeUnitIndex
badCharLookupMap = HashMap CodePoint CodeUnitIndex
badCharMap
, badCharLookupDefault :: CodeUnitIndex
badCharLookupDefault = CodeUnitIndex
defaultSkip
}
indexTable :: Prim a => TypedByteArray a -> Int -> a
{-# INLINE indexTable #-}
indexTable :: forall a. Prim a => TypedByteArray a -> Int -> a
indexTable = forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex