{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
module Data.Text.Internal.Lazy.Search
(
indices
) where
import qualified Data.Text.Array as A
import Data.Int (Int64)
import Data.Word (Word16, Word64)
import qualified Data.Text.Internal as T
import Data.Text.Internal.Fusion.Types (PairS(..))
import Data.Text.Internal.Lazy (Text(..), foldlChunks)
import Data.Bits ((.|.), (.&.))
import Data.Text.Internal.Unsafe.Shift (shiftL)
indices :: Text
-> Text
-> [Int64]
indices :: Text -> Text -> [Int64]
indices needle :: Text
needle@(Chunk Text
n Text
ns) _haystack :: Text
_haystack@(Chunk Text
k Text
ks)
| Int64
nlen Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Int64
0 = []
| Int64
nlen Int64 -> Int64 -> Bool
forall a. Eq a => a -> a -> Bool
== Int64
1 = Word16 -> Int64 -> Text -> Text -> [Int64]
indicesOne (Int64 -> Word16
nindex Int64
0) Int64
0 Text
k Text
ks
| Bool
otherwise = Text -> Text -> Int64 -> Int64 -> [Int64]
advance Text
k Text
ks Int64
0 Int64
0
where
advance :: Text -> Text -> Int64 -> Int64 -> [Int64]
advance x :: Text
x@(T.Text Array
_ Int
_ Int
l) Text
xs = Int64 -> Int64 -> [Int64]
scan
where
scan :: Int64 -> Int64 -> [Int64]
scan !Int64
g !Int64
i
| Int64
i Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
>= Int64
m = case Text
xs of
Text
Empty -> []
Chunk y ys -> Text -> Text -> Int64 -> Int64 -> [Int64]
advance Text
y Text
ys Int64
g (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
-Int64
m)
| Int64 -> Text -> Text -> Bool
lackingHay (Int64
i Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
nlen) Text
x Text
xs = []
| Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z Bool -> Bool -> Bool
&& Int64 -> Bool
candidateMatch Int64
0 = Int64
g Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
: Int64 -> Int64 -> [Int64]
scan (Int64
gInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
nlen) (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
nlen)
| Bool
otherwise = Int64 -> Int64 -> [Int64]
scan (Int64
gInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
delta) (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
delta)
where
m :: Int64
m = Int -> Int64
intToInt64 Int
l
c :: Word16
c = Int64 -> Word16
hindex (Int64
i Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
nlast)
delta :: Int64
delta | Bool
nextInPattern = Int64
nlen Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
1
| Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z = Int64
skip Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
1
| Bool
otherwise = Int64
1
nextInPattern :: Bool
nextInPattern = Word64
mask Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word16 -> Word64
swizzle (Int64 -> Word16
hindex (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
nlen)) Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0
candidateMatch :: Int64 -> Bool
candidateMatch !Int64
j
| Int64
j Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
>= Int64
nlast = Bool
True
| Int64 -> Word16
hindex (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
j) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Int64 -> Word16
nindex Int64
j = Bool
False
| Bool
otherwise = Int64 -> Bool
candidateMatch (Int64
jInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
1)
hindex :: Int64 -> Word16
hindex = Text -> Text -> Int64 -> Word16
index Text
x Text
xs
nlen :: Int64
nlen = Text -> Int64
wordLength Text
needle
nlast :: Int64
nlast = Int64
nlen Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
1
nindex :: Int64 -> Word16
nindex = Text -> Text -> Int64 -> Word16
index Text
n Text
ns
z :: Word16
z = (Word16 -> Text -> Word16) -> Word16 -> Text -> Word16
forall a. (a -> Text -> a) -> a -> Text -> a
foldlChunks Word16 -> Text -> Word16
forall p. p -> Text -> Word16
fin Word16
0 Text
needle
where fin :: p -> Text -> Word16
fin p
_ (T.Text Array
farr Int
foff Int
flen) = Array -> Int -> Word16
A.unsafeIndex Array
farr (Int
foffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
flenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
(Word64
mask :: Word64) :*: Int64
skip = Text
-> Text -> Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
buildTable Text
n Text
ns Int64
0 Int
0 Word64
0 (Int64
nlenInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
-Int64
2)
swizzle :: Word16 -> Word64
swizzle :: Word16 -> Word64
swizzle Word16
w = Word64
1 Word64 -> Int -> Word64
forall a. UnsafeShift a => a -> Int -> a
`shiftL` (Word16 -> Int
word16ToInt Word16
w Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
buildTable :: Text
-> Text -> Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
buildTable (T.Text Array
xarr Int
xoff Int
xlen) Text
xs = Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
go
where
go :: Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
go !(Int64
g::Int64) !Int
i !Word64
msk !Int64
skp
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
xlast = case Text
xs of
Text
Empty -> (Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
swizzle Word16
z) Word64 -> Int64 -> PairS Word64 Int64
forall a b. a -> b -> PairS a b
:*: Int64
skp
Chunk y ys -> Text
-> Text -> Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
buildTable Text
y Text
ys Int64
g Int
0 Word64
msk' Int64
skp'
| Bool
otherwise = Int64 -> Int -> Word64 -> Int64 -> PairS Word64 Int64
go (Int64
gInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Word64
msk' Int64
skp'
where c :: Word16
c = Array -> Int -> Word16
A.unsafeIndex Array
xarr (Int
xoffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
msk' :: Word64
msk' = Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
swizzle Word16
c
skp' :: Int64
skp' | Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z = Int64
nlen Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
g Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
2
| Bool
otherwise = Int64
skp
xlast :: Int
xlast = Int
xlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
lackingHay :: Int64 -> T.Text -> Text -> Bool
lackingHay :: Int64 -> Text -> Text -> Bool
lackingHay Int64
q = Int64 -> Text -> Text -> Bool
go Int64
0
where
go :: Int64 -> Text -> Text -> Bool
go Int64
p (T.Text Array
_ Int
_ Int
l) Text
ps = Int64
p' Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
< Int64
q Bool -> Bool -> Bool
&& case Text
ps of
Text
Empty -> Bool
True
Chunk Text
r Text
rs -> Int64 -> Text -> Text -> Bool
go Int64
p' Text
r Text
rs
where p' :: Int64
p' = Int64
p Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
l
indices Text
_ Text
_ = []
index :: T.Text -> Text -> Int64 -> Word16
index :: Text -> Text -> Int64 -> Word16
index (T.Text Array
arr Int
off Int
len) Text
xs !Int64
i
| Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j)
| Bool
otherwise = case Text
xs of
Text
Empty
| Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len -> Word16
0
| Bool
otherwise -> String -> Word16
forall a. String -> a
emptyError String
"index"
Chunk Text
c Text
cs -> Text -> Text -> Int64 -> Word16
index Text
c Text
cs (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
-Int -> Int64
intToInt64 Int
len)
where j :: Int
j = Int64 -> Int
int64ToInt Int64
i
indicesOne :: Word16 -> Int64 -> T.Text -> Text -> [Int64]
indicesOne :: Word16 -> Int64 -> Text -> Text -> [Int64]
indicesOne Word16
c = Int64 -> Text -> Text -> [Int64]
chunk
where
chunk :: Int64 -> T.Text -> Text -> [Int64]
chunk :: Int64 -> Text -> Text -> [Int64]
chunk !Int64
i (T.Text Array
oarr Int
ooff Int
olen) Text
os = Int -> [Int64]
go Int
0
where
go :: Int -> [Int64]
go Int
h | Int
h Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
olen = case Text
os of
Text
Empty -> []
Chunk Text
y Text
ys -> Int64 -> Text -> Text -> [Int64]
chunk (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int -> Int64
intToInt64 Int
olen) Text
y Text
ys
| Word16
on Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
c = Int64
i Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
h Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
: Int -> [Int64]
go (Int
hInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
| Bool
otherwise = Int -> [Int64]
go (Int
hInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
where on :: Word16
on = Array -> Int -> Word16
A.unsafeIndex Array
oarr (Int
ooffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
h)
wordLength :: Text -> Int64
wordLength :: Text -> Int64
wordLength = (Int64 -> Text -> Int64) -> Int64 -> Text -> Int64
forall a. (a -> Text -> a) -> a -> Text -> a
foldlChunks Int64 -> Text -> Int64
sumLength Int64
0
where
sumLength :: Int64 -> T.Text -> Int64
sumLength :: Int64 -> Text -> Int64
sumLength Int64
i (T.Text Array
_ Int
_ Int
l) = Int64
i Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
l
emptyError :: String -> a
emptyError :: String -> a
emptyError String
fun = String -> a
forall a. HasCallStack => String -> a
error (String
"Data.Text.Lazy.Search." String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
fun String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": empty input")
intToInt64 :: Int -> Int64
intToInt64 :: Int -> Int64
intToInt64 = Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral
int64ToInt :: Int64 -> Int
int64ToInt :: Int64 -> Int
int64ToInt = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
word16ToInt :: Word16 -> Int
word16ToInt :: Word16 -> Int
word16ToInt = Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral