{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module System.IO.Streams.Internal.Search
( search
, MatchInfo(..)
) where
import Control.Monad (when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.ST (ST)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector.Unboxed.Mutable as MV
import Prelude (Bool (..), Either (..), Enum (..), Eq (..), IO, Int, Monad (..), Num (..), Ord (..), Show, either, id, maybe, not, otherwise, ($), ($!), (&&), (.), (||))
import System.IO.Streams.Internal (InputStream)
import qualified System.IO.Streams.Internal as Streams
data MatchInfo = Match {-# UNPACK #-} !ByteString
| NoMatch {-# UNPACK #-} !ByteString
deriving (Int -> MatchInfo -> ShowS
[MatchInfo] -> ShowS
MatchInfo -> String
(Int -> MatchInfo -> ShowS)
-> (MatchInfo -> String)
-> ([MatchInfo] -> ShowS)
-> Show MatchInfo
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchInfo] -> ShowS
$cshowList :: [MatchInfo] -> ShowS
show :: MatchInfo -> String
$cshow :: MatchInfo -> String
showsPrec :: Int -> MatchInfo -> ShowS
$cshowsPrec :: Int -> MatchInfo -> ShowS
Show, MatchInfo -> MatchInfo -> Bool
(MatchInfo -> MatchInfo -> Bool)
-> (MatchInfo -> MatchInfo -> Bool) -> Eq MatchInfo
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MatchInfo -> MatchInfo -> Bool
$c/= :: MatchInfo -> MatchInfo -> Bool
== :: MatchInfo -> MatchInfo -> Bool
$c== :: MatchInfo -> MatchInfo -> Bool
Eq)
matches :: ByteString
-> Int
-> Int
-> ByteString
-> Int
-> Int
-> Bool
matches :: ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches !ByteString
needle !Int
nstart !Int
nend' !ByteString
haystack !Int
hstart !Int
hend' =
Int -> Int -> Bool
go Int
nend' Int
hend'
where
go :: Int -> Int -> Bool
go !Int
nend !Int
hend =
if Int
nend Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
nstart Bool -> Bool -> Bool
|| Int
hend Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
hstart
then Bool
True
else let !nc :: Word8
nc = ByteString -> Int -> Word8
S.unsafeIndex ByteString
needle Int
nend
!hc :: Word8
hc = ByteString -> Int -> Word8
S.unsafeIndex ByteString
haystack Int
hend
in if Word8
nc Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
hc
then Bool
False
else Int -> Int -> Bool
go (Int
nendInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (Int
hendInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
{-# INLINE matches #-}
search :: ByteString
-> InputStream ByteString
-> IO (InputStream MatchInfo)
search :: ByteString -> InputStream ByteString -> IO (InputStream MatchInfo)
search ByteString
needle InputStream ByteString
stream = Generator MatchInfo () -> IO (InputStream MatchInfo)
forall r a. Generator r a -> IO (InputStream r)
Streams.fromGenerator (Generator MatchInfo () -> IO (InputStream MatchInfo))
-> Generator MatchInfo () -> IO (InputStream MatchInfo)
forall a b. (a -> b) -> a -> b
$
Int -> Generator MatchInfo (Either ByteString ByteString)
forall (m :: * -> *).
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
nlen Generator MatchInfo (Either ByteString ByteString)
-> (Either ByteString ByteString -> Generator MatchInfo ())
-> Generator MatchInfo ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (ByteString -> Generator MatchInfo ())
-> (ByteString -> Generator MatchInfo ())
-> Either ByteString ByteString
-> Generator MatchInfo ()
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ByteString -> Generator MatchInfo ()
finishAndEOF ByteString -> Generator MatchInfo ()
startSearch
where
finishAndEOF :: ByteString -> Generator MatchInfo ()
finishAndEOF ByteString
x = if ByteString -> Bool
S.null ByteString
x
then () -> Generator MatchInfo ()
forall (m :: * -> *) a. Monad m => a -> m a
return (() -> Generator MatchInfo ()) -> () -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$! ()
else MatchInfo -> Generator MatchInfo ()
forall r. r -> Generator r ()
Streams.yield (MatchInfo -> Generator MatchInfo ())
-> MatchInfo -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch ByteString
x
startSearch :: ByteString -> Generator MatchInfo ()
startSearch !ByteString
haystack =
if ByteString -> Bool
S.null ByteString
haystack
then Int -> Generator MatchInfo (Either ByteString ByteString)
forall (m :: * -> *).
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
nlen Generator MatchInfo (Either ByteString ByteString)
-> (Either ByteString ByteString -> Generator MatchInfo ())
-> Generator MatchInfo ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (ByteString -> Generator MatchInfo ())
-> (ByteString -> Generator MatchInfo ())
-> Either ByteString ByteString
-> Generator MatchInfo ()
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ByteString -> Generator MatchInfo ()
finishAndEOF ByteString -> Generator MatchInfo ()
startSearch
else Int -> Generator MatchInfo ()
go Int
0
where
!hlen :: Int
hlen = ByteString -> Int
S.length ByteString
haystack
go :: Int -> Generator MatchInfo ()
go !Int
hidx
| Int
hend Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = Int -> Generator MatchInfo ()
crossBound Int
hidx
| Bool
otherwise = do
let match :: Bool
match = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
0 Int
lastIdx ByteString
haystack Int
hidx Int
hend
if Bool
match
then do
let !nomatch :: ByteString
nomatch = Int -> ByteString -> ByteString
S.take Int
hidx ByteString
haystack
let !aftermatch :: ByteString
aftermatch = Int -> ByteString -> ByteString
S.drop (Int
hend Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ByteString
haystack
ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch
else do
let c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
haystack Int
hend
let !skip :: Int
skip = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
V.unsafeIndex Vector Int
table (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
c
Int -> Generator MatchInfo ()
go (Int
hidx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip)
where
!hend :: Int
hend = Int
hidx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
mkCoeff :: Int -> (Int, Int)
mkCoeff Int
hidx = let !ll :: Int
ll = Int
hlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
hidx
!nm :: Int
nm = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ll
in (Int
ll, Int
nm)
crossBound :: Int -> Generator MatchInfo ()
crossBound !Int
hidx0 = do
let (!Int
leftLen, Int
needMore) = Int -> (Int, Int)
mkCoeff Int
hidx0
Int -> Generator MatchInfo (Either ByteString ByteString)
forall (m :: * -> *).
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
needMore Generator MatchInfo (Either ByteString ByteString)
-> (Either ByteString ByteString -> Generator MatchInfo ())
-> Generator MatchInfo ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
(ByteString -> Generator MatchInfo ())
-> (ByteString -> Generator MatchInfo ())
-> Either ByteString ByteString
-> Generator MatchInfo ()
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\ByteString
s -> ByteString -> Generator MatchInfo ()
finishAndEOF (ByteString -> Generator MatchInfo ())
-> ByteString -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
S.append ByteString
haystack ByteString
s)
(Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx0 Int
leftLen Int
needMore)
where
runNext :: Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext !Int
hidx !Int
leftLen !Int
needMore !ByteString
nextHaystack = do
let match1 :: Bool
match1 = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
leftLen Int
lastIdx ByteString
nextHaystack Int
0
(Int
needMoreInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
let match2 :: Bool
match2 = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
0 (Int
leftLenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) ByteString
haystack Int
hidx
(Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
if Bool
match1 Bool -> Bool -> Bool
&& Bool
match2
then do
let !nomatch :: ByteString
nomatch = Int -> ByteString -> ByteString
S.take Int
hidx ByteString
haystack
let !aftermatch :: ByteString
aftermatch = Int -> ByteString -> ByteString
S.drop Int
needMore ByteString
nextHaystack
ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch
else do
let c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
nextHaystack (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
needMore Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
let p :: Int
p = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
V.unsafeIndex Vector Int
table (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
c)
if Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
leftLen
then do
let !hidx' :: Int
hidx' = Int
hidx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
p
let (!Int
leftLen', Int
needMore') = Int -> (Int, Int)
mkCoeff Int
hidx'
let !nextlen :: Int
nextlen = ByteString -> Int
S.length ByteString
nextHaystack
if Int
nextlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
needMore'
then
Int -> Generator MatchInfo (Either ByteString ByteString)
forall (m :: * -> *).
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead (Int
needMore' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nextlen) Generator MatchInfo (Either ByteString ByteString)
-> (Either ByteString ByteString -> Generator MatchInfo ())
-> Generator MatchInfo ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
(ByteString -> Generator MatchInfo ())
-> (ByteString -> Generator MatchInfo ())
-> Either ByteString ByteString
-> Generator MatchInfo ()
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\ByteString
s -> ByteString -> Generator MatchInfo ()
finishAndEOF (ByteString -> Generator MatchInfo ())
-> ByteString -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$
[ByteString] -> ByteString
S.concat [ ByteString
haystack
, ByteString
nextHaystack
, ByteString
s ])
(\ByteString
s -> Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx' Int
leftLen' Int
needMore' (ByteString -> Generator MatchInfo ())
-> ByteString -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$
ByteString -> ByteString -> ByteString
S.append ByteString
nextHaystack ByteString
s)
else Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx' Int
leftLen' Int
needMore' ByteString
nextHaystack
else do
let sidx :: Int
sidx = Int
p Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
leftLen
let (!ByteString
crumb, ByteString
rest) = Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
sidx ByteString
nextHaystack
MatchInfo -> Generator MatchInfo ()
forall r. r -> Generator r ()
Streams.yield (MatchInfo -> Generator MatchInfo ())
-> MatchInfo -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch (ByteString -> MatchInfo) -> ByteString -> MatchInfo
forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
S.append ByteString
haystack ByteString
crumb
ByteString -> Generator MatchInfo ()
startSearch ByteString
rest
produceMatch :: ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch = do
Bool -> Generator MatchInfo () -> Generator MatchInfo ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ ByteString -> Bool
S.null ByteString
nomatch) (Generator MatchInfo () -> Generator MatchInfo ())
-> Generator MatchInfo () -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$ MatchInfo -> Generator MatchInfo ()
forall r. r -> Generator r ()
Streams.yield (MatchInfo -> Generator MatchInfo ())
-> MatchInfo -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch ByteString
nomatch
MatchInfo -> Generator MatchInfo ()
forall r. r -> Generator r ()
Streams.yield (MatchInfo -> Generator MatchInfo ())
-> MatchInfo -> Generator MatchInfo ()
forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
Match ByteString
needle
ByteString -> Generator MatchInfo ()
startSearch ByteString
aftermatch
!nlen :: Int
nlen = ByteString -> Int
S.length ByteString
needle
!lastIdx :: Int
lastIdx = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
!table :: Vector Int
table = (forall s. ST s (MVector s Int)) -> Vector Int
forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
V.create ((forall s. ST s (MVector s Int)) -> Vector Int)
-> (forall s. ST s (MVector s Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s Int
t <- Int -> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
MV.replicate Int
256 Int
nlen
MVector s Int -> ST s (MVector s Int)
forall s. MVector s Int -> ST s (MVector s Int)
go MVector s Int
t
where
go :: forall s . MV.MVector s Int -> ST s (MV.MVector s Int)
go :: MVector s Int -> ST s (MVector s Int)
go !MVector s Int
t = Int -> ST s (MVector s Int)
go' Int
0
where
go' :: Int -> ST s (MVector s Int)
go' !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
lastIdx = MVector s Int -> ST s (MVector s Int)
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
t
| Bool
otherwise = do
let c :: Int
c = Word8 -> Int
forall a. Enum a => a -> Int
fromEnum (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> Word8
S.unsafeIndex ByteString
needle Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MV.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
t Int
c (Int
lastIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i)
Int -> ST s (MVector s Int)
go' (Int -> ST s (MVector s Int)) -> Int -> ST s (MVector s Int)
forall a b. (a -> b) -> a -> b
$! Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
lookahead :: Int -> m (Either ByteString ByteString)
lookahead Int
n = ([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
forall (m :: * -> *).
MonadIO m =>
([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go [ByteString] -> [ByteString]
forall a. a -> a
id Int
n
where
go :: ([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go [ByteString] -> [ByteString]
dlist !Int
k = IO (Maybe ByteString) -> m (Maybe ByteString)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (InputStream ByteString -> IO (Maybe ByteString)
forall a. InputStream a -> IO (Maybe a)
Streams.read InputStream ByteString
stream) m (Maybe ByteString)
-> (Maybe ByteString -> m (Either ByteString ByteString))
-> m (Either ByteString ByteString)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= m (Either ByteString ByteString)
-> (ByteString -> m (Either ByteString ByteString))
-> Maybe ByteString
-> m (Either ByteString ByteString)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe m (Either ByteString ByteString)
forall b. m (Either ByteString b)
eof ByteString -> m (Either ByteString ByteString)
chunk
where
eof :: m (Either ByteString b)
eof = Either ByteString b -> m (Either ByteString b)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ByteString b -> m (Either ByteString b))
-> Either ByteString b -> m (Either ByteString b)
forall a b. (a -> b) -> a -> b
$! ByteString -> Either ByteString b
forall a b. a -> Either a b
Left (ByteString -> Either ByteString b)
-> ByteString -> Either ByteString b
forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
dlist []
chunk :: ByteString -> m (Either ByteString ByteString)
chunk ByteString
x = if Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0
then Either ByteString ByteString -> m (Either ByteString ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ByteString ByteString -> m (Either ByteString ByteString))
-> Either ByteString ByteString -> m (Either ByteString ByteString)
forall a b. (a -> b) -> a -> b
$! ByteString -> Either ByteString ByteString
forall a b. b -> Either a b
Right (ByteString -> Either ByteString ByteString)
-> ByteString -> Either ByteString ByteString
forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
d' []
else ([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go [ByteString] -> [ByteString]
d' Int
r
where
l :: Int
l = ByteString -> Int
S.length ByteString
x
r :: Int
r = Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l
d' :: [ByteString] -> [ByteString]
d' = [ByteString] -> [ByteString]
dlist ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)