{-# LANGUAGE BangPatterns, MagicHash #-}
module Data.Attoparsec.ByteString.FastSet
(
FastSet
, fromList
, set
, memberChar
, memberWord8
, fromSet
, charClass
) where
import Data.Bits ((.&.), (.|.))
import Foreign.Storable (peekByteOff, pokeByteOff)
import GHC.Exts (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
import GHC.Word (Word8(W8#))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Internal as I
import qualified Data.ByteString.Unsafe as U
data FastSet = Sorted { FastSet -> ByteString
fromSet :: !B.ByteString }
| Table { fromSet :: !B.ByteString }
deriving (FastSet -> FastSet -> Bool
(FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool) -> Eq FastSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FastSet -> FastSet -> Bool
$c/= :: FastSet -> FastSet -> Bool
== :: FastSet -> FastSet -> Bool
$c== :: FastSet -> FastSet -> Bool
Eq, Eq FastSet
Eq FastSet
-> (FastSet -> FastSet -> Ordering)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> FastSet)
-> (FastSet -> FastSet -> FastSet)
-> Ord FastSet
FastSet -> FastSet -> Bool
FastSet -> FastSet -> Ordering
FastSet -> FastSet -> FastSet
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: FastSet -> FastSet -> FastSet
$cmin :: FastSet -> FastSet -> FastSet
max :: FastSet -> FastSet -> FastSet
$cmax :: FastSet -> FastSet -> FastSet
>= :: FastSet -> FastSet -> Bool
$c>= :: FastSet -> FastSet -> Bool
> :: FastSet -> FastSet -> Bool
$c> :: FastSet -> FastSet -> Bool
<= :: FastSet -> FastSet -> Bool
$c<= :: FastSet -> FastSet -> Bool
< :: FastSet -> FastSet -> Bool
$c< :: FastSet -> FastSet -> Bool
compare :: FastSet -> FastSet -> Ordering
$ccompare :: FastSet -> FastSet -> Ordering
$cp1Ord :: Eq FastSet
Ord)
instance Show FastSet where
show :: FastSet -> String
show (Sorted ByteString
s) = String
"FastSet Sorted " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show (ByteString -> String
B8.unpack ByteString
s)
show (Table ByteString
_) = String
"FastSet Table"
tableCutoff :: Int
tableCutoff :: Int
tableCutoff = Int
8
set :: B.ByteString -> FastSet
set :: ByteString -> FastSet
set ByteString
s | ByteString -> Int
B.length ByteString
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
tableCutoff = ByteString -> FastSet
Sorted (ByteString -> FastSet)
-> (ByteString -> ByteString) -> ByteString -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
B.sort (ByteString -> FastSet) -> ByteString -> FastSet
forall a b. (a -> b) -> a -> b
$ ByteString
s
| Bool
otherwise = ByteString -> FastSet
Table (ByteString -> FastSet)
-> (ByteString -> ByteString) -> ByteString -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
mkTable (ByteString -> FastSet) -> ByteString -> FastSet
forall a b. (a -> b) -> a -> b
$ ByteString
s
fromList :: [Word8] -> FastSet
fromList :: [Word8] -> FastSet
fromList = ByteString -> FastSet
set (ByteString -> FastSet)
-> ([Word8] -> ByteString) -> [Word8] -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteString
B.pack
data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
shiftR :: Int -> Int -> Int
shiftR :: Int -> Int -> Int
shiftR (I# Int#
x#) (I# Int#
i#) = Int# -> Int
I# (Int#
x# Int# -> Int# -> Int#
`iShiftRA#` Int#
i#)
shiftL :: Word8 -> Int -> Word8
shiftL :: Word8 -> Int -> Word8
shiftL (W8# Word#
x#) (I# Int#
i#) = Word# -> Word8
W8# (Word# -> Word#
narrow8Word# (Word#
x# Word# -> Int# -> Word#
`shiftL#` Int#
i#))
index :: Int -> I
index :: Int -> I
index Int
i = Int -> Word8 -> I
I (Int
i Int -> Int -> Int
`shiftR` Int
3) (Word8
1 Word8 -> Int -> Word8
`shiftL` (Int
i Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
7))
{-# INLINE index #-}
memberWord8 :: Word8 -> FastSet -> Bool
memberWord8 :: Word8 -> FastSet -> Bool
memberWord8 Word8
w (Table ByteString
t) =
let I Int
byte Word8
bit = Int -> I
index (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w)
in ByteString -> Int -> Word8
U.unsafeIndex ByteString
t Int
byte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
bit Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0
memberWord8 Word8
w (Sorted ByteString
s) = Int -> Int -> Bool
search Int
0 (ByteString -> Int
B.length ByteString
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
where search :: Int -> Int -> Bool
search Int
lo Int
hi
| Int
hi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lo = Bool
False
| Bool
otherwise =
let mid :: Int
mid = (Int
lo Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hi) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2
in case Word8 -> Word8 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word8
w (ByteString -> Int -> Word8
U.unsafeIndex ByteString
s Int
mid) of
Ordering
GT -> Int -> Int -> Bool
search (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
hi
Ordering
LT -> Int -> Int -> Bool
search Int
lo (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
Ordering
_ -> Bool
True
memberChar :: Char -> FastSet -> Bool
memberChar :: Char -> FastSet -> Bool
memberChar Char
c = Word8 -> FastSet -> Bool
memberWord8 (Char -> Word8
I.c2w Char
c)
{-# INLINE memberChar #-}
mkTable :: B.ByteString -> B.ByteString
mkTable :: ByteString -> ByteString
mkTable ByteString
s = Int -> (Ptr Word8 -> IO ()) -> ByteString
I.unsafeCreate Int
32 ((Ptr Word8 -> IO ()) -> ByteString)
-> (Ptr Word8 -> IO ()) -> ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
t -> do
Ptr Word8
_ <- Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
I.memset Ptr Word8
t Word8
0 CSize
32
ByteString -> (CStringLen -> IO ()) -> IO ()
forall a. ByteString -> (CStringLen -> IO a) -> IO a
U.unsafeUseAsCStringLen ByteString
s ((CStringLen -> IO ()) -> IO ()) -> (CStringLen -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
p, Int
l) ->
let loop :: Int -> IO ()
loop Int
n | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
l = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
| Bool
otherwise = do
Word8
c <- Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
p Int
n :: IO Word8
let I Int
byte Word8
bit = Int -> I
index (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c)
Word8
prev <- Ptr Word8 -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
t Int
byte :: IO Word8
Ptr Word8 -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
t Int
byte (Word8
prev Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
bit)
Int -> IO ()
loop (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
in Int -> IO ()
loop Int
0
charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = ByteString -> FastSet
set (ByteString -> FastSet)
-> (String -> ByteString) -> String -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
B8.pack (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go
where go :: ShowS
go (Char
a:Char
'-':Char
b:String
xs) = [Char
a..Char
b] String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
go String
xs
go (Char
x:String
xs) = Char
x Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
go String
xs
go String
_ = String
""