{-# LANGUAGE BangPatterns, MagicHash #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Attoparsec.ByteString.FastSet
-- Copyright   :  Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
-- set representation is unboxed for efficiency.  For small sets, we
-- test for membership using a binary search.  For larger sets, we use
-- a lookup table.
--
-----------------------------------------------------------------------------
module Data.Attoparsec.ByteString.FastSet
    (
    -- * Data type
      FastSet
    -- * Construction
    , fromList
    , set
    -- * Lookup
    , memberChar
    , memberWord8
    -- * Debugging
    , fromSet
    -- * Handy interface
    , charClass
    ) where

import Data.Bits ((.&.), (.|.))
import Foreign.Storable (peekByteOff, pokeByteOff)
import GHC.Exts (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
import GHC.Word (Word8(W8#))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Internal as I
import qualified Data.ByteString.Unsafe as U

data FastSet = Sorted { FastSet -> ByteString
fromSet :: !B.ByteString }
             | Table  { fromSet :: !B.ByteString }
    deriving (FastSet -> FastSet -> Bool
(FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool) -> Eq FastSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FastSet -> FastSet -> Bool
$c/= :: FastSet -> FastSet -> Bool
== :: FastSet -> FastSet -> Bool
$c== :: FastSet -> FastSet -> Bool
Eq, Eq FastSet
Eq FastSet
-> (FastSet -> FastSet -> Ordering)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> Bool)
-> (FastSet -> FastSet -> FastSet)
-> (FastSet -> FastSet -> FastSet)
-> Ord FastSet
FastSet -> FastSet -> Bool
FastSet -> FastSet -> Ordering
FastSet -> FastSet -> FastSet
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: FastSet -> FastSet -> FastSet
$cmin :: FastSet -> FastSet -> FastSet
max :: FastSet -> FastSet -> FastSet
$cmax :: FastSet -> FastSet -> FastSet
>= :: FastSet -> FastSet -> Bool
$c>= :: FastSet -> FastSet -> Bool
> :: FastSet -> FastSet -> Bool
$c> :: FastSet -> FastSet -> Bool
<= :: FastSet -> FastSet -> Bool
$c<= :: FastSet -> FastSet -> Bool
< :: FastSet -> FastSet -> Bool
$c< :: FastSet -> FastSet -> Bool
compare :: FastSet -> FastSet -> Ordering
$ccompare :: FastSet -> FastSet -> Ordering
$cp1Ord :: Eq FastSet
Ord)

instance Show FastSet where
    show :: FastSet -> String
show (Sorted ByteString
s) = String
"FastSet Sorted " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show (ByteString -> String
B8.unpack ByteString
s)
    show (Table ByteString
_) = String
"FastSet Table"

-- | The lower bound on the size of a lookup table.  We choose this to
-- balance table density against performance.
tableCutoff :: Int
tableCutoff :: Int
tableCutoff = Int
8

-- | Create a set.
set :: B.ByteString -> FastSet
set :: ByteString -> FastSet
set ByteString
s | ByteString -> Int
B.length ByteString
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
tableCutoff = ByteString -> FastSet
Sorted (ByteString -> FastSet)
-> (ByteString -> ByteString) -> ByteString -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
B.sort (ByteString -> FastSet) -> ByteString -> FastSet
forall a b. (a -> b) -> a -> b
$ ByteString
s
      | Bool
otherwise                = ByteString -> FastSet
Table (ByteString -> FastSet)
-> (ByteString -> ByteString) -> ByteString -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
mkTable (ByteString -> FastSet) -> ByteString -> FastSet
forall a b. (a -> b) -> a -> b
$ ByteString
s

fromList :: [Word8] -> FastSet
fromList :: [Word8] -> FastSet
fromList = ByteString -> FastSet
set (ByteString -> FastSet)
-> ([Word8] -> ByteString) -> [Word8] -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteString
B.pack

data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8

shiftR :: Int -> Int -> Int
shiftR :: Int -> Int -> Int
shiftR (I# Int#
x#) (I# Int#
i#) = Int# -> Int
I# (Int#
x# Int# -> Int# -> Int#
`iShiftRA#` Int#
i#)

shiftL :: Word8 -> Int -> Word8
shiftL :: Word8 -> Int -> Word8
shiftL (W8# Word#
x#) (I# Int#
i#) = Word# -> Word8
W8# (Word# -> Word#
narrow8Word# (Word#
x# Word# -> Int# -> Word#
`shiftL#` Int#
i#))

index :: Int -> I
index :: Int -> I
index Int
i = Int -> Word8 -> I
I (Int
i Int -> Int -> Int
`shiftR` Int
3) (Word8
1 Word8 -> Int -> Word8
`shiftL` (Int
i Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
7))
{-# INLINE index #-}

-- | Check the set for membership.
memberWord8 :: Word8 -> FastSet -> Bool
memberWord8 :: Word8 -> FastSet -> Bool
memberWord8 Word8
w (Table ByteString
t)  =
    let I Int
byte Word8
bit = Int -> I
index (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w)
    in  ByteString -> Int -> Word8
U.unsafeIndex ByteString
t Int
byte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
bit Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0
memberWord8 Word8
w (Sorted ByteString
s) = Int -> Int -> Bool
search Int
0 (ByteString -> Int
B.length ByteString
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    where search :: Int -> Int -> Bool
search Int
lo Int
hi
              | Int
hi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lo = Bool
False
              | Bool
otherwise =
                  let mid :: Int
mid = (Int
lo Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hi) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2
                  in case Word8 -> Word8 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word8
w (ByteString -> Int -> Word8
U.unsafeIndex ByteString
s Int
mid) of
                       Ordering
GT -> Int -> Int -> Bool
search (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
hi
                       Ordering
LT -> Int -> Int -> Bool
search Int
lo (Int
mid Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                       Ordering
_ -> Bool
True

-- | Check the set for membership.  Only works with 8-bit characters:
-- characters above code point 255 will give wrong answers.
memberChar :: Char -> FastSet -> Bool
memberChar :: Char -> FastSet -> Bool
memberChar Char
c = Word8 -> FastSet -> Bool
memberWord8 (Char -> Word8
I.c2w Char
c)
{-# INLINE memberChar #-}

mkTable :: B.ByteString -> B.ByteString
mkTable :: ByteString -> ByteString
mkTable ByteString
s = Int -> (Ptr Word8 -> IO ()) -> ByteString
I.unsafeCreate Int
32 ((Ptr Word8 -> IO ()) -> ByteString)
-> (Ptr Word8 -> IO ()) -> ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
t -> do
            Ptr Word8
_ <- Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
I.memset Ptr Word8
t Word8
0 CSize
32
            ByteString -> (CStringLen -> IO ()) -> IO ()
forall a. ByteString -> (CStringLen -> IO a) -> IO a
U.unsafeUseAsCStringLen ByteString
s ((CStringLen -> IO ()) -> IO ()) -> (CStringLen -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
p, Int
l) ->
              let loop :: Int -> IO ()
loop Int
n | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
l = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                         | Bool
otherwise = do
                    Word8
c <- Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
p Int
n :: IO Word8
                    let I Int
byte Word8
bit = Int -> I
index (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c)
                    Word8
prev <- Ptr Word8 -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
t Int
byte :: IO Word8
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
t Int
byte (Word8
prev Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
bit)
                    Int -> IO ()
loop (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
              in Int -> IO ()
loop Int
0

charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = ByteString -> FastSet
set (ByteString -> FastSet)
-> (String -> ByteString) -> String -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
B8.pack (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go
    where go :: ShowS
go (Char
a:Char
'-':Char
b:String
xs) = [Char
a..Char
b] String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
go String
xs
          go (Char
x:String
xs) = Char
x Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
go String
xs
          go String
_ = String
""