{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE CPP #-}
module Xeno.DOM
( parse
, Node
, Content(..)
, name
, attributes
, contents
, children
) where
import Control.Monad.ST
import Control.Spork
import Data.ByteString (ByteString)
#if MIN_VERSION_bytestring(0,11,0)
import Data.ByteString.Internal (ByteString(BS))
#else
import Data.ByteString.Internal (ByteString(PS))
#endif
import qualified Data.ByteString as S
import Data.Mutable
import Data.STRef
import qualified Data.Vector.Unboxed as UV
import qualified Data.Vector.Unboxed.Mutable as UMV
#if MIN_VERSION_bytestring(0,11,0)
import Foreign.Ptr (minusPtr)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import System.IO.Unsafe (unsafeDupablePerformIO)
#endif
import Xeno.SAX
import Xeno.Types
import Xeno.DOM.Internal
parse :: ByteString -> Either XenoException Node
parse :: ByteString -> Either XenoException Node
parse ByteString
str =
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left XenoException
XenoExpectRootNode) forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Maybe Node
findRootNode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall e a. Exception e => a -> Either e a
spork Vector Int
node
where
findRootNode :: Vector Int -> Maybe Node
findRootNode Vector Int
r = Int -> Maybe Node
go Int
0
where
go :: Int -> Maybe Node
go Int
n = case Vector Int
r forall a. Unbox a => Vector a -> Int -> Maybe a
UV.!? Int
n of
Just Int
0x0 -> forall a. a -> Maybe a
Just (ByteString -> Int -> Vector Int -> Node
Node ByteString
str Int
n Vector Int
r)
Just Int
0x1 -> Int -> Maybe Node
go (Int
nforall a. Num a => a -> a -> a
+Int
3)
Maybe Int
_ -> forall a. Maybe a
Nothing
#if MIN_VERSION_bytestring(0,11,0)
BS ForeignPtr Word8
offset0 Int
_ = ByteString
str
#else
PS _ offset0 _ = str
#endif
node :: Vector Int
node =
let !initialSize :: Int
initialSize = forall a. Ord a => a -> a -> a
max Int
1000 (ByteString -> Int
S.length ByteString
str forall a. Integral a => a -> a -> a
`div` Int
8) in
forall a. (forall s. ST s a) -> a
runST
(do MVector s Int
nil <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
UMV.unsafeNew Int
initialSize
STRef s (MVector s Int)
vecRef <- forall a s. a -> ST s (STRef s a)
newSTRef MVector s Int
nil
URef s Int
sizeRef <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall s a. URef s a -> URef s a
asURef (forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
RefElement c -> m c
newRef Int
0)
URef s Int
parentRef <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall s a. URef s a -> URef s a
asURef (forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
RefElement c -> m c
newRef Int
0)
forall (m :: * -> *) str.
(Monad m, StringLike str) =>
Process (m ()) -> str -> m ()
process Process {
#if MIN_VERSION_bytestring(0,11,0)
openF :: ByteString -> ST s ()
openF = \(BS ForeignPtr Word8
name_start Int
name_len) -> do
#else
openF = \(PS _ name_start name_len) -> do
#endif
let tag :: Int
tag = Int
0x00
tag_end :: Int
tag_end = -Int
1
Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
MVector s Int
v' <-
do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
if Int
index forall a. Num a => a -> a -> a
+ Int
5 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
else do
MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.unsafeGrow MVector s Int
v (forall {b} {p} {a}.
(Integral b, Integral p) =>
ForeignPtr a -> Int -> p -> b -> b
predictGrowSize ForeignPtr Word8
name_start Int
name_len (Int
index forall a. Num a => a -> a -> a
+ Int
5) (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v))
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
Int
tag_parent <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
parentRef
do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
parentRef Int
index
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
5)
do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' Int
index Int
tag
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) Int
tag_parent
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
name_start ForeignPtr Word8
offset0)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
3) Int
name_len
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
4) Int
tag_end
#if MIN_VERSION_bytestring(0,11,0)
, attrF :: ByteString -> ByteString -> ST s ()
attrF = \(BS ForeignPtr Word8
key_start Int
key_len) (BS ForeignPtr Word8
value_start Int
value_len) -> do
#else
, attrF = \(PS _ key_start key_len) (PS _ value_start value_len) -> do
#endif
Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
MVector s Int
v' <-
do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
if Int
index forall a. Num a => a -> a -> a
+ Int
5 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
else do
MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.unsafeGrow MVector s Int
v (forall {b} {p} {a}.
(Integral b, Integral p) =>
ForeignPtr a -> Int -> p -> b -> b
predictGrowSize ForeignPtr Word8
value_start Int
value_len (Int
index forall a. Num a => a -> a -> a
+ Int
5) (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v))
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
let tag :: Int
tag = Int
0x02
do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
5)
do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' Int
index Int
tag
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
key_start ForeignPtr Word8
offset0)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
key_len
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
3) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
value_start ForeignPtr Word8
offset0)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
4) Int
value_len
, endOpenF :: ByteString -> ST s ()
endOpenF = \ByteString
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
#if MIN_VERSION_bytestring(0,11,0)
, textF :: ByteString -> ST s ()
textF = \(BS ForeignPtr Word8
text_start Int
text_len) -> do
#else
, textF = \(PS _ text_start text_len) -> do
#endif
let tag :: Int
tag = Int
0x01
Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
MVector s Int
v' <-
do MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
if Int
index forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
else do
MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.unsafeGrow MVector s Int
v (forall {b} {p} {a}.
(Integral b, Integral p) =>
ForeignPtr a -> Int -> p -> b -> b
predictGrowSize ForeignPtr Word8
text_start Int
text_len (Int
index forall a. Num a => a -> a -> a
+ Int
3) (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v))
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
do forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
3)
do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' Int
index Int
tag
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
text_start ForeignPtr Word8
offset0)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
text_len
, closeF :: ByteString -> ST s ()
closeF = \ByteString
_ -> do
MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
Int
parent <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
parentRef
Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
4) Int
index
Int
previousParent <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UMV.unsafeRead MVector s Int
v (Int
parent forall a. Num a => a -> a -> a
+ Int
1)
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
parentRef Int
previousParent
#if MIN_VERSION_bytestring(0,11,0)
, cdataF :: ByteString -> ST s ()
cdataF = \(BS ForeignPtr Word8
cdata_start Int
cdata_len) -> do
#else
, cdataF = \(PS _ cdata_start cdata_len) -> do
#endif
let tag :: Int
tag = Int
0x03
Int
index <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
MVector s Int
v' <- do
MVector s Int
v <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
if Int
index forall a. Num a => a -> a -> a
+ Int
3 forall a. Ord a => a -> a -> Bool
< forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v
then forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s Int
v
else do
MVector s Int
v' <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
UMV.unsafeGrow MVector s Int
v (forall {b} {p} {a}.
(Integral b, Integral p) =>
ForeignPtr a -> Int -> p -> b -> b
predictGrowSize ForeignPtr Word8
cdata_start Int
cdata_len (Int
index forall a. Num a => a -> a -> a
+ Int
3) (forall a s. Unbox a => MVector s a -> Int
UMV.length MVector s Int
v))
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s (MVector s Int)
vecRef MVector s Int
v'
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
v'
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> RefElement c -> m ()
writeRef URef s Int
sizeRef (Int
index forall a. Num a => a -> a -> a
+ Int
3)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' Int
index Int
tag
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
1) (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr Word8
cdata_start ForeignPtr Word8
offset0)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UMV.unsafeWrite MVector s Int
v' (Int
index forall a. Num a => a -> a -> a
+ Int
2) Int
cdata_len
} ByteString
str
MVector s Int
wet <- forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
Vector Int
arr <- forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
UV.unsafeFreeze MVector s Int
wet
Int
size <- forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
readRef URef s Int
sizeRef
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Unbox a => Int -> Int -> Vector a -> Vector a
UV.unsafeSlice Int
0 Int
size Vector Int
arr))
where
predictGrowSize :: ForeignPtr a -> Int -> p -> b -> b
predictGrowSize ForeignPtr a
bsStart Int
bsLen p
index b
vecLen =
let
processedLen :: Int
processedLen = forall a. Ord a => a -> a -> a
max Int
1 (forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance ForeignPtr a
bsStart ForeignPtr Word8
offset0 forall a. Num a => a -> a -> a
+ Int
bsLen)
k :: Double
k = (Double
1.25 :: Double) forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
S.length ByteString
str) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
processedLen
predictedTotalSize :: b
predictedTotalSize = forall a b. (RealFrac a, Integral b) => a -> b
round forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral p
index forall a. Num a => a -> a -> a
* Double
k
growSize :: b
growSize = b
predictedTotalSize forall a. Num a => a -> a -> a
- b
vecLen
in b
growSize
#if MIN_VERSION_bytestring(0,11,0)
minusForeignPtr :: ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr ForeignPtr a
fpA ForeignPtr b
fpB = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr a
fpA forall a b. (a -> b) -> a -> b
$ \Ptr a
ptrA -> forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr b
fpB forall a b. (a -> b) -> a -> b
$ \Ptr b
ptrB ->
forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. Ptr a -> Ptr b -> Int
minusPtr Ptr a
ptrA Ptr b
ptrB)
distance :: ForeignPtr a -> ForeignPtr b -> Int
distance :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance = forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr
#else
distance :: Int -> Int -> Int
distance a b = a - b
#endif