{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Xeno.DOM.Robust
( parse
, Node
, Content(..)
, name
, attributes
, contents
, children
) where
import Control.Monad.ST
import Control.Spork
import Data.ByteString.Internal(ByteString(..))
import Data.STRef
import qualified Data.Vector.Unboxed as UV
import qualified Data.Vector.Unboxed.Mutable as UMV
import Data.Mutable(asURef, newRef, readRef, writeRef)
import Xeno.SAX
import Xeno.Types
import Xeno.DOM.Internal(Node(..), Content(..), name, attributes, contents, children)
parse :: ByteString -> Either XenoException Node
parse inp =
case spork node of
Left e -> Left e
Right r ->
case findRootNode r of
Just n -> Right n
Nothing -> Left XenoExpectRootNode
where
findRootNode r = go 0
where
go n = case r UV.!? n of
Just 0x0 -> Just (Node str n r)
Just 0x1 -> go (n+3)
_ -> Nothing
PS _ offset0 _ = str
str = skipDoctype inp
node =
runST
(do nil <- UMV.new 1000
vecRef <- newSTRef nil
sizeRef <- fmap asURef $ newRef 0
parentRef <- fmap asURef $ newRef 0
process Process {
openF = \(PS _ name_start name_len) -> do
let tag = 0x00
tag_end = -1
index <- readRef sizeRef
v' <-
do v <- readSTRef vecRef
if index + 5 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
tag_parent <- readRef parentRef
do writeRef parentRef index
writeRef sizeRef (index + 5)
UMV.write v' index tag
UMV.write v' (index + 1) tag_parent
UMV.write v' (index + 2) (name_start - offset0)
UMV.write v' (index + 3) name_len
UMV.write v' (index + 4) tag_end
, attrF = \(PS _ key_start key_len) (PS _ value_start value_len) -> do
index <- readRef sizeRef
v' <-
do v <- readSTRef vecRef
if index + 5 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
let tag = 0x02
do writeRef sizeRef (index + 5)
do UMV.write v' index tag
UMV.write v' (index + 1) (key_start - offset0)
UMV.write v' (index + 2) key_len
UMV.write v' (index + 3) (value_start - offset0)
UMV.write v' (index + 4) value_len
, endOpenF = \_ -> return ()
, textF = \(PS _ text_start text_len) -> do
let tag = 0x01
index <- readRef sizeRef
v' <-
do v <- readSTRef vecRef
if index + 3 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
do writeRef sizeRef (index + 3)
do UMV.write v' index tag
UMV.write v' (index + 1) (text_start - offset0)
UMV.write v' (index + 2) text_len
, closeF = \closeTag@(PS s _ _) -> do
v <- readSTRef vecRef
index <- readRef sizeRef
untilM $ do
parent <- readRef parentRef
correctTag <- if parent == 0
then return True
else do
parent_name <- UMV.read v (parent + 2)
parent_len <- UMV.read v (parent + 3)
let openTag = PS s (parent_name+offset0) parent_len
return $ openTag == closeTag
UMV.write v (parent + 4) index
previousParent <- UMV.read v (parent + 1)
writeRef parentRef previousParent
return correctTag
, cdataF = \(PS _ cdata_start cdata_len) -> do
let tag = 0x03
index <- readRef sizeRef
v' <-
do v <- readSTRef vecRef
if index + 3 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
do writeRef sizeRef (index + 3)
do UMV.write v' index tag
UMV.write v' (index + 1) (cdata_start - offset0)
UMV.write v' (index + 2) cdata_len
} str
wet <- readSTRef vecRef
arr <- UV.unsafeFreeze wet
size <- readRef sizeRef
return (UV.unsafeSlice 0 size arr))
untilM :: Monad m => m Bool -> m ()
untilM loop = do
cond <- loop
case cond of
True -> return ()
False -> untilM loop