module Text.XML.Unresolved
(
writeFile
, readFile
, renderLBS
, parseLBS
, parseLBS_
, sinkDoc
, toEvents
, fromEvents
, renderBuilder
, renderBytes
, renderText
, InvalidEventStream (InvalidEventStream)
, P.def
, P.ParseSettings
, P.psDecodeEntities
, R.RenderSettings
, R.rsPretty
) where
import Prelude hiding (writeFile, readFile, FilePath)
import Filesystem.Path.CurrentOS (FilePath, encodeString)
import Data.XML.Types
import Control.Exception (Exception, SomeException)
import Data.Typeable (Typeable)
import Blaze.ByteString.Builder (Builder)
import qualified Text.XML.Stream.Render as R
import qualified Text.XML.Stream.Parse as P
import Data.ByteString (ByteString)
import Data.Text (Text)
import Control.Applicative ((<$>), (<*>))
import Control.Monad (when)
import qualified Data.Text as T
import Data.Char (isSpace)
import qualified Data.ByteString.Lazy as L
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Binary as CB
import Control.Exception (throw)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Resource (ResourceUnsafeIO, runExceptionT)
import Control.Monad.ST (runST)
import Text.XML.Stream.Token (lazyConsumeNoResource)
readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = C.runResourceT $ P.parseFile ps fp C.$$ fromEvents
sinkDoc :: C.ResourceThrow m
=> P.ParseSettings -> C.Sink ByteString m Document
sinkDoc ps = P.parseBytes ps C.=$ fromEvents
writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
C.runResourceT $ renderBytes rs doc C.$$ CB.sinkFile (encodeString fp)
renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
L.fromChunks $ unsafePerformIO
$ C.runResourceT
$ lazyConsumeNoResource
$ renderBytes rs doc
parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs =
runST $ runExceptionT
$ C.runResourceT
$ CL.sourceList (L.toChunks lbs) C.$$ sinkDoc ps
parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs
data InvalidEventStream = InvalidEventStream String
deriving (Show, Typeable)
instance Exception InvalidEventStream
renderBuilder :: C.Resource m => R.RenderSettings -> Document -> C.Source m Builder
renderBuilder rs doc = CL.sourceList (toEvents doc) C.$= R.renderBuilder rs
renderBytes :: ResourceUnsafeIO m => R.RenderSettings -> Document -> C.Source m ByteString
renderBytes rs doc = CL.sourceList (toEvents doc) C.$= R.renderBytes rs
renderText :: (C.ResourceThrow m, ResourceUnsafeIO m) => R.RenderSettings -> Document -> C.Source m Text
renderText rs doc = CL.sourceList (toEvents doc) C.$= R.renderText rs
fromEvents :: C.ResourceThrow m => C.Sink Event m Document
fromEvents = do
skip EventBeginDocument
d <- Document <$> goP <*> require goE <*> goM
skip EventEndDocument
y <- CL.head
if y == Nothing
then return d
else lift $ C.resourceThrow $ InvalidEventStream $ "Trailing matter after epilogue: " ++ show y
where
skip e = do
x <- CL.peek
when (x == Just e) (CL.drop 1)
many f =
go id
where
go front = do
x <- f
case x of
Nothing -> return $ front []
Just y -> go (front . (:) y)
dropReturn x = CL.drop 1 >> return x
require f = do
x <- f
case x of
Just y -> return y
Nothing -> do
y <- CL.head
lift $ C.resourceThrow $ InvalidEventStream $ "Document must have a single root element, got: " ++ show y
goP = Prologue <$> goM <*> goD <*> goM
goM = many goM'
goM' = do
x <- CL.peek
case x of
Just (EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
Just (EventComment t) -> dropReturn $ Just $ MiscComment t
Just (EventContent (ContentText t))
| T.all isSpace t -> CL.drop 1 >> goM'
_ -> return Nothing
goD = do
x <- CL.peek
case x of
Just (EventBeginDoctype name meid) -> do
CL.drop 1
dropTillDoctype
return (Just $ Doctype name meid)
_ -> return Nothing
dropTillDoctype = do
x <- CL.head
case x of
Just EventEndDoctype -> return ()
_ -> lift $ C.resourceThrow $ InvalidEventStream $ "Invalid event during doctype, got: " ++ show x
goE = do
x <- CL.peek
case x of
Just (EventBeginElement n as) -> Just <$> goE' n as
_ -> return Nothing
goE' n as = do
CL.drop 1
ns <- many goN
y <- CL.head
if y == Just (EventEndElement n)
then return $ Element n as $ compressNodes ns
else lift $ C.resourceThrow $ InvalidEventStream $ "Missing end element for " ++ show n ++ ", got: " ++ show y
goN = do
x <- CL.peek
case x of
Just (EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
Just (EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
Just (EventContent c) -> dropReturn $ Just $ NodeContent c
Just (EventComment t) -> dropReturn $ Just $ NodeComment t
Just (EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
_ -> return Nothing
toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
(EventBeginDocument :)
. goP prol . goE root . goM epi $ [EventEndDocument]
where
goP (Prologue before doctype after) =
goM before . maybe id goD doctype . goM after
goM [] = id
goM [x] = (goM' x :)
goM (x:xs) = (goM' x :) . goM xs
goM' (MiscInstruction i) = EventInstruction i
goM' (MiscComment t) = EventComment t
goD (Doctype name meid) =
(:) (EventBeginDoctype name meid)
. (:) EventEndDoctype
goE (Element name as ns) =
(EventBeginElement name as :)
. goN ns
. (EventEndElement name :)
goN [] = id
goN [x] = goN' x
goN (x:xs) = goN' x . goN xs
goN' (NodeElement e) = goE e
goN' (NodeInstruction i) = (EventInstruction i :)
goN' (NodeContent c) = (EventContent c :)
goN' (NodeComment t) = (EventComment t :)
compressNodes :: [Node] -> [Node]
compressNodes [] = []
compressNodes [x] = [x]
compressNodes (NodeContent (ContentText x) : NodeContent (ContentText y) : z) =
compressNodes $ NodeContent (ContentText $ x `T.append` y) : z
compressNodes (x:xs) = x : compressNodes xs