-- ------------------------------------------------------------
{- |
Module : Text.XML.HXT.Arrow.WriteDocument
Copyright : Copyright (C) 2005-9 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : experimental
Portability: portable
Compound arrow for writing XML documents
-}
-- ------------------------------------------------------------
module Text.XML.HXT.Arrow.WriteDocument
( writeDocument
, writeDocument'
, writeDocumentToString
, prepareContents
)
where
import Control.Arrow -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.XmlState.RunIOStateArrow
( initialSysState
)
import Text.XML.HXT.Arrow.Edit ( haskellRepOfXmlDoc
, indentDoc
, addDefaultDTDecl
, preventEmptyElements
, removeDocWhiteSpace
, treeRepOfXmlDoc
)
import Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument
, encodeDocument
, encodeDocument'
)
-- ------------------------------------------------------------
--
{- |
the main filter for writing documents
this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument'
usage: @ writeDocument optionList destination @
if @ destination @ is the empty string or \"-\", stdout is used as output device
for available options see 'Text.XML.HXT.Arrow.XmlState.SystemConfig'
- @withOutputXML@ :
(default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary
add XML processing instruction
and encode document with respect to output encoding,
- @withOutputHTML@ :
issue HTML: translate all special XML chars and all HTML chars with a corresponding entity reference
into entity references. Do not generate empty elements, e.g. @@ for HTML elements, that are allowed
to contain a none empty body. Result is for the example is @@.
The short form introduces trouble in various browsers.
- @withOutputXHTML@ :
same as @withOutputHTML@, but all none ASCII chars are substituted by char references.
- @withOutputPLAIN@ :
Do not substitute any chars. This is useful when generating something else than XML/HTML, e.g. Haskell source code.
- @withXmlPi yes/no@ :
Add a @@ processing instruction to the beginning of the document.
Default is yes.
- @withAddDefaultDTD@ :
if the document to be written was build by reading another document containing a Document Type Declaration,
this DTD is inserted into the output document (default: no insert)
- @withShowTree yes/no@ :
show DOM tree representation of document (for debugging)
- @withShowHaskell yes/no@ :
show Haskell representaion of document (for debugging)
a minimal main program for copying a document
has the following structure:
> module Main
> where
>
> import Text.XML.HXT.Core
>
> main :: IO ()
> main
> = do
> runX ( readDocument [] "hello.xml"
> >>>
> writeDocument [] "bye.xml"
> )
> return ()
an example for copying a document from the web to standard output with global trace level 1, input trace level 2,
output encoding isoLatin1,
and evaluation of
error code is:
> module Main
> where
>
> import Text.XML.HXT.Core
> import Text.XML.HXT.Curl
> -- or
> -- import Text.XML.HXT.HTTP
> import System.Exit
>
> main :: IO ()
> main
> = do
> [rc] <- runX
> ( configSysVars [ withTrace 1 -- set the defaults for all read-,
> , withCurl [] -- write- and other operations
> -- or withHTTP []
> ]
> >>>
> readDocument [ withTrace 2 -- use these additional
> , withParseHTML yes -- options only for this read
> ]
> "http://www.haskell.org/"
> >>>
> writeDocument [ withOutputEncoding isoLatin1
> ]
> "" -- output to stdout
> >>>
> getErrStatus
> )
> exitWith ( if rc >= c_err
> then ExitFailure 1
> else ExitSuccess
> )
-}
writeDocument :: SysConfigList -> String -> IOStateArrow s XmlTree XmlTree
writeDocument config dst
= localSysEnv
$
configSysVars config
>>>
perform ( (flip writeDocument') dst $< getSysVar theTextMode )
writeDocument' :: Bool -> String -> IOStateArrow s XmlTree XmlTree
writeDocument' textMode dst
= ( traceMsg 1 ("writeDocument: destination is " ++ show dst)
>>>
( (flip prepareContents) encodeDocument $< getSysVar idS )
>>>
traceDoc "document after encoding"
>>>
putXmlDocument textMode dst
>>>
traceMsg 1 "writeDocument: finished"
)
`when`
documentStatusOk
-- ------------------------------------------------------------
-- |
-- Convert a document into a string. Formating is done the same way
-- and with the same options as in 'writeDocument'. Default output encoding is
-- no encoding, that means the result is a normal unicode encode haskell string.
-- The default may be overwritten with the 'Text.XML.HXT.Arrow.XmlState.SystemConfig.withOutputEncoding' option.
-- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option.
--
-- This arrow fails, when the encoding scheme is not supported.
-- The arrow is pure, it does not run in the IO monad.
-- The XML PI is suppressed, if not explicitly turned on with an
-- option @ (a_no_xml_pi, v_0) @
writeDocumentToString :: ArrowXml a => SysConfigList -> a XmlTree String
writeDocumentToString config
= prepareContents ( foldr (>>>) id (withOutputEncoding unicodeString :
withXmlPi no :
config
)
$ initialSysState
) encodeDocument'
>>>
xshow getChildren
-- ------------------------------------------------------------
-- |
-- indent and format output
prepareContents :: ArrowXml a => XIOSysState -> (Bool -> Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree
prepareContents config encodeDoc
= indent
>>>
addDtd
>>>
format
where
indent' = getS theIndent config
removeWS' = getS theRemoveWS config
showTree' = getS theShowTree config
showHaskell' = getS theShowHaskell config
outHtml' = getS theOutputFmt config == HTMLoutput
outXhtml' = getS theOutputFmt config == XHTMLoutput
outXml' = getS theOutputFmt config == XMLoutput
noPi' = not $ getS theXmlPi config
noEEsFor' = getS theNoEmptyElemFor config
addDDTD' = getS theAddDefaultDTD config
outEnc' = getS theOutputEncoding config
addDtd
| addDDTD' = addDefaultDTDecl
| otherwise = this
indent
| indent' = indentDoc -- document indentation
| removeWS' = removeDocWhiteSpace -- remove all whitespace between tags
| otherwise = this
format
| showTree' = treeRepOfXmlDoc
| showHaskell' = haskellRepOfXmlDoc
| outHtml' = preventEmptyElements noEEsFor' True
>>>
encodeDoc -- convert doc into text with respect to output encoding with ASCII as default
False noPi' ( if null outEnc' then usAscii else outEnc' )
| outXhtml' = preventEmptyElements noEEsFor' True
>>>
encodeDoc -- convert doc into text with respect to output encoding
True noPi' outEnc'
| outXml' = ( if null noEEsFor'
then this
else preventEmptyElements noEEsFor' False
)
>>>
encodeDoc -- convert doc into text with respect to output encoding
True noPi' outEnc'
| otherwise = this
-- ------------------------------------------------------------