Copyright | Copyright (C) 2010 Uwe Schmidt |
---|---|
License | MIT |
Maintainer | Uwe Schmidt (uwe@fh-wedel.de) |
Stability | stable |
Portability | portable |
Safe Haskell | None |
Language | Haskell98 |
the interface for the basic state maipulation functions
- data XIOState us
- data XIOSysState
- type IOStateArrow s b c = IOSLA (XIOState s) b c
- type IOSArrow b c = IOStateArrow () b c
- type SysConfig = XIOSysState -> XIOSysState
- type SysConfigList = [SysConfig]
- getUserState :: IOStateArrow s b s
- setUserState :: IOStateArrow s s s
- changeUserState :: (b -> s -> s) -> IOStateArrow s b b
- withExtendedUserState :: s1 -> IOStateArrow (s1, s0) b c -> IOStateArrow s0 b c
- withOtherUserState :: s1 -> IOStateArrow s1 b c -> IOStateArrow s0 b c
- withoutUserState :: IOSArrow b c -> IOStateArrow s0 b c
- runX :: IOSArrow XmlTree c -> IO [c]
- configSysVars :: SysConfigList -> IOStateArrow s c c
- setSysAttr :: String -> IOStateArrow s String String
- unsetSysAttr :: String -> IOStateArrow s b b
- getSysAttr :: String -> IOStateArrow s b String
- getAllSysAttrs :: IOStateArrow s b Attributes
- setSysAttrString :: String -> String -> IOStateArrow s b b
- setSysAttrInt :: String -> Int -> IOStateArrow s b b
- getSysAttrInt :: Int -> String -> IOStateArrow s b Int
- getConfigAttr :: String -> SysConfigList -> String
- clearErrStatus :: IOStateArrow s b b
- setErrStatus :: IOStateArrow s Int Int
- getErrStatus :: IOStateArrow s XmlTree Int
- setErrMsgStatus :: IOStateArrow s XmlTree XmlTree
- setErrorMsgHandler :: Bool -> (String -> IO ()) -> IOStateArrow s b b
- errorMsgStderr :: IOStateArrow s b b
- errorMsgCollect :: IOStateArrow s b b
- errorMsgStderrAndCollect :: IOStateArrow s b b
- errorMsgIgnore :: IOStateArrow s b b
- getErrorMessages :: IOStateArrow s b XmlTree
- filterErrorMsg :: IOStateArrow s XmlTree XmlTree
- issueWarn :: String -> IOStateArrow s b b
- issueErr :: String -> IOStateArrow s b b
- issueFatal :: String -> IOStateArrow s b b
- issueExc :: String -> IOStateArrow s SomeException b
- setDocumentStatus :: Int -> String -> IOStateArrow s XmlTree XmlTree
- setDocumentStatusFromSystemState :: String -> IOStateArrow s XmlTree XmlTree
- documentStatusOk :: ArrowXml a => a XmlTree XmlTree
- setTraceLevel :: Int -> IOStateArrow s b b
- getTraceLevel :: IOStateArrow s b Int
- withTraceLevel :: Int -> IOStateArrow s b c -> IOStateArrow s b c
- setTraceCmd :: (Int -> String -> IO ()) -> IOStateArrow s b b
- getTraceCmd :: IOStateArrow a b (Int -> String -> IO ())
- trace :: Int -> IOStateArrow s b String -> IOStateArrow s b b
- traceMsg :: Int -> String -> IOStateArrow s b b
- traceValue :: Int -> (b -> String) -> IOStateArrow s b b
- traceString :: Int -> (b -> String) -> IOStateArrow s b b
- traceSource :: IOStateArrow s XmlTree XmlTree
- traceTree :: IOStateArrow s XmlTree XmlTree
- traceDoc :: String -> IOStateArrow s XmlTree XmlTree
- setBaseURI :: IOStateArrow s String String
- getBaseURI :: IOStateArrow s b String
- changeBaseURI :: IOStateArrow s String String
- setDefaultBaseURI :: String -> IOStateArrow s b String
- getDefaultBaseURI :: IOStateArrow s b String
- runInLocalURIContext :: IOStateArrow s b c -> IOStateArrow s b c
- expandURIString :: String -> String -> Maybe String
- expandURI :: ArrowXml a => a (String, String) String
- mkAbsURI :: IOStateArrow s String String
- getFragmentFromURI :: ArrowList a => a String String
- getPathFromURI :: ArrowList a => a String String
- getPortFromURI :: ArrowList a => a String String
- getQueryFromURI :: ArrowList a => a String String
- getRegNameFromURI :: ArrowList a => a String String
- getSchemeFromURI :: ArrowList a => a String String
- getUserInfoFromURI :: ArrowList a => a String String
- getMimeTypeTable :: IOStateArrow s b MimeTypeTable
- setMimeTypeTable :: MimeTypeTable -> IOStateArrow s b b
- setMimeTypeTableFromFile :: FilePath -> IOStateArrow s b b
- yes :: Bool
- no :: Bool
- withAcceptedMimeTypes :: [String] -> SysConfig
- withAddDefaultDTD :: Bool -> SysConfig
- withSysAttr :: String -> String -> SysConfig
- withCanonicalize :: Bool -> SysConfig
- withCompression :: (CompressionFct, DeCompressionFct) -> SysConfig
- withCheckNamespaces :: Bool -> SysConfig
- withDefaultBaseURI :: String -> SysConfig
- withStrictDeserialize :: Bool -> SysConfig
- withEncodingErrors :: Bool -> SysConfig
- withErrors :: Bool -> SysConfig
- withFileMimeType :: String -> SysConfig
- withIgnoreNoneXmlContents :: Bool -> SysConfig
- withIndent :: Bool -> SysConfig
- withInputEncoding :: String -> SysConfig
- withInputOption :: String -> String -> SysConfig
- withInputOptions :: Attributes -> SysConfig
- withMimeTypeFile :: String -> SysConfig
- withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig
- withNoEmptyElemFor :: [String] -> SysConfig
- withXmlPi :: Bool -> SysConfig
- withOutputEncoding :: String -> SysConfig
- withOutputXML :: SysConfig
- withOutputHTML :: SysConfig
- withOutputXHTML :: SysConfig
- withOutputPLAIN :: SysConfig
- withParseByMimeType :: Bool -> SysConfig
- withParseHTML :: Bool -> SysConfig
- withPreserveComment :: Bool -> SysConfig
- withProxy :: String -> SysConfig
- withRedirect :: Bool -> SysConfig
- withRemoveWS :: Bool -> SysConfig
- withShowHaskell :: Bool -> SysConfig
- withShowTree :: Bool -> SysConfig
- withStrictInput :: Bool -> SysConfig
- withSubstDTDEntities :: Bool -> SysConfig
- withSubstHTMLEntities :: Bool -> SysConfig
- withTextMode :: Bool -> SysConfig
- withTrace :: Int -> SysConfig
- withValidate :: Bool -> SysConfig
- withWarnings :: Bool -> SysConfig
Data Types
state datatype consists of a system state and a user state the user state is not fixed
data XIOSysState Source
predefined system state data type with all components for the system functions, like trace, error handling, ...
type IOStateArrow s b c = IOSLA (XIOState s) b c Source
The arrow type for stateful arrows
type IOSArrow b c = IOStateArrow () b c Source
The arrow for stateful arrows with no user defined state
type SysConfig = XIOSysState -> XIOSysState Source
type SysConfigList = [SysConfig] Source
User State Manipulation
getUserState :: IOStateArrow s b s Source
read the user defined part of the state
setUserState :: IOStateArrow s s s Source
set the user defined part of the state
changeUserState :: (b -> s -> s) -> IOStateArrow s b b Source
change the user defined part of the state
withExtendedUserState :: s1 -> IOStateArrow (s1, s0) b c -> IOStateArrow s0 b c Source
extend user state
Run an arrow with an extended user state component, The old component is stored together with a new one in a pair, the arrow is executed with this extended state, and the augmented state component is removed form the state when the arrow has finished its execution
withOtherUserState :: s1 -> IOStateArrow s1 b c -> IOStateArrow s0 b c Source
change the type of user state
This conversion is useful, when running a state arrow with another structure of the user state, e.g. with () when executing some IO arrows
withoutUserState :: IOSArrow b c -> IOStateArrow s0 b c Source
Run IO State arrows
runX :: IOSArrow XmlTree c -> IO [c] Source
apply an IOSArrow
to an empty root node with initialState
() as initial state
the main entry point for running a state arrow with IO
when running runX f
an empty XML root node is applied to f
.
usually f
will start with a constant arrow (ignoring the input), e.g. a readDocument
arrow.
for usage see examples with writeDocument
if input has to be feed into the arrow use runIOSLA
like in runIOSLA f emptyX inputDoc
Global System State Configuration and Access
configSysVars :: SysConfigList -> IOStateArrow s c c Source
setSysAttr :: String -> IOStateArrow s String String Source
store a string in global state under a given attribute name
unsetSysAttr :: String -> IOStateArrow s b b Source
remove an entry in global state, arrow input remains unchanged
getSysAttr :: String -> IOStateArrow s b String Source
read an attribute value from global state
getAllSysAttrs :: IOStateArrow s b Attributes Source
read all attributes from global state
setSysAttrString :: String -> String -> IOStateArrow s b b Source
setSysAttrInt :: String -> Int -> IOStateArrow s b b Source
store an int value in global state
getSysAttrInt :: Int -> String -> IOStateArrow s b Int Source
read an int value from global state
getSysAttrInt 0 myIntAttr
getConfigAttr :: String -> SysConfigList -> String Source
Error Handling
clearErrStatus :: IOStateArrow s b b Source
reset global error variable
setErrStatus :: IOStateArrow s Int Int Source
set global error variable
getErrStatus :: IOStateArrow s XmlTree Int Source
read current global error status
setErrMsgStatus :: IOStateArrow s XmlTree XmlTree Source
raise the global error status level to that of the input tree
setErrorMsgHandler :: Bool -> (String -> IO ()) -> IOStateArrow s b b Source
set the error message handler and the flag for collecting the errors
errorMsgStderr :: IOStateArrow s b b Source
the default error message handler: error output to stderr
errorMsgCollect :: IOStateArrow s b b Source
error message handler for collecting errors
errorMsgStderrAndCollect :: IOStateArrow s b b Source
error message handler for output to stderr and collecting
errorMsgIgnore :: IOStateArrow s b b Source
error message handler for ignoring errors
getErrorMessages :: IOStateArrow s b XmlTree Source
if error messages are collected by the error handler for processing these messages by the calling application, this arrow reads the stored messages and clears the error message store
filterErrorMsg :: IOStateArrow s XmlTree XmlTree Source
filter error messages from input trees and issue errors
issueWarn :: String -> IOStateArrow s b b Source
generate a warnig message
issueErr :: String -> IOStateArrow s b b Source
generate an error message
issueFatal :: String -> IOStateArrow s b b Source
generate a fatal error message, e.g. document not found
issueExc :: String -> IOStateArrow s SomeException b Source
Default exception handler: issue a fatal error message and fail.
The parameter can be used to specify where the error occured
setDocumentStatus :: Int -> String -> IOStateArrow s XmlTree XmlTree Source
add the error level and the module where the error occured
to the attributes of a document root node and remove the children when level is greater or equal to c_err
.
called by setDocumentStatusFromSystemState
when the system state indicates an error
setDocumentStatusFromSystemState :: String -> IOStateArrow s XmlTree XmlTree Source
check whether the error level attribute in the system state
is set to error, in this case the children of the document root are
removed and the module name where the error occured and the error level are added as attributes with setDocumentStatus
else nothing is changed
documentStatusOk :: ArrowXml a => a XmlTree XmlTree Source
check whether tree is a document root and the status attribute has a value less than c_err
Tracing
setTraceLevel :: Int -> IOStateArrow s b b Source
set the global trace level
getTraceLevel :: IOStateArrow s b Int Source
read the global trace level
withTraceLevel :: Int -> IOStateArrow s b c -> IOStateArrow s b c Source
run an arrow with a given trace level, the old trace level is restored after the arrow execution
setTraceCmd :: (Int -> String -> IO ()) -> IOStateArrow s b b Source
set the global trace command. This command does the trace output
getTraceCmd :: IOStateArrow a b (Int -> String -> IO ()) Source
acces the command for trace output
trace :: Int -> IOStateArrow s b String -> IOStateArrow s b b Source
apply a trace arrow and issue message to stderr
traceMsg :: Int -> String -> IOStateArrow s b b Source
issue a string message as trace
traceValue :: Int -> (b -> String) -> IOStateArrow s b b Source
trace the current value transfered in a sequence of arrows.
The value is formated by a string conversion function. This is a substitute for the old and less general traceString function
traceString :: Int -> (b -> String) -> IOStateArrow s b b Source
an old alias for traceValue
traceSource :: IOStateArrow s XmlTree XmlTree Source
issue the source representation of a document if trace level >= 3
for better readability the source is formated with indentDoc
traceTree :: IOStateArrow s XmlTree XmlTree Source
issue the tree representation of a document if trace level >= 4
traceDoc :: String -> IOStateArrow s XmlTree XmlTree Source
trace a main computation step issue a message when trace level >= 1, issue document source if level >= 3, issue tree when level is >= 4
Document Base
setBaseURI :: IOStateArrow s String String Source
set the base URI of a document, used e.g. for reading includes, e.g. external entities, the input must be an absolute URI
getBaseURI :: IOStateArrow s b String Source
read the base URI from the globale state
changeBaseURI :: IOStateArrow s String String Source
change the base URI with a possibly relative URI, can be used for evaluating the xml:base attribute. Returns the new absolute base URI. Fails, if input is not parsable with parseURIReference
see also: setBaseURI
, mkAbsURI
setDefaultBaseURI :: String -> IOStateArrow s b String Source
set the default base URI, if parameter is null, the system base ( file:///<cwd>/
) is used,
else the parameter, must be called before any document is read
getDefaultBaseURI :: IOStateArrow s b String Source
get the default base URI
runInLocalURIContext :: IOStateArrow s b c -> IOStateArrow s b c Source
remember base uri, run an arrow and restore the base URI, used with external entity substitution
URI Manipulation
expandURIString :: String -> String -> Maybe String Source
compute the absolut URI for a given URI and a base URI
expandURI :: ArrowXml a => a (String, String) String Source
arrow variant of expandURIString
, fails if expandURIString
returns Nothing
mkAbsURI :: IOStateArrow s String String Source
arrow for expanding an input URI into an absolute URI using global base URI, fails if input is not a legal URI
getFragmentFromURI :: ArrowList a => a String String Source
arrow for computing the fragment component of an URI, fails if input is not a legal URI
getPathFromURI :: ArrowList a => a String String Source
arrow for computing the path component of an URI, fails if input is not a legal URI
getPortFromURI :: ArrowList a => a String String Source
arrow for selecting the port number of the URI without leading ':', fails if input is not a legal URI
getQueryFromURI :: ArrowList a => a String String Source
arrow for computing the query component of an URI, fails if input is not a legal URI
getRegNameFromURI :: ArrowList a => a String String Source
arrow for selecting the registered name (host) of the URI, fails if input is not a legal URI
getSchemeFromURI :: ArrowList a => a String String Source
arrow for selecting the scheme (protocol) of the URI, fails if input is not a legal URI.
See Network.URI for URI components
getUserInfoFromURI :: ArrowList a => a String String Source
arrow for selecting the user info of the URI without trailing '@', fails if input is not a legal URI
Mime Type Handling
getMimeTypeTable :: IOStateArrow s b MimeTypeTable Source
read the system mimetype table
setMimeTypeTable :: MimeTypeTable -> IOStateArrow s b b Source
set the table mapping of file extensions to mime types in the system state
Default table is defined in MimeTypeDefaults
.
This table is used when reading loacl files, (file: protocol) to determine the mime type
setMimeTypeTableFromFile :: FilePath -> IOStateArrow s b b Source
set the table mapping of file extensions to mime types by an external config file
The config file must follow the conventions of etcmime.types on a debian linux system, that means all empty lines and all lines starting with a # are ignored. The other lines must consist of a mime type followed by a possible empty list of extensions. The list of extenstions and mime types overwrites the default list in the system state of the IOStateArrow
System Configuration and Options
withAcceptedMimeTypes :: [String] -> SysConfig Source
Specify the set of accepted mime types.
All contents of documents for which the mime type is not found in this list are discarded.
withAddDefaultDTD :: Bool -> SysConfig Source
withSysAttr :: String -> String -> SysConfig Source
withSysAttr key value
: store an arbitarty key value pair in system state
withCanonicalize :: Bool -> SysConfig Source
withCanonicalize yes/no
: read option, canonicalize document, default is yes
withCompression :: (CompressionFct, DeCompressionFct) -> SysConfig Source
Configure compression and decompression for binary serialization/deserialization. First component is the compression function applied after serialization, second the decompression applied before deserialization.
withCheckNamespaces :: Bool -> SysConfig Source
withCheckNamespaces yes/no
: read option, check namespaces, default is no
withDefaultBaseURI :: String -> SysConfig Source
withDefaultBaseURI URI
, input option, set the default base URI
This option can be useful when parsing documents from stdin or contained in a string, and interpreting relative URIs within the document
withStrictDeserialize :: Bool -> SysConfig Source
Strict input for deserialization of binary data
withEncodingErrors :: Bool -> SysConfig Source
withEncodingErrors yes/no
: input option, ignore all encoding errors, default is no
withErrors :: Bool -> SysConfig Source
withErrors yes/no
: system option for suppressing error messages, default is no
withFileMimeType :: String -> SysConfig Source
Force a given mime type for all file contents.
The mime type for file access will then not be computed by looking into a mime.types file
withIgnoreNoneXmlContents :: Bool -> SysConfig Source
withIgnoreNoneXmlContents yes/no
: input option, ignore document contents of none XML/HTML documents.
This option can be useful for implementing crawler like applications, e.g. an URL checker. In those cases net traffic can be reduced.
withIndent :: Bool -> SysConfig Source
withIndent yes/no
: output option, indent document before output, default is no
withInputEncoding :: String -> SysConfig Source
withInputEncoding encodingName
: input option
Set default document encoding (utf8
, isoLatin1
, usAscii
, iso8859_2
, ... , iso8859_16
, ...).
Only XML, HTML and text documents are decoded,
default decoding for XML/HTML is utf8, for text iso latin1 (no decoding).
withInputOption :: String -> String -> SysConfig Source
withMimeTypeFile :: String -> SysConfig Source
withMimeTypeFile filename
: input option,
set the mime type table for file:
documents by given file.
The format of this config file must be in the syntax of a debian linux "mime.types" config file
withMimeTypeHandler :: String -> IOSArrow XmlTree XmlTree -> SysConfig Source
Specify a content handler for documents of a given mime type
withNoEmptyElemFor :: [String] -> SysConfig Source
withOutputEncoding :: String -> SysConfig Source
withOutputEncoding encoding
, output option,
default is the default input encoding or utf8, if input encoding is not set
withOutputXML :: SysConfig Source
withOutputXML
: output option, default writing
Default is writing XML: quote special XML chars >,<,",',& where neccessary,
add XML processing instruction
and encode document with respect to withOutputEncoding
withOutputHTML :: SysConfig Source
Write XHTML: quote all special XML chars, use HTML entity refs or char refs for none ASCII chars
withOutputPLAIN :: SysConfig Source
suppreses all char and entitiy substitution
withParseByMimeType :: Bool -> SysConfig Source
withParseByMimeType yes/no
: read option, select the parser by the mime type of the document
(pulled out of the HTTP header).
When the mime type is set to "text/html" the configured HTML parser is taken, when it's set to "text/xml" or "text/xhtml" the configured XML parser is taken. If the mime type is something else, no further processing is performed, the contents is given back to the application in form of a single text node. If the default document encoding is set to isoLatin1, this even enables processing of arbitray binary data.
withParseHTML :: Bool -> SysConfig Source
withParseHTML yes/no
: read option, use HTML parser, default is no
(use XML parser)
withPreserveComment :: Bool -> SysConfig Source
withPreserveComment yes/no
: read option, preserve comments during canonicalization, default is no
withProxy :: String -> SysConfig Source
withProxy "host:port"
: input option, configure a proxy for HTTP access, e.g. www-cache:3128
withRedirect :: Bool -> SysConfig Source
withRedirect yes/no
: input option, automatically follow redirected URIs, default is yes
withRemoveWS :: Bool -> SysConfig Source
withRemoveWS yes/no
: read and write option, remove all whitespace, used for document indentation, default is no
withShowHaskell :: Bool -> SysConfig Source
withShowTree :: Bool -> SysConfig Source
withStrictInput :: Bool -> SysConfig Source
withStrictInput yes/no
: input option, input of file and HTTP contents is read eagerly, default is no
withSubstDTDEntities :: Bool -> SysConfig Source
withSubstDTDEntities yes/no
: read option, substitute general entities defined in DTD, default is yes
.
switching this option and the validate option off can lead to faster parsing, because then
there is no need to access the DTD
withSubstHTMLEntities :: Bool -> SysConfig Source
withSubstHTMLEntities yes/no
: read option, substitute general entities defined in HTML DTD, default is no
.
switching this option on and the substDTDEntities and validate options off can lead to faster parsing
because there is no need to access a DTD, but still the HTML general entities are substituted
withTextMode :: Bool -> SysConfig Source
withValidate :: Bool -> SysConfig Source
withValidate yes/no
: read option, validate document against DTD, default is yes
withWarnings :: Bool -> SysConfig Source
withWarnings yes/no
: system option, issue warnings during reading, HTML parsing and processing,
default is yes