-- | A streaming XML parser, using a method known as SAX. SAX isn't really a
--   standard, but an implementation, so it's just an \"SAX-like\" parser.
--   This module allows you parse an XML document without having to evaluate
--   it as a whole. This is needed for protocols like jabber, which use xml
--   streams for communication.

module Text.XML.HaXml.SAX
        ( SaxElement(..)
        , saxParse
        ) where

import Text.XML.HaXml.Types
import Text.XML.HaXml.Parse
import Text.XML.HaXml.Lex
import Text.ParserCombinators.Poly.State

data SaxElement
        = SaxDocTypeDecl DocTypeDecl
                -- ^ A doctype declaration occured(\<!DOCTYPE\>)
        | SaxProcessingInstruction ProcessingInstruction
                -- ^ A processing instruction occured (\<??\>)
        | SaxComment String             -- ^ A comment occured (\<!-- --\>)
        | SaxElementOpen Name [Attribute] -- ^ An element was opened (\<\>)
        | SaxElementClose Name          -- ^ An element was closed (\<\/\>)
        | SaxElementTag Name [Attribute]
                -- ^ An element without content occured (\<\/\>)
        | SaxCharData CharData          -- ^ Some string data occured
        | SaxReference Reference        -- ^ A reference occured

-- | @saxParse file content@ takes a filename and the string content of that
--   file and generates a stream of @SaxElement@s. If an error occurs, the
--   parsing stops and a string is returned using the @Maybe@ type.
saxParse :: String -- ^ The filename
         -> String -- ^ The content of the file
         -> ([SaxElement],Maybe String)
                -- ^ A tuple of the parsed elements and @Nothing@, if no
                --   error occured, or @Just@ @String@ if an error occured.
saxParse :: String -> String -> ([SaxElement], Maybe String)
saxParse String
file String
cntnt = Parser SymTabs (Posn, TokenT) SaxElement
-> SymTabs -> [(Posn, TokenT)] -> ([SaxElement], Maybe String)
forall s t a. Parser s t a -> s -> [t] -> ([a], Maybe String)
parseStream Parser SymTabs (Posn, TokenT) SaxElement
sax SymTabs
emptySTs
                                    (String -> String -> [(Posn, TokenT)]
xmlLex String
file String
cntnt)

parseStream :: Parser s t a -> s -> [t] -> ([a], Maybe String)
parseStream :: Parser s t a -> s -> [t] -> ([a], Maybe String)
parseStream Parser s t a
_ s
_ [] = ([],Maybe String
forall a. Maybe a
Nothing)
parseStream Parser s t a
p s
state [t]
toks = case Parser s t a -> s -> [t] -> (Either String a, s, [t])
forall s t a. Parser s t a -> s -> [t] -> (Either String a, s, [t])
runParser Parser s t a
p s
state [t]
toks of
        (Left String
err, s
_, [t]
_) -> ([],String -> Maybe String
forall a. a -> Maybe a
Just String
err)
        (Right a
res, s
nstate, [t]
rest) -> (a
resa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
moreres, Maybe String
err)
            where ([a]
moreres,Maybe String
err) = Parser s t a -> s -> [t] -> ([a], Maybe String)
forall s t a. Parser s t a -> s -> [t] -> ([a], Maybe String)
parseStream Parser s t a
p s
nstate [t]
rest

sax :: XParser SaxElement
sax :: Parser SymTabs (Posn, TokenT) SaxElement
sax = [Parser SymTabs (Posn, TokenT) SaxElement]
-> Parser SymTabs (Posn, TokenT) SaxElement
forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ Parser SymTabs (Posn, TokenT) SaxElement
saxelementopen
            , Parser SymTabs (Posn, TokenT) SaxElement
saxelementclose
            , Parser SymTabs (Posn, TokenT) SaxElement
saxprocessinginstruction
            , Parser SymTabs (Posn, TokenT) SaxElement
saxcomment
            , Parser SymTabs (Posn, TokenT) SaxElement
saxdoctypedecl
            , Parser SymTabs (Posn, TokenT) SaxElement
saxreference
            , Parser SymTabs (Posn, TokenT) SaxElement
saxchardata
            ]
        Parser SymTabs (Posn, TokenT) SaxElement
-> (String -> String) -> Parser SymTabs (Posn, TokenT) SaxElement
forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String -> String -> String
forall a. [a] -> [a] -> [a]
++(String
"\nLooking for a SAX event:\n"
               String -> String -> String
forall a. [a] -> [a] -> [a]
++String
"  elem-open, elem-close, PI, comment, DTD, ref, or chardata"))

saxelementopen :: XParser SaxElement
saxelementopen :: Parser SymTabs (Posn, TokenT) SaxElement
saxelementopen = do
        TokenT -> XParser TokenT
tok TokenT
TokAnyOpen
        (ElemTag (N String
n) [Attribute]
as) <- XParser ElemTag
elemtag  -- no QN ever generated during parsing
        ( do TokenT -> XParser TokenT
tok TokenT
TokEndClose
             SaxElement -> Parser SymTabs (Posn, TokenT) SaxElement
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> [Attribute] -> SaxElement
SaxElementTag String
n [Attribute]
as)) Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
forall s t a. Parser s t a -> Parser s t a -> Parser s t a
`onFail`
          ( do TokenT -> XParser TokenT
tok TokenT
TokAnyClose
               SaxElement -> Parser SymTabs (Posn, TokenT) SaxElement
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> [Attribute] -> SaxElement
SaxElementOpen String
n [Attribute]
as))
          Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
forall s t a. Parser s t a -> Parser s t a -> Parser s t a
`onFail` String -> Parser SymTabs (Posn, TokenT) SaxElement
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"missing > or /> in element tag"

saxelementclose :: XParser SaxElement
saxelementclose :: Parser SymTabs (Posn, TokenT) SaxElement
saxelementclose = do
        TokenT -> XParser TokenT
tok TokenT
TokEndOpen
        String
n <- XParser String
name
        TokenT -> XParser TokenT
tok TokenT
TokAnyClose
        SaxElement -> Parser SymTabs (Posn, TokenT) SaxElement
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SaxElement
SaxElementClose String
n)

saxcomment :: XParser SaxElement
saxcomment :: Parser SymTabs (Posn, TokenT) SaxElement
saxcomment = String -> SaxElement
SaxComment (String -> SaxElement)
-> XParser String -> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> XParser String
comment

saxchardata :: XParser SaxElement
saxchardata :: Parser SymTabs (Posn, TokenT) SaxElement
saxchardata =
  (String -> SaxElement
SaxCharData (String -> SaxElement)
-> XParser String -> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>XParser String
cdsect)
  Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
-> Parser SymTabs (Posn, TokenT) SaxElement
forall s t a. Parser s t a -> Parser s t a -> Parser s t a
`onFail`
  (String -> SaxElement
SaxCharData (String -> SaxElement)
-> XParser String -> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>XParser String
chardata)

saxreference :: XParser SaxElement
saxreference :: Parser SymTabs (Posn, TokenT) SaxElement
saxreference = Reference -> SaxElement
SaxReference (Reference -> SaxElement)
-> Parser SymTabs (Posn, TokenT) Reference
-> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser SymTabs (Posn, TokenT) Reference
reference

saxdoctypedecl :: XParser SaxElement
saxdoctypedecl :: Parser SymTabs (Posn, TokenT) SaxElement
saxdoctypedecl = DocTypeDecl -> SaxElement
SaxDocTypeDecl (DocTypeDecl -> SaxElement)
-> Parser SymTabs (Posn, TokenT) DocTypeDecl
-> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser SymTabs (Posn, TokenT) DocTypeDecl
doctypedecl

saxprocessinginstruction :: XParser SaxElement
saxprocessinginstruction :: Parser SymTabs (Posn, TokenT) SaxElement
saxprocessinginstruction = (ProcessingInstruction -> SaxElement)
-> Parser SymTabs (Posn, TokenT) ProcessingInstruction
-> Parser SymTabs (Posn, TokenT) SaxElement
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ProcessingInstruction -> SaxElement
SaxProcessingInstruction Parser SymTabs (Posn, TokenT) ProcessingInstruction
processinginstruction