Copyright | (c) Kristof Bastiaensen 2020 |
---|---|
License | BSD-3 |
Maintainer | kristof@resonata.be |
Stability | unstable |
Portability | ghc |
Safe Haskell | None |
Language | Haskell2010 |
This module implements a streaming parser built on top of hexpat. It has an interface similar to parsec and other parser libraries. Note that backtracking is not supported. Only the current tag name and attributes can be looked at without backtracking. After a tag test and attribute parser has succeeded, attempting to backtrack will generate an error.
This library can be used with a streaming library (conduit, pipes,
etc...) by providing an instance for List
.
Synopsis
- type EventListParser e a = EventParser [] e Identity a
- data EventParser l e m a
- type EventLoc = (SAXEvent Text Text, XMLParseLocation)
- data EventParseError e
- mapParser :: (Monad m, Monad n) => (forall b. m b -> n b) -> EventParser l e m a -> EventParser l e n a
- runEventParser :: List l => EventParser l e (ItemM l) a -> l EventLoc -> ItemM l (Either (EventParseError e, Maybe XMLParseLocation) a)
- customError :: Monad m => e -> EventParser l e m a
- parseXMLByteString :: EventListParser e a -> ParseOptions Text Text -> ByteString -> Either (EventParseError e, Maybe XMLParseLocation) a
- parseXMLFile :: ParseOptions Text Text -> IOMode -> FilePath -> EventListParser e a -> IO (Either (EventParseError e, Maybe XMLParseLocation) a)
- data AttrParser e a
- class ParseAttr e a
- getAttr :: ParseAttr e a => Text -> AttrParser e a
- peekAttr :: AttrParser e a -> AttrParser e a
- findAttr :: ParseAttr e a => Text -> AttrParser e (Maybe a)
- skipAttrs :: AttrParser e ()
- noAttrs :: AttrParser e ()
- someTag :: (Monad (ItemM l), List l) => (Text -> Bool) -> AttrParser e b -> (b -> EventParser l e (ItemM l) a) -> EventParser l e (ItemM l) a
- skipTag :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) ()
- skipTags :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) ()
- skipTagsTill :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) a -> EventParser l e (ItemM l) a
- tag :: (Monad (ItemM l), List l) => Text -> AttrParser e b -> (b -> EventParser l e (ItemM l) a) -> EventParser l e (ItemM l) a
- someEmptyTag :: (Monad (ItemM l), List l) => (Text -> Bool) -> AttrParser e b -> EventParser l e (ItemM l) b
- emptyTag :: (Monad (ItemM l), List l) => Text -> AttrParser e b -> EventParser l e (ItemM l) b
- text :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) Text
- (<|>) :: Alternative f => f a -> f a -> f a
- optional :: Alternative f => f a -> f (Maybe a)
- empty :: Alternative f => f a
- between :: Applicative m => m open -> m close -> m a -> m a
- choice :: (Foldable f, Alternative m) => f (m a) -> m a
- count :: Monad m => Int -> m a -> m [a]
- count' :: MonadPlus m => Int -> Int -> m a -> m [a]
- eitherP :: Alternative m => m a -> m b -> m (Either a b)
- endBy :: MonadPlus m => m a -> m sep -> m [a]
- endBy1 :: MonadPlus m => m a -> m sep -> m [a]
- many :: MonadPlus m => m a -> m [a]
- manyTill :: MonadPlus m => m a -> m end -> m [a]
- manyTill_ :: MonadPlus m => m a -> m end -> m ([a], end)
- some :: MonadPlus m => m a -> m [a]
- someTill :: MonadPlus m => m a -> m end -> m [a]
- someTill_ :: MonadPlus m => m a -> m end -> m ([a], end)
- option :: Alternative m => a -> m a -> m a
- sepBy :: MonadPlus m => m a -> m sep -> m [a]
- sepBy1 :: MonadPlus m => m a -> m sep -> m [a]
- sepEndBy :: MonadPlus m => m a -> m sep -> m [a]
- sepEndBy1 :: MonadPlus m => m a -> m sep -> m [a]
- skipMany :: MonadPlus m => m a -> m ()
- skipSome :: MonadPlus m => m a -> m ()
- skipCount :: Monad m => Int -> m a -> m ()
- skipManyTill :: MonadPlus m => m a -> m end -> m end
- skipSomeTill :: MonadPlus m => m a -> m end -> m end
Event parser datatype
type EventListParser e a = EventParser [] e Identity a Source #
A parser that parses a lazy list of SAX events into a value of
type a
, or an error of type `EventParseError
e`, where e
is a
custom error type.
data EventParser l e m a Source #
A parser that parses a stream of SAX events of type l
into to a value of type EventLoc
a
using m
as the underlying
monad. l should be an instance of List
, and m should be equal to
the type instance (
). Custom error messages are
possible using the type e.ItemM
l
Instances
data EventParseError e Source #
EndOfSaxStream | |
Empty | |
ExpectedTag | |
UnMatchedTag | |
ExpectedCloseTag | |
XmlError XMLParseError | |
AttributeNotFound Text Text | |
UnknownAttributes [Text] | |
Expected [Text] | |
CustomError e |
Instances
mapParser :: (Monad m, Monad n) => (forall b. m b -> n b) -> EventParser l e m a -> EventParser l e n a Source #
Change the base monad of a parser
:: List l | |
=> EventParser l e (ItemM l) a | parser to run |
-> l EventLoc | list of SAX event |
-> ItemM l (Either (EventParseError e, Maybe XMLParseLocation) a) |
customError :: Monad m => e -> EventParser l e m a Source #
Running parsers
parseXMLByteString :: EventListParser e a -> ParseOptions Text Text -> ByteString -> Either (EventParseError e, Maybe XMLParseLocation) a Source #
Parse a lazy bytestring with the given parser. Evaluating the
result to WHNF will consume the bytestring (as much as needed).
However this function does not close resources, for example a file
handle when using readFile
. Make sure to always explicitly close
a resource, after evaluating to WHNF, or use the streaming
version of this library (hexpat-conduit). For reading
from a file use the parseXMLFile
function.
parseXMLFile :: ParseOptions Text Text -> IOMode -> FilePath -> EventListParser e a -> IO (Either (EventParseError e, Maybe XMLParseLocation) a) Source #
Lazily parse an xml file into a value. This function ensures the input is consumed and the file handle closed, before returning the value.
Attribute parsers
data AttrParser e a Source #
A parser for the attributes of a single tag, that returns a value of type a. Custom error messages are possible of type e.
Instances
A parser for the value of an attribute
parseAttr
:: ParseAttr e a | |
=> Text | attribute name |
-> AttrParser e a |
returns the value for the given attribute. Fail if the attribute is not found.
peekAttr :: AttrParser e a -> AttrParser e a Source #
run an attribute parser without consuming any attributes.
:: ParseAttr e a | |
=> Text | attribute name |
-> AttrParser e (Maybe a) |
return the value for the attribute if it exists, otherwise
Nothing
.
skipAttrs :: AttrParser e () Source #
consume all remaining attributes
noAttrs :: AttrParser e () Source #
expect no attributes. This is the same as `pure ()`
Event parsers
:: (Monad (ItemM l), List l) | |
=> (Text -> Bool) | tagname test |
-> AttrParser e b | parser for attributes |
-> (b -> EventParser l e (ItemM l) a) | parser for tag children |
-> EventParser l e (ItemM l) a |
Parse a tag that succeed on the given test function. Parses the children in the order or the inner parser.
skipTags :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) () Source #
Skip remaining tags and text, if any.
skipTagsTill :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) a -> EventParser l e (ItemM l) a Source #
Skip zero or more tags until the given parser succeeds
:: (Monad (ItemM l), List l) | |
=> Text | tag name |
-> AttrParser e b | attribute parser |
-> (b -> EventParser l e (ItemM l) a) | tag children parser |
-> EventParser l e (ItemM l) a |
Parse a tag with the given name, using the inner parser for the children tags.
:: (Monad (ItemM l), List l) | |
=> (Text -> Bool) | tag name test |
-> AttrParser e b | attribute parser |
-> EventParser l e (ItemM l) b |
Parse a tag which should have no children.
:: (Monad (ItemM l), List l) | |
=> Text | tag name |
-> AttrParser e b | attribute parser |
-> EventParser l e (ItemM l) b |
Parser a tag with the given name which should have no children. If the tag has children, an error is raised.
text :: (Monad (ItemM l), List l) => EventParser l e (ItemM l) Text Source #
Parse text. Note that parsing a tag will skip white space, so if whitespace is significant, run this parser first.
Re-exports from Control.Applicative.Combinators
(<|>) :: Alternative f => f a -> f a -> f a infixl 3 #
An associative binary operation
optional :: Alternative f => f a -> f (Maybe a) #
One or none.
empty :: Alternative f => f a #
The identity of <|>
Re-exports from Control.Monad.Combinators
between :: Applicative m => m open -> m close -> m a -> m a #
parses between
open close popen
, followed by p
and close
.
Returns the value returned by p
.
braces = between (symbol "{") (symbol "}")
choice :: (Foldable f, Alternative m) => f (m a) -> m a #
tries to apply the parsers in the list choice
psps
in order,
until one of them succeeds. Returns the value of the succeeding parser.
choice = asum
eitherP :: Alternative m => m a -> m b -> m (Either a b) #
Combine two alternatives.
eitherP a b = (Left <$> a) <|> (Right <$> b)
endBy :: MonadPlus m => m a -> m sep -> m [a] #
parses zero or more occurrences of endBy
p sepp
, separated and
ended by sep
. Returns a list of values returned by p
.
cStatements = cStatement `endBy` semicolon
endBy1 :: MonadPlus m => m a -> m sep -> m [a] #
parses one or more occurrences of endBy1
p sepp
, separated and
ended by sep
. Returns a list of values returned by p
.
many :: MonadPlus m => m a -> m [a] #
applies the parser many
pp
zero or more times and returns a
list of the values returned by p
.
identifier = (:) <$> letter <*> many (alphaNumChar <|> char '_')
manyTill :: MonadPlus m => m a -> m end -> m [a] #
applies parser manyTill
p endp
zero or more times until parser
end
succeeds. Returns the list of values returned by p
. Note that
end
result is consumed and lost. Use manyTill_
if you wish to keep
it.
See also: skipMany
, skipManyTill
.
manyTill_ :: MonadPlus m => m a -> m end -> m ([a], end) #
applies parser manyTill_
p endp
zero or more times until
parser end
succeeds. Returns the list of values returned by p
and the
end
result. Use manyTill
if you have no need in the result of the
end
.
See also: skipMany
, skipManyTill
.
Since: parser-combinators-1.2.0
some :: MonadPlus m => m a -> m [a] #
applies the parser some
pp
one or more times and returns a
list of the values returned by p
.
word = some letter
someTill :: MonadPlus m => m a -> m end -> m [a] #
works similarly to someTill
p end
, but manyTill
p endp
should succeed at least once. Note that end
result is consumed and
lost. Use someTill_
if you wish to keep it.
someTill p end = liftM2 (:) p (manyTill p end)
See also: skipSome
, skipSomeTill
.
someTill_ :: MonadPlus m => m a -> m end -> m ([a], end) #
works similarly to someTill_
p end
, but manyTill_
p endp
should succeed at least once. Use someTill
if you have no need in the
result of the end
.
See also: skipSome
, skipSomeTill
.
Since: parser-combinators-1.2.0
option :: Alternative m => a -> m a -> m a #
sepBy :: MonadPlus m => m a -> m sep -> m [a] #
parses zero or more occurrences of sepBy
p sepp
, separated by
sep
. Returns a list of values returned by p
.
commaSep p = p `sepBy` comma
sepBy1 :: MonadPlus m => m a -> m sep -> m [a] #
parses one or more occurrences of sepBy1
p sepp
, separated by
sep
. Returns a list of values returned by p
.
sepEndBy :: MonadPlus m => m a -> m sep -> m [a] #
parses zero or more occurrences of sepEndBy
p sepp
, separated
and optionally ended by sep
. Returns a list of values returned by p
.
sepEndBy1 :: MonadPlus m => m a -> m sep -> m [a] #
parses one or more occurrences of sepEndBy1
p sepp
, separated
and optionally ended by sep
. Returns a list of values returned by p
.
skipMany :: MonadPlus m => m a -> m () #
applies the parser skipMany
pp
zero or more times, skipping
its result.
See also: manyTill
, skipManyTill
.
skipSome :: MonadPlus m => m a -> m () #
applies the parser skipSome
pp
one or more times, skipping its
result.
See also: someTill
, skipSomeTill
.
skipManyTill :: MonadPlus m => m a -> m end -> m end #
applies the parser skipManyTill
p endp
zero or more times
skipping results until parser end
succeeds. Result parsed by end
is
then returned.
skipSomeTill :: MonadPlus m => m a -> m end -> m end #
applies the parser skipSomeTill
p endp
one or more times
skipping results until parser end
succeeds. Result parsed by end
is
then returned.