onama-0.2.3.0: HTML-parsing primitives for Parsec.
Copyright(c) William Yao 2017-2024
LicenseBSD-3
Maintainerwilliamyaoh@gmail.com
Stabilityexperimental
Safe HaskellSafe-Inferred
LanguageHaskell2010

Text.HTML.Onama

Description

Some extra primitives to parse HTMl with Parsec.

You'll still need to import Text.Parsec along with this library. These primitives will work with all the combinators from Parsec. Note that you'll need to override Parsec's satisfies, since that one only works on character streams (for some reason).

testParser = dp
  tagOpen "b"
  bolded <- text
  tagClose "b"
testParser2 = do
  tagClose "div"
  tagOpen "p"
  inner <- text
  tagClose "p"
Synopsis

Documentation

data Tag str Source #

Constructors

TagOpen str [Attribute str] Position 
TagClose str Position 
TagText str Position 

Instances

Instances details
Show str => Show (Tag str) Source # 
Instance details

Defined in Text.HTML.Onama

Methods

showsPrec :: Int -> Tag str -> ShowS #

show :: Tag str -> String #

showList :: [Tag str] -> ShowS #

Eq str => Eq (Tag str) Source # 
Instance details

Defined in Text.HTML.Onama

Methods

(==) :: Tag str -> Tag str -> Bool #

(/=) :: Tag str -> Tag str -> Bool #

parseTags :: StringLike str => str -> [Tag str] Source #

Return a list of tags parsed from some sort of string. This list should then get fed into an Onama parser.

tag :: (Monad m, Show str) => ParsecT [Tag str] u m (Tag str) Source #

Primitive. Return the next input tag. All other primitive parsers should be implemented in terms of this.

satisfy :: (Monad m, Show str) => (Tag str -> Bool) -> ParsecT [Tag str] u m (Tag str) Source #

Create a parser which parses a single HTML tag if it passes the given predicate. Return the parsed tag.

data TagOpenSelector Source #

Instances

Instances details
IsString TagOpenSelector Source # 
Instance details

Defined in Text.HTML.Onama

data TagCloseSelector Source #

Instances

Instances details
IsString TagCloseSelector Source # 
Instance details

Defined in Text.HTML.Onama

newtype AttrName Source #

Constructors

AttrName String 

Instances

Instances details
IsString AttrName Source # 
Instance details

Defined in Text.HTML.Onama

data AttrValue Source #

Constructors

AnyAttr 
AttrValue String 

Instances

Instances details
IsString AttrValue Source # 
Instance details

Defined in Text.HTML.Onama

data AttrSelector Source #

Instances

Instances details
IsString AttrSelector Source # 
Instance details

Defined in Text.HTML.Onama

tagOpen_ :: (Monad m, StringLike str, Show str) => TagOpenSelector -> ParsecT [Tag str] u m (Tag str) Source #

tagOpen :: (Monad m, StringLike str, Show str) => TagOpenSelector -> ParsecT [Tag str] u m (Tag str) Source #

tagClose_ :: (Monad m, StringLike str, Show str) => TagCloseSelector -> ParsecT [Tag str] u m (Tag str) Source #

tagClose :: (Monad m, StringLike str, Show str) => TagCloseSelector -> ParsecT [Tag str] u m (Tag str) Source #

tagText :: (Monad m, Show str) => ParsecT [Tag str] u m str Source #

voidElement :: (Monad m, StringLike str, Show str) => ParsecT [Tag str] u m (Tag str) Source #

Certain HTML elements are self closing. In addition, they can show up without their closing slash. For these, we just want to go over their opening tag. These elements are void, according to the W3C spec: https://www.w3.org/TR/2012/WD-html-markup-20121025/syntax.html#syntax-elements

  • area
  • base
  • br
  • col
  • command
  • embed
  • hr
  • img
  • input
  • keygen
  • link
  • meta
  • param
  • source
  • track
  • wbr

balancedTags :: (Monad m, StringLike str, Show str) => TagOpenSelector -> ParsecT [Tag str] u m [Tag str] Source #

innerText :: StringLike str => [Tag str] -> str Source #

skip :: Stream s m t => ParsecT s u m a -> ParsecT s u m () Source #

skip p produces a parser which will ignore the output of p.