{-# LANGUAGE LambdaCase            #-}
{-# LANGUAGE OverloadedStrings     #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Parsing
   Copyright   : Copyright (C) 2006-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Parsing functions and utilities.
-}
module Text.Pandoc.Readers.HTML.Parsing
  ( TagOmission (..)
  , pInTags
  , pInTags'
  , pInTag
  , pInTagWithAttribs
  , pAny
  , pCloses
  , pSatisfy
  , pBlank
  , matchTagClose
  , matchTagOpen
  , isSpace
  , maybeFromAttrib
  , toAttr
  , toStringAttr
  )
where

import Control.Monad (guard, void, mzero)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
  ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) )
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition (Attr)
import Text.Pandoc.Parsing
  ( (<|>), eof, getPosition, lookAhead, manyTill, newPos, option, optional
  , skipMany, setPosition, token, try)
import Text.Pandoc.Readers.HTML.TagCategories
import Text.Pandoc.Readers.HTML.Types
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (html5Attributes, html4Attributes, rdfaAttributes)
import qualified Data.Set as Set
import qualified Data.Text as T

-- | Whether no tag, the closing tag, or both tags can be omitted.
data TagOmission
  = TagsRequired       -- ^ Opening and closing tags are both required
  | ClosingTagOptional -- ^ The closing tag can be omitted
  | TagsOmittable      -- ^ Both tags, opening and closing, can be omitted.
  deriving (TagOmission -> TagOmission -> Bool
(TagOmission -> TagOmission -> Bool)
-> (TagOmission -> TagOmission -> Bool) -> Eq TagOmission
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TagOmission -> TagOmission -> Bool
$c/= :: TagOmission -> TagOmission -> Bool
== :: TagOmission -> TagOmission -> Bool
$c== :: TagOmission -> TagOmission -> Bool
Eq)

pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a
pInTags :: Text -> TagParser m a -> TagParser m a
pInTags Text
tagtype TagParser m a
parser = Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
forall (m :: * -> *) a.
(PandocMonad m, Monoid a) =>
Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
pInTags' Text
tagtype (Bool -> Tag Text -> Bool
forall a b. a -> b -> a
const Bool
True) TagParser m a
parser

pInTags' :: (PandocMonad m, Monoid a)
         => Text
         -> (Tag Text -> Bool)
         -> TagParser m a
         -> TagParser m a
pInTags' :: Text -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a
pInTags' Text
tagtype Tag Text -> Bool
tagtest TagParser m a
parser = TagParser m a -> TagParser m a
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m a -> TagParser m a) -> TagParser m a -> TagParser m a
forall a b. (a -> b) -> a -> b
$ do
  (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy ((Tag Text -> Bool) -> TagParser m (Tag Text))
-> (Tag Text -> Bool) -> TagParser m (Tag Text)
forall a b. (a -> b) -> a -> b
$ \Tag Text
t -> Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen Text
tagtype [] Tag Text
t Bool -> Bool -> Bool
&& Tag Text -> Bool
tagtest Tag Text
t
  [a] -> a
forall a. Monoid a => [a] -> a
mconcat ([a] -> a)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [a]
-> TagParser m a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TagParser m a
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [a]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill TagParser m a
parser (Text -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => Text -> TagParser m ()
pCloses Text
tagtype ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof)

pInTag :: PandocMonad m
       => TagOmission    -- ^ Whether some tags can be omitted
       -> Text           -- ^ @tagtype@ Tag name
       -> TagParser m a  -- ^ @p@ Content parser
       -> TagParser m a
pInTag :: TagOmission -> Text -> TagParser m a -> TagParser m a
pInTag TagOmission
tagOmission Text
tagtype = (([(Text, Text)], a) -> a)
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
-> TagParser m a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([(Text, Text)], a) -> a
forall a b. (a, b) -> b
snd (ParsecT
   [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
 -> TagParser m a)
-> (TagParser m a
    -> ParsecT
         [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a))
-> TagParser m a
-> TagParser m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TagOmission
-> Text
-> TagParser m a
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) ([(Text, Text)], a)
forall (m :: * -> *) a.
PandocMonad m =>
TagOmission
-> Text -> TagParser m a -> TagParser m ([(Text, Text)], a)
pInTagWithAttribs TagOmission
tagOmission Text
tagtype

-- | Returns the contents of a tag together with its attributes; parses
-- @p@, preceded by an opening tag (optional if TagsOmittable) and
-- followed by a closing tag (optional unless TagsRequired).
pInTagWithAttribs :: PandocMonad m
                  => TagOmission    -- ^ Whether some tags can be omitted
                  -> Text           -- ^ @tagtype@ Tag name
                  -> TagParser m a  -- ^ @p@ Content parser
                  -> TagParser m ([Attribute Text], a)
pInTagWithAttribs :: TagOmission
-> Text -> TagParser m a -> TagParser m ([(Text, Text)], a)
pInTagWithAttribs TagOmission
tagOmission Text
tagtype TagParser m a
p = TagParser m ([(Text, Text)], a) -> TagParser m ([(Text, Text)], a)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m ([(Text, Text)], a)
 -> TagParser m ([(Text, Text)], a))
-> TagParser m ([(Text, Text)], a)
-> TagParser m ([(Text, Text)], a)
forall a b. (a -> b) -> a -> b
$ do
  let openingOptional :: Bool
openingOptional = TagOmission
tagOmission TagOmission -> TagOmission -> Bool
forall a. Eq a => a -> a -> Bool
== TagOmission
TagsOmittable
  let closingOptional :: Bool
closingOptional = TagOmission
tagOmission TagOmission -> TagOmission -> Bool
forall a. Eq a => a -> a -> Bool
/= TagOmission
TagsRequired
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [(Text, Text)]
attribs <- (if Bool
openingOptional then [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [] else ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall a. a -> a
id)
             (Tag Text -> [(Text, Text)]
forall str. Tag str -> [Attribute str]
getAttribs (Tag Text -> [(Text, Text)])
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT
     [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Text, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen Text
tagtype []))
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  a
x <- TagParser m a
p
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  (if Bool
closingOptional then ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional else ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void) (ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ())
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall a b. (a -> b) -> a -> b
$
    (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
tagtype)
  ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  ([(Text, Text)], a) -> TagParser m ([(Text, Text)], a)
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Text, Text)]
attribs, a
x)
  where
    getAttribs :: Tag str -> [Attribute str]
getAttribs = \case
      TagOpen str
_ [Attribute str]
attribs -> [Attribute str]
attribs
      Tag str
_                 -> []

pCloses :: PandocMonad m => Text -> TagParser m ()
pCloses :: Text -> TagParser m ()
pCloses Text
tagtype = TagParser m () -> TagParser m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m () -> TagParser m ())
-> TagParser m () -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ do
  Tag Text
t <- ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text))
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall a b. (a -> b) -> a -> b
$ (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy ((Tag Text -> Bool)
 -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text))
-> (Tag Text -> Bool)
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall a b. (a -> b) -> a -> b
$ \Tag Text
tag -> Tag Text -> Bool
forall str. Tag str -> Bool
isTagClose Tag Text
tag Bool -> Bool -> Bool
|| Tag Text -> Bool
forall str. Tag str -> Bool
isTagOpen Tag Text
tag
  case Tag Text
t of
       (TagClose Text
t') | Text
t' Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
tagtype -> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
-> TagParser m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) (Tag Text)
forall (m :: * -> *). PandocMonad m => TagParser m (Tag Text)
pAny
       (TagOpen Text
t' [(Text, Text)]
_) | Text
t' Text -> Text -> Bool
`closes` Text
tagtype -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"ul") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"li" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"ol") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"li" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"dl") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"dd" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"td" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"th" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"table") | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"tr" -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"td") | Text
tagtype Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
"th") | Text
tagtype Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
       (TagClose Text
t') | Text
tagtype Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"p" Bool -> Bool -> Bool
&& Text
t' Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockHtmlTags
                                            -> () -> TagParser m ()
forall (m :: * -> *) a. Monad m => a -> m a
return () -- see #3794
       Tag Text
_ -> TagParser m ()
forall (m :: * -> *) a. MonadPlus m => m a
mzero

pBlank :: PandocMonad m => TagParser m ()
pBlank :: TagParser m ()
pBlank = TagParser m () -> TagParser m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m () -> TagParser m ())
-> TagParser m () -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ do
  (TagText Text
str) <- (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy Tag Text -> Bool
forall str. Tag str -> Bool
isTagText
  Bool -> TagParser m ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> TagParser m ()) -> Bool -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
str

pLocation :: PandocMonad m => TagParser m ()
pLocation :: TagParser m ()
pLocation = do
  (TagPosition Row
r Row
c) <- (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSat Tag Text -> Bool
forall str. Tag str -> Bool
isTagPosition
  SourcePos -> TagParser m ()
forall (m :: * -> *) s u. Monad m => SourcePos -> ParsecT s u m ()
setPosition (SourcePos -> TagParser m ()) -> SourcePos -> TagParser m ()
forall a b. (a -> b) -> a -> b
$ SourceName -> Row -> Row -> SourcePos
newPos SourceName
"input" Row
r Row
c

pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat :: (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat Tag Text -> Bool
f = do
  SourcePos
pos <- ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  (Tag Text -> Text)
-> (Tag Text -> SourcePos)
-> (Tag Text -> Maybe (Tag Text))
-> TagParser m (Tag Text)
forall s (m :: * -> *) t a st.
Stream s m t =>
(t -> Text)
-> (t -> SourcePos) -> (t -> Maybe a) -> ParsecT s st m a
token Tag Text -> Text
forall a. Show a => a -> Text
tshow (SourcePos -> Tag Text -> SourcePos
forall a b. a -> b -> a
const SourcePos
pos) (\Tag Text
x -> if Tag Text -> Bool
f Tag Text
x then Tag Text -> Maybe (Tag Text)
forall a. a -> Maybe a
Just Tag Text
x else Maybe (Tag Text)
forall a. Maybe a
Nothing)

pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy :: (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy Tag Text -> Bool
f = TagParser m (Tag Text) -> TagParser m (Tag Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (TagParser m (Tag Text) -> TagParser m (Tag Text))
-> TagParser m (Tag Text) -> TagParser m (Tag Text)
forall a b. (a -> b) -> a -> b
$ ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
forall (m :: * -> *). PandocMonad m => TagParser m ()
pLocation ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) ()
-> TagParser m (Tag Text) -> TagParser m (Tag Text)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSat Tag Text -> Bool
f

matchTagClose :: Text -> (Tag Text -> Bool)
matchTagClose :: Text -> Tag Text -> Bool
matchTagClose Text
t = (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== Text -> Tag Text
forall str. str -> Tag str
TagClose Text
t)

matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool)
matchTagOpen :: Text -> [(Text, Text)] -> Tag Text -> Bool
matchTagOpen Text
t [(Text, Text)]
as = (Tag Text -> Tag Text -> Bool
forall str t. (StringLike str, TagRep t) => Tag str -> t -> Bool
~== Text -> [(Text, Text)] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
t [(Text, Text)]
as)

pAny :: PandocMonad m => TagParser m (Tag Text)
pAny :: TagParser m (Tag Text)
pAny = (Tag Text -> Bool) -> TagParser m (Tag Text)
forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Bool -> Tag Text -> Bool
forall a b. a -> b -> a
const Bool
True)

isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace Char
' '  = Bool
True
isSpace Char
'\t' = Bool
True
isSpace Char
'\n' = Bool
True
isSpace Char
'\r' = Bool
True
isSpace Char
_    = Bool
False

-- taken from HXT and extended
-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
closes :: Text -> Text -> Bool
Text
_ closes :: Text -> Text -> Bool
`closes` Text
"body" = Bool
False
Text
_ `closes` Text
"html" = Bool
False
Text
"body" `closes` Text
"head" = Bool
True
Text
"a" `closes` Text
"a" = Bool
True
Text
"li" `closes` Text
"li" = Bool
True
Text
"th" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"th",Text
"td"] = Bool
True
Text
"td" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"th",Text
"td"] = Bool
True
Text
"tr" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"th",Text
"td",Text
"tr",Text
"colgroup"] = Bool
True
Text
"dd" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"dt", Text
"dd"] = Bool
True
Text
"dt" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"dt",Text
"dd"] = Bool
True
Text
"rt" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"rb", Text
"rt", Text
"rtc"] = Bool
True
Text
"col" `closes` Text
"col" = Bool
True
Text
"colgroup" `closes` Text
"col" = Bool
True
Text
"optgroup" `closes` Text
"optgroup" = Bool
True
Text
"optgroup" `closes` Text
"option" = Bool
True
Text
"option" `closes` Text
"option" = Bool
True
-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
Text
x `closes` Text
"p" | Text
x Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"address", Text
"article", Text
"aside", Text
"blockquote",
   Text
"dir", Text
"div", Text
"dl", Text
"fieldset", Text
"footer", Text
"form", Text
"h1", Text
"h2", Text
"h3", Text
"h4",
   Text
"h5", Text
"h6", Text
"header", Text
"hr", Text
"main", Text
"menu", Text
"nav", Text
"ol", Text
"p", Text
"pre", Text
"section",
   Text
"table", Text
"ul"] = Bool
True
Text
_ `closes` Text
"meta" = Bool
True
Text
"form" `closes` Text
"form" = Bool
True
Text
"label" `closes` Text
"label" = Bool
True
Text
"map" `closes` Text
"map" = Bool
True
Text
"object" `closes` Text
"object" = Bool
True
Text
_ `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"option",Text
"style",Text
"script",Text
"textarea",Text
"title"] = Bool
True
Text
t `closes` Text
"select" | Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"option" = Bool
True
Text
"thead" `closes` Text
"colgroup" = Bool
True
Text
"tfoot" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"thead",Text
"colgroup"] = Bool
True
Text
"tbody" `closes` Text
t | Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"tbody",Text
"tfoot",Text
"thead",Text
"colgroup"] = Bool
True
Text
t `closes` Text
t2 |
   Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"h1",Text
"h2",Text
"h3",Text
"h4",Text
"h5",Text
"h6",Text
"dl",Text
"ol",Text
"ul",Text
"table",Text
"div",Text
"main",Text
"p"] Bool -> Bool -> Bool
&&
   Text
t2 Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"h1",Text
"h2",Text
"h3",Text
"h4",Text
"h5",Text
"h6",Text
"p" ] = Bool
True -- not "div" or "main"
Text
t1 `closes` Text
t2 |
   Text
t1 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
blockTags Bool -> Bool -> Bool
&&
   Text
t2 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set Text
blockTags Bool -> Bool -> Bool
&&
   Text
t2 Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set Text
eitherBlockOrInline = Bool
True
Text
_ `closes` Text
_ = Bool
False

toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr = ((Text, Text) -> [(Text, Text)] -> [(Text, Text)])
-> [(Text, Text)] -> [(Text, Text)] -> [(Text, Text)]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go []
  where
   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
   -- treat xml:lang as lang
   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go (Text
"xml:lang",Text
y) [(Text, Text)]
ats = (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go (Text
"lang",Text
y) [(Text, Text)]
ats
   -- prevent duplicate attributes
   go (Text
x,Text
y) [(Text, Text)]
ats
     | ((Text, Text) -> Bool) -> [(Text, Text)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\(Text
x',Text
_) -> Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
x') [(Text, Text)]
ats = [(Text, Text)]
ats
     | Bool
otherwise      =
        case Text -> Text -> Maybe Text
T.stripPrefix Text
"data-" Text
x of
          Just Text
x' | Text
x' Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` (Set Text
html5Attributes Set Text -> Set Text -> Set Text
forall a. Semigroup a => a -> a -> a
<>
                                        Set Text
html4Attributes Set Text -> Set Text -> Set Text
forall a. Semigroup a => a -> a -> a
<> Set Text
rdfaAttributes)
            -> (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
go (Text
x',Text
y) [(Text, Text)]
ats
          Maybe Text
_ -> (Text
x,Text
y)(Text, Text) -> [(Text, Text)] -> [(Text, Text)]
forall a. a -> [a] -> [a]
:[(Text, Text)]
ats

-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
maybeFromAttrib :: Text -> Tag Text -> Maybe Text
maybeFromAttrib :: Text -> Tag Text -> Maybe Text
maybeFromAttrib Text
name (TagOpen Text
_ [(Text, Text)]
attrs) = Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
name [(Text, Text)]
attrs
maybeFromAttrib Text
_ Tag Text
_ = Maybe Text
forall a. Maybe a
Nothing

mkAttr :: [(Text, Text)] -> Attr
mkAttr :: [(Text, Text)] -> Attr
mkAttr [(Text, Text)]
attr = (Text
attribsId, [Text]
attribsClasses, [(Text, Text)]
attribsKV)
  where attribsId :: Text
attribsId = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"id" [(Text, Text)]
attr
        attribsClasses :: [Text]
attribsClasses = Text -> [Text]
T.words (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"class" [(Text, Text)]
attr) [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
epubTypes
        attribsKV :: [(Text, Text)]
attribsKV = ((Text, Text) -> Bool) -> [(Text, Text)] -> [(Text, Text)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Text
k,Text
_) -> Text
k Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"class" Bool -> Bool -> Bool
&& Text
k Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"id") [(Text, Text)]
attr
        epubTypes :: [Text]
epubTypes = Text -> [Text]
T.words (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"epub:type" [(Text, Text)]
attr

toAttr :: [(Text, Text)] -> Attr
toAttr :: [(Text, Text)] -> Attr
toAttr = [(Text, Text)] -> Attr
mkAttr ([(Text, Text)] -> Attr)
-> ([(Text, Text)] -> [(Text, Text)]) -> [(Text, Text)] -> Attr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Text, Text)] -> [(Text, Text)]
toStringAttr