{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Types
   Copyright   : Copyright (C) 2006-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Types for pandoc's HTML reader.
-}
module Text.Pandoc.Readers.HTML.Types
  ( TagParser
  , HTMLParser
  , HTMLState (..)
  , HTMLLocal (..)
  )
where

import Control.Monad.Reader (ReaderT, asks, local)
import Data.Default (Default (def))
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Network.URI (URI)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, HasMeta (..))
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Logging (LogMessage)
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Parsing
  ( HasIdentifierList (..), HasLastStrPosition (..), HasLogMessages (..)
  , HasMacros (..), HasQuoteContext (..), HasReaderOptions (..)
  , ParsecT, ParserState, QuoteContext (NoQuote)
  )
import Text.Pandoc.TeX (Macro)

-- | HTML parser type
type HTMLParser m s = ParsecT s HTMLState (ReaderT HTMLLocal m)

-- | HTML parser, expecting @Tag Text@ as tokens.
type TagParser m = HTMLParser m [Tag Text]

-- | Global HTML parser state
data HTMLState = HTMLState
  { HTMLState -> ParserState
parserState :: ParserState
  , HTMLState -> [(Text, Blocks)]
noteTable   :: [(Text, Blocks)]
  , HTMLState -> Maybe URI
baseHref    :: Maybe URI
  , HTMLState -> Set Text
identifiers :: Set Text
  , HTMLState -> [LogMessage]
logMessages :: [LogMessage]
  , HTMLState -> Map Text Macro
macros      :: Map Text Macro
  , HTMLState -> ReaderOptions
readerOpts  :: ReaderOptions
  , HTMLState -> Bool
inFootnotes :: Bool
  }

-- | Local HTML parser state
data HTMLLocal = HTMLLocal
  { HTMLLocal -> QuoteContext
quoteContext :: QuoteContext
  , HTMLLocal -> Bool
inChapter    :: Bool -- ^ Set if in chapter section
  , HTMLLocal -> Bool
inPlain      :: Bool -- ^ Set if in pPlain
  , HTMLLocal -> Bool
inListItem   :: Bool -- ^ Set if in <li> tag
  }


-- Instances

instance HasMacros HTMLState where
  extractMacros :: HTMLState -> Map Text Macro
extractMacros        = HTMLState -> Map Text Macro
macros
  updateMacros :: (Map Text Macro -> Map Text Macro) -> HTMLState -> HTMLState
updateMacros Map Text Macro -> Map Text Macro
f HTMLState
st    = HTMLState
st{ macros = f $ macros st }

instance HasIdentifierList HTMLState where
  extractIdentifierList :: HTMLState -> Set Text
extractIdentifierList = HTMLState -> Set Text
identifiers
  updateIdentifierList :: (Set Text -> Set Text) -> HTMLState -> HTMLState
updateIdentifierList Set Text -> Set Text
f HTMLState
s = HTMLState
s{ identifiers = f (identifiers s) }

instance HasLogMessages HTMLState where
  addLogMessage :: LogMessage -> HTMLState -> HTMLState
addLogMessage LogMessage
m HTMLState
s = HTMLState
s{ logMessages = m : logMessages s }
  getLogMessages :: HTMLState -> [LogMessage]
getLogMessages = [LogMessage] -> [LogMessage]
forall a. [a] -> [a]
reverse ([LogMessage] -> [LogMessage])
-> (HTMLState -> [LogMessage]) -> HTMLState -> [LogMessage]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HTMLState -> [LogMessage]
logMessages

-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
  getQuoteContext :: forall s t.
Stream s (ReaderT HTMLLocal m) t =>
ParsecT s HTMLState (ReaderT HTMLLocal m) QuoteContext
getQuoteContext = (HTMLLocal -> QuoteContext)
-> ParsecT s HTMLState (ReaderT HTMLLocal m) QuoteContext
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks HTMLLocal -> QuoteContext
quoteContext
  withQuoteContext :: forall s a.
QuoteContext
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
withQuoteContext QuoteContext
q = (HTMLLocal -> HTMLLocal)
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
forall a.
(HTMLLocal -> HTMLLocal)
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
-> ParsecT s HTMLState (ReaderT HTMLLocal m) a
forall r (m :: * -> *) a. MonadReader r m => (r -> r) -> m a -> m a
local (\HTMLLocal
s -> HTMLLocal
s{quoteContext = q})

instance HasReaderOptions HTMLState where
    extractReaderOptions :: HTMLState -> ReaderOptions
extractReaderOptions = ParserState -> ReaderOptions
forall st. HasReaderOptions st => st -> ReaderOptions
extractReaderOptions (ParserState -> ReaderOptions)
-> (HTMLState -> ParserState) -> HTMLState -> ReaderOptions
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HTMLState -> ParserState
parserState

instance HasMeta HTMLState where
  setMeta :: forall b. ToMetaValue b => Text -> b -> HTMLState -> HTMLState
setMeta Text
s b
b HTMLState
st = HTMLState
st {parserState = setMeta s b $ parserState st}
  deleteMeta :: Text -> HTMLState -> HTMLState
deleteMeta Text
s HTMLState
st = HTMLState
st {parserState = deleteMeta s $ parserState st}

instance Default HTMLLocal where
  def :: HTMLLocal
def = QuoteContext -> Bool -> Bool -> Bool -> HTMLLocal
HTMLLocal QuoteContext
NoQuote Bool
False Bool
False Bool
False

instance HasLastStrPosition HTMLState where
  setLastStrPos :: Maybe SourcePos -> HTMLState -> HTMLState
setLastStrPos Maybe SourcePos
s HTMLState
st = HTMLState
st {parserState = setLastStrPos s (parserState st)}
  getLastStrPos :: HTMLState -> Maybe SourcePos
getLastStrPos = ParserState -> Maybe SourcePos
forall st. HasLastStrPosition st => st -> Maybe SourcePos
getLastStrPos (ParserState -> Maybe SourcePos)
-> (HTMLState -> ParserState) -> HTMLState -> Maybe SourcePos
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HTMLState -> ParserState
parserState