{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Zenacy.HTML.Internal.HTML
( HTMLOptions(..)
, HTMLResult(..)
, HTMLError(..)
, HTMLNode(..)
, HTMLAttr(..)
, HTMLNamespace(..)
, HTMLAttrNamespace(..)
, htmlParse
, htmlParseEasy
, htmlFragment
, htmlDefaultDocument
, htmlDefaultDoctype
, htmlDefaultFragment
, htmlDefaultElement
, htmlDefaultTemplate
, htmlDefaultText
, htmlDefaultComment
, htmlAttr
, htmlElem
, htmlText
) where
import Zenacy.HTML.Internal.BS
import Zenacy.HTML.Internal.Core
import Zenacy.HTML.Internal.DOM
import Zenacy.HTML.Internal.Parser
import Zenacy.HTML.Internal.Types
import Data.Default
( Default(..)
)
import Data.Either
( either
)
import Data.Foldable
( toList
)
import Data.Maybe
( fromJust
)
import Data.Text
( Text
)
import qualified Data.Text as T
( empty
)
import qualified Data.Text.Encoding as T
( encodeUtf8
, decodeUtf8
)
data HTMLOptions = HTMLOptions
{ htmlOptionLogErrors :: !Bool
, htmlOptionIgnoreEntities :: !Bool
} deriving (Eq, Ord, Show)
data HTMLResult = HTMLResult
{ htmlResultDocument :: !HTMLNode
, htmlResultErrors :: ![HTMLError]
} deriving (Eq, Ord, Show)
data HTMLError = HTMLError
{ htmlErrorText :: !Text
} deriving (Show, Eq, Ord)
data HTMLNode
= HTMLDocument
{ htmlDocumentName :: !Text
, htmlDocumentChildren :: ![HTMLNode]
}
| HTMLDoctype
{ htmlDoctypeName :: !Text
, htmlDoctypePublicID :: !(Maybe Text)
, htmlDoctypeSystemID :: !(Maybe Text)
}
| HTMLFragment
{ htmlFragmentName :: !Text
, htmlFragmentChildren :: ![HTMLNode]
}
| HTMLElement
{ htmlElementName :: !Text
, htmlElementNamespace :: !HTMLNamespace
, htmlElementAttributes :: ![HTMLAttr]
, htmlElementChildren :: ![HTMLNode]
}
| HTMLTemplate
{ htmlTemplateNamespace :: !HTMLNamespace
, htmlTemplateAttributes :: ![HTMLAttr]
, htmlTemplateContents :: !HTMLNode
}
| HTMLText
{ htmlTextData :: !Text
}
| HTMLComment
{ htmlCommentData :: !Text
}
deriving (Eq, Ord, Show)
data HTMLAttr = HTMLAttr
{ htmlAttrName :: Text
, htmlAttrVal :: Text
, htmlAttrNamespace :: HTMLAttrNamespace
} deriving (Eq, Ord, Show)
instance Default HTMLOptions where
def = HTMLOptions
{ htmlOptionLogErrors = False
, htmlOptionIgnoreEntities = False
}
instance Default HTMLResult where
def = HTMLResult
{ htmlResultDocument = htmlDefaultDocument
, htmlResultErrors = []
}
instance Default HTMLError where
def = HTMLError
{ htmlErrorText = T.empty
}
instance Default HTMLAttr where
def = HTMLAttr
{ htmlAttrName = T.empty
, htmlAttrVal = T.empty
, htmlAttrNamespace = HTMLAttrNamespaceNone
}
htmlParse :: HTMLOptions -> Text -> Either HTMLError HTMLResult
htmlParse HTMLOptions {..} x =
case d of
Right ParserResult {..} ->
Right def
{ htmlResultDocument = domToHTML parserResultDOM
, htmlResultErrors = map f parserResultErrors
}
Left e ->
Left (f e)
where
d = parseDocument def
{ parserOptionInput = T.encodeUtf8 x
, parserOptionLogErrors = htmlOptionLogErrors
, parserOptionIgnoreEntities = htmlOptionIgnoreEntities
}
f x = def { htmlErrorText = T.decodeUtf8 x }
htmlParseEasy :: Text -> HTMLNode
htmlParseEasy =
either (const htmlDefaultDocument) htmlResultDocument . htmlParse def
htmlFragment :: HTMLOptions -> Text -> Either HTMLError HTMLResult
htmlFragment HTMLOptions {..} x = Left def
{ htmlErrorText = "fragment support not currently implemented" }
htmlDefaultDocument :: HTMLNode
htmlDefaultDocument = HTMLDocument
{ htmlDocumentName = T.empty
, htmlDocumentChildren = []
}
htmlDefaultDoctype :: HTMLNode
htmlDefaultDoctype = HTMLDoctype
{ htmlDoctypeName = T.empty
, htmlDoctypePublicID = Nothing
, htmlDoctypeSystemID = Nothing
}
htmlDefaultFragment :: HTMLNode
htmlDefaultFragment = HTMLFragment
{ htmlFragmentName = T.empty
, htmlFragmentChildren = []
}
htmlDefaultElement :: HTMLNode
htmlDefaultElement = HTMLElement
{ htmlElementName = T.empty
, htmlElementNamespace = HTMLNamespaceHTML
, htmlElementAttributes = []
, htmlElementChildren = []
}
htmlDefaultTemplate :: HTMLNode
htmlDefaultTemplate = HTMLTemplate
{ htmlTemplateNamespace = HTMLNamespaceHTML
, htmlTemplateAttributes = []
, htmlTemplateContents = htmlDefaultFragment
}
htmlDefaultText :: HTMLNode
htmlDefaultText = HTMLText
{ htmlTextData = T.empty
}
htmlDefaultComment :: HTMLNode
htmlDefaultComment = HTMLComment
{ htmlCommentData = T.empty
}
htmlAttr :: Text -> Text -> HTMLAttr
htmlAttr n v = HTMLAttr n v HTMLAttrNamespaceNone
htmlElem :: Text -> [HTMLAttr] -> [HTMLNode] -> HTMLNode
htmlElem n a c = HTMLElement n HTMLNamespaceHTML a c
htmlText :: Text -> HTMLNode
htmlText = HTMLText
domToHTML :: DOM -> HTMLNode
domToHTML d = nodeToHTML d $ domDocument d
nodeToHTML :: DOM -> DOMNode -> HTMLNode
nodeToHTML d = go where
go DOMDocument {..} = HTMLDocument
{ htmlDocumentName = t domDocumentName
, htmlDocumentChildren = f domDocumentChildren
}
go DOMDoctype {..} = HTMLDoctype
{ htmlDoctypeName = t domDoctypeName
, htmlDoctypePublicID = t <$> domDoctypePublicID
, htmlDoctypeSystemID = t <$> domDoctypeSystemID
}
go DOMFragment {..} = HTMLFragment
{ htmlFragmentName = t domFragmentName
, htmlFragmentChildren = f domFragmentChildren
}
go DOMElement {..} = HTMLElement
{ htmlElementName = t domElementName
, htmlElementNamespace = domElementNamespace
, htmlElementAttributes = h domElementAttributes
, htmlElementChildren = f domElementChildren
}
go DOMTemplate {..} = HTMLTemplate
{ htmlTemplateNamespace = domTemplateNamespace
, htmlTemplateAttributes = h domTemplateAttributes
, htmlTemplateContents = g domTemplateContents
}
go DOMText {..} = HTMLText
{ htmlTextData = t domTextData
}
go DOMComment {..} = HTMLComment
{ htmlCommentData = t domCommentData
}
f = map go . domMapID d . toList
g = go . fromJust . domGetNode d
h = map attr . toList
t = T.decodeUtf8
attr (DOMAttr n v s) = HTMLAttr (t n) (t v) s