--------------------------------------------------------------------------------
-- | Provides utilities to manipulate HTML pages
module Hakyll.Web.Html
    ( -- * Generic
      withTags
    , withTagList

      -- * Headers
    , demoteHeaders

      -- * Url manipulation
    , getUrls
    , withUrls
    , toUrl
    , toSiteRoot
    , isExternal

      -- * Stripping/escaping
    , stripTags
    , escapeHtml
    ) where


--------------------------------------------------------------------------------
import           Data.Char                       (digitToInt, intToDigit,
                                                  isDigit, toLower)
import           Data.List                       (isPrefixOf)
import qualified Data.Set                        as S
import           System.FilePath                 (joinPath, splitPath,
                                                  takeDirectory)
import           Text.Blaze.Html                 (toHtml)
import           Text.Blaze.Html.Renderer.String (renderHtml)
import qualified Text.HTML.TagSoup               as TS
import           Network.URI                     (isUnreserved, escapeURIString)


--------------------------------------------------------------------------------
import           Hakyll.Core.Util.String         (removeWinPathSeparator)


--------------------------------------------------------------------------------
-- | Map over all tags in the document
withTags :: (TS.Tag String -> TS.Tag String) -> String -> String
withTags :: (Tag String -> Tag String) -> String -> String
withTags = ([Tag String] -> [Tag String]) -> String -> String
withTagList (([Tag String] -> [Tag String]) -> String -> String)
-> ((Tag String -> Tag String) -> [Tag String] -> [Tag String])
-> (Tag String -> Tag String)
-> String
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Tag String -> Tag String) -> [Tag String] -> [Tag String]
forall a b. (a -> b) -> [a] -> [b]
map

-- | Map over all tags (as list) in the document
withTagList :: ([TS.Tag String] -> [TS.Tag String]) -> String -> String
withTagList :: ([Tag String] -> [Tag String]) -> String -> String
withTagList [Tag String] -> [Tag String]
f = [Tag String] -> String
renderTags' ([Tag String] -> String)
-> (String -> [Tag String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> [Tag String]
f ([Tag String] -> [Tag String])
-> (String -> [Tag String]) -> String -> [Tag String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Tag String]
parseTags'

--------------------------------------------------------------------------------
-- | Map every @h1@ to an @h2@, @h2@ to @h3@, etc.
demoteHeaders :: String -> String
demoteHeaders :: String -> String
demoteHeaders = (Tag String -> Tag String) -> String -> String
withTags ((Tag String -> Tag String) -> String -> String)
-> (Tag String -> Tag String) -> String -> String
forall a b. (a -> b) -> a -> b
$ \Tag String
tag -> case Tag String
tag of
    TS.TagOpen String
t [Attribute String]
a -> String -> [Attribute String] -> Tag String
forall str. str -> [Attribute str] -> Tag str
TS.TagOpen (String -> String
demote String
t) [Attribute String]
a
    TS.TagClose String
t  -> String -> Tag String
forall str. str -> Tag str
TS.TagClose (String -> String
demote String
t)
    Tag String
t              -> Tag String
t
  where
    demote :: String -> String
demote t :: String
t@[Char
'h', Char
n]
        | Char -> Bool
isDigit Char
n = [Char
'h', Int -> Char
intToDigit (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
6 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Char -> Int
digitToInt Char
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)]
        | Bool
otherwise = String
t
    demote String
t        = String
t


--------------------------------------------------------------------------------
isUrlAttribute :: String -> Bool
isUrlAttribute :: String -> Bool
isUrlAttribute = (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"src", String
"href", String
"data", String
"poster", String
"srcset"])


--------------------------------------------------------------------------------
getUrls :: [TS.Tag String] -> [String]
getUrls :: [Tag String] -> [String]
getUrls [Tag String]
tags = [String
v | TS.TagOpen String
_ [Attribute String]
as <- [Tag String]
tags, (String
k, String
v) <- [Attribute String]
as, String -> Bool
isUrlAttribute String
k]


--------------------------------------------------------------------------------
-- | Apply a function to each URL on a webpage
withUrls :: (String -> String) -> String -> String
withUrls :: (String -> String) -> String -> String
withUrls String -> String
f = (Tag String -> Tag String) -> String -> String
withTags Tag String -> Tag String
tag
  where
    tag :: Tag String -> Tag String
tag (TS.TagOpen String
s [Attribute String]
a) = String -> [Attribute String] -> Tag String
forall str. str -> [Attribute str] -> Tag str
TS.TagOpen String
s ([Attribute String] -> Tag String)
-> [Attribute String] -> Tag String
forall a b. (a -> b) -> a -> b
$ (Attribute String -> Attribute String)
-> [Attribute String] -> [Attribute String]
forall a b. (a -> b) -> [a] -> [b]
map Attribute String -> Attribute String
attr [Attribute String]
a
    tag Tag String
x                = Tag String
x
    attr :: Attribute String -> Attribute String
attr (String
k, String
v)          = (String
k, if String -> Bool
isUrlAttribute String
k then String -> String
f String
v else String
v)


--------------------------------------------------------------------------------
-- | Customized TagSoup renderer. The default TagSoup renderer escape CSS
-- within style tags, and doesn't properly minimize.
renderTags' :: [TS.Tag String] -> String
renderTags' :: [Tag String] -> String
renderTags' = RenderOptions String -> [Tag String] -> String
forall str. StringLike str => RenderOptions str -> [Tag str] -> str
TS.renderTagsOptions RenderOptions :: forall str.
(str -> str) -> (str -> Bool) -> (str -> Bool) -> RenderOptions str
TS.RenderOptions
    { optRawTag :: String -> Bool
TS.optRawTag   = (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"script", String
"style"]) (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower
    , optMinimize :: String -> Bool
TS.optMinimize = (String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set String
minimize) (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower
    , optEscape :: String -> String
TS.optEscape   = String -> String
forall a. a -> a
id
    }
  where
    -- A list of elements which must be minimized
    minimize :: Set String
minimize = [String] -> Set String
forall a. Ord a => [a] -> Set a
S.fromList
        [ String
"area", String
"br", String
"col", String
"embed", String
"hr", String
"img", String
"input", String
"meta", String
"link"
        , String
"param"
        ]


--------------------------------------------------------------------------------
-- | Customized TagSoup parser: do not decode any entities.
parseTags' :: String -> [TS.Tag String]
parseTags' :: String -> [Tag String]
parseTags' = ParseOptions String -> String -> [Tag String]
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
TS.parseTagsOptions (ParseOptions String
forall str. StringLike str => ParseOptions str
TS.parseOptions :: TS.ParseOptions String)
    { optEntityData :: (String, Bool) -> [Tag String]
TS.optEntityData   = \(String
str, Bool
b) -> [String -> Tag String
forall str. str -> Tag str
TS.TagText (String -> Tag String) -> String -> Tag String
forall a b. (a -> b) -> a -> b
$ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
str String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
b]]
    , optEntityAttrib :: (String, Bool) -> (String, [Tag String])
TS.optEntityAttrib = \(String
str, Bool
b) -> (String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
str String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
b], [])
    }


--------------------------------------------------------------------------------
-- | Convert a filepath to an URL starting from the site root
--
-- Example:
--
-- > toUrl "foo/bar.html"
--
-- Result:
--
-- > "/foo/bar.html"
--
-- This also sanitizes the URL, e.g. converting spaces into '%20'
toUrl :: FilePath -> String
toUrl :: String -> String
toUrl String
url = case (String -> String
removeWinPathSeparator String
url) of
    (Char
'/' : String
xs) -> Char
'/' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
sanitize String
xs
    String
xs         -> Char
'/' Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
sanitize String
xs
  where
    -- Everything but unreserved characters should be escaped as we are
    -- sanitising the path therefore reserved characters which have a
    -- meaning in URI does not appear. Special casing for `/`, because it has
    -- a special meaning in FilePath as well as in URI.
    sanitize :: String -> String
sanitize = (Char -> Bool) -> String -> String
escapeURIString (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char -> Bool
isUnreserved Char
c)


--------------------------------------------------------------------------------
-- | Get the relative url to the site root, for a given (absolute) url
toSiteRoot :: String -> String
toSiteRoot :: String -> String
toSiteRoot = String -> String
removeWinPathSeparator (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
emptyException (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
joinPath 
           ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
forall b. b -> String
parent ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter String -> Bool
relevant ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
splitPath (String -> [String]) -> (String -> String) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
takeDirectory
  where
    parent :: b -> String
parent            = String -> b -> String
forall a b. a -> b -> a
const String
".."
    emptyException :: String -> String
emptyException [] = String
"."
    emptyException String
x  = String
x
    relevant :: String -> Bool
relevant String
"."      = Bool
False
    relevant String
"/"      = Bool
False
    relevant String
"./"     = Bool
False
    relevant String
_        = Bool
True


--------------------------------------------------------------------------------
-- | Check if an URL links to an external HTTP(S) source
isExternal :: String -> Bool
isExternal :: String -> Bool
isExternal String
url = (String -> Bool) -> [String] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ((String -> String -> Bool) -> String -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf String
url) [String
"http://", String
"https://", String
"//"]


--------------------------------------------------------------------------------
-- | Strip all HTML tags from a string
--
-- Example:
--
-- > stripTags "<p>foo</p>"
--
-- Result:
--
-- > "foo"
--
-- This also works for incomplete tags
--
-- Example:
--
-- > stripTags "<p>foo</p"
--
-- Result:
--
-- > "foo"
stripTags :: String -> String
stripTags :: String -> String
stripTags []         = []
stripTags (Char
'<' : String
xs) = String -> String
stripTags (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
1 (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'>') String
xs
stripTags (Char
x : String
xs)   = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
stripTags String
xs


--------------------------------------------------------------------------------
-- | HTML-escape a string
--
-- Example:
--
-- > escapeHtml "Me & Dean"
--
-- Result:
--
-- > "Me &amp; Dean"
escapeHtml :: String -> String
escapeHtml :: String -> String
escapeHtml = Html -> String
renderHtml (Html -> String) -> (String -> Html) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Html
forall a. ToMarkup a => a -> Html
toHtml