{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Servant.Seo.Sitemap where
import Control.Lens
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Binary.Builder as Builder
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Lazy as BSL
import Data.Coerce (coerce)
import qualified Data.Foldable as F
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe (catMaybes)
import Data.String (IsString)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import GHC.Generics (Generic)
import GHC.TypeLits (KnownNat, KnownSymbol, natVal,
symbolVal)
import Numeric (showFFloat)
import Servant
import Servant.HTML.Blaze (HTML)
import Text.Blaze (ToMarkup)
import Text.XML
import Servant.Seo.Combinators
data SitemapInfo = SitemapInfo
{ _sitemapInfoEntries :: [SitemapEntry]
, _sitemapInfoPresent :: Maybe ()
}
deriving (Show, Eq, Generic, Ord)
instance Monoid SitemapInfo where
mempty = SitemapInfo [] Nothing
instance Semigroup SitemapInfo where
SitemapInfo e1 p1 <> SitemapInfo e2 p2
= SitemapInfo (e1 <> e2) (p1 <> p2)
data SitemapEntry = SitemapEntry
{ _sitemapPathPieces :: [PathPiece]
, _sitemapQueryParts :: [(ParamName, [ParamValue])]
, _sitemapFrequency :: Maybe Period
, _sitemapPriority :: Maybe Text
}
deriving (Show, Eq, Generic, Ord)
instance Monoid SitemapEntry where
mempty = SitemapEntry [] [] Nothing Nothing
instance Semigroup SitemapEntry where
SitemapEntry p1 q1 f1 r1 <> SitemapEntry p2 q2 f2 r2 =
SitemapEntry (p1 <> p2) (q1 <> q2) (maximum [f1, f2]) (maximum [r1, r2])
newtype ParamName = ParamName Text
deriving stock (Show, Eq, Generic)
deriving newtype (Ord, ToMarkup, ToHttpApiData)
newtype ParamValue = ParamValue Text
deriving stock (Show, Eq, Generic)
deriving newtype (Ord, ToMarkup, ToHttpApiData)
data PathPiece = UrlPathPiece Text
| CaptureValues [Text]
deriving (Show, Eq, Generic, Ord)
isEmpty :: PathPiece -> Bool
isEmpty (UrlPathPiece "") = True
isEmpty (CaptureValues []) = True
isEmpty _ = False
class ToHttpApiData a => ToSitemapParamPart a where
getParamsForIndexing :: MonadIO m => Proxy a -> app -> m [a]
getParamsForIndexing _ _ = pure mempty
toParamPart :: MonadIO m => Proxy a -> env -> m [ParamValue]
toParamPart proxy env = do
values <- getParamsForIndexing proxy env
pure $ ParamValue . toUrlPiece <$> values
class ToHttpApiData a => ToSitemapPathPiece a where
getPathPiecesForIndexing :: MonadIO m => Proxy a -> app -> m [a]
getPathPiecesForIndexing _ _ = pure mempty
toPathPiece :: MonadIO m => Proxy a -> env -> m PathPiece
toPathPiece proxy env = do
values <- getPathPiecesForIndexing proxy env
pure $ CaptureValues (toUrlPiece <$> values)
makeLenses ''SitemapInfo
makeLenses ''SitemapEntry
class HasSitemap a where
toSitemapInfo :: MonadIO m => Proxy a -> m SitemapInfo
toSitemapInfo proxy = toSitemapInfoWith () proxy
toSitemapInfoWith :: MonadIO m => env -> Proxy a -> m SitemapInfo
toSitemapInfoWith _ = toSitemapInfo
{-# MINIMAL toSitemapInfo | toSitemapInfoWith #-}
instance HasSitemap Raw where
toSitemapInfo _ = pure mempty
instance HasSitemap EmptyAPI where
toSitemapInfo _ = pure mempty
instance (HasSitemap a, HasSitemap b) => HasSitemap (a :<|> b) where
toSitemapInfo _ = do
sitemapA <- toSitemapInfo (Proxy :: Proxy a)
sitemapB <- toSitemapInfo (Proxy :: Proxy b)
pure (sitemapA <> sitemapB)
instance HasSitemap sub => HasSitemap (WithNamedContext x c sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (HttpVersion :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (StreamBody' mods fr ct a :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (ReqBody' mods cs a :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (RemoteHost :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance (ToSitemapParamPart a, HasSitemap sub, KnownSymbol sym) =>
HasSitemap (QueryParam' mods sym a :> sub) where
toSitemapInfoWith env _ = do
sitemap <- toSitemapInfo (Proxy :: Proxy sub)
vals <- toParamPart (Proxy :: Proxy a) env
let newSitemap
| sitemap == mempty = mempty
| null vals = sitemap
| otherwise
= sitemap & sitemapInfoEntries .~ newEntries sitemap vals
pure newSitemap
where
param = (ParamName . Text.pack . symbolVal) (Proxy :: Proxy sym)
addQueryParts paramName paramValues x = x & sitemapQueryParts .~ new
where
new = x ^. sitemapQueryParts . to ((paramName, paramValues) :)
newEntries x vals = if x ^. sitemapInfoEntries . to null
then [addQueryParts param vals mempty]
else x ^. sitemapInfoEntries . to (fmap (addQueryParts param vals))
instance HasSitemap sub => HasSitemap (QueryFlag sym :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (Header' mods sym a :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (IsSecure :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap api => HasSitemap (Summary desc :> api) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy api)
instance HasSitemap api => HasSitemap (Description desc :> api) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy api)
instance (HasSitemap sub, ToSitemapPathPiece a) =>
HasSitemap (Capture' mods sym a :> sub) where
toSitemapInfoWith env _ = do
sitemap <- toSitemapInfo (Proxy :: Proxy sub)
value <- toPathPiece (Proxy @a) env
pure $ if sitemap == mempty
then mempty
else
if isEmpty value
then sitemap
else sitemap & sitemapInfoEntries .~ newEntries sitemap value
where
addPathPieces capturedValue x = x & sitemapPathPieces .~ new
where
new = x ^. sitemapPathPieces . to (capturedValue :)
newEntries x capturedValue = if x ^. sitemapInfoEntries . to null
then [addPathPieces capturedValue mempty]
else x ^. sitemapInfoEntries . to (fmap (addPathPieces capturedValue))
instance HasSitemap sub => HasSitemap (CaptureAll sym a :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance HasSitemap sub => HasSitemap (Vault :> sub) where
toSitemapInfo _ = toSitemapInfo (Proxy :: Proxy sub)
instance (HasSitemap sub, KnownSymbol sym) => HasSitemap (sym :> sub) where
toSitemapInfo _ = do
sitemap <- toSitemapInfo (Proxy :: Proxy sub)
pure $ if sitemap == mempty
then mempty
else sitemap & sitemapInfoEntries .~ newEntries sitemap
where
newEntries x = if x ^. sitemapInfoEntries . to null
then [addPathPiece piece mempty]
else x ^. sitemapInfoEntries . to (fmap (addPathPiece piece))
piece = (UrlPathPiece . Text.pack . symbolVal) (Proxy :: Proxy sym)
addPathPiece p x = x & sitemapPathPieces .~ new
where
new = x ^. sitemapPathPieces . to (p :)
instance (HasSitemap sub, KnownSymbol sym) => HasSitemap (Disallow sym :> sub) where
toSitemapInfo _ = pure mempty
instance {-# OVERLAPPABLE #-} (KnownNat status) =>
HasSitemap (Verb method status cs a) where
toSitemapInfo _ = pure mempty
instance {-# OVERLAPPING #-} (ToMarkup a) => HasSitemap (Get '[HTML] a) where
toSitemapInfo _ = pure (SitemapInfo [mempty] (Just ()))
instance (HasPeriod period, HasSitemap api) => HasSitemap (Frequency period :> api) where
toSitemapInfo _ = do
sitemap <- toSitemapInfo (Proxy :: Proxy api)
pure $ sitemap & sitemapInfoEntries . each %~ (sitemapFrequency . _Just .~ period')
where
period' = getPeriod (Proxy :: Proxy period)
instance (KnownNat n, KnownNat m, HasSitemap api) => HasSitemap (Priority '(n, m) :> api) where
toSitemapInfo _ = do
sitemap <- toSitemapInfo (Proxy :: Proxy api)
pure $ sitemap & sitemapInfoEntries . each %~ (sitemapPriority . _Just .~ priority')
where
n' = natVal (Proxy :: Proxy n) & fromInteger @Float
m' = natVal (Proxy :: Proxy m) & fromInteger @Float
priority' = (n' * 10 + m') / 10
& min 1.0 & showFloat & Text.pack
showFloat x = showFFloat (Just 1) x ""
data SitemapUrl = SitemapUrl
{ _sitemapUrlLoc :: [SitemapLoc]
, _sitemapUrlFrequency :: Maybe Period
, _sitemapUrlPriority :: Maybe Text
}
deriving (Show, Eq, Generic, Ord)
newtype SitemapLoc = SitemapLoc Text
deriving stock (Show, Eq, Generic)
deriving newtype (Ord, ToMarkup)
newtype ServerUrl = ServerUrl Text
deriving stock (Show, Eq, Generic)
deriving newtype (Ord, ToMarkup, IsString)
type SitemapIndex = (Int, [SitemapUrl])
newtype SitemapIx = SitemapIx Int
deriving stock (Generic)
deriving newtype (Eq, Ord, Show, FromHttpApiData, ToHttpApiData)
instance ToSitemapPathPiece SitemapIx
makeLenses ''SitemapUrl
sitemapEntryToUrlList :: ServerUrl -> SitemapEntry -> SitemapUrl
sitemapEntryToUrlList server SitemapEntry {..} = SitemapUrl
{ _sitemapUrlLoc = locs
, _sitemapUrlFrequency = _sitemapFrequency
, _sitemapUrlPriority = _sitemapPriority
}
where
locs = case (null paths, null queries) of
(False, False) -> [ SitemapLoc (coerce server <> path <> query) | path <- paths, query <- queries ]
(True, False) -> [ SitemapLoc (coerce server <> "/" <> query) | query <- queries ]
(False, True) -> [ SitemapLoc (coerce server <> path) | path <- paths ]
_ -> [ SitemapLoc (coerce server) ]
paths = _sitemapPathPieces
& F.foldr combinePaths []
& fmap (escapeXmlEntities . toText)
where
combinePaths :: PathPiece -> [Builder] -> [Builder]
combinePaths p [] = case p of
UrlPathPiece piece -> [prepare piece]
CaptureValues pieces -> prepare <$> pieces
combinePaths p xs = case p of
UrlPathPiece piece -> prepend piece <$> xs
CaptureValues pieces -> concatMap (\piece -> prepend piece <$> xs) pieces
prepare = Builder.append (Builder.fromByteString "/") . toEncodedUrlPiece
prepend x = Builder.append (prepare x)
queries = _sitemapQueryParts
& F.foldl' combineParams []
& fmap (escapeXmlEntities . fixQuery . toText)
where
combineParams :: [Builder] -> (ParamName, [ParamValue]) -> [Builder]
combineParams [] (param, values) = values
& fmap toEncodedUrlPiece
& fmap (combine param)
combineParams xs paramWithValues = combineParams [] paramWithValues
& concatMap (\piece -> Builder.append piece <$> xs)
combine p v = Builder.fromByteString "&"
<> toEncodedUrlPiece p
<> Builder.fromByteString "="
<> v
fixQuery q = if Text.null q then q else Text.cons '?' (Text.tail q)
toText :: Builder -> Text
toText = Text.decodeUtf8 . BSL.toStrict . Builder.toLazyByteString
escapeXmlEntities txt = txt
& Text.replace "&" "&"
& Text.replace "'" "'"
& Text.replace "\"" """
& Text.replace ">" ">"
& Text.replace "<" "<"
sitemapUrlToNodes :: SitemapUrl -> [Node]
sitemapUrlToNodes SitemapUrl{..} = mkUrlNode "url" extra <$> _sitemapUrlLoc
where
extra = [ frequencyNode, priorityNode ] & catMaybes
frequencyNode = fromFrequency <$> _sitemapUrlFrequency
priorityNode = fromPriority <$> _sitemapUrlPriority
fromFrequency = mkNodeWithContent "changefreq" . Text.toLower . Text.pack . show
fromPriority = mkNodeWithContent "priority" . coerce
mkNode :: Name -> [Node] -> Node
mkNode name childNodes = NodeElement (Element name Map.empty childNodes)
mkNodeWithContent :: Name -> Text -> Node
mkNodeWithContent name content = mkNode name [NodeContent content]
mkUrlNode :: Name -> [Node] -> SitemapLoc -> Node
mkUrlNode name extra loc = mkNode name (locNode : extra)
where
locNode = mkNodeWithContent "loc" (coerce loc)
sitemapIndexUrlToNodes :: SitemapUrl -> [Node]
sitemapIndexUrlToNodes SitemapUrl{..} = mkUrlNode "sitemap" [] <$> _sitemapUrlLoc
sitemapUrlsToDocument :: ServerUrl -> [SitemapUrl] -> Document
sitemapUrlsToDocument server urlparts = Document
{ documentPrologue = Prologue [] Nothing []
, documentRoot = Element "urlset" (Map.fromList [namespace]) childNodes
, documentEpilogue = []
}
where
namespace = ("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
childNodes = concatMap sitemapUrlToNodes urlparts
sitemapIndexToDocument :: ServerUrl -> [SitemapUrl] -> Document
sitemapIndexToDocument server urlparts = Document
{ documentPrologue = Prologue [] Nothing []
, documentRoot = Element "sitemapindex" (Map.fromList [namespace]) childNodes
, documentEpilogue = []
}
where
namespace = ("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
childNodes = concatMap sitemapIndexUrlToNodes urlparts
sitemapUrlsToRootLBS :: ServerUrl -> [SitemapUrl] -> BSL.ByteString
sitemapUrlsToRootLBS serverUrl urls = if urls & concatMap _sitemapUrlLoc & length & (<= 50000)
then render sitemapUrlsToDocument urls
else render sitemapIndexToDocument indeces
where
render x ys = renderSitemapWith x serverUrl ys
indeces = urls & concatMap countLocGroups & length & pred & mkIndexList & fmap mkIndexUrl
countLocGroups :: SitemapUrl -> [Int]
countLocGroups x = x
& _sitemapUrlLoc
& length
& (realToFrac @_ @Double)
& (/ 50000)
& truncate
& mkIndexList
mkIndexList x = [0 .. x]
mkIndexUrl x = SitemapUrl
{ _sitemapUrlLoc = pure $ SitemapLoc
$ coerce serverUrl <> "/" <> Text.pack (show x) <> "/sitemap.xml"
, _sitemapUrlFrequency = Nothing
, _sitemapUrlPriority = Nothing
}
sitemapUrlsToSitemapMap :: ServerUrl -> [SitemapUrl] -> Map Int BSL.ByteString
sitemapUrlsToSitemapMap serverUrl urls = urls
& concatMap (splitUrlTo50KLocs [])
& zip [0..]
& Map.fromList
where
splitUrlTo50KLocs xs s = if s ^. sitemapUrlLoc . to length . to (<= 50000)
then mkLBS [s] : xs
else splitUrlTo50KLocs (currentLocsSitemap : xs) restLocsSitemap
where
currentLocsSitemap = mkLBS [s & sitemapUrlLoc %~ take 50000]
restLocsSitemap = s & sitemapUrlLoc %~ drop 50000
mkLBS = renderSitemapWith sitemapUrlsToDocument serverUrl
renderSitemapWith
:: (ServerUrl -> [SitemapUrl] -> Document) -> ServerUrl -> [SitemapUrl] -> BSL.ByteString
renderSitemapWith renderer serverUrl urls = renderer serverUrl urls & renderLBS def