{-# LANGUAGE OverloadedStrings #-} module Readability ( -- * Data types Article (..), -- * Construction fromByteString, fromDocument, fromFile, fromText, ) where import Data.ByteString.Lazy (ByteString) import Data.Text.Lazy (Text) import Readability.Internal as I import Readability.Types import qualified Text.HTML.DOM as DOM import Text.XML (Document) -- | Extracts article from HTML represented as HTML document. fromDocument :: Document -> Maybe Article fromDocument = fromDocument' strictSettings -- | Extracts article from HTML represented as HTML document. fromDocument' :: Settings -> Document -> Maybe Article fromDocument' s d = (\smr -> Article smr (I.title d) (I.shortTitle d)) <$> I.summary s d -- | Extracts article from HTML in ByteString. fromByteString :: ByteString -> Maybe Article fromByteString = fromDocument . DOM.parseLBS -- | Extracts article from HTML in given file. fromFile :: FilePath -> IO (Maybe Article) fromFile f = fromDocument <$> DOM.readFile f -- | Extracts article from HTML in given text. fromText :: Text -> Maybe Article fromText = fromDocument . DOM.parseLT strictSettings :: Settings strictSettings = Settings { reRemoveAttributes = (`elem` ["class"]) } {- looseSettings :: Settings looseSettings = Settings { reRemoveAttributes = const False } -}