{-# LANGUAGE ApplicativeDo #-} {-# LANGUAGE BlockArguments #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} module Main where import Data.Aeson import Data.Aeson.Text (encodeToLazyText) import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy.IO as TL import Network.HTTP.Simple import Options.Applicative import qualified Readability as R import Text.XML data Source = StdIn | File FilePath | Web Request deriving (Show) data Extract = Body | Title | ShortTitle | All deriving (Eq, Show) data Opts = Opts {optExtract :: Extract, optInput :: Source} deriving (Show) main :: IO () main = do (Opts ext src) <- execParser opts article <- case src of File f -> R.fromFile f Web req -> R.fromByteString . getResponseBody <$> httpLBS req StdIn -> R.fromText <$> TL.getContents case article of Just R.Article {..} -> do case ext of Body -> TL.putStrLn $ render summary Title -> maybe (return ()) TIO.putStrLn title ShortTitle -> maybe (return ()) TIO.putStrLn shortTitle All -> TL.putStrLn (encodeToLazyText $ object ["article" .= render summary, "title" .= title, "shortTitle" .= shortTitle]) Nothing -> return () where render = renderText def {rsPretty = True, rsXMLDeclaration = True} opts :: ParserInfo Opts opts = info (options <**> helper) ( fullDesc <> progDesc "Provide FILE or URL as source or nothing for reading from stdin" <> header "readability - extract article from HTML" ) options :: Parser Opts options = do ext <- option extract ( value Body <> short 'e' <> long "extract" <> help "Extract 'article' (default), 'title', 'shortTitle', 'all' (as JSON)" ) inf <- argument source (value StdIn <> metavar "SOURCE") pure $ Opts ext inf source :: ReadM Source source = maybeReader \s -> Web <$> parseRequest s <|> Just (File s) extract :: ReadM Extract extract = eitherReader \case "article" -> Right Body "title" -> Right Title "shortTitle" -> Right ShortTitle "all" -> Right All _ -> Left "Only 'article', 'title', 'shortTitle' or 'all' can be extracted"