module Data.Text.WordCount.Exec where
import Control.Arrow ((&&&))
import Control.Lens (over, _2)
import Data.Binary (decode, encode)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.IntMap as IM
import Data.Maybe
import Data.Monoid
import qualified Data.Text.IO as TIO
import qualified Data.Text.Lazy as TL
import Data.Text.WordCount
import Data.Text.WordCount.FileRead
import Data.Version
import Options.Applicative
import Paths_wordchoice
import System.Directory (doesFileExist)
data Program = Program { file :: FilePath
, num :: Maybe Int
, output :: Maybe FilePath
, filterOutput :: Bool
, cacheIndex :: Bool
}
program :: Parser Program
program = Program
<$> (argument str
(metavar "FILEPATH"
<> completer (bashCompleter "file -o plusdirs")
<> help "File to analyze"))
<*> (optional (read <$> strOption
(short 'n'
<> long "number"
<> metavar "NUM"
<> help "Top NUM words will be listed")))
<*> (optional (strOption
(short 'o'
<> long "output"
<> metavar "OUTPUT"
<> help "Filepath for output graph")))
<*> switch
(short 'f'
<> long "filter"
<> help "Filter common English words from output.")
<*> switch
(short 'd'
<> long "dump"
<> help "Cache word frequency indices")
versionInfo :: Parser (a -> a)
versionInfo = infoOption
("wordchoice version: " <> showVersion version)
(short 'v' <> long "version" <> help "Show version")
wrapper :: ParserInfo Program
wrapper = info (helper <*> versionInfo <*> program)
(fullDesc
<> progDesc "Word choice is a command-line meant to help you improve your writing. Simply point it to a file containing text and it will list your most frequently used words and their frequencies."
<> header "Word choice command-line utility")
exec :: IO ()
exec = execParser wrapper >>= pick
pick :: Program -> IO ()
pick rec = do
let n = fromMaybe 25 (num rec)
contents <- TL.fromStrict <$> globFile (file rec)
pickContents <- case (filterOutput &&& num) rec of {
(True, _) -> pure $ filterTop n small contents ;
(False, Just x) -> pure $ topN x contents ;
(False, Nothing) -> do {
cacheExists <- doesFileExist "index.bin" ;
let toDisplay = (>>= (\(i, ws) -> zip (repeat i) ws)) in
if cacheExists
then toDisplay . IM.toList . (decode :: BSL.ByteString -> IM.IntMap [TL.Text]) . BSL.fromStrict <$> BS.readFile "index.bin"
else pure . toDisplay . IM.toList . indexed $ contents }
}
if cacheIndex rec
then do {
BS.writeFile "index.bin" . BSL.toStrict $ encode $ indexed contents ;
putStrLn "...finished indexing" ;
TIO.putStrLn . displayWords $ pickContents }
else TIO.putStrLn . displayWords $ pickContents
case output rec of
(Just out) -> flip makeFile out . topN n $ contents
_ -> pure ()