module Data.Text.WordCount
( topN
, displayWords
, makeFile
, makeDistribution
, processFile
, globFile
, buildFreq
) where
import qualified Data.Map.Lazy as M
import Data.Map.Lens
import Control.Lens hiding (argument)
import qualified Data.Text as T
import Data.Tuple
import Data.Monoid
import Data.List
import Data.Ord
import Graphics.Rendering.Chart.Easy hiding (argument)
import Graphics.Rendering.Chart.Backend.Diagrams
import Data.Char
import Data.Text.WordCount.FileRead
import Graphics.Rendering.Chart.State
topN :: Int -> T.Text -> [(Int,T.Text)]
topN n = take n . order . buildFreq
displayWords :: [(Int,T.Text)] -> T.Text
displayWords [] = ""
displayWords (pair:pairs) = display pair <> "\n" <> displayWords pairs
where display (n,str) = (T.pack . show) n <> ": " <> str
buildFreq :: T.Text -> M.Map T.Text Int
buildFreq = count . T.words . T.map toLower
order :: M.Map T.Text Int -> [(Int, T.Text)]
order = sortBy (flip (comparing fst)) . fmap swap . M.toList
count :: [T.Text] -> M.Map T.Text Int
count words = foldr ((.) . wordFunction) id words M.empty
where wordFunction word map = case map ^. at word of
Nothing -> at word ?~ 1 $ map
_ -> ix word %~ (+1) $ map
makeFile :: BarsPlotValue a => [(a, T.Text)] -> FilePath -> IO ()
makeFile points out = toFile def out (makeDistribution points)
makeDistribution :: BarsPlotValue a => [(a, T.Text)] -> EC (Layout PlotIndex a) ()
makeDistribution points = do
let values = addIndexes (fmap fst points)
let alabels = fmap (T.unpack . snd) points
let fillStyle = solidFillStyle (opaque lightblue)
layout_title .= "Word Frequencies"
layout_x_axis . laxis_generate .= autoIndexAxis alabels
layout_y_axis . laxis_override .= axisGridHide
layout_left_axis_visibility . axis_show_ticks .= False
plot $ fmap plotBars $ liftEC $ do
plot_bars_values .= fmap (over _2 pure) values
plot_bars_item_styles .= pure (fillStyle, Nothing)