module Text.Hastily.Report (
generate,
findCandidateDialog
) where
import Data.List (intersect)
import Data.List
import qualified Data.Map as Map
import Data.String.Conversions
import qualified Data.Text as DT
import System.Directory
import System.IO
import Text.Printf
import Text.Hastily.Types
generate :: [DT.Text] -> [Subtitle] -> IO ()
generate skip_words subs = do
let dialog_frequency_pair = findCandidateDialog skip_words subs
printReport (filter isEmpty subs) $ fst dialog_frequency_pair
where
isEmpty :: Subtitle -> Bool
isEmpty s = (length $ subtitle_dialogs s) > 0
printReport :: [Subtitle] -> SubtitleDialog -> IO ()
printReport subs candidate_dialog = if (length subs==0)
then putStrLn "No subtitles found"
else do
putStrLn $ printf "Computing candidate dialog..."
maybe_handle <- let report_filename = "hastily-report.txt" in
doesFileExist report_filename >>= (\x -> if not x
then fmap Just $ openFile report_filename WriteMode
else do
putStrLn "WARNING: Existing report file found. Will not overwrite!"
return Nothing)
printHeaders candidate_dialog maybe_handle
mapM_ (findAndPrintReport candidate_dialog maybe_handle) subs
case maybe_handle of
Just handle -> hClose handle
_ -> return ()
where
outPut maybe_handle str = do
putStrLn str
case maybe_handle of
Just handle -> hPutStrLn handle str
_ -> return ()
printHeaders candidate_dialog maybe_handle = do
outPut maybe_handle $ printf "\nDialogue\n--------\n%s\n--------" (cs $ dialog candidate_dialog::String)
outPut maybe_handle $ printf "%-29s | %s" ("Time"::String) ("File"::String)
outPut maybe_handle $ printf "%-29s | %s" ("----"::String) ("----"::String)
findAndPrintReport :: SubtitleDialog -> Maybe Handle -> Subtitle -> IO ()
findAndPrintReport c_dialog maybe_handle sub = do
outPut maybe_handle $ printf "%-29s | %s" (findTimeFor c_dialog (subtitle_dialogs sub)) (subtitle_file sub)
where
findTimeFor :: SubtitleDialog -> [SubtitleDialog] -> String
findTimeFor c_dialog [] = "Dialog not found!"
findTimeFor c_dialog (d:ds) = if d == c_dialog
then printf "%s --> %s" (cs $ start_time d::String) (cs $ end_time d::String)
else findTimeFor c_dialog ds
findCandidateDialog :: [DT.Text] -> [Subtitle] -> (SubtitleDialog, Int)
findCandidateDialog skip_words subs = head $ sortBy compareFunction $ getCombinedHistogram non_repeating_dialog_sets
where
compareFunction (SubtitleDialog st1 _ _ _, a) (SubtitleDialog st2 _ _ _, b) =
let ordering = compare b a in
if ordering == EQ then compare st1 st2 else ordering
non_repeating_dialog_sets = fmap (getNonRepeatingDialogList skip_words) subs
getNonRepeatingDialogList :: [DT.Text] -> Subtitle -> [SubtitleDialog]
getNonRepeatingDialogList skip_words (Subtitle _ _ dialogs) = fmap fst $
filter (\(dialog, count) -> (count == 1) && (dialog `doesNotContain` skip_words) ) $ getHistogram dialogs
where
doesNotContain sub_dialog skip_words = all (\word -> not $ word `DT.isInfixOf` (digest sub_dialog)) $ fmap DT.toLower skip_words
getCombinedHistogram :: [[SubtitleDialog]] -> [(SubtitleDialog, Int)]
getCombinedHistogram dlgxs = getHistogram $ flatenList dlgxs
where
flatenList dlgs = foldl (++) [] dlgxs
getHistogram :: [SubtitleDialog] -> [(SubtitleDialog, Int)]
getHistogram subdx = Map.toList $ foldl addDialogToMap (makeMap subdx) subdx
where
addDialogToMap :: Map.Map SubtitleDialog Int -> SubtitleDialog -> Map.Map SubtitleDialog Int
addDialogToMap map dg = Map.update (\f -> Just (f+1)) dg map
makeMap :: [SubtitleDialog] -> Map.Map SubtitleDialog Int
makeMap subdx = Map.fromList $ zip (nub subdx) $ repeat 0