{-# LANGUAGE OverloadedStrings #-}

module Text.Hastily.Report (
        generate,
        findCandidateDialog
        ) where


import           Data.List               (intersect)
import           Data.List
import qualified Data.Map                as Map
import           Data.String.Conversions
import qualified Data.Text               as DT
import           System.Directory
import           System.IO
import           Text.Printf

import           Text.Hastily.Types

generate :: [DT.Text] -> [Subtitle] -> IO ()
generate skip_words subs = do
        let dialog_frequency_pair = findCandidateDialog skip_words subs
        printReport (filter isEmpty subs) $ fst dialog_frequency_pair
    where
    isEmpty :: Subtitle -> Bool
    isEmpty s = (length $ subtitle_dialogs s) > 0

printReport :: [Subtitle] -> SubtitleDialog -> IO ()
printReport subs candidate_dialog = if (length subs==0)
    then putStrLn "No subtitles found"
    else do
        putStrLn $ printf "Computing candidate dialog..."
        maybe_handle <- let report_filename = "hastily-report.txt" in
            doesFileExist report_filename >>= (\x -> if not x
                then fmap Just $ openFile report_filename WriteMode
                else do
                    putStrLn "WARNING: Existing report file found. Will not overwrite!"
                    return Nothing)
        printHeaders candidate_dialog maybe_handle
        mapM_ (findAndPrintReport candidate_dialog maybe_handle) subs
        case maybe_handle of
            Just handle -> hClose handle
            _ -> return ()
    where
        outPut maybe_handle str = do
            putStrLn str
            case maybe_handle of
                Just handle -> hPutStrLn handle str
                _ -> return ()
        printHeaders candidate_dialog maybe_handle = do
            outPut maybe_handle $ printf "\nDialogue\n--------\n%s\n--------" (cs $ dialog candidate_dialog::String)
            outPut maybe_handle $ printf "%-29s | %s" ("Time"::String) ("File"::String)
            outPut maybe_handle $ printf "%-29s | %s" ("----"::String) ("----"::String)
        findAndPrintReport :: SubtitleDialog -> Maybe Handle -> Subtitle -> IO ()
        findAndPrintReport c_dialog maybe_handle sub = do
            outPut maybe_handle $ printf "%-29s | %s" (findTimeFor c_dialog (subtitle_dialogs sub)) (subtitle_file sub)
            where
                findTimeFor :: SubtitleDialog -> [SubtitleDialog] -> String
                findTimeFor c_dialog [] = "Dialog not found!"
                findTimeFor c_dialog (d:ds) = if d == c_dialog
                    then printf "%s --> %s" (cs $ start_time d::String) (cs $ end_time d::String)
                    else findTimeFor c_dialog ds

-- Find a dialog whose time is used to compare
-- subtitles. The following process is used to find this dialog.
-- 1. For each subtitle file, prepare a list of dialogues that appear only once in the movie.
-- 2. Among those list, select dialogues that appear in the most of them.
-- 3. Among those list of dialogues, pick one that appears closest to the start.
findCandidateDialog :: [DT.Text] -> [Subtitle] -> (SubtitleDialog, Int)
findCandidateDialog skip_words subs = head $ sortBy compareFunction $ getCombinedHistogram non_repeating_dialog_sets
    where
        compareFunction (SubtitleDialog st1 _ _ _, a) (SubtitleDialog st2 _ _ _, b) =
            -- Sort dialogs by decreasing order of their frequency.
            -- if two dialogs appear in same number of files, then sort them
            -- by the time they first appear, so that earlier ones are prefered.
            let ordering = compare b a in
                if ordering == EQ then compare st1 st2 else ordering
        non_repeating_dialog_sets = fmap (getNonRepeatingDialogList skip_words) subs

-- given a subtitle, return a list of subtitle dialogs that does not
-- appear more than once or contain the list of skip words
getNonRepeatingDialogList :: [DT.Text] -> Subtitle -> [SubtitleDialog]
getNonRepeatingDialogList skip_words (Subtitle _ _ dialogs) = fmap fst $
    filter (\(dialog, count) -> (count == 1) && (dialog `doesNotContain` skip_words) ) $ getHistogram dialogs
    where
        doesNotContain sub_dialog skip_words = all (\word -> not $ word `DT.isInfixOf` (digest sub_dialog)) $ fmap DT.toLower skip_words

-- Make a list of tuples. Each tuple in this list will
-- hold a subtitle dialog and the number of times it appears in
-- all the avaliable subs
getCombinedHistogram :: [[SubtitleDialog]] -> [(SubtitleDialog, Int)]
getCombinedHistogram dlgxs = getHistogram $ flatenList dlgxs
    where
        flatenList dlgs = foldl (++) [] dlgxs

getHistogram :: [SubtitleDialog] -> [(SubtitleDialog, Int)]
getHistogram subdx = Map.toList $ foldl addDialogToMap (makeMap subdx) subdx
    where
        addDialogToMap :: Map.Map SubtitleDialog Int -> SubtitleDialog -> Map.Map SubtitleDialog Int
        addDialogToMap map dg = Map.update (\f -> Just (f+1)) dg map
        makeMap :: [SubtitleDialog] -> Map.Map SubtitleDialog Int
        makeMap subdx = Map.fromList $ zip (nub subdx) $ repeat 0