-- |
-- Module      :  Main
-- Copyright   :  (c) OleksandrZhabenko 2020
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- Analyzes a poetic text in Ukrainian, for every line prints statistic data and 
-- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version 
-- the program tries to be more accurate in cases of the lines consisting entirely of the words 
-- which are unique in phonetic meaning alongside the line. Another hypothesis is that the distribution 
-- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. 
-- It can probably have approximately a form of and is different for different authors:
-- 
-- >    --   --   --
-- >   /  \_/  \_/  \
-- 
-- To enable parallel computations (potentially, they can speed up the work), please, run the @uniqVec03@ executable with
-- @+RTS -threaded -RTS@ command line options.
-- 

{-# OPTIONS_GHC -threaded -rtsopts #-}

{-# LANGUAGE CPP, BangPatterns #-}

module Main where

#ifdef __GLASGOW_HASKELL__
#if __GLASGOW_HASKELL__>=710
/* code that applies only to GHC 7.10.* and higher versions */
import GHC.Base (mconcat)
#endif
#endif
import Control.Parallel.Strategies
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)
import qualified Data.Vector as V
import String.Languages.UniquenessPeriods.Vector
import Languages.UniquenessPeriods.Vector.General.Debug
import Languages.UniquenessPeriods.Vector.Properties
import Melodics.Ukrainian
import System.Environment
import Languages.Phonetic.Ukrainian.PrepareText
import Languages.UniquenessPeriods.Vector.Data
import Languages.UniquenessPeriods.Vector.Auxiliary
import Languages.UniquenessPeriods.Vector.StrictV
import Numeric (showFFloat)
import Languages.UniquenessPeriods.Vector.Filters
import GHC.Float (int2Float)
import Data.List (sort)
import Numeric.Stats

#ifdef __GLASGOW_HASKELL__
#if __GLASGOW_HASKELL__==708
/* code that applies only to GHC 7.8.* */
mconcat = concat
#endif
#endif

main :: IO ()
main = do
  args <- getArgs
  let file = concat . take 1 $ args
      gz = fromMaybe 9 (readMaybe (concat . take 1 . drop 1 $ args)::(Maybe Int))
  contents <- readFile file
  let flines = filter (not . null . filter isUkrainian) . prepareText $ contents
  data3 <- mapM (\ts -> do
   let (maxE,minE,data2) = runEval ((parTuple3 rpar rseq rseq) ((\k -> if k == 0 then 1 else k) . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . 
          aux0 . convertToProperUkrainian) (justOneValue2Property . diverse2) $ ts),
          (\k -> if k == 0 then 1 else k) . abs . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . 
          aux0 . convertToProperUkrainian) (justOneValue2Property . negate . diverse2) $ ts),
          (\k -> if k == 0 then 1 else k) . diverse2 . uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian $ ts))
       ratio = if maxE == 1 then 0.0 else 2.0 * fromIntegral data2 / fromIntegral (minE + maxE)
       [intervalN,wordsN] = runEval ((parList rpar) [if maxE == 1 then 0 else intervalNRealFrac (int2Float minE) (int2Float maxE) gz (int2Float data2),
         if maxE == 1 then 0 else length . words $ ts])
   putStrLn $ mconcat [show (minE::Int), "\t", show (data2::Int), "\t", show (maxE::Int), "\t", 
     showFFloat (Just 4) (fromIntegral data2 / fromIntegral minE) "\t", showFFloat (Just 4) (fromIntegral maxE / fromIntegral data2) "\t", 
       showFFloat (Just 4) ratio "\t", show (wordsN::Int), "\t", show (intervalN::Int)]
   return (ratio,(wordsN,intervalN))) $ flines
  let (data31,wordsCnt0_data32) = unzip data3 
      data4 = filter (/= 0) data31
  if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence.
  else do 
    let (!mean1,!disp) = meanWithDisp data4
        pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32
        g m n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq
    putStrLn (replicate 102 '-')
    mapM_ (\r -> putStr $ show r ++ "\t") [1..gz] >> putStrLn ""
    mapM_ (\r ->  putStr $ (show . length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) ++ "\t") [1..gz] >> putStrLn ""
    putStrLn $ showFFloat (Just 4) mean1 "+-" ++ showFFloat (Just 4) (sqrt disp) "\t" ++ show (length . filter (== 0) $ data31) ++ "\t" ++ show (length data3)
    mapM_ (\r -> putStr $ show r ++ "\t") [2..8] >> putStrLn ""
    mapM_ (\r ->  putStr $ (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") [2..8] >> putStrLn ""
    putStrLn (replicate 102 '*')
    mapM_ (\m1 -> mapM_ (\n1 ->  putStr $  (if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") [1..gz] >> putStrLn "") [2..8]
    putStrLn (replicate 102 '~')
    mapM_ (\m1 -> mapM_ (\n1 ->  putStr $  show (g m1 n1) ++ "\t") [1..gz] >> putStrLn "") [2..8]
    putStrLn (replicate 102 '=')