-- | -- Module : Main -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version -- the program tries to be more accurate in cases of the lines consisting entirely of the words -- which are unique in phonetic meaning alongside the line. Another hypothesis is that the distribution -- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. -- It can probably have approximately a form of and is different for different authors: -- -- > -- -- -- -- > / \_/ \_/ \ -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @uniqVec03@ executable with -- @+RTS -threaded -RTS@ command line options. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns #-} module Main where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Parallel.Strategies import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import qualified Data.Vector as V import String.Languages.UniquenessPeriods.Vector import Languages.UniquenessPeriods.Vector.General.Debug import Languages.UniquenessPeriods.Vector.Properties import Melodics.Ukrainian import System.Environment import Languages.Phonetic.Ukrainian.PrepareText import Languages.UniquenessPeriods.Vector.Data import Languages.UniquenessPeriods.Vector.Auxiliary import Languages.UniquenessPeriods.Vector.StrictV import Numeric (showFFloat) import Languages.UniquenessPeriods.Vector.Filters import GHC.Float (int2Float) import Data.List (sort) import Numeric.Stats #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif main :: IO () main = do args <- getArgs let file = concat . take 1 $ args gz = fromMaybe 9 (readMaybe (concat . take 1 . drop 1 $ args)::(Maybe Int)) contents <- readFile file let flines = filter (not . null . filter isUkrainian) . prepareText $ contents data3 <- mapM (\ts -> do let (maxE,minE,data2) = runEval ((parTuple3 rpar rseq rseq) ((\k -> if k == 0 then 1 else k) . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . diverse2) $ ts), (\k -> if k == 0 then 1 else k) . abs . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . negate . diverse2) $ ts), (\k -> if k == 0 then 1 else k) . diverse2 . uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian $ ts)) ratio = if maxE == 1 then 0.0 else 2.0 * fromIntegral data2 / fromIntegral (minE + maxE) [intervalN,wordsN] = runEval ((parList rpar) [if maxE == 1 then 0 else intervalNRealFrac (int2Float minE) (int2Float maxE) gz (int2Float data2), if maxE == 1 then 0 else length . words $ ts]) putStrLn $ mconcat [show (minE::Int), "\t", show (data2::Int), "\t", show (maxE::Int), "\t", showFFloat (Just 4) (fromIntegral data2 / fromIntegral minE) "\t", showFFloat (Just 4) (fromIntegral maxE / fromIntegral data2) "\t", showFFloat (Just 4) ratio "\t", show (wordsN::Int), "\t", show (intervalN::Int)] return (ratio,(wordsN,intervalN))) $ flines let (data31,wordsCnt0_data32) = unzip data3 data4 = filter (/= 0) data31 if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence. else do let (!mean1,!disp) = meanWithDisp data4 pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32 g m n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq putStrLn (replicate 102 '-') mapM_ (\r -> putStr $ show r ++ "\t") [1..gz] >> putStrLn "" mapM_ (\r -> putStr $ (show . length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) ++ "\t") [1..gz] >> putStrLn "" putStrLn $ showFFloat (Just 4) mean1 "+-" ++ showFFloat (Just 4) (sqrt disp) "\t" ++ show (length . filter (== 0) $ data31) ++ "\t" ++ show (length data3) mapM_ (\r -> putStr $ show r ++ "\t") [2..8] >> putStrLn "" mapM_ (\r -> putStr $ (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") [2..8] >> putStrLn "" putStrLn (replicate 102 '*') mapM_ (\m1 -> mapM_ (\n1 -> putStr $ (if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") [1..gz] >> putStrLn "") [2..8] putStrLn (replicate 102 '~') mapM_ (\m1 -> mapM_ (\n1 -> putStr $ show (g m1 n1) ++ "\t") [1..gz] >> putStrLn "") [2..8] putStrLn (replicate 102 '=')