-- | -- Module : Main -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version -- the program tries to be more accurate in cases of the lines consisting entirely of the words -- which are unique in phonetic meaning alongside the line. Another hypothesis is that the distribution -- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. -- It can probably have approximately a form of and is different for different authors: -- -- > -- -- -- -- > / \_/ \_/ \ -- -- {-# LANGUAGE CPP, BangPatterns #-} module Main where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Exception (onException) import qualified Data.Vector as V import String.Languages.UniquenessPeriods.Vector import Languages.UniquenessPeriods.Vector.General.Debug import Languages.UniquenessPeriods.Vector.Properties import Melodics.Ukrainian import System.Environment import Languages.Phonetic.Ukrainian.PrepareText import Languages.UniquenessPeriods.Vector.Data import Languages.UniquenessPeriods.Vector.Auxiliary import Languages.UniquenessPeriods.Vector.StrictV import Numeric (showFFloat) import Languages.UniquenessPeriods.Vector.Filters import GHC.Float (int2Float) import Data.List (sort) import Numeric.Stats #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif main :: IO () main = do args <- getArgs let file = concat . take 1 $ args gz <- onException (do {return (read (concat . take 1 . drop 1 $ args)::Int)}) (return 9) contents <- readFile file let flines = filter (not . null . filter isUkrainian) . prepareText $ contents data3 <- mapM (\ts -> do let maxE = (\k -> if k == 0 then 1 else k) . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . diverse2) $ ts) minE = (\k -> if k == 0 then 1 else k) . abs . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . negate . diverse2) $ ts) data2 = (\k -> if k == 0 then 1 else k) . diverse2 . uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian $ ts let ratio = if maxE == 1 then 0 else 2.0 * fromIntegral data2 / fromIntegral (minE + maxE) intervalN = if maxE == 1 then 0 else intervalNRealFrac (int2Float minE) (int2Float maxE) gz (int2Float data2) wordsN = if maxE == 1 then 0 else length . words $ ts putStrLn $ mconcat [show (minE::Int), "\t", show (data2::Int), "\t", show (maxE::Int), "\t", showFFloat (Just 4) (fromIntegral data2 / fromIntegral minE) "\t", showFFloat (Just 4) (fromIntegral maxE / fromIntegral data2) "\t", showFFloat (Just 4) ratio "\t", show wordsN, "\t", show intervalN] return (ratio,(wordsN,intervalN))) $ flines let (data31,wordsCnt0_data32) = unzip data3 data4 = filter (/= 0) data31 if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence. else do let (mean1,disp) = meanWithDisp data4 pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32 f xs n = show . length . takeWhile (== n) . dropWhile (/= n) $ xs g m n = length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs putStrLn (replicate 102 '-') mapM_ (\r -> putStr $ show r ++ "\t") [1..gz] >> putStrLn "" mapM_ (\r -> putStr $ f (sort . map snd $ pairs) r ++ "\t") [1..gz] >> putStrLn "" putStrLn $ showFFloat (Just 4) mean1 "+-" ++ showFFloat (Just 4) (sqrt disp) "\t" ++ show (length . filter (== 0) $ data31) ++ "\t" ++ show (length data3) mapM_ (\r -> putStr $ show r ++ "\t") [2..8] >> putStrLn "" mapM_ (\r -> putStr $ f (map fst $ pairs) r ++ "\t") [2..8] >> putStrLn "" putStrLn (replicate 102 '*') mapM_ (\m1 -> mapM_ (\n1 -> putStr $ (if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") [1..gz] >> putStrLn "") [2..8] putStrLn (replicate 102 '~') mapM_ (\m1 -> mapM_ (\n1 -> putStr $ show (g m1 n1) ++ "\t") [1..gz] >> putStrLn "") [2..8] putStrLn (replicate 102 '=') isUkrainian :: Char -> Bool isUkrainian x | x == '\x0404' || (x >= '\x0406' && x <= '\x0407') || (x >= '\x0410' && x <= '\x0429') || x == '\x042C' || (x >= '\x042E' && x <= '\x0449') || x == '\x044C' || (x >= '\x044E' && x <= '\x044F') || x == '\x0454' || (x >= '\x0456' && x <= '\x0457') = True | otherwise = False