-- | -- Module : Distribution.Processment -- Copyright : (c) OleksandrZhabenko 2020-2022 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. -- Is used in pair with some other programs, e. g. with propertiesTextG3 from phonetic-languages-simplified-examples-array package -- or with a new phonetic-languages-ukrainian series. -- The module contains library functions for the program. -- -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @distributionText@ executable with -- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns #-} module Distribution.Processment where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Parallel.Strategies import Data.Maybe (fromMaybe,isJust,fromJust) import Text.Read (readMaybe) import Numeric (showFFloat) import Data.List (sort) import Numeric.Stats import Data.Char (isDigit) import qualified Data.ByteString.Char8 as B import Data.Lists.FLines hiding (mconcat) import Data.Statistics.RulesIntervals import Data.Statistics.RulesIntervalsPlus #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif -- | Sum data type to control whether the functions work with multiple properties or with just one. data ControlStatsIntervals = U ([Double],[(Int,Int)]) | M [(Int,Int)] deriving (Eq,Show) isU :: ControlStatsIntervals -> Bool isU (U _) = True isU _ = False isM :: ControlStatsIntervals -> Bool isM (M _) = True isM _ = False data31F (U (x,y)) = Just x data31F (M _) = Nothing wordsCnt0_data32F (U (x,y)) = y wordsCnt0_data32F (M y) = y maybeDII :: (Int -> Bool) -> ControlStatsIntervals -> Maybe [(Double,(Int,Int))] maybeDII p (U (xs,ys)) = Just . filter (\(_,(y,_)) -> p y) . zip xs $ ys maybeDII _ _ = Nothing numberProps :: B.ByteString -> Int numberProps contents = maximum . map (subtract 1 . length . filter (B.all isDigit) . B.words) . B.lines $ contents innerProcG :: Bool -> Bool -> String -> Bool -> B.ByteString -> IO () innerProcG pairwisePermutations whitelines gzS multiprop contents | multiprop = mapM_ (\i -> processContentsMultiprop i contents >>= \csi -> innerProc pairwisePermutations whitelines gzS csi contents) [1..numberProps contents] | otherwise = processContents whitelines contents >>= \csi -> innerProc pairwisePermutations whitelines gzS csi contents innerProc :: Bool -> Bool -> String -> ControlStatsIntervals -> B.ByteString -> IO () innerProc pairwisePermutations whitelines gzS csi contents = do if all ((< 2) . fst) . wordsCnt0_data32F $ csi then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the lines that have no variativity from the program perspective and, therefore, they cannot be analyzed effectively by it. Nevertheless, you can accurately exclude them from the consideration. A rather rare occurrence. else do let !gz | isU csi = getIntervalsN gzS (fromJust . data31F $ csi) -- Obtained from the first command line argument except those ones that are for RTS | otherwise = getIntervalsN gzS . wordsCnt0_data32F $ csi !mndsp | isU csi = Just . meanWithDispD2 . map fst . fromJust . maybeDII (>1) $ csi -- Since the 0.6.0.0 version switched to the sample unbiased dispersion with (n - 1) in the denominator. | otherwise = Nothing !pairs = sort . filter ((/= 0) . snd) . wordsCnt0_data32F $ csi g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..(if pairwisePermutations then 10 else 7)],replicate 102 x] putStrLn . generalInfo1 pairwisePermutations gz pairs mndsp . length . wordsCnt0_data32F $ csi putStrLn (h False '~') putStrLn (h True '=') processContents :: Bool -> B.ByteString -> IO ControlStatsIntervals processContents whitelines contents = do let !anlines = B.lines contents !anStrs | whitelines = filter (not . null) . map (drop 6 . take 9 . B.words) $ anlines | otherwise = map (drop 6 . take 9 . B.words) anlines !ratioStrs = map (B.unpack . head) anStrs !wordsNStrs = map (B.unpack . (!! 1)) anStrs !intervalNStrs = map (B.unpack . last) anStrs !ratios = map (\xs -> fromMaybe 1.0 (readMaybe xs::Maybe Double)) ratioStrs !wordsNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) wordsNStrs !intervalNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) intervalNStrs return . U $ (ratios,zip wordsNs intervalNs) processContentsMultiprop :: Int -> B.ByteString -> IO ControlStatsIntervals processContentsMultiprop propN contents = do let !anwords = map B.words . B.lines $ contents !wordsNStrs = map (B.unpack . head) anwords !intervalNStrs = map (B.unpack . B.concat . drop propN . take (propN + 1)) anwords !wordsNs = drop 1 . filter (> 0) . map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) $ wordsNStrs !intervalNs = filter (> 0) . map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) $ intervalNStrs return . M . zip wordsNs $ intervalNs generalInfo1 :: Bool -> Int -> [(Int,Int)] -> Maybe (Double,Double) -> Int -> String generalInfo1 pairwisePermutations gz pairs mndsp ll = let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz] !s = sum ks stringMD | isJust mndsp = let (mean1,disp) = fromJust mndsp in mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t"] | otherwise = "" in mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks, newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks, newLineEnding, stringMD, show (length . filter ((<= 1) . fst) $ pairs), '\t':show ll, newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..(if pairwisePermutations then 10 else 7)], newLineEnding, mconcat . map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..(if pairwisePermutations then 10 else 7)], newLineEnding, replicate 102 '*'] {-# INLINE generalInfo1 #-}