{- |RandProc.hs - a Haskell library for working with random processes in a mathematically rigorous way (Concepts taken from /Random Processes - a Mathematical Approach for Engineers/ by: - Robert M. Gray - Lee D. Davisson Prentice-Hall Information and System Sciences Series, Thomas Kailath, Series Editor) $Id: RandProc.hs 31 2011-06-22 13:49:48Z dbanas $ David Banas 12 March 2011 Copyright (c) 2011 by David Banas; All rights reserved World wide. /Revision History:/ [@Date SVN #@] Description [@2011-03-13 3@] Data structures stabilized. 'isSigma' working under minimal, discrete sample testing. [@2011-03-18 4@] Added 'isProbMeas', as well as monadic debugging versions of both it and 'isSigma'. Added an example probability space representing a fair die. [@2011-03-29 7@] Custom intersection functions added and briefly tested. [@2011-04-02 8@] Custom union functions added and briefly tested. Solution is crude: it is O(N^2), and requires 2 passes over the sample list every time a join is successful. Perhaps, a pre-sort? [@2011-06-06 9@] Attempted fix of 'getCompEvent' Added 'smplComp' function, as helper to revised 'getCompEvent'. Changed 'Point' to accept Double. Moved all sample spaces to new file, 'Main.hs'. Added input sorting to 'range'. Changed Ranges to be open intervals, in order to allow for complementing out a Point from them. [@2011-06-11 10@] Major re-write. 'getCompEvent' fixed. All 5 test spaces checking out ok. [@2011-06-18 21@] Removed sample set order dependency from 'checkSigma'. All 7 test spaces checking out ok. [@2011-06-19 22@] Added 'union of events is an event' test to 'checkSigma'. [@2011-06-20 23@] Changed 'Event' from data constructor to type alias, in order to eliminate many instances of 'Event . f . getSamps' code. [@2011-06-20 25@] Modified 'smpsSetInt' to use a fold. [@2011-06-20 26@] Defined public interface. [@2011-06-21 27@] Modified comments for Haddock, and generated HTML docs. [@2011-06-22 31@] Moved into 'Data' directory. /To Do:/ - Make 'smplSetUnion' more efficient. - Tune performance bottlenecks. - submission to Hackage -} {-# LANGUAGE FlexibleInstances, TypeSynonymInstances, OverlappingInstances, NoMonomorphismRestriction #-} module Data.RandProc ( ProbSpace(ProbSpace) ,Measure(Measure) ,Sample(Empty) ,TestResult(..) ,ErrType(..) ,checkProbMeas ,point ,range ,rangeBegin ,rangeEnd ,getProb ,getEvent ,getCompEvent ,sortSamps ,eventInt ,smplComp ,isElem ,noDupEvents ,smplInt ,smplSetInt ,smplUnion ,smplSetUnion ,checkSigma ,getRsltStr ) where eps :: Double eps = 0.000001 -- Used in floating point "equality" tests. {- |We take a probability space to consist of the following: - an 'abstract space' composed of either discrete or continuous (or a mix) samples - an 'event space', which must be a Sigma field defined over the abstract space - a 'probability measure' defined over the event space [Note:] For the sake of efficient coding, the /event space/ and the /probability measure/ are combined in the Haskell data structure, below. This is permissable, because there has to be a 1:1 correspondance between them anyway. And it is preferable, because it: - keeps the probabilities more closely associated w/ the events, and - avoids duplication of code (i.e. - the list of events). -} data ProbSpace = ProbSpace { space :: [Sample] ,measure :: [Measure] } {- |This is our abstract data type, which represents a sample in the abstract space. It has a constructor representing every possible element in the abstract space we're modeling. (Currently, just points and ranges of /Double/s.) Normally, none of the constructors of this type will be called directly. Instead, helper functions are provided, such as 'point' and 'range', which hide the implementation details from the user, and present a stable interface. Currently, the sole exception to the above is the /Empty/ constructor, which is really just a hack intended to put off the job of making the functions in this library more intelligent, with regard to their handling of empty lists. -} data Sample = Point Double | Range (Double, Double) | Empty | Full deriving (Eq, Show) -- [Note:] When constructing a range, it is the user's responsibility to ensure -- that the first element of the couple is less than the second. instance Ord Sample where -- (<) _ < Empty = False Empty < _ = True Full < _ = False _ < Full = True (Point p) < (Point p') = p < p' (Point p) < (Range (r1, _)) = p <= r1 (Range (r1, r2)) < (Point p) = not ((Point p) < (Range (r1, r2))) (Range (r1, r2)) < (Range (r3, r4)) | r1 == r3 = r2 < r4 | otherwise = r1 < r3 -- (>) Empty > _ = False _ > Empty = True _ > Full = False Full > _ = True (Point p) > (Point p') = p > p' (Point p) > (Range (r1, r2)) = not ((Point p) < (Range (r1, r2))) (Range (r1, r2)) > (Point p) = (Point p) < (Range (r1, r2)) (Range (r1, r2)) > (Range (r3, r4)) | r1 == r3 = r2 > r4 | otherwise = r1 > r3 -- (<=), (>=) s1 <= s2 = not (s1 > s2) s1 >= s2 = not (s1 < s2) -- custom data type and helper function for communicating sample types: -- (Helpful in places where pattern matching can't be used.) data SampleType = STPoint | STRange | STEmpty | STFull deriving (Eq, Show) sampleType :: Sample -> SampleType sampleType (Point _) = STPoint sampleType (Range _) = STRange sampleType Empty = STEmpty sampleType Full = STFull -- |The custom type /Event/ is just an alias for a list of Samples. type Event = [Sample] -- |/Measure/ has 2 fields: -- -- * /event/ - a list of samples from the space, and -- -- * /prob/ - a number between 0 and 1 giving the events probability of -- occurence. data Measure = Measure { event :: Event ,prob :: Double } deriving (Eq, Ord, Show) -- |This is the helper function intended to be used for constructing a point sample. point :: Double -> Sample point a = Point a -- |This is the helper function intended to be used for constructing a range sample. -- The range is considered /open/. That is, its end points are not included. range :: (Double, Double) -> Sample range (a, b) | a == b = point a | a < b = Range (a, b) | otherwise = Range (b, a) -- Helpful info getters: -- |Gets the beginning point of a range, which is /not/ included in the range, -- since ranges are considered to be open. rangeBegin :: Sample -> Double rangeBegin (Point _) = undefined rangeBegin (Range (a,_)) = a rangeBegin Empty = undefined rangeBegin Full = undefined -- |Gets the ending point of a range, which is /not/ included in the range, rangeEnd :: Sample -> Double rangeEnd (Point _) = undefined rangeEnd (Range (_,b)) = b rangeEnd Empty = undefined rangeEnd Full = undefined -- |Extracts the probability from a Measure. getProb :: Measure -> Double getProb m = prob m -- |Extracts the Event from a Measure. getEvent :: Measure -> Event getEvent m = event m -- |Get the complement of an event from the sample space. getCompEvent :: [Sample] -> Event -> Event getCompEvent [] _ = [Empty] getCompEvent (s:ss) e = sortSamps $ smplSetUnion $ (foldl eventInt [s] (map (smplComp s) ( e))) ++ ( (getCompEvent ss e)) -- |Sorts a list of samples (i.e. - an event). sortSamps :: [Sample] -> [Sample] sortSamps [] = [] sortSamps (s:ss) = sortSamps([s' | s' <- ss, s' < s]) ++ [s] ++ sortSamps([s'' | s'' <- ss, s'' >= s]) -- |Calculates the intersection of 2 events (i.e. - list of samples). eventInt :: Event -> Event -> Event eventInt s1 s2 | length (filter (/= Empty) s1) == 0 = [Empty] | length (filter (/= Empty) s2) == 0 = [Empty] | otherwise = smplSetUnion $ concat $ map (\s -> (map (smplInt s) s1)) s2 -- |Returns that portion of the first sample that is disjoint from the second. smplComp :: Sample -> Sample -> [Sample] smplComp Empty _ = [Empty] smplComp s Empty = [s] smplComp (Point n) (Point m) | m == n = [Empty] | otherwise = [Point n] smplComp (Point n) (Range (a, b)) | (n > a) && (n < b) = [Empty] | otherwise = [Point n] smplComp (Range (a, b)) (Point n) | (n > a) && (n < b) = [Range (a, n), Range (n, b)] | otherwise = [Range (a, b)] smplComp (Range (a, b)) (Range (c, d)) | (c >= b) || (a >= d) = [Range (a, b)] | (c <= a) && (d >= b) = [Empty] | (c > a) && (d < b) = [Range (a, c), Range (d, b), Point c, Point d] | (a < c) = [Range (a, c), Point c] | otherwise = [Range (d, b), Point d] smplComp _ Full = [Empty] smplComp Full _ = undefined -- |Determine if a sample is an element of a space. -- -- (Need this, as opposed to just using `elem`, in order to accomodate ranges.) isElem :: [Sample] -> Sample -> Bool isElem [] _ = False isElem _ Empty = True isElem (s:ss) s' = (testElem s s') || (isElem ss s') -- testElem : Test whether the second sample is an element of the first. testElem :: Sample -> Sample -> Bool testElem Empty _ = False testElem _ Empty = True testElem (Point x) (Point y) = x == y testElem (Point _) (Range _) = False testElem (Range (x, y)) (Point z) = (z > x) && (z < y) testElem (Range (x, y)) (Range (w, z)) = (w >= x) && (z <= y) testElem Full _ = True testElem _ Full = False -- |Checks a list of measures against duplicate events. noDupEvents :: [Measure] -> Bool noDupEvents [] = True noDupEvents (m:ms) = not ((event m) `elem` es) && noDupEvents ms where es = map event ms -- custom union and intersection functions for our 'Sample' data type -- |Returns the intersection between 2 samples. smplInt :: Sample -> Sample -> Sample smplInt Empty _ = Empty smplInt _ Empty = Empty smplInt Full s = s smplInt s Full = s smplInt (Point n) (Point m) | m == n = Point n | otherwise = Empty smplInt (Point n) (Range (a, b)) | (n > a) && (n < b) = Point n | otherwise = Empty smplInt (Range (a, b)) (Point n) = smplInt (Point n) (Range (a, b)) smplInt (Range (a, b)) (Range (c, d)) | (c >= b) || (a >= d) = Empty | otherwise = Range (max a c, min b d) -- |Reduces a list of samples to a single sample representing their intersection. smplSetInt :: [Sample] -> Sample smplSetInt = foldl smplInt Full -- |Returns the union of 2 samples. -- -- Unlike 'smplInt', /smplUnion/ must return a list since, if the 2 input -- samples aren't adjacent or overlapping, the union of them is a list -- containing both. smplUnion :: Sample -> Sample -> [Sample] smplUnion Empty s = [s] smplUnion s Empty = [s] smplUnion (Point n) (Point m) | m == n = [Point n] | otherwise = [Point n, Point m] smplUnion (Point n) (Range (a, b)) -- Ranges are considered open, in order to allow complementing a point from them. | (n > a) && (n < b) = [Range (a, b)] | otherwise = [Point n, Range (a, b)] -- The union operation is commutative. smplUnion (Range (a, b)) (Point n) = smplUnion (Point n) (Range (a, b)) smplUnion (Range (a, b)) (Range (c, d)) | (c >= b) || (a >= d) = [Range (a, b), Range (c, d)] | otherwise = [Range (min a c, max b d)] smplUnion Full _ = [Full] smplUnion _ Full = [Full] -- |Collapses a list of samples down to the maximally reduced set, which still -- composes a proper union of the input. smplSetUnion :: [Sample] -> [Sample] -- The general approach follows this recipe: -- > Check for any adjacency or overlap of the first element with the rest. -- > If there was at least one successful joining, toss out the first sample -- and recurse on a modified version of the rest. Namely, use the -- 'selector' helper function to perturb only those samples in the original -- list, which are adjacent/overlapping w/ the current token sample, leaving -- the rest unaltered. -- > Otherwise, keep the first sample and recurse on the remainder, -- unmodified. -- -- Note that this algorithm is crude for, at leat, 2 reasons: -- 1) It requires O(N^2) operations to complete. -- 2) It requires a second pass over the list whenever a successful joining -- occurs. smplSetUnion [] = [] smplSetUnion ss = consolidateRPR $ smplSetUnionDraft ss -- smplSetUnionDraft : This function gets us almost all the way to a maximally -- reduced union. However, it leaves the very special -- pattern: Range (_, a), Point (a), Range (a, _) -- unresolved, due to the open nature of ranges, here. -- So, we require a final pass through the cleanup function, -- 'consolidateRPR', as noted above. smplSetUnionDraft :: [Sample] -> [Sample] smplSetUnionDraft [] = [] smplSetUnionDraft (x:xs) | any (\y -> length y == 1) xs' = smplSetUnionDraft xs'' | otherwise = x : smplSetUnionDraft xs where xs' = map (smplUnion x) xs xs'' = map (selector x) xs selector :: Sample -> Sample -> Sample selector s1 s2 | length (smplUnion s1 s2) == 1 = head $ smplUnion s1 s2 | otherwise = s2 -- consolidateRPR : Reduces all occurences of: -- Range (a,b), Point (b), Range (b,c) into: -- Range (a,c). consolidateRPR :: [Sample] -> [Sample] consolidateRPR [] = [] consolidateRPR ss = scanRPR $ sortSamps ss -- scanRPR : Scans through a list of samples looking for the pattern: -- Range (a,b), Point (b), Range (b,c) and consolidates those into: -- Range (a,c). -- -- Note) This function depends upon receiving a SORTED sample list! scanRPR :: [Sample] -> [Sample] scanRPR [] = [] scanRPR (s:ss) | sampleType s == STRange = case (headIsPoint (rangeEnd (s)) ss) of True -> case (headIsRange (tail ss) && (rangeBegin (head (tail ss))) == (rangeEnd (s))) of True -> scanRPR $ Range ((rangeBegin (s)),rangeEnd(head (tail ss))) : (tail (tail ss)) _ -> s:(scanRPR ss) _ -> s:(scanRPR ss) | otherwise = s:(scanRPR ss) -- headIsPoint : Tests whether the head of the incoming list is a particular -- Point value. headIsPoint :: Double -> [Sample] -> Bool headIsPoint _ [] = False headIsPoint a (s:_) = s == Point a -- headIsRange : Tests whether the head of the incoming list is a -- Range. headIsRange :: [Sample] -> Bool headIsRange [] = False headIsRange (s:_) = sampleType s == STRange -- |Custom data type used for test results and error reporting. data TestResult = Fail {err :: ErrType} | Pass deriving (Show) instance Eq TestResult where Pass == Pass = True Fail e == Fail e' = e == e' _ == _ = False -- |Custom data type for reporting different errors data ErrType = UnknownErr | EmptySampleSpace | EmptyEventSpace | MissingNullEvent | MissingCertainEvent | BadEventSamples | MissingCompEvent | MissingUnionEvent | EventMeasLenMismatch | DupEventsInMeas | MissingEventsInMeas | NullEventNonZeroProb | CertainEventNonUnityProb | EventAndCompNoSumOne deriving (Eq, Show) -- |getErrStr : Turns a value of type /ErrType/ into a more readable string. getErrStr :: ErrType -> String getErrStr UnknownErr = "Unknown error" getErrStr EmptySampleSpace = "Empty sample space" getErrStr EmptyEventSpace = "Empty event space" getErrStr MissingNullEvent = "The null event is missing from the event space." getErrStr MissingCertainEvent = "The certain event is missing from the event space." getErrStr BadEventSamples = "At least one event contains samples not in the sample space." getErrStr MissingCompEvent = "At least one event's compliment is missing from the event space." getErrStr MissingUnionEvent = "At least one union of events is missing from the event space." getErrStr EventMeasLenMismatch = "Lengths of event and measure lists don't match." getErrStr DupEventsInMeas = "There are duplicate events in the measure list." getErrStr MissingEventsInMeas = "Some events aren't covered in the measure list." getErrStr NullEventNonZeroProb = "The null event has been assigned a non-zero probability." getErrStr CertainEventNonUnityProb = "The certain event has been assigned a probability other than 1." getErrStr EventAndCompNoSumOne = "At least one pair of event and compliment have probabillities that don't add to 1." -- |Turns a value of type /TestResult/ into a human readable string. getRsltStr :: TestResult -> String getRsltStr Pass = "Ok" getRsltStr tr = getErrStr $ err tr -- |Checks whether event space is actually a Sigma field over the sample space. checkSigma :: ProbSpace -> TestResult checkSigma ps | length (filter (/= Empty) (space ps)) == 0 = Fail EmptySampleSpace | length (es) == 0 = Fail EmptyEventSpace | not (([Empty]) `elem` (es)) = Fail MissingNullEvent | not (((space ps)) `elem` (es)) = Fail MissingCertainEvent | not (and (map (and . map (\s -> (isElem (space ps) s) || (s == Empty))) (es))) = Fail BadEventSamples | not (and (map (\e -> ((getCompEvent (space ps) e) `elem` (es))) (es))) = Fail MissingCompEvent | not $ and $ map (`elem` es) $ removeDups $ map (sortSamps . smplSetUnion . concat) (filter (\s -> length s > 0) (subs ss)) = Fail MissingUnionEvent | otherwise = Pass where es = map (sortSamps . event) (measure ps) ss = filter (\s -> length s > 0) $ map (filter (/= Empty) . event) (measure ps) -- subs : power set generator (Stolen from Hutton.) subs :: [a] -> [[a]] subs [] = [[]] subs (x:xs) = yss ++ map (x:) yss where yss = subs xs -- removeDups : remove duplicates from a list. removeDups :: (Eq a) => [a] -> [a] removeDups [] = [] removeDups (x:xs) = x:(removeDups ys) where ys = [y | y <- xs, y /= x] -- |Checks a value of type 'ProbSpace' for correctness, and returns a value of -- type 'TestResult'. checkProbMeas :: ProbSpace -> TestResult checkProbMeas ps | checkSigma ps /= Pass = checkSigma ps | not (length (es) == length (measure ps)) = Fail EventMeasLenMismatch | not (noDupEvents (measure ps)) = Fail DupEventsInMeas | not (length (filter (\m -> getEvent m `elem` (es)) (measure ps)) == length (es)) = Fail MissingEventsInMeas | not (and $ map (\m -> getProb m == 0.0) (filter (\m -> getEvent m == []) (measure ps))) = Fail NullEventNonZeroProb | not (and $ map (\m -> getProb m == 1.0) (filter (\m -> getEvent m == (space ps)) (measure ps))) = Fail CertainEventNonUnityProb | not (and $ map (\m -> 1.0 - (getProb m) - ( (getProb (head (filter (\m' -> (sortSamps $ getEvent m') == (getCompEvent (space ps) (getEvent m))) (measure ps))))) < eps) (measure ps)) = Fail EventAndCompNoSumOne | otherwise = Pass where es = map event (measure ps)