> {-# OPTIONS_HADDOCK show-extensions #-} > {-# Language > CPP, > FlexibleContexts, > FlexibleInstances, > MultiParamTypeClasses, > Trustworthy > #-} #if !defined(MIN_VERSION_base) # define MIN_VERSION_base(a,b,c) 0 #endif > {-| > Module : LTK.FSA > Copyright : (c) 2014-2023 Dakotah Lambert > License : MIT > The purpose of this module is to define an interface to a generic, > reusable impementation of finite-state automata (FSAs). The primary > motivation for this is to allow for efficient analysis of stringsets > in a linguistic context, although the nature of the project should > allow more general use. > -} > module LTK.FSA > ( FSA(..) > , states > , isNull > , follow > , accepts > -- * Constructing simple automata > , totalWithAlphabet > , emptyWithAlphabet > , emptyLanguage > , singletonWithAlphabet > , singletonLanguage > -- * Derived automata > , brzozowskiDerivative > , loopify > , tierify > , neutralize > , quotLeft > , quotMid > , quotRight > , kleeneClosure > , powersetGraph > , syntacticMonoid > , residue > , coresidue > -- * Primitive ideals > , primitiveIdeal2 > , primitiveIdealL > , primitiveIdealR > -- * Transformations > , flatIntersection > , flatUnion > , flatInfiltration > , flatShuffle > , LTK.FSA.reverse > , autDifference > , autInfiltration > , autShuffle > , complement > , complementDeterministic > , determinize > -- ** Minimization > , minimize > , minimizeDeterministic > , normalize > , trimUnreachables > -- *** Equivalence Classes > , minimizeOver > , nerode > , hEquivalence > , jEquivalence > , trivialUnder > -- ** Alphabetic Transformations > , extendAlphabetTo > , semanticallyExtendAlphabetTo > , contractAlphabetTo > , forceAlphabetTo > , desemantify > , renameSymbolsBy > -- ** Transformations of 'State' labels > , renameStatesBy > , renameStates > -- * Miscellaneous > , State(..) > , Symbol(..) > , unsymbols > , Transition(..) > , module LTK.Containers > ) where > import Control.DeepSeq (NFData, rnf) #if !MIN_VERSION_base(4,8,0) > import Control.Applicative (Applicative, pure, (<*>)) > import Data.Functor ((<$>)) > import Data.Monoid (Monoid, mappend, mempty) #endif #if MIN_VERSION_base(4,9,0) #if !MIN_VERSION_base(4,11,0) > import safe Data.Semigroup (Semigroup, (<>)) #endif #endif > import Data.Maybe (fromMaybe) > import Data.Set (Set) > import qualified Data.Set as Set > import qualified Data.Map.Lazy as Map > import Control.Parallel (par, pseq) > import LTK.Containers Data Structures =============== An FSA has four main parts: * a set of symbols representing its alphabet * a set of edges that describe transitions from state to state * a set of initial states, from which computations begin * a set of final states, which determine computational success Further, given an FSA F, if for every symbol a in the alphabet of F and for every state q in the set of states in F, there exists exactly one edge exiting q labelled with a, and if F has exactly one initial state, then F can be described as a deterministic finite-state automaton, or DFA. Determinism allows for useful optimizations in some operations, but can be slow to verify. This module sacrifices space for speed, treating determinism as a property of the datatype itself. > -- |A finite-state automaton (FSA) is represented by a directed > -- graph, the edges of which are labelled by formal symbols. > data FSA n e > = FSA > { -- |@since 0.3 > sigma :: Set e > , transitions :: Set (Transition n e) > , initials :: Set (State n) > , finals :: Set (State n) > , isDeterministic :: Bool > } > deriving (Show, Read) > -- |The collection of all states in an FSA. > states :: (Ord e, Ord n) => FSA n e -> Set (State n) > states f = unionAll [initials f, finals f, others] > where others = collapse extractStates empty ts > extractStates t = insert (source t) . insert (destination t) > ts = transitions f > -- |An automaton accepting every string over a given alphabet. > totalWithAlphabet :: (Ord e, Enum n, Ord n) => Set e -> FSA n e > totalWithAlphabet as = FSA as trans (singleton q) (singleton q) True > where trans = Set.mapMonotonic > (flip (`Transition` q) q . Symbol) > as > q = State $ toEnum 0 > -- |An automaton accepting no strings over a given alphabet. > emptyWithAlphabet :: (Ord e, Enum n, Ord n) => Set e -> FSA n e > emptyWithAlphabet as = (totalWithAlphabet as) {finals = empty} > -- |A specialization of 'emptyWithAlphabet' where the alphabet > -- is itself empty. > emptyLanguage :: (Ord e, Ord n, Enum n) => FSA n e > emptyLanguage = emptyWithAlphabet empty A singleton FSA is one that accepts exactly one (possibly-empty) string. The number of states in such an FSA is equal to the length of the string plus two. > -- |An automaton that accepts only the given string, > -- over a given alphabet. > singletonWithAlphabet :: (Ord e, Enum n, Ord n) => > Set e -> [e] -> FSA n e > singletonWithAlphabet as str > = FSA > { sigma = as > , transitions = trans str > , initials = begins > , finals = fins > , isDeterministic = True > } > where trans xs = trans' xs (toEnum 1) `union` failTransitions > trans' [] n > = tmap (\a -> Transition (Symbol a) (State n) qfail) as > trans' (x:xs) n > = let m = succ n > in union (trans' xs m) $ > Set.mapMonotonic > (\y -> > Transition (Symbol y) (State n) $ > if x == y then State m else qfail > ) as > qfail = State $ toEnum 0 > failTransitions > = Set.mapMonotonic > (\a -> Transition (Symbol a) qfail qfail) > as > begins = singleton (State $ toEnum 1) > qlast = (+ 1) $ size str > fins = singleton (State $ toEnum qlast) > -- |An automaton that accepts only the given string, > -- over the minimal alphabet required to represent this string. > singletonLanguage :: (Ord e, Enum n, Ord n) => [e] -> FSA n e > singletonLanguage s = singletonWithAlphabet (Set.fromList s) s Formal Symbols -------------- The edges of an FSA are labelled by either a formal symbol or the pseudo-symbol Epsilon. Specifically, an edge labelled by Epsilon represents a transition that may occur without consuming any further input. > -- |The label of a 'Transition'. > data Symbol e = Epsilon -- ^The edge may be taken without consuming input. > | Symbol e -- ^The edge requires consuming this symbol. > deriving (Eq, Ord, Read, Show) The Symbol type is used to adjoin Epsilon to an alphabet. We often want only the real portion of a string, where instances of Epsilon are not important. The 'unsymbols' function does this transformation: unsymbols [Symbol 'a', Epsilon, Symbol 'b', Epsilon] :: [Char] becomes simply "ab". This transformed a [Symbol Char] to a [Char]. The container type need not be the same though: unsymbols [Symbol 'a', Epsilon, Symbol 'b', Epsilon] :: Set Char becomes fromList ['a', 'b']. > -- |Remove 'Epsilon' from a 'Collapsible' of 'Symbol' > -- and present the unwrapped results as a new 'Container'. > unsymbols :: (Collapsible s, Container c e, Monoid c) => s (Symbol e) -> c > unsymbols = collapse (mappend . f) mempty > where f (Symbol x) = singleton x > f _ = empty States ------ > -- |A vertex of the graph representation of an 'FSA' is a 'State', > -- which can be labelled with any arbitrary value, so long as every > -- vertex of a single automaton is labelled with a distinct value > -- of the same type. > newtype State n = State {nodeLabel :: n} deriving (Eq, Ord, Read, Show) Transitions ----------- If a state is the vertex of a graph, then a transition is its edge. Since an FSA is represented by a directed graph, there are three components to a transition: an edge label, and two states. If a computation in the first state encounters a symbol matching the transition's edge label, then it moves to the second state. > -- |The edges of an 'FSA'. > data Transition n e > = Transition > { edgeLabel :: Symbol e > , source :: State n > , destination :: State n > } > deriving (Eq, Ord, Show, Read) Class Instances =============== Here we define class instances for FSAs and their component types. Symbol ------ > instance Functor Symbol > where fmap _ Epsilon = Epsilon > fmap f (Symbol e) = Symbol (f e) > instance (NFData e) => NFData (Symbol e) > where rnf Epsilon = () > rnf (Symbol e) = rnf e State ----- > instance Functor State > where fmap f = State . f . nodeLabel > instance Applicative State > where pure = State > (<*>) = fmap . nodeLabel > instance Monad State > where a >>= f = f $ nodeLabel a #if !MIN_VERSION_base(4,8,0) > return = pure #endif #if MIN_VERSION_base(4,9,0) Semigroup instance to satisfy base-4.11 > instance (Semigroup n) => Semigroup (State n) > where (<>) = fmap . nodeLabel . fmap (<>) #endif > instance (Monoid n) => Monoid (State n) > where mempty = State mempty #if MIN_VERSION_base(4,11,0) > -- mappend will eventually be removed #elif MIN_VERSION_base(4,9,0) > mappend = (<>) #else > mappend = fmap . nodeLabel . fmap mappend #endif > instance (NFData n) => NFData (State n) > where rnf (State n) = rnf n Transition ---------- > instance (NFData n, NFData e) => NFData (Transition n e) > where rnf t = rnf (edgeLabel t) `seq` > rnf (source t) `seq` > rnf (destination t) > newtype Noitisnart e n = Noitisnart { transition :: Transition n e } > instance Functor (Transition n) > where fmap f t = t { edgeLabel = fmap f (edgeLabel t) } > instance Functor (Noitisnart e) > where fmap f = Noitisnart . fmap' . transition > where fmap' t > = t { source = fmap f (source t) > , destination = fmap f (destination t) > } FSA --- FSAs represent languages, so it makes sense to use equivalence of the represented languages as the criterion for equivalence of the FSAs themselves. First, an FSA represents the empty language if it has no reachable accepting states: > -- |True iff the input accepts no strings. > isNull :: (Ord e, Ord n) => FSA n e -> Bool > isNull = (== empty) . finals . trimUnreachables Two FSAs are considered equal iff they are isomorphic. > instance (Ord e, Ord n) => Eq (FSA n e) > where a == b = isomorphic (normalize a) (normalize b) Calls to `isomorphic` should work for NFAs as well as DFAs, but in the current implementation, in general, will not. This is because multiple start states are combined with the cartesian product to create c, rather than mapped from a to their counterparts in b, a much harder problem. > isomorphic :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e -> Bool > isomorphic a b = (alphabet a == alphabet b) && > (isize (initials a) == isize (initials b)) && > (isize (finals a) == isize (finals b)) && > (isize (states a) == isize (states b)) && > (isize (initials a) == isize (initials c)) && > (isize (finals a) == isize (finals c)) && > (isize (states a) == s) > where c = autUnion a b > s = isize . keep (State (Nothing, Nothing) /=) $ states c A Set of FSAs could be useful, and an Ord instance is needed for that. If two automata are equal, they should certainly compare EQ. If A is a subset of B, compare A B ought be LT and the reverse GT. When neither is a subset of the other, they are incomparable by this measure, so instead they are compared by a standard Haskell comparison of tuples consisting of their alphabets, transitions, initial states, and final states. > instance (Ord e, Ord n) => Ord (FSA n e) > where compare a b > | a == b = EQ > | isSubsetOf (f b) (f a) = LT > | isSubsetOf (f a) (f b) = GT > | otherwise = compare (g a) (g b) > where f :: (Ord e, Ord n) => FSA n e -> FSA Integer e > f = renameStates > g x = (alphabet x, transitions x, initials x, finals x) > instance (Enum n, Ord n, Ord e) => Container (FSA n e) [e] > where isEmpty = isNull > isIn = accepts > union = apply autUnion > intersection = apply autIntersection > difference = apply autDifference > empty = emptyLanguage > singleton = singletonLanguage > symmetricDifference > = apply autSymmetricDifference Here we consider FSAs to be Semigroups (and Monoids) under concatenation #if MIN_VERSION_base(4,9,0) Semigroup instance to satisfy base-4.9 > instance (Enum n, Ord n, Ord e) => Semigroup (FSA n e) > where (<>) = apply autConcatenation #endif > instance (Enum n, Ord n, Ord e) => Monoid (FSA n e) > where mempty = singletonLanguage empty #if MIN_VERSION_base(4,11,0) -- mappend will eventually be removed #elif MIN_VERSION_base(4,9,0) > mappend = (<>) #else > mappend = apply autConcatenation #endif > apply :: (Ord e, Ord n1, Ord n2, Enum n2) => > (a -> b -> FSA n1 e) -> a -> b -> FSA n2 e > apply f = curry (renameStates . uncurry f) > pfold :: (a -> a -> a) -> [a] -> a > pfold = fmap (. treeFromList) treeFold It is better to use flatIntersection and flatUnion than the appropriate fold, because the flat- functions take advantage of parallelism if possible. > -- |Intersect all given automata, in parallel if possible. > -- An empty intersection is undefined. > -- In theory it should be the total language over the > -- total alphabet, but the latter cannot be defined. > -- Intermediate results are evaluated to normal form. > flatIntersection :: (Enum n, Ord n, NFData n, Ord e, NFData e) => > [FSA n e] -> FSA n e > flatIntersection [] = error "Cannot take a nullary intersection" > flatIntersection xs = pfold i xs > where i a b = let x = renameStates . minimize $ autIntersection a b > in rnf x `seq` x > -- |Union all given automata, in parallel if possible. > -- An empty union is defined as the empty language over > -- an empty alphabet. > -- Intermediate results are evaluated to normal form. > flatUnion :: (Enum n, Ord n, NFData n, Ord e, NFData e) => > [FSA n e] -> FSA n e > flatUnion [] = emptyLanguage > flatUnion xs = pfold u xs > where u a b = let x = renameStates . minimize $ autUnion a b > in rnf x `seq` x > -- |Shuffle all given automata, in parallel if possible. > -- An empty shuffle is defined as the singleton language over > -- an empty alphabet containing only the empty string. > -- Intermediate results are evaluated to normal form. > -- > -- @since 1.1 > flatShuffle :: (Enum n, Ord n, NFData n, Ord e, NFData e) => > [FSA n e] -> FSA n e > flatShuffle [] = singletonLanguage [] > flatShuffle xs = pfold s xs > where s a b = let x = renameStates . minimize $ autShuffle a b > in rnf x `seq` x > -- |Infiltrate all given automata, in parallel if possible. > -- An empty infiltration is defined as the singleton language over > -- an empty alphabet containing only the empty string. > -- Intermediate results are evaluated to normal form. > -- > -- @since 1.1 > flatInfiltration :: (Enum n, Ord n, NFData n, Ord e, NFData e) => > [FSA n e] -> FSA n e > flatInfiltration [] = singletonLanguage [] > flatInfiltration xs = pfold s xs > where s a b = let x = renameStates . minimize $ autInfiltration a b > in rnf x `seq` x > instance (NFData n, NFData e) => NFData (FSA n e) > where rnf (FSA a t i f d) > = rnf a `seq` rnf t `seq` rnf i `seq` rnf f `seq` rnf d > instance HasAlphabet (FSA n) > where alphabet = sigma Acceptance and the Transition Function ====================================== To determine whether an FSA accepts a string of Symbols, there must exist a mechanism to determine which State to enter upon consuming a Symbol. The set of Transitions describes the map, and we will use that to define the transition function. > data ID n e = ID (State n) [Symbol e] deriving (Eq, Ord, Read, Show) > state :: ID n e -> State n > state (ID a _) = a > string :: ID n e -> [Symbol e] > string (ID _ xs) = xs > epsilonClosure :: (Ord e, Ord n) => > FSA n e -> Set (State n) -> Set (State n) > epsilonClosure fsa qs > | isDeterministic fsa = qs > | otherwise = epsilonClosure' qs qs > where epsilons = extractMonotonic edgeLabel Epsilon (transitions fsa) > epsilonClosure' seen new > | isEmpty new = seen > | otherwise = epsilonClosure' > (seen `union` closure) > (difference closure seen) > where seens = keep (isIn new . source) epsilons > closure = tmap destination seens > step :: (Ord e, Ord n) => FSA n e -> Set (ID n e) -> Set (ID n e) > step fsa ids > | filteredIDs == empty = empty > | otherwise = collapse (union . next) empty filteredIDs > where ts = transitions fsa > filterID i = ID (state i) (keep (/= Epsilon) (string i)) > filteredIDs = tmap filterID ids > next i > | null s = tmap (`ID` []) closure > | otherwise = tmap (`ID` tail s) outStates > where s = string i > closure = epsilonClosure fsa (singleton (state i)) > outStates = epsilonClosure fsa > . tmap destination > . keep (isIn closure . source) > $ extractMonotonic edgeLabel > (head s) ts We should not have to produce IDs ourselves. We can define the transition function `delta` from an FSA, a symbol, and a state to a set of states: > delta :: (Ord e, Ord n) => > FSA n e -> Symbol e -> Set (State n) -> Set (State n) > delta f x = tmap state . step f . Set.mapMonotonic (`ID` [x]) > compute :: (Ord e, Ord n) => FSA n e -> [Symbol e] -> Set (ID n e) > compute fsa str = until (allS (null . string)) (step fsa) initialIDs > where initialIDs = Set.mapMonotonic (`ID` str) expandedInitials > expandedInitials = epsilonClosure fsa $ initials fsa > -- |Returns whether the given 'FSA' lands in a final state > -- after processing the given sequence. > -- > -- @since 1.1 > accepts :: (Ord e, Ord n) => FSA n e -> [e] -> Bool > accepts fsa = anyS (isIn (finals fsa)) . tmap state > . compute fsa . tmap Symbol The Brzozowski derivative of an FSA with respect to some string is an FSA representing the valid continuations from that string. > -- |Return an FSA representing possible continuations from a given > -- sequence of symbols. If the input automaton is not complete > -- then the output may have no states when given incompatible input. > -- > -- @since 1.0 > brzozowskiDerivative :: (Ord e, Ord n) => [e] -> FSA n e -> FSA n e > brzozowskiDerivative xs f = trimUnreachables > $ f { initials = tmap state . compute f > $ tmap Symbol xs} A generalization of the Brzozowski derivative is the left quotient of a language by another language. In fact, the following operations, quotLeft, quotRight, and quotMid, offer a start toward computing in the free group rather than the free monoid generated by the alphabet. > -- |Return an FSA representing possible continuations in the second > -- language from strings in the first language. > -- In other words, @quotLeft a b@ returns \(\{w : x\in a, xw\in b\}\). > -- > -- @since 1.0 > quotLeft :: (Ord e, Ord n1, Ord n2) => > FSA n1 e -> FSA n2 e > -> FSA (Maybe (Either n1 ()), Maybe n2) e > quotLeft a b = p { initials = i > , isDeterministic = d } > where a' = x (autConcatenation (trimUnreachables a) t) > x m = m {finals = finals m `Set.union` tmap State f} > t = totalWithAlphabet (sigma a `Set.union` sigma b) > p = autIntersection a' (trimUnreachables b) > f = tmap (Left . nodeLabel) $ finals a > i = keep ( maybe False (`Set.member` f) > . fst . nodeLabel) $ states p > d = isDeterministic p && Set.size i == 1 Doing quotRight similarly should be fairly simple, but it's easier to just do left-quotient on reversals. > -- |Return an FSA representing possible strings in the first language > -- which end with a string in the second language. > -- In other words, @quotRight a b@ is \(\{w : wx\in a, x\in b\}\). > -- > -- @since 1.0 > quotRight :: (Ord e, Ord n1, Ord n2) => > FSA n1 e -> FSA n2 e -> FSA Integer e > quotRight a b = normalize . LTK.FSA.reverse > $ quotLeft (LTK.FSA.reverse b) (LTK.FSA.reverse a) > -- |@quotMid a b c@ is \(\{wz : wx\in a, yx\in b, yz\in c\}\). > -- This lifts strings to a group, placing b-inverse between a and c. > -- The time complexity of this function is abysmal, > -- performing a left and a right quotient for each state in @b@. > -- > -- @since 1.0 > quotMid :: (Ord e, Ord n1, Ord n2, Ord n3) => > FSA n1 e -> FSA n2 e -> FSA n3 e -> FSA Integer e > quotMid a b c = unionAll . map bridge . Set.toList $ states b' > where a' = normalize a > b' = normalize b > c' = normalize c > bridge n = let b1 = b' {initials = Set.singleton n} > b2 = b' {finals = Set.singleton n} > in renameStates > (quotRight a' b1 > `autConcatenation` quotLeft b2 c') Logical Operators ================= > combine :: State a -> State b -> State (a, b) > combine q1 q2 = (,) <$> q1 <*> q2 > makePairs :: (Ord a, Ord b) => Set a -> Set b -> Set (a, b) > makePairs xs ys = collapse (union . f) empty xs > where f x = Set.mapMonotonic ((,) x) ys > makeJustPairs :: (Ord a, Ord b) => > Set (State a) -> Set (State b) -> > Set (State (Maybe a), State (Maybe b)) > makeJustPairs xs ys = makePairs (justify xs) (justify ys) > where justify = Set.mapMonotonic (fmap Just) The Cartesian construction for automata is closely related to the tensor product of graphs. Given two automata, M1 and M2, we construct a new automata M3 such that: * states(M3) is a subset of the Cartesian product of (states(M1) or Nothing) with (states(M2) or Nothing) * Any lack of explicit transition in either M1 or M2 is considered a transition to Nothing in that automaton. This effectively makes each input total. * If (q1, q2) and (q1', q2') are states of M3, then there is a transition from (q1, q2) to (q1', q2') iff there exists both a transition from q1 to q1' in M1 and a transition from q2 to q2' in M2. This construction results in an automaton that tracks a string through both of its input automata. States may be tagged as accepting to obtain either an intersection or a union: * For a intersection, a state (q1, q2) in states(M3) is accepting iff q1 is accepting in M1 and q2 is accepting in M2. * For a union, a state (q1, q2) in states(M3) is accepting iff q1 is accepting in M1 or q2 is accepting in M2. In either case, the set of initial states in the new automaton is equal to the Cartesian product of the initial states of M1 with the initial states of M2. The Cartesian construction preserves determinism and guarantees totality of the result. We will slightly generalize this construction, with options for whether to trace the machines in sync, out of sync, or both > pairTrace :: (Ord e, Ord n1, Ord n2) => > Bool -> Bool > -> (Bool -> Bool -> Bool) -> FSA n1 e -> FSA n2 e > -> FSA (Maybe n1, Maybe n2) e > pairTrace sync unsync isFinal' f1 f2 > = FSA { sigma = alpha > , transitions = ts > , initials = qi > , finals = qf > , isDeterministic = isDet > } > where alpha = alphabet f1 `union` alphabet f2 > isDet = not unsync > && isDeterministic f1 && isDeterministic f2 > && Set.size qi == 1 > qi = Set.mapMonotonic (uncurry combine) > $ makeJustPairs > (epsilonClosure f1 $ initials f1) > (epsilonClosure f2 $ initials f2) > isFinal q > = let ~(a,b) = nodeLabel q > f m = maybe False (isIn (finals m) . State) > in isFinal' (f f1 a) (f f2 b) > (_,_,ts,qf) > = until > (\(new, _, _, _) -> isEmpty new) > (\(new, prev, partial, fins) -> > let exts = collapse (union . extensions) > empty new > seen = new `union` prev > dests = tmap destination exts > fins' = keep isFinal dests > in ( difference dests seen > , seen > , exts `union` partial > , fins `union` fins' > ) > ) > (qi, empty, empty, keep isFinal qi) > extensions q = collapse (union . exts' q) empty > $ Set.mapMonotonic Symbol alpha > exts' q x = Set.mapMonotonic (Transition x q) $ nexts x q > nexts x q = let n1 = nexts' x f1 $ fmap fst q > n2 = nexts' x f2 $ fmap snd q > f a = Set.mapMonotonic (combine a) n2 > in union > (if unsync > then Set.mapMonotonic > (`combine` fmap snd q) n1 > `union` > Set.mapMonotonic > (combine (fmap fst q)) n2 > else empty) > (if sync > then collapse (union . f) empty n1 > else empty) > nexts' x f = maybe > (singleton $ State Nothing) > (mDests x f . State) . nodeLabel > mDests x f q > | isEmpty exts = singleton $ State Nothing > | otherwise = Set.mapMonotonic (fmap Just) exts > where exts = delta f x $ singleton q > cartesianConstruction :: (Ord e, Ord n1, Ord n2) => > (Bool -> Bool -> Bool) -> FSA n1 e -> FSA n2 e > -> FSA (Maybe n1, Maybe n2) e > cartesianConstruction = pairTrace True False > autIntersection :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e -> > FSA (Maybe n1, Maybe n2) e > autIntersection = cartesianConstruction (&&) > autUnion :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e -> > FSA (Maybe n1, Maybe n2) e > autUnion = cartesianConstruction (||) For the difference A - B, the final states are those that are accepting in A and non-accepting in B. Note that the relative complement requires functionality. Consider the case of (A - B) where B is nondeterministic in such a way that there exists a string w for which a computation leads to both an accepting state qa and a nonaccepting state qn. Suppose that w leads to an accepting state in A, qf. Then the cartesian construction will contain both (qf, qa) and (qf, qn). When selecting states to be accepting, (qf, qn) will be included since qn is nonaccepting in B, and (qf, qn) will be excluded since qa is accepting in B. This is not what we want, as it means that w is still accepted. Thus we cannot use the cartesian construction to gain an advantage over the naive implementation (A & not B). > -- |Returns an 'FSA' accepting all and only those strings > -- accepted by the first input but rejected by the second. > -- > -- @since 1.1 > autDifference :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e -> > FSA (Maybe n1, Maybe (Set n2)) e > autDifference = fmap (. complement) autIntersection Much like the one-sided difference, the symmetric difference of two automata relies on determinism. > autSymmetricDifference :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e -> > FSA (Maybe (Maybe n1, Maybe n2), > Maybe (Set (Maybe n1, Maybe n2))) e > autSymmetricDifference f1 f2 > = autDifference (autUnion f1 f2) $ autIntersection f1 f2 For a total functional FSA, the complement can be obtained by simply inverting the notion of accepting states. Totality is necessary, as any sink-states in the original will be accepting in the complement. Functionality is necessary, as: -> (0) -a-> ((1)) -a) (x) is a state, ((x)) is accepting | -a-> represents a transition on a +----a-> (2) -a) -a) represents a self-edge on a becomes under this construction: ->((0)) -a-> (1) -a) | +-----a-> ((2)) -a) and the string "a" is accepted in both. > -- |Returns an 'FSA' accepting all and only those strings not > -- accepted by the input. > complement :: (Ord e, Ord n) => FSA n e -> FSA (Set n) e > complement = complementDeterministic . determinize > -- |Returns the 'complement' of a deterministic 'FSA'. > -- The precondition that the input is deterministic > -- is not checked. > complementDeterministic :: (Ord e, Ord n) => FSA n e -> FSA n e > complementDeterministic f = f { finals = difference (states f) (finals f) } > -- |@(residue a b)@ is equivalent to @(difference a b)@. > -- In the context of an approximation and its source, > -- represents the strings accepted by the approximation > -- that should not be. > residue :: (Ord n, Ord e, Enum n) => FSA n e -> FSA n e -> FSA n e > residue = curry (renameStates . minimize . uncurry difference) > -- |@(coresidue a b)@ is equivalent to @(complement (residue a b))@. > -- In the context of an approximation and its source, > -- represents unmet constraints of the source. > coresidue :: (Ord n, Ord e, Enum n) => FSA n e -> FSA n e -> FSA n e > coresidue a = renameStates . minimize . > union (renameStates $ complement a) The shuffle product of two languages can be constructed similarly to their intersection. The difference is that in the standard Cartesian construction, an edge follows its labeling symbol in both source automata, while in the shuffle product it follows in only one. Thus rather than one out-edge per symbol per state, there are two. > -- |Returns the shuffle product of its two input autamata. > -- > -- @since 1.1 > autShuffle :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e > -> FSA (Maybe n1, Maybe n2) e > autShuffle = pairTrace False True (&&) Closely related is the infiltration product, in which an edge may follow its labeling symbol in one source, the other, or both simultaneously. > -- |Returns the infiltration product of its two input autamata. > -- > -- @since 1.1 > autInfiltration :: (Ord e, Ord n1, Ord n2) => FSA n1 e -> FSA n2 e > -> FSA (Maybe n1, Maybe n2) e > autInfiltration = pairTrace True True (&&) Other Combinations ================== > autConcatenation :: (Ord n1, Ord n2, Ord e) => > FSA n1 e -> FSA n2 e > -> FSA (Either n1 n2) e > autConcatenation f1 f2 > = FSA > { sigma = alphabet f1' `union` alphabet f2' > , transitions > = unionAll > [ transitions f1' > , transitions f2' > , combiningTransitions > ] > , initials = initials f1' > , finals = finals f2' > , isDeterministic = False > } > where f1' = renameStatesByMonotonic Left f1 > f2' = renameStatesByMonotonic Right f2 > combiningTransitions = collapse (union . cts) empty > (finals f1') > cts f = Set.mapMonotonic > (\i -> > Transition > { edgeLabel = Epsilon > , source = f > , destination = i > } > ) > (initials f2') > -- |The Kleene Closure of an automaton is > -- the free monoid under concatenation generated by all strings > -- in the automaton's represented stringset. > -- The resulting automaton is nondeterministic. > kleeneClosure :: (Ord n, Ord e) => FSA n e -> FSA (Either n Bool) e > kleeneClosure f > = FSA > { sigma = alphabet f' > , transitions > = unionAll [ transitions f' > , toOldInitials > , toNewFinal > ] > , initials = singleton ni > , finals = singleton nf > , isDeterministic = False > } > where f' = renameStatesByMonotonic Left f > ni = State (Right False) > nf = State (Right True) > toOldInitials = collapse (union . cts) empty > (insert ni (finals f')) > cts q = tmap > (\i -> > Transition > { edgeLabel = Epsilon > , source = q > , destination = i > } > ) > (initials f') > toNewFinal = tmap > (\q -> > Transition > { edgeLabel = Epsilon > , source = q > , destination = nf > } > ) > (insert ni (finals f')) Minimization ============ In general, operations on FSAs have run time proportional to some (increasing) function of how many states the FSA has. With this in mind, we provide a function to make an FSA as small as possible without loss of information. We begin by constructing the set of Myhill-Nerode equivalence classes for the states of the input FSA, then simply replace each state by its equivalence class. > -- |Returns a deterministic 'FSA' recognizing the same stringset > -- as the input, with a minimal number of states. > minimize :: (Ord e, Ord n) => FSA n e -> FSA (Set (Set n)) e > minimize = minimizeDeterministic . determinize > -- |Returns a deterministic 'FSA' recognizing the same stringset > -- as the input, with a minimal number of states. > -- The precondition that the input is deterministic > -- is not checked. > minimizeDeterministic :: (Ord e, Ord n) => FSA n e -> FSA (Set n) e > minimizeDeterministic = setD . minimizeOver nerode > where setD f = f {isDeterministic = True} > -- |Returns a non-necessarily deterministic 'FSA' > -- minimized over a given relation. > -- Some, but not all, relations do guarantee deterministic output. > -- The precondition that the input is deterministic > -- is not checked. > minimizeOver :: (Ord e, Ord n) => > (FSA n e -> Set (Set (State n))) -> FSA n e -> FSA (Set n) e > minimizeOver r fsa = FSA > { sigma = alphabet fsa > , transitions = trans > , initials = qi > , finals = fin > , isDeterministic = False > } > where classes = r fsa > classOf x = State . tmap nodeLabel . unionAll $ > keep (contains x) classes > qi = tmap classOf $ initials fsa > fin = tmap classOf $ finals fsa > trans = tmap > (\t -> > t > { source = classOf (source t) > , destination = classOf (destination t) > } > ) $ transitions fsa > -- |Two strings \(u\) and \(v\) are equivalent iff > -- for all strings \(w\), \(uw\) and \(vw\) lead to > -- states in the same equivalence class. > nerode :: (Ord e, Ord n) => FSA n e -> Set (Set (State n)) > nerode fsa = tmap eqClass sts > where sts = states fsa > i = union i' $ Set.mapMonotonic (\x -> (x, x)) sts > i' = difference (pairs sts) $ distinguishedPairs fsa > eqClass x = unionAll > [ singleton x > , tmap snd $ extractMonotonic fst x i > , Set.mapMonotonic fst $ keep ((== x) . snd) i > ] The easiest way to construct the equivalence classes is to iteratively build a set of known-distinct pairs. In the beginning we know that any accepting state is distinct from any non-accepting state. At each further iteration, two states p and q are distinct if there exists some symbol x such that deltax(p) is distinct from deltax(q). When an iteration completes without updating the set of known-distinct pairs, the algorithm is finished; all possible distinctions have been discovered. The Myhill-Nerode equivalence class of a state p, then, is the set of states not distinct from p. > distinguishedPairs :: (Ord e, Ord n) => FSA n e -> Set (State n, State n) > distinguishedPairs fsa = fst result > where allPairs = pairs (states fsa) > initiallyDistinguished > = collapse (union . pairs' (finals fsa)) empty . > difference (states fsa) $ finals fsa > f d (a, b) = areDistinguishedByOneStep fsa d a b > result = until > (\(x, y) -> isize x == isize y) > (\(x, _) -> > ( union x $ > keep (f x) (difference allPairs x) > , x > ) > ) > (initiallyDistinguished, empty) > areDistinguishedByOneStep :: (Ord e, Ord n) => > FSA n e -> > Set (State n, State n) -> > State n -> > State n -> > Bool > areDistinguishedByOneStep fsa knownDistinct p q > | isIn knownDistinct (makePair p q) = True > | otherwise = anyS (isIn knownDistinct) newPairs > where destinations s x = delta fsa (Symbol x) (singleton s) > newPairs' a = collapse (union . pairs' (destinations q a)) > empty > (destinations p a) > newPairs = collapse (union . newPairs') empty (alphabet fsa) We only need to check each pair of states once: (1, 2) and (2, 1) are equivalent in this sense. Since they are not equivalent in Haskell, we define a function to ensure that each pair is only built in one direction. > makePair :: (Ord a) => a -> a -> (a, a) > makePair a b = (min a b, max a b) > pairs :: (Ord a) => Set a -> Set (a, a) > pairs xs = collapse (union . f) empty xs > where f x = Set.mapMonotonic ((,) x) . snd $ Set.split x xs > pairs' :: (Ord a) => Set a -> a -> Set (a, a) > pairs' xs x = Set.mapMonotonic (makePair x) xs An FSA is certainly not minimal if there are states that cannot be reached by any path from the initial state. We can trim those. > -- |The input automaton with unreachable states removed. > -- > -- @since 1.0 > trimUnreachables :: (Ord e, Ord n) => FSA n e -> FSA n e > trimUnreachables fsa = FSA alpha trans qi fin (isDeterministic fsa) > where alpha = alphabet fsa > qi = initials fsa > fin = intersection reachables $ finals fsa > trans = keep (isIn reachables . source) $ transitions fsa > reachables = reachables' qi > reachables' qs > | newqs == qs = qs > | otherwise = reachables' newqs > where initialIDs a = Set.mapMonotonic (`ID` [a]) qs > next = collapse > (union . tmap state . step fsa . > initialIDs . Symbol > ) > empty > alpha > newqs = next `union` qs An FSA will often contain states from which no path at all leads to an accepting state. These represent failure to match a pattern, which can be represented equally well by explicit lack of a transition. Thus we can safely remove them. Given that we already have a function to remove states that cannot be reached, the simplest way to remove these fail-states is to trim the unreachable states in the reversal of the FSA. > -- |The reversal of an automaton accepts the reversals of all > -- strings accepted by the original. > reverse :: (Ord e, Ord n) => FSA n e -> FSA n e > reverse f = f { isDeterministic = False > , transitions = reverseTransitions f > , initials = finals f > , finals = initials f > } > where reverseTransition t = t { source = destination t > , destination = source t > } > reverseTransitions = tmap reverseTransition . transitions > trimFailStates :: (Ord e, Ord n) => FSA n e -> FSA n e > trimFailStates = LTK.FSA.reverse . trimUnreachables . LTK.FSA.reverse > -- |Returns a normal form of the input. > -- An FSA is in normal form if it is minimal and deterministic, > -- and contains neither unreachable states nor nonaccepting sinks. > -- Node labels are irrelevant, so 'Int' is used as a small > -- representation. > normalize :: (Ord e, Ord n) => FSA n e -> FSA Integer e > normalize = f . trimFailStates . minimize . trimUnreachables > where f fsa > | isEmpty (states fsa) = complementDeterministic $ > totalWithAlphabet (alphabet fsa) > | otherwise = renameStates fsa J-Minimization ============== Note that a state in an FSA is a representation of a (set of) string(s). The standard minimization algorithm considers two strings w and v equivalent iff for all u, wu and vu are the same state or otherwise equivalent by a recursive application of this definition. A different equivalence relation exists, though. Consider a syntactic monoid M. Then two elements w and v are J-equivalent iff the two-sides ideals MwM and MvM are equal. This is not equivalent to the statement that wM and vM are equivalent as well as Mw and Mv. There are stringsets for which two or more elements are considered distinct when looking at each one-sided ideal but are actually equivalent in terms of their two-sided ideals. > -- |Given an automaton whose syntactic monoid is \(M\), > -- two strings \(u\) and \(v\) are equivalent iff > -- \(MuM=MvM\) > jEquivalence :: (Ord e, Ord n) => > FSA ([Maybe n], [Symbol e]) e -> > Set (Set (State ([Maybe n], [Symbol e]))) > jEquivalence f = partitionBy (primitiveIdeal2 f) (states f) The primitive left-ideal of an element x of the syntactic monoid is the set of elements {ax} for all elements a: > -- |The primitive left ideal. > -- > -- @since 0.2 > primitiveIdealL :: (Ord n, Ord e) => FSA (n, [Symbol e]) e -> > State (n, [Symbol e]) -> Set (State (n, [Symbol e])) > primitiveIdealL f x = collapse > (union . follow f (snd $ nodeLabel x)) > empty $ > states f > -- |The generalized \(\delta\) function, > -- follow each symbol in a string in order. > -- > -- @since 0.2 > follow :: (Ord n, Ord e) => FSA n e -> > [Symbol e] -> State n -> Set (State n) > follow f xs q = collapse (flip (.) . delta f) id xs $ singleton q The primitive right-ideal is {xa} for all a, i.e. the reachability relation. We already have a function that computes this: @epsilonClosure@. In order to make use of that, we just replace every edge by Epsilon. Ideally we would use an uninhabited type for the alphabet, but since Haskell does not have such a type out of the box, we use the unit type @()@ instead. > ignoreSymbols :: (Ord n, Ord e) => FSA n e -> FSA n () > ignoreSymbols f = f { sigma = empty > , transitions = Set.map x (transitions f) > , isDeterministic = False > } > where x t = t {edgeLabel = Epsilon} > -- |The primitive right ideal. > -- > -- @since 0.2 > primitiveIdealR :: (Ord n, Ord e) => FSA n e -> State n -> Set (State n) > primitiveIdealR f x = epsilonClosure (ignoreSymbols f) $ singleton x Then the two-sided ideal is {axb} for all a and b, i.e. the right-ideals of every left-ideal (or v.v.). > -- |The primitive two-sided ideal. > -- > -- @since 0.2 > primitiveIdeal2 :: (Ord n, Ord e) => FSA (n, [Symbol e]) e -> > State (n, [Symbol e]) -> Set (State (n, [Symbol e])) > primitiveIdeal2 f = collapse (union . primitiveIdealR f) empty . > primitiveIdealL f > -- |An automaton is considered trivial under some equivalence relation > -- if each of its equivalence classes is singleton. > -- > -- @since 0.2 > trivialUnder :: (FSA n e -> Set (Set (State n))) -> FSA n e -> Bool > trivialUnder f = all ((== 1) . isize) . f H-Minimization ============== Where two strings are J-equivalent iff their two-sided ideals are equal, they are H-equivalent if their corresponding one-sided ideals are equal. That is, w is equivalent to v iff wM == vM and Mw == Mv. > -- |Given an automaton whose syntactic monoid is \(M\), > -- two strings \(u\) and \(v\) are equivalent if > -- \(Mu=Mv\) and \(uM=vM\). > -- > -- @since 0.2 > hEquivalence :: (Ord n, Ord e) => > FSA (n, [Symbol e]) e -> Set (Set (State (n, [Symbol e]))) > hEquivalence f = refinePartitionBy (primitiveIdealR f) . > partitionBy (primitiveIdealL f) $ states f Determinization ================ Converting a non-deterministic FSA to a deterministic one (DFA) can improve the speed of determining whether the language represented by the FSA contains a string. Further, both complexity-classification and minimization require DFAs as input. > metaFlip :: Ord n => Set (State n) -> State (Set n) > metaFlip = State . Set.mapMonotonic nodeLabel > powersetConstruction :: (Ord e, Ord n) => > FSA n e -> > Set (State n) -> > (Set (State n) -> Bool) -> > FSA (Set n) e > powersetConstruction f start isFinal = FSA (alphabet f) trans qi fin True > where qi = singleton (metaFlip start) > buildTransition a q = (a, q, delta f (Symbol a) q) > buildTransitions' q > = Set.mapMonotonic (`buildTransition` q) $ alphabet f > buildTransitions = collapse (union . buildTransitions') empty > (trans',qs,_) > = until > (\(_, b, c) -> isize b == isize c) > (\(a, b, c) -> > let d = buildTransitions (difference c b) > in ( a `union` d > , c > , union c $ tmap (\(_, _, z) -> z) d > ) > ) > (empty, empty, singleton start) > makeRealTransition (a, b, c) > = Transition > { edgeLabel = Symbol a > , source = metaFlip b > , destination = metaFlip c > } > trans = Set.mapMonotonic makeRealTransition trans' > fin = Set.mapMonotonic metaFlip $ keep isFinal qs > -- |Returns a deterministic automaton representing the same > -- stringset as the potentially nondeterministic input. > determinize :: (Ord e, Ord n) => FSA n e -> FSA (Set n) e > determinize f > | isDeterministic f = renameStatesByMonotonic singleton f > | otherwise = powersetConstruction f (initials f) isFinal > where isFinal = anyS (isIn (finals f)) . epsilonClosure f The Powerset Graph ================== When determinizing an FSA, we use a powerset construction building out from the set of initial states. We can use the same construction but begin instead with the set of all states to obtain a different powerset graph. Though there are many possible initial conditions, including the one used for determinization, we call this particular instance *the* powerset graph. If our source FSA happens to be normalized, we can gather a lot of information from this graph. We will tag any states not disjoint from the set of final states in the source as accepting. > -- |Given an automaton \(M\) with stateset \(Q\), > -- the powerset graph of \(M\) is an automaton with > -- stateset in the powerset of \(Q\). > -- From a node \(\{q_1,q_2,\ldots,q_n\}\), > -- there is an edge labelled \(\sigma\) that leads to > -- \(\{\delta(q_1,\sigma), \delta(q_2,\sigma), \ldots, \delta(q_n, \sigma)\}\), > -- where \(\delta\) is the transition function of the input. > -- The initial state is \(Q\), and the result is complete. > powersetGraph :: (Ord e, Ord n) => FSA n e -> FSA (Set n) e > powersetGraph f = powersetConstruction f (states f) hasAccept > where hasAccept qs = intersection (finals f) qs /= empty The Syntactic Monoid ==================== In the powerset graph (PSG), states are labelled by sets of states. For all states Q and symbols x, there is an edge labelled by x from Q to the state labelled by Q' such that for all q' in Q', there exists some q in Q such that q goes to q' on x. The syntactic monoid differs in that the states are effectively labelled by functions. Here we will use lists of the form [q_0, q_1, ..., q_n]. The syntactic monoid a DFA whose states are labelled [0, 1, ..., n] will always contain the state [0, 1, ..., n]. This is the initial state. There exist edges between states are found by mapping over the list. That is, if delta is the transition function from QxSigma->Q: delta' [q_0, ..., q_n] x = [delta q_0 x, ..., delta q_n x] Any state labelled by a function mapping an initial state to a final state is considered accepting in the syntactic monoid. > -- |Given an automaton \(M\) with stateset \(Q\), > -- the syntactic monoid of \(M\) is an automaton with > -- stateset in \((Q\rightarrow Q)\). > -- Here these functions are represented by lists, > -- where \(q_i\) maps to the \(i^\text{th}\) element of the list. > -- From a node \(\langle q_1,q_2,\ldots,q_n\rangle\), > -- there is an edge labelled \(\sigma\) that leads to > -- \(\langle\delta(q_1,\sigma), \delta(q_2,\sigma), \ldots, \delta(q_n, \sigma)\rangle\), > -- where \(\delta\) is the transition function of the input. > -- The initial state is the identity function, and the result is complete. > syntacticMonoid :: (Ord e, Ord n) => > FSA n e -> FSA ([Maybe n], [Symbol e]) e > syntacticMonoid m = FSA { sigma = alphabet m > , transitions = t > , initials = i > , finals = f > , isDeterministic = True > } > where i = singleton . fmap (flip (,) []) . mapM (fmap Just) $ s > s = Set.toList (initials m) ++ > Set.toList (difference (states m) $ initials m) > n = size (initials m) > i' = if intersection (finals m) (initials m) /= empty > then i > else empty > (t,_,f,_) > = syntacticMonoid' m n (empty, i, i', i) > syntacticMonoid' :: (Ord e, Ord n) => FSA n e -> Int -> > ( Set (Transition ([Maybe n], [Symbol e]) e) > , Set (State ([Maybe n], [Symbol e])) > , Set (State ([Maybe n], [Symbol e])) > , Set (State ([Maybe n], [Symbol e])) > ) -> > ( Set (Transition ([Maybe n], [Symbol e]) e) > , Set (State ([Maybe n], [Symbol e])) > , Set (State ([Maybe n], [Symbol e])) > , Set (State ([Maybe n], [Symbol e])) > ) > syntacticMonoid' f n former@(ot, os, ofi, s) > | isEmpty s = former > | otherwise = syntacticMonoid' f n next > where next = ( nt `union` ot > , ns `union` os > , nf `union` ofi > , ns > ) > alpha = alphabet f > move a q = replaceDestinationFromMap (s `union` os) $ > Transition > { edgeLabel = Symbol a > , source = q > , destination = move' (Symbol a) q > } > move' a > = fmap > (bimap > (tmap (maybe Nothing > $ pull . delta f a . singleton . State)) > (++ [a])) > pull xs = if xs == empty > then Nothing > else nodeLabel $ fmap Just (chooseOne xs) > nt = mergeByDestFst $ > collapse (union . nt') empty alpha > nt' a = tmap (move a) s > ns = keep (isNotIn os' . fmap fst) > $ tmap destination nt > nf = keep hasFinal ns > os' = tmap (fmap fst) os > fins = nodeLabel . mapM (fmap Just) > . Set.toList $ finals f > hasFinal = not . isEmpty . intersection fins > . take n . fst . nodeLabel > replaceDestinationFromMap :: > (Container (c (State (a, b))) (State (a, b)), Collapsible c, Eq a) => > c (State (a, b)) -> Transition (a, b) e -> Transition (a, b) e > replaceDestinationFromMap m t > | isEmpty m' = t > | otherwise = t {destination = chooseOne m'} > where m' = keep ((==) (fn $ destination t) . fn) m > fn = fst . nodeLabel > mergeByDestFst :: > ( Container (c (Transition (n, n') e)) (Transition (n, n') e) > , Collapsible c, Ord n, Ord n', Ord e > ) => c (Transition (n, n') e) -> c (Transition (n, n') e) > mergeByDestFst = mergeByDestFst' empty > mergeByDestFst' :: > ( Container (c (Transition (n, n') e)) (Transition (n, n') e) > , Collapsible c, Ord n, Ord n', Ord e > ) => > c (Transition (n, n') e) -> c (Transition (n, n') e) -> > c (Transition (n, n') e) > mergeByDestFst' p l > | isEmpty l = p > | otherwise > = mergeByDestFst' > (union p . > insert x $ > tmap (set_dest (destination x)) sds > ) $ difference xs sds > where (x, xs) = choose l > fnd = fst . nodeLabel . destination > sds = keep ((==) (fnd x) . fnd) xs > set_dest d t = t {destination = d} Alphabet Manipulation ===================== > -- |Add missing symbols to the alphabet of an automaton. > -- The result is an automaton with at least the provided alphabet > -- that licenses exactly the same set of strings as the input. > extendAlphabetTo :: (Ord a, Ord b) => Set b -> FSA a b -> > FSA (Maybe Integer, Maybe a) b > extendAlphabetTo syms = autUnion $ emptyWithAlphabet syms A "semantic automaton" is one in which a constraint is realized for a universal alphabet. This is achieved by using edges labelled by 'Nothing' to represent symbols not already included in the alphabet and an extend function that takes these edges into account. For example, consider the local and piecewise constraints: * No A immediately follows another A, and * No A follows another A. As automata with alphabet {A} these constraints appear identical, each licensing only the empty string and "A" itself. But if the alphabet were instead {A,B}, then they would instead license: * B*A?(BA?)*, and * B*A?B*, respectively. Since the source automata for these constraints are identical, no algorithm can know which variant to extend the alphabet to. Encoding the universal alphabet in the transition graph with semantic automata can prevent this issue by explicitly stating which alternative is correct. One caveat with the use of semantic automata is that before any operation combines two or more automata, the inputs must have their alphabets unified. > -- |Add missing symbols to the alphabet of an automaton. > -- As the symbol 'Nothing' is taken to represent > -- any symbol not currently in the alphabet, > -- new edges are added in parallel to existing edges labelled by 'Nothing'. > semanticallyExtendAlphabetTo :: > (Ord a, Ord b) => Set b -> FSA a (Maybe b) -> FSA a (Maybe b) > semanticallyExtendAlphabetTo syms fsa > = fsa { sigma = as `union` new > , transitions = ts `union` ts' > } > where as = alphabet fsa > new = difference (Set.mapMonotonic Just syms) as > ts = transitions fsa > f e = union (Set.mapMonotonic > (\x -> e {edgeLabel = Symbol x}) new) > ts' = collapse f empty $ > extractMonotonic edgeLabel (Symbol Nothing) ts > -- |Remove the semantic 'Nothing' edges from an automaton and reflect this > -- change in the type. > desemantify :: (Ord a, Ord b) => FSA a (Maybe b) -> FSA a b > desemantify fsa = renameSymbolsByMonotonic (fromMaybe undefined) > $ contractAlphabetTo > (Set.delete Nothing (alphabet fsa)) > fsa > -- |Add self-loops on all symbols to all edges to compute > -- an upward closure. > -- > -- @since 1.1 > loopify :: (Ord a, Ord b) => FSA a b -> FSA a b > loopify fsa = fsa { transitions = Set.union (transitions fsa) trs > , isDeterministic = False > } > where as = Set.toList $ alphabet fsa > qs = Set.toList $ states fsa > trs = Set.fromList $ concatMap sigmatr as > sigmatr x = map (\q -> Transition > { edgeLabel = Symbol x > , source = q > , destination = q > }) qs Tierify: * Ensure that all of T is accounted for in the input * Remove symbols from the input that are not in T * Insert self-loops on all symbols not in T, including: * the other symbols from the input's alphabet * the Nothing placeholder > -- |Convert a semantic automaton that represents a Local constraint > -- into a new one that represents the same constraint in the associated > -- Tier-Local class. > tierify :: (Ord a, Ord b) => Set b -> FSA a (Maybe b) -> FSA a (Maybe b) > tierify t fsa = semanticallyExtendAlphabetTo as f'' > where f' = contractAlphabetTo (tmap Just t) $ > semanticallyExtendAlphabetTo t fsa > f'' = f' > { sigma = insert Nothing $ alphabet f' > , transitions = union (transitions f') . > Set.mapMonotonic l $ states f' > } > l q = Transition > { edgeLabel = Symbol Nothing > , source = q > , destination = q > } > as = collapse (maybe id insert) empty $ alphabet fsa > -- |Allow a given set of symbols to be freely inserted or deleted. > -- In other words, make those symbols neutral. > -- > -- @since 1.1 > neutralize :: (Ord a, Ord b) => Set b -> FSA a b -> FSA a b > neutralize t fsa = fsa > { sigma = Set.union t $ alphabet fsa > , transitions = transitions fsa > `union` loops > `union` omissions > , isDeterministic = False > } > where tsym = map Symbol $ Set.toList t > x p = p { edgeLabel = Epsilon } > c s = Set.mapMonotonic (m s) (states fsa) > m s q = Transition > { edgeLabel = s > , source = q > , destination = q > } > loops = unionAll $ map c tsym > omissions = tmap x . keep (isIn tsym . edgeLabel) > $ transitions fsa > -- |Remove symbols from the alphabet of an automaton. > contractAlphabetTo :: (Ord a, Ord b) => Set b -> FSA a b -> FSA a b > contractAlphabetTo syms fsa = trimUnreachables $ > fsa > { sigma = syms > , transitions = trans > } > where trans = keep > (isIn > (insert Epsilon $ Set.mapMonotonic Symbol syms) . > edgeLabel > ) $ transitions fsa > -- |Ignore the alphabet of an automaton and use a given alphabet instead. > forceAlphabetTo :: (Ord a, Ord b) => > Set b -> FSA a b -> FSA (Maybe Integer, Maybe a) b > forceAlphabetTo syms = contractAlphabetTo syms . extendAlphabetTo syms Miscellaneous Functions ======================= After several operations, the nodeLabel type of an FSA becomes a deep mixture of pairs, maybes, and sets. We can smash these into a smaller type to improve memory usage and processing speed. > -- |Equivalent to 'renameStatesBy' \(f\), > -- where \(f\) is an arbitrary injective function. > renameStates :: (Ord e, Ord n, Ord n1, Enum n1) => FSA n e -> FSA n1 e > renameStates fsa = renameStatesByMonotonic > (flip (Map.findWithDefault (toEnum 0)) m) > fsa > where m = Map.fromDistinctAscList . flip zip (enumFrom $ toEnum 1) . > map nodeLabel . Set.toAscList $ states fsa > {-# INLINE[1] renameStates #-} > {-# RULES > "renameStates/identity" renameStates = id > #-} > -- |Transform the node labels of an automaton using a given function. > -- If this function is not injective, the resulting FSA may not be > -- deterministic even if the original was. > renameStatesBy :: (Ord e, Ord n, Ord n1) => > (n -> n1) -> FSA n e -> FSA n1 e > renameStatesBy f a > = a { transitions = tmap (transition . fmap f . Noitisnart) > (transitions a) > , initials = tmap (fmap f) (initials a) > , finals = tmap (fmap f) (finals a) > , isDeterministic = isDeterministic a && > isize ns == isize (states a) > } > where ns = tmap (fmap f) (states a) > -- |Transform the node labels of an automaton using a given function. > -- The precondition (that for all states x < y, f x < f y) is not checked. > renameStatesByMonotonic :: (Ord e, Ord n, Ord n1) => > (n -> n1) -> FSA n e -> FSA n1 e > renameStatesByMonotonic f a > = a { transitions = Set.mapMonotonic > (transition . fmap f . Noitisnart) $ > transitions a > , initials = Set.mapMonotonic (fmap f) $ initials a > , finals = Set.mapMonotonic (fmap f) $ finals a > } > -- |Transform the edge labels of an automaton using a given function. > -- If this function is not injective, the resulting FSA may not be > -- deterministic even if the original was. > renameSymbolsBy :: (Ord e, Ord e1, Ord n) => > (e -> e1) -> FSA n e -> FSA n e1 > renameSymbolsBy f a = a { sigma = alpha > , transitions = tmap (fmap f) $ transitions a > , isDeterministic = isDeterministic a && samea > } > where alpha = tmap f (alphabet a) > samea = isize alpha == isize (alphabet a) > -- |Transform the edge labels of an automaton using a given function. > -- The precondition (that for all symbols x < y, f x < f y) is not checked. > renameSymbolsByMonotonic :: (Ord e, Ord e1, Ord n) => > (e -> e1) -> FSA n e -> FSA n e1 > renameSymbolsByMonotonic f a > = a { sigma = alpha > , transitions = Set.mapMonotonic (fmap f) (transitions a) > } > where alpha = Set.mapMonotonic f (alphabet a) Mapping on tuples: > bimap :: (a -> c) -> (b -> d) -> (a, b) -> (c, d) > bimap f g (a, b) = (f a, g b) A parallel fold implementation based on a tree. The accumulating function must be both associative and commutative, as the tree is built in such a way that order of elements is not preserved. > data Tree a = Leaf a | Tree (Tree a) (Tree a) > deriving (Eq, Ord, Read, Show) > treeFromList :: [a] -> Tree a > treeFromList [] = error "No elements" > treeFromList [x] = Leaf x > treeFromList xs = ls' `par` rs' `pseq` Tree ls' rs' > where (ls, rs) = evenOdds xs > (ls', rs') = (treeFromList ls, treeFromList rs) > instance NFData a => NFData (Tree a) > where rnf (Leaf a) = rnf a > rnf (Tree l r) = rnf l `seq` rnf r > treeFold :: (a -> a -> a) -> Tree a -> a > treeFold _ (Leaf x) = x > treeFold op (Tree l r) = l' `par` r' `pseq` (l' `op` r') > where l' = treeFold op l > r' = treeFold op r Split a linked list into two smaller lists by taking the even and odd elements. This does not require computing the list's length, thus it can be more efficient than splitting at the middle element. The implementation of evenOdds given here will even work on an infinite stream because it guarantees that elements are output as soon as they are obtained. > evenOdds :: [a] -> ([a],[a]) > evenOdds [] = ([], []) > evenOdds [a] = ([a], []) > evenOdds (a:b:xs) = let (e, o) = evenOdds xs in (a:e, b:o)