{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# LANGUAGE PatternGuards #-} module DFAMin (minimizeDFA) where import AbsSyn import Data.Map (Map) import qualified Data.Map as Map import Data.IntSet (IntSet) import qualified Data.IntSet as IS import Data.IntMap (IntMap) import qualified Data.IntMap as IM import Data.List as List -- Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia): -- P := {{all accepting states}, {all nonaccepting states}}; -- Q := {{all accepting states}}; -- while (Q is not empty) do -- choose and remove a set A from Q -- for each c in ∑ do -- let X be the set of states for which a transition on c leads to a state in A -- for each set Y in P for which X ∩ Y is nonempty do -- replace Y in P by the two sets X ∩ Y and Y \ X -- if Y is in Q -- replace Y in Q by the same two sets -- else -- add the smaller of the two sets to Q -- end; -- end; -- end; minimizeDFA :: Ord a => DFA Int a -> DFA Int a minimizeDFA dfa@ DFA { dfa_start_states = starts, dfa_states = statemap } = DFA { dfa_start_states = starts, dfa_states = Map.fromList states } where equiv_classes = groupEquivStates dfa numbered_states = number (length starts) equiv_classes -- assign each state in the minimized DFA a number, making -- sure that we assign the numbers [0..] to the start states. number _ [] = [] number n (ss:sss) = case filter (`IS.member` ss) starts of [] -> (n,ss) : number (n+1) sss starts' -> zip starts' (repeat ss) ++ number n sss -- if one of the states of the minimized DFA corresponds -- to multiple starts states, we just have to duplicate -- that state. states = [ let old_states = map (lookup statemap) (IS.toList equiv) accs = map fix_acc (state_acc (head old_states)) -- accepts should all be the same out = IM.fromList [ (b, get_new old) | State _ out <- old_states, (b,old) <- IM.toList out ] in (n, State accs out) | (n, equiv) <- numbered_states ] fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) } fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s) fix_rctxt other = other lookup m k = Map.findWithDefault (error "minimizeDFA") k m get_new = lookup old_to_new old_to_new :: Map Int Int old_to_new = Map.fromList [ (s,n) | (n,ss) <- numbered_states, s <- IS.toList ss ] groupEquivStates :: (Ord a) => DFA Int a -> [IntSet] groupEquivStates DFA { dfa_states = statemap } = go init_p init_q where (accepting, nonaccepting) = Map.partition acc statemap where acc (State as _) = not (List.null as) nonaccepting_states = IS.fromList (Map.keys nonaccepting) -- group the accepting states into equivalence classes accept_map = {-# SCC "accept_map" #-} foldl' (\m (n,s) -> Map.insertWith (++) (state_acc s) [n] m) Map.empty (Map.toList accepting) -- accept_groups :: Ord s => [Set s] accept_groups = map IS.fromList (Map.elems accept_map) init_p = nonaccepting_states : accept_groups init_q = accept_groups -- map token T to -- a map from state S to the list of states that transition to -- S on token T -- This is a cache of the information needed to compute x below bigmap :: IntMap (IntMap [SNum]) bigmap = IM.fromListWith (IM.unionWith (++)) [ (i, IM.singleton to [from]) | (from, state) <- Map.toList statemap, (i,to) <- IM.toList (state_out state) ] -- incoming I A = the set of states that transition to a state in -- A on token I. incoming :: Int -> IntSet -> IntSet incoming i a = IS.fromList (concat ss) where map1 = IM.findWithDefault IM.empty i bigmap ss = [ IM.findWithDefault [] s map1 | s <- IS.toList a ] -- The outer loop: recurse on each set in Q go p [] = p go p (a:q) = go1 0 p q where -- recurse on each token (0..255) go1 256 p q = go p q go1 i p q = go1 (i+1) p' q' where (p',q') = go2 p [] q x = incoming i a -- recurse on each set in P go2 [] p' q = (p',q) go2 (y:p) p' q | IS.null i || IS.null d = go2 p (y:p') q | otherwise = go2 p (i:d:p') q1 where i = IS.intersection x y d = IS.difference y x q1 = replaceyin q where replaceyin [] = if IS.size i < IS.size d then [i] else [d] replaceyin (z:zs) | z == y = i : d : zs | otherwise = z : replaceyin zs