----------------------------------------------------------------------
-- |
-- Module      : GF.Speech.CFGToFA
--
-- Approximates CFGs with finite state networks.
----------------------------------------------------------------------
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
                          MFA(..), cfgToMFA, cfgToFA') where

import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set

import PGF.Internal
import GF.Data.Utilities
import GF.Grammar.CFG
--import GF.Speech.PGFToCFG
--import GF.Infra.Ident (Ident)

import GF.Data.Graph
--import GF.Data.Relation
import GF.Speech.FiniteState
--import GF.Speech.CFG

data Recursivity = RightR | LeftR | NotR

data MutRecSet = MutRecSet {
                            MutRecSet -> Set Cat
mrCats :: Set Cat,
                            MutRecSet -> [CFRule]
mrNonRecRules :: [CFRule],
                            MutRecSet -> [CFRule]
mrRecRules :: [CFRule],
                            MutRecSet -> Recursivity
mrRec :: Recursivity
                           }


type MutRecSets = Map Cat MutRecSet

--
-- * Multiple DFA type
--

data MFA = MFA Cat [(Cat,DFA CFSymbol)]



cfgToFA :: CFG -> DFA Token
cfgToFA :: CFG -> DFA Cat
cfgToFA = NFA Cat -> DFA Cat
forall a. Ord a => NFA a -> DFA a
minimize (NFA Cat -> DFA Cat) -> (CFG -> NFA Cat) -> CFG -> DFA Cat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CFG -> NFA Cat
compileAutomaton (CFG -> NFA Cat) -> (CFG -> CFG) -> CFG -> NFA Cat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CFG -> CFG
makeSimpleRegular


--
-- * Compile strongly regular grammars to NFAs
--

-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: CFG -> NFA Token
compileAutomaton :: CFG -> NFA Cat
compileAutomaton CFG
g = (CFG, MutRecSets)
-> State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa (CFG
g,MutRecSets
ns) State
s [Cat -> CFSymbol
forall c t. c -> Symbol c t
NonTerminal (CFG -> Cat
forall c t. Grammar c t -> c
cfgStartCat CFG
g)] State
f NFA Cat
forall b. FA State () b
fa
  where 
  (FA State () b
fa,State
s,State
f) = (FA State () b, State, State)
forall n b. Enum n => (FA n () b, n, n)
newFA_
  ns :: MutRecSets
ns = CFG -> [Set Cat] -> MutRecSets
mutRecSets CFG
g ([Set Cat] -> MutRecSets) -> [Set Cat] -> MutRecSets
forall a b. (a -> b) -> a -> b
$ Bool -> CFG -> [Set Cat]
forall c t. Ord c => Bool -> Grammar c t -> [Set c]
mutRecCats Bool
False CFG
g

-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--   Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State 
          -> NFA Token -> NFA Token
make_fa :: (CFG, MutRecSets)
-> State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa c :: (CFG, MutRecSets)
c@(CFG
g,MutRecSets
ns) State
q0 [CFSymbol]
alpha State
q1 NFA Cat
fa = 
   case [CFSymbol]
alpha of
        []              -> State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 State
q1 Maybe Cat
forall a. Maybe a
Nothing NFA Cat
fa
        [Terminal Cat
t]    -> State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 State
q1 (Cat -> Maybe Cat
forall a. a -> Maybe a
Just Cat
t) NFA Cat
fa
        [NonTerminal Cat
a] -> 
            case Cat -> MutRecSets -> Maybe MutRecSet
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Cat
a MutRecSets
ns of
              -- a is recursive
              Just n :: MutRecSet
n@(MutRecSet { mrCats :: MutRecSet -> Set Cat
mrCats = Set Cat
ni, mrNonRecRules :: MutRecSet -> [CFRule]
mrNonRecRules = [CFRule]
nrs, mrRecRules :: MutRecSet -> [CFRule]
mrRecRules = [CFRule]
rs} ) -> 
                  case MutRecSet -> Recursivity
mrRec MutRecSet
n of
                    -- the set Ni is right-recursive or cyclic
                    Recursivity
RightR ->
                        let new :: [(State, [CFSymbol], State)]
new = [(Cat -> State
getState Cat
c, [CFSymbol]
xs, State
q1) | Rule Cat
c [CFSymbol]
xs CFTerm
_ <- [CFRule]
nrs]
                                  [(State, [CFSymbol], State)]
-> [(State, [CFSymbol], State)] -> [(State, [CFSymbol], State)]
forall a. [a] -> [a] -> [a]
++ [(Cat -> State
getState Cat
c, [CFSymbol]
xs, Cat -> State
getState Cat
d) | Rule Cat
c [CFSymbol]
ss CFTerm
_ <- [CFRule]
rs, 
                                       let ([CFSymbol]
xs,NonTerminal Cat
d) = ([CFSymbol] -> [CFSymbol]
forall a. [a] -> [a]
init [CFSymbol]
ss,[CFSymbol] -> CFSymbol
forall a. [a] -> a
last [CFSymbol]
ss)]
                         in [(State, [CFSymbol], State)] -> NFA Cat -> NFA Cat
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA Cat -> NFA Cat
make_fas [(State, [CFSymbol], State)]
new (NFA Cat -> NFA Cat) -> NFA Cat -> NFA Cat
forall a b. (a -> b) -> a -> b
$ State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 (Cat -> State
getState Cat
a) Maybe Cat
forall a. Maybe a
Nothing NFA Cat
fa'
                    -- the set Ni is left-recursive                         
                    Recursivity
LeftR ->
                        let new :: [(State, [CFSymbol], State)]
new = [(State
q0, [CFSymbol]
xs, Cat -> State
getState Cat
c) | Rule Cat
c [CFSymbol]
xs CFTerm
_ <- [CFRule]
nrs]
                                  [(State, [CFSymbol], State)]
-> [(State, [CFSymbol], State)] -> [(State, [CFSymbol], State)]
forall a. [a] -> [a] -> [a]
++ [(Cat -> State
getState Cat
d, [CFSymbol]
xs, Cat -> State
getState Cat
c) | Rule Cat
c (NonTerminal Cat
d:[CFSymbol]
xs) CFTerm
_ <- [CFRule]
rs]
                         in [(State, [CFSymbol], State)] -> NFA Cat -> NFA Cat
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA Cat -> NFA Cat
make_fas [(State, [CFSymbol], State)]
new (NFA Cat -> NFA Cat) -> NFA Cat -> NFA Cat
forall a b. (a -> b) -> a -> b
$ State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition (Cat -> State
getState Cat
a) State
q1 Maybe Cat
forall a. Maybe a
Nothing NFA Cat
fa'
                where
                  (NFA Cat
fa',Map Cat State
stateMap) = Set Cat -> NFA Cat -> (NFA Cat, Map Cat State)
forall t. Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats Set Cat
ni NFA Cat
fa
                  getState :: Cat -> State
getState Cat
x = State -> Cat -> Map Cat State -> State
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault 
                               (Cat -> State
forall a. HasCallStack => Cat -> a
error (Cat -> State) -> Cat -> State
forall a b. (a -> b) -> a -> b
$ Cat
"CFGToFiniteState: No state for " Cat -> Cat -> Cat
forall a. [a] -> [a] -> [a]
++ Cat
x) 
                               Cat
x Map Cat State
stateMap
              -- a is not recursive
              Maybe MutRecSet
Nothing -> let rs :: [CFRule]
rs = CFG -> Cat -> [CFRule]
forall c t. Ord c => Grammar c t -> c -> [Rule c t]
catRules CFG
g Cat
a
                          in (NFA Cat -> CFRule -> NFA Cat) -> NFA Cat -> [CFRule] -> NFA Cat
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\NFA Cat
f (Rule Cat
_ [CFSymbol]
b CFTerm
_) -> State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa_ State
q0 [CFSymbol]
b State
q1 NFA Cat
f) NFA Cat
fa [CFRule]
rs
        (CFSymbol
x:[CFSymbol]
beta) -> let (NFA Cat
fa',State
q) = () -> NFA Cat -> (NFA Cat, State)
forall a n b. a -> FA n a b -> (FA n a b, n)
newState () NFA Cat
fa
                     in State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa_ State
q [CFSymbol]
beta State
q1 (NFA Cat -> NFA Cat) -> NFA Cat -> NFA Cat
forall a b. (a -> b) -> a -> b
$ State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa_ State
q0 [CFSymbol
x] State
q NFA Cat
fa'
  where
  make_fa_ :: State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa_ = (CFG, MutRecSets)
-> State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa (CFG, MutRecSets)
c
  make_fas :: t (State, [CFSymbol], State) -> NFA Cat -> NFA Cat
make_fas t (State, [CFSymbol], State)
xs NFA Cat
fa = (NFA Cat -> (State, [CFSymbol], State) -> NFA Cat)
-> NFA Cat -> t (State, [CFSymbol], State) -> NFA Cat
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\NFA Cat
f' (State
s1,[CFSymbol]
xs,State
s2) -> State -> [CFSymbol] -> State -> NFA Cat -> NFA Cat
make_fa_ State
s1 [CFSymbol]
xs State
s2 NFA Cat
f') NFA Cat
fa t (State, [CFSymbol], State)
xs

--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--

cfgToMFA :: CFG -> MFA
cfgToMFA :: CFG -> MFA
cfgToMFA  = CFG -> MFA
buildMFA (CFG -> MFA) -> (CFG -> CFG) -> CFG -> MFA
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CFG -> CFG
makeSimpleRegular

-- | Build a DFA by building and expanding an MFA
cfgToFA' :: CFG -> DFA Token
cfgToFA' :: CFG -> DFA Cat
cfgToFA' = MFA -> DFA Cat
mfaToDFA (MFA -> DFA Cat) -> (CFG -> MFA) -> CFG -> DFA Cat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CFG -> MFA
cfgToMFA

buildMFA :: CFG -> MFA
buildMFA :: CFG -> MFA
buildMFA CFG
g = MFA -> MFA
sortSubLats (MFA -> MFA) -> MFA -> MFA
forall a b. (a -> b) -> a -> b
$ MFA -> MFA
removeUnusedSubLats MFA
mfa
  where fas :: [(Cat, NFA CFSymbol)]
fas = CFG -> [(Cat, NFA CFSymbol)]
compileAutomata CFG
g
        mfa :: MFA
mfa = Cat -> [(Cat, DFA CFSymbol)] -> MFA
MFA (CFG -> Cat
forall c t. Grammar c t -> c
cfgStartCat CFG
g) [(Cat
c, NFA CFSymbol -> DFA CFSymbol
forall a. Ord a => NFA a -> DFA a
minimize NFA CFSymbol
fa) | (Cat
c,NFA CFSymbol
fa) <- [(Cat, NFA CFSymbol)]
fas]

mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA (MFA Cat
start [(Cat, DFA CFSymbol)]
subs) = 
    DFA CFSymbol -> Maybe (DFA CFSymbol) -> DFA CFSymbol
forall a. a -> Maybe a -> a
fromMaybe (Cat -> DFA CFSymbol
forall a. HasCallStack => Cat -> a
error (Cat -> DFA CFSymbol) -> Cat -> DFA CFSymbol
forall a b. (a -> b) -> a -> b
$ Cat
"Bad start MFA: " Cat -> Cat -> Cat
forall a. [a] -> [a] -> [a]
++ Cat
start) (Maybe (DFA CFSymbol) -> DFA CFSymbol)
-> Maybe (DFA CFSymbol) -> DFA CFSymbol
forall a b. (a -> b) -> a -> b
$ Cat -> [(Cat, DFA CFSymbol)] -> Maybe (DFA CFSymbol)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Cat
start [(Cat, DFA CFSymbol)]
subs

mfaToDFA :: MFA -> DFA Token
mfaToDFA :: MFA -> DFA Cat
mfaToDFA mfa :: MFA
mfa@(MFA Cat
_ [(Cat, DFA CFSymbol)]
subs) = NFA Cat -> DFA Cat
forall a. Ord a => NFA a -> DFA a
minimize (NFA Cat -> DFA Cat) -> NFA Cat -> DFA Cat
forall a b. (a -> b) -> a -> b
$ NFA CFSymbol -> NFA Cat
expand (NFA CFSymbol -> NFA Cat) -> NFA CFSymbol -> NFA Cat
forall a b. (a -> b) -> a -> b
$ DFA CFSymbol -> NFA CFSymbol
forall a. DFA a -> NFA a
dfa2nfa (DFA CFSymbol -> NFA CFSymbol) -> DFA CFSymbol -> NFA CFSymbol
forall a b. (a -> b) -> a -> b
$ MFA -> DFA CFSymbol
mfaStartDFA MFA
mfa
  where
  subs' :: Map Cat (NFA CFSymbol)
subs' = [(Cat, NFA CFSymbol)] -> Map Cat (NFA CFSymbol)
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(Cat
c, DFA CFSymbol -> NFA CFSymbol
forall a. DFA a -> NFA a
dfa2nfa DFA CFSymbol
n) | (Cat
c,DFA CFSymbol
n) <- [(Cat, DFA CFSymbol)]
subs]
  getSub :: Cat -> NFA CFSymbol
getSub Cat
l = Maybe (NFA CFSymbol) -> NFA CFSymbol
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe (NFA CFSymbol) -> NFA CFSymbol)
-> Maybe (NFA CFSymbol) -> NFA CFSymbol
forall a b. (a -> b) -> a -> b
$ Cat -> Map Cat (NFA CFSymbol) -> Maybe (NFA CFSymbol)
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Cat
l Map Cat (NFA CFSymbol)
subs'
  expand :: NFA CFSymbol -> NFA Cat
expand (FA (Graph [State]
c [Node State ()]
ns [Edge State (Maybe CFSymbol)]
es) State
s [State]
f) 
      = (NFA Cat -> Edge State (Maybe CFSymbol) -> NFA Cat)
-> NFA Cat -> [Edge State (Maybe CFSymbol)] -> NFA Cat
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' NFA Cat -> Edge State (Maybe CFSymbol) -> NFA Cat
expandEdge (Graph State () (Maybe Cat) -> State -> [State] -> NFA Cat
forall n a b. Graph n a b -> n -> [n] -> FA n a b
FA ([State]
-> [Node State ()]
-> [Edge State (Maybe Cat)]
-> Graph State () (Maybe Cat)
forall n a b. [n] -> [Node n a] -> [Edge n b] -> Graph n a b
Graph [State]
c [Node State ()]
ns []) State
s [State]
f) [Edge State (Maybe CFSymbol)]
es
  expandEdge :: NFA Cat -> Edge State (Maybe CFSymbol) -> NFA Cat
expandEdge NFA Cat
fa (State
f,State
t,Maybe CFSymbol
x) = 
      case Maybe CFSymbol
x of
        Maybe CFSymbol
Nothing              -> State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
f State
t Maybe Cat
forall a. Maybe a
Nothing  NFA Cat
fa
        Just (Terminal Cat
s)    -> State -> State -> Maybe Cat -> NFA Cat -> NFA Cat
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
f State
t (Cat -> Maybe Cat
forall a. a -> Maybe a
Just Cat
s) NFA Cat
fa
        Just (NonTerminal Cat
l) -> NFA Cat -> (State, State) -> NFA Cat -> NFA Cat
forall a. NFA a -> (State, State) -> NFA a -> NFA a
insertNFA NFA Cat
fa (State
f,State
t) (NFA CFSymbol -> NFA Cat
expand (NFA CFSymbol -> NFA Cat) -> NFA CFSymbol -> NFA Cat
forall a b. (a -> b) -> a -> b
$ Cat -> NFA CFSymbol
getSub Cat
l)

removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats mfa :: MFA
mfa@(MFA Cat
start [(Cat, DFA CFSymbol)]
subs) = Cat -> [(Cat, DFA CFSymbol)] -> MFA
MFA Cat
start [(Cat
c,DFA CFSymbol
s) | (Cat
c,DFA CFSymbol
s) <- [(Cat, DFA CFSymbol)]
subs, Cat -> Bool
isUsed Cat
c]
  where
  usedMap :: Map Cat (Set Cat)
usedMap = MFA -> Map Cat (Set Cat)
subLatUseMap MFA
mfa
  used :: Set Cat
used = Set Cat -> Set Cat
growUsedSet (Cat -> Set Cat
forall a. a -> Set a
Set.singleton Cat
start)
  isUsed :: Cat -> Bool
isUsed Cat
c = Cat
c Cat -> Set Cat -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Cat
used
  growUsedSet :: Set Cat -> Set Cat
growUsedSet = (Set Cat -> Set Cat) -> Set Cat -> Set Cat
forall a. Eq a => (a -> a) -> a -> a
fix (\Set Cat
s -> (Set Cat -> Set Cat -> Set Cat) -> Set Cat -> [Set Cat] -> Set Cat
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Set Cat -> Set Cat -> Set Cat
forall a. Ord a => Set a -> Set a -> Set a
Set.union Set Cat
s ([Set Cat] -> Set Cat) -> [Set Cat] -> Set Cat
forall a b. (a -> b) -> a -> b
$ (Cat -> Maybe (Set Cat)) -> [Cat] -> [Set Cat]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((Cat -> Map Cat (Set Cat) -> Maybe (Set Cat))
-> Map Cat (Set Cat) -> Cat -> Maybe (Set Cat)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Cat -> Map Cat (Set Cat) -> Maybe (Set Cat)
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Map Cat (Set Cat)
usedMap) ([Cat] -> [Set Cat]) -> [Cat] -> [Set Cat]
forall a b. (a -> b) -> a -> b
$ Set Cat -> [Cat]
forall a. Set a -> [a]
Set.toList Set Cat
s)

subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap (MFA Cat
_ [(Cat, DFA CFSymbol)]
subs) = [(Cat, Set Cat)] -> Map Cat (Set Cat)
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(Cat
c,DFA CFSymbol -> Set Cat
usedSubLats DFA CFSymbol
n) | (Cat
c,DFA CFSymbol
n) <- [(Cat, DFA CFSymbol)]
subs]

usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats DFA CFSymbol
fa = [Cat] -> Set Cat
forall a. Ord a => [a] -> Set a
Set.fromList [Cat
s | (State
_,State
_,NonTerminal Cat
s) <- DFA CFSymbol -> [(State, State, CFSymbol)]
forall n a b. FA n a b -> [(n, n, b)]
transitions DFA CFSymbol
fa]

-- | Sort sub-networks topologically.
sortSubLats :: MFA -> MFA
sortSubLats :: MFA -> MFA
sortSubLats mfa :: MFA
mfa@(MFA Cat
main [(Cat, DFA CFSymbol)]
subs) = Cat -> [(Cat, DFA CFSymbol)] -> MFA
MFA Cat
main ([(Cat, DFA CFSymbol)] -> [(Cat, DFA CFSymbol)]
forall a. [a] -> [a]
reverse ([(Cat, DFA CFSymbol)] -> [(Cat, DFA CFSymbol)])
-> [(Cat, DFA CFSymbol)] -> [(Cat, DFA CFSymbol)]
forall a b. (a -> b) -> a -> b
$ Map Cat (Set Cat) -> [(Cat, DFA CFSymbol)] -> [(Cat, DFA CFSymbol)]
forall k b. Ord k => Map k (Set k) -> [(k, b)] -> [(k, b)]
sortLats Map Cat (Set Cat)
usedByMap [(Cat, DFA CFSymbol)]
subs)
  where
  usedByMap :: Map Cat (Set Cat)
usedByMap = Map Cat (Set Cat) -> Map Cat (Set Cat)
forall a b. (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap (MFA -> Map Cat (Set Cat)
subLatUseMap MFA
mfa)
  sortLats :: Map k (Set k) -> [(k, b)] -> [(k, b)]
sortLats Map k (Set k)
_ [] = []
  sortLats Map k (Set k)
ub [(k, b)]
ls = [(k, b)]
xs [(k, b)] -> [(k, b)] -> [(k, b)]
forall a. [a] -> [a] -> [a]
++ Map k (Set k) -> [(k, b)] -> [(k, b)]
sortLats Map k (Set k)
ub' [(k, b)]
ys
      where ([(k, b)]
xs,[(k, b)]
ys) = ((k, b) -> Bool) -> [(k, b)] -> ([(k, b)], [(k, b)])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition ((State -> State -> Bool
forall a. Eq a => a -> a -> Bool
==State
0) (State -> Bool) -> ((k, b) -> State) -> (k, b) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (k, b) -> State
forall b. (k, b) -> State
indeg) [(k, b)]
ls
            ub' :: Map k (Set k)
ub' = (Set k -> Set k) -> Map k (Set k) -> Map k (Set k)
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Set k -> Set k -> Set k
forall a. Ord a => Set a -> Set a -> Set a
Set.\\ [k] -> Set k
forall a. Ord a => [a] -> Set a
Set.fromList (((k, b) -> k) -> [(k, b)] -> [k]
forall a b. (a -> b) -> [a] -> [b]
map (k, b) -> k
forall a b. (a, b) -> a
fst [(k, b)]
xs)) Map k (Set k)
ub
            indeg :: (k, b) -> State
indeg (k
c,b
_) = State -> (Set k -> State) -> Maybe (Set k) -> State
forall b a. b -> (a -> b) -> Maybe a -> b
maybe State
0 Set k -> State
forall a. Set a -> State
Set.size (Maybe (Set k) -> State) -> Maybe (Set k) -> State
forall a b. (a -> b) -> a -> b
$ k -> Map k (Set k) -> Maybe (Set k)
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup k
c Map k (Set k)
ub

-- | Convert a strongly regular grammar to a number of finite automata,
--   one for each non-terminal.
--   The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFG
                 -> [(Cat,NFA CFSymbol)]
                    -- ^ A map of non-terminals and their automata.
compileAutomata :: CFG -> [(Cat, NFA CFSymbol)]
compileAutomata CFG
g = [(Cat
c, Cat -> NFA CFSymbol
makeOneFA Cat
c) | Cat
c <- CFG -> [Cat]
forall c t. Grammar c t -> [c]
allCats CFG
g]
  where
  mrs :: MutRecSets
mrs = CFG -> [Set Cat] -> MutRecSets
mutRecSets CFG
g ([Set Cat] -> MutRecSets) -> [Set Cat] -> MutRecSets
forall a b. (a -> b) -> a -> b
$ Bool -> CFG -> [Set Cat]
forall c t. Ord c => Bool -> Grammar c t -> [Set c]
mutRecCats Bool
True CFG
g
  makeOneFA :: Cat -> NFA CFSymbol
makeOneFA Cat
c = MutRecSet
-> State -> [CFSymbol] -> State -> NFA CFSymbol -> NFA CFSymbol
make_fa1 MutRecSet
mr State
s [Cat -> CFSymbol
forall c t. c -> Symbol c t
NonTerminal Cat
c] State
f NFA CFSymbol
forall b. FA State () b
fa 
    where (FA State () b
fa,State
s,State
f) = (FA State () b, State, State)
forall n b. Enum n => (FA n () b, n, n)
newFA_
          mr :: MutRecSet
mr = Maybe MutRecSet -> MutRecSet
forall a. HasCallStack => Maybe a -> a
fromJust (Cat -> MutRecSets -> Maybe MutRecSet
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Cat
c MutRecSets
mrs)


-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
--   Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
--   adapted to build a finite automaton for a single (mutually recursive) set only.
--   Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
                      --   we are building the automaton.
         -> State     -- ^ State to come from
         -> [CFSymbol] -- ^ Symbols to accept
         -> State     -- ^ State to end up in
         -> NFA CFSymbol -- ^ FA to add to.
         -> NFA CFSymbol
make_fa1 :: MutRecSet
-> State -> [CFSymbol] -> State -> NFA CFSymbol -> NFA CFSymbol
make_fa1 MutRecSet
mr State
q0 [CFSymbol]
alpha State
q1 NFA CFSymbol
fa = 
   case [CFSymbol]
alpha of
        []        -> State -> State -> Maybe CFSymbol -> NFA CFSymbol -> NFA CFSymbol
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 State
q1 Maybe CFSymbol
forall a. Maybe a
Nothing NFA CFSymbol
fa
        [t :: CFSymbol
t@(Terminal Cat
_)] -> State -> State -> Maybe CFSymbol -> NFA CFSymbol -> NFA CFSymbol
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 State
q1 (CFSymbol -> Maybe CFSymbol
forall a. a -> Maybe a
Just CFSymbol
t) NFA CFSymbol
fa
        [c :: CFSymbol
c@(NonTerminal Cat
a)] | Bool -> Bool
not (Cat
a Cat -> Set Cat -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` MutRecSet -> Set Cat
mrCats MutRecSet
mr) -> State -> State -> Maybe CFSymbol -> NFA CFSymbol -> NFA CFSymbol
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 State
q1 (CFSymbol -> Maybe CFSymbol
forall a. a -> Maybe a
Just CFSymbol
c) NFA CFSymbol
fa
        [NonTerminal Cat
a] ->
            case MutRecSet -> Recursivity
mrRec MutRecSet
mr of
                Recursivity
NotR -> -- the set is a non-recursive (always singleton) set of categories
                        -- so the set of category rules is the set of rules for the whole set
                    [(State, [CFSymbol], State)] -> NFA CFSymbol -> NFA CFSymbol
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA CFSymbol -> NFA CFSymbol
make_fas [(State
q0, [CFSymbol]
b, State
q1) | Rule Cat
_ [CFSymbol]
b CFTerm
_ <- MutRecSet -> [CFRule]
mrNonRecRules MutRecSet
mr] NFA CFSymbol
fa
                Recursivity
RightR -> -- the set is right-recursive or cyclic
                    let new :: [(State, [CFSymbol], State)]
new = [(Cat -> State
getState Cat
c, [CFSymbol]
xs, State
q1) | Rule Cat
c [CFSymbol]
xs CFTerm
_ <- MutRecSet -> [CFRule]
mrNonRecRules MutRecSet
mr]
                              [(State, [CFSymbol], State)]
-> [(State, [CFSymbol], State)] -> [(State, [CFSymbol], State)]
forall a. [a] -> [a] -> [a]
++ [(Cat -> State
getState Cat
c, [CFSymbol]
xs, Cat -> State
getState Cat
d) | Rule Cat
c [CFSymbol]
ss CFTerm
_ <- MutRecSet -> [CFRule]
mrRecRules MutRecSet
mr, 
                                                                 let ([CFSymbol]
xs,NonTerminal Cat
d) = ([CFSymbol] -> [CFSymbol]
forall a. [a] -> [a]
init [CFSymbol]
ss,[CFSymbol] -> CFSymbol
forall a. [a] -> a
last [CFSymbol]
ss)]
                     in [(State, [CFSymbol], State)] -> NFA CFSymbol -> NFA CFSymbol
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA CFSymbol -> NFA CFSymbol
make_fas [(State, [CFSymbol], State)]
new (NFA CFSymbol -> NFA CFSymbol) -> NFA CFSymbol -> NFA CFSymbol
forall a b. (a -> b) -> a -> b
$ State -> State -> Maybe CFSymbol -> NFA CFSymbol -> NFA CFSymbol
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition State
q0 (Cat -> State
getState Cat
a) Maybe CFSymbol
forall a. Maybe a
Nothing NFA CFSymbol
fa'
                Recursivity
LeftR -> -- the set is left-recursive
                    let new :: [(State, [CFSymbol], State)]
new = [(State
q0, [CFSymbol]
xs, Cat -> State
getState Cat
c) | Rule Cat
c [CFSymbol]
xs CFTerm
_ <- MutRecSet -> [CFRule]
mrNonRecRules MutRecSet
mr]
                              [(State, [CFSymbol], State)]
-> [(State, [CFSymbol], State)] -> [(State, [CFSymbol], State)]
forall a. [a] -> [a] -> [a]
++ [(Cat -> State
getState Cat
d, [CFSymbol]
xs, Cat -> State
getState Cat
c) | Rule Cat
c (NonTerminal Cat
d:[CFSymbol]
xs) CFTerm
_ <- MutRecSet -> [CFRule]
mrRecRules MutRecSet
mr]
                     in [(State, [CFSymbol], State)] -> NFA CFSymbol -> NFA CFSymbol
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA CFSymbol -> NFA CFSymbol
make_fas [(State, [CFSymbol], State)]
new (NFA CFSymbol -> NFA CFSymbol) -> NFA CFSymbol -> NFA CFSymbol
forall a b. (a -> b) -> a -> b
$ State -> State -> Maybe CFSymbol -> NFA CFSymbol -> NFA CFSymbol
forall n b a. n -> n -> b -> FA n a b -> FA n a b
newTransition (Cat -> State
getState Cat
a) State
q1 Maybe CFSymbol
forall a. Maybe a
Nothing NFA CFSymbol
fa'
             where
             (NFA CFSymbol
fa',Map Cat State
stateMap) = Set Cat -> NFA CFSymbol -> (NFA CFSymbol, Map Cat State)
forall t. Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats (MutRecSet -> Set Cat
mrCats MutRecSet
mr) NFA CFSymbol
fa
             getState :: Cat -> State
getState Cat
x = State -> Cat -> Map Cat State -> State
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault 
                          (Cat -> State
forall a. HasCallStack => Cat -> a
error (Cat -> State) -> Cat -> State
forall a b. (a -> b) -> a -> b
$ Cat
"CFGToFiniteState: No state for " Cat -> Cat -> Cat
forall a. [a] -> [a] -> [a]
++ Cat
x) 
                          Cat
x Map Cat State
stateMap
        (CFSymbol
x:[CFSymbol]
beta) -> let (NFA CFSymbol
fa',State
q) = () -> NFA CFSymbol -> (NFA CFSymbol, State)
forall a n b. a -> FA n a b -> (FA n a b, n)
newState () NFA CFSymbol
fa
                     in [(State, [CFSymbol], State)] -> NFA CFSymbol -> NFA CFSymbol
forall (t :: * -> *).
Foldable t =>
t (State, [CFSymbol], State) -> NFA CFSymbol -> NFA CFSymbol
make_fas [(State
q0,[CFSymbol
x],State
q),(State
q,[CFSymbol]
beta,State
q1)] NFA CFSymbol
fa'
  where
  make_fas :: t (State, [CFSymbol], State) -> NFA CFSymbol -> NFA CFSymbol
make_fas t (State, [CFSymbol], State)
xs NFA CFSymbol
fa = (NFA CFSymbol -> (State, [CFSymbol], State) -> NFA CFSymbol)
-> NFA CFSymbol -> t (State, [CFSymbol], State) -> NFA CFSymbol
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\NFA CFSymbol
f' (State
s1,[CFSymbol]
xs,State
s2) -> MutRecSet
-> State -> [CFSymbol] -> State -> NFA CFSymbol -> NFA CFSymbol
make_fa1 MutRecSet
mr State
s1 [CFSymbol]
xs State
s2 NFA CFSymbol
f') NFA CFSymbol
fa t (State, [CFSymbol], State)
xs

mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets CFG
g = [(Cat, MutRecSet)] -> MutRecSets
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Cat, MutRecSet)] -> MutRecSets)
-> ([Set Cat] -> [(Cat, MutRecSet)]) -> [Set Cat] -> MutRecSets
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Set Cat -> [(Cat, MutRecSet)]) -> [Set Cat] -> [(Cat, MutRecSet)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Set Cat -> [(Cat, MutRecSet)]
mkMutRecSet
  where 
  mkMutRecSet :: Set Cat -> [(Cat, MutRecSet)]
mkMutRecSet Set Cat
cs = [ (Cat
c,MutRecSet
ms) | Cat
c <- [Cat]
csl ]
   where csl :: [Cat]
csl = Set Cat -> [Cat]
forall a. Set a -> [a]
Set.toList Set Cat
cs
         rs :: [CFRule]
rs = CFG -> Set Cat -> [CFRule]
catSetRules CFG
g Set Cat
cs
         ([CFRule]
nrs,[CFRule]
rrs) = (CFRule -> Bool) -> [CFRule] -> ([CFRule], [CFRule])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (Set Cat -> CFRule -> Bool
forall c t. Ord c => Set c -> Rule c t -> Bool
ruleIsNonRecursive Set Cat
cs) [CFRule]
rs
         ms :: MutRecSet
ms = MutRecSet :: Set Cat -> [CFRule] -> [CFRule] -> Recursivity -> MutRecSet
MutRecSet {
                         mrCats :: Set Cat
mrCats = Set Cat
cs,
                         mrNonRecRules :: [CFRule]
mrNonRecRules = [CFRule]
nrs,
                         mrRecRules :: [CFRule]
mrRecRules = [CFRule]
rrs,
                         mrRec :: Recursivity
mrRec = Recursivity
rec
                        }
         rec :: Recursivity
rec | [CFRule] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CFRule]
rrs = Recursivity
NotR
             | (CFRule -> Bool) -> [CFRule] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Set Cat -> CFRule -> Bool
forall c t. Ord c => Set c -> Rule c t -> Bool
isRightLinear Set Cat
cs) [CFRule]
rrs = Recursivity
RightR
             | Bool
otherwise = Recursivity
LeftR

--
-- * Utilities
--

-- | Add a state for the given NFA for each of the categories
--   in the given set. Returns a map of categories to their
--   corresponding states.
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats Set Cat
cs NFA t
fa = (NFA t
fa', Map Cat State
m)
  where (NFA t
fa', [Node State ()]
ns) = [()] -> NFA t -> (NFA t, [Node State ()])
forall a n b. [a] -> FA n a b -> (FA n a b, [(n, a)])
newStates (State -> () -> [()]
forall a. State -> a -> [a]
replicate (Set Cat -> State
forall a. Set a -> State
Set.size Set Cat
cs) ()) NFA t
fa
        m :: Map Cat State
m = [(Cat, State)] -> Map Cat State
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([Cat] -> [State] -> [(Cat, State)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Set Cat -> [Cat]
forall a. Set a -> [a]
Set.toList Set Cat
cs) ((Node State () -> State) -> [Node State ()] -> [State]
forall a b. (a -> b) -> [a] -> [b]
map Node State () -> State
forall a b. (a, b) -> a
fst [Node State ()]
ns))

revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap :: Map a (Set b) -> Map b (Set a)
revMultiMap Map a (Set b)
m = (Set a -> Set a -> Set a) -> [(b, Set a)] -> Map b (Set a)
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith Set a -> Set a -> Set a
forall a. Ord a => Set a -> Set a -> Set a
Set.union [ (b
y,a -> Set a
forall a. a -> Set a
Set.singleton a
x) | (a
x,Set b
s) <- Map a (Set b) -> [(a, Set b)]
forall k a. Map k a -> [(k, a)]
Map.toList Map a (Set b)
m, b
y <- Set b -> [b]
forall a. Set a -> [a]
Set.toList Set b
s]