module Text.Regex.TDFA.TDFA(patternToRegex,DFA(..),DT(..)
,examineDFA,nfaToDFA,dfaMap) where
import Data.Monoid(Monoid(..))
import Control.Monad.State(State,MonadState(..),execState)
import Data.Array.IArray(Array,(!),bounds,)
import Data.IntMap(IntMap)
import qualified Data.IntMap as IMap(empty,keys,delete,null,lookup,fromDistinctAscList
,member,unionWith,singleton,union
,toAscList,Key,elems,toList,insert
,insertWith,insertWithKey)
import Data.IntMap.CharMap2(CharMap(..))
import qualified Data.IntMap.CharMap2 as Map(empty)
import qualified Data.IntSet as ISet(empty,singleton,null)
import Data.List(foldl')
import qualified Data.Map (Map,empty,member,insert,elems)
import Data.Sequence as S((|>),)
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerge)
import Text.Regex.TDFA.Pattern(Pattern)
import Text.Regex.TDFA.TNFA(patternToNFA)
err :: String -> a
err s = common_error "Text.Regex.TDFA.TDFA" s
dlose :: DFA
dlose = DFA { d_id = ISet.empty
, d_dt = Simple' { dt_win = IMap.empty
, dt_trans = Map.empty
, dt_other = Transition dlose dlose mempty } }
makeDFA :: SetIndex -> DT -> DFA
makeDFA i dt = DFA i dt
nfaToDFA :: ((Index,Array Index QNFA),Array Tag OP,Array GroupIndex [GroupInfo])
-> CompOption -> ExecOption
-> Regex
nfaToDFA ((startIndex,aQNFA),aTagOp,aGroupInfo) co eo = Regex dfa startIndex indexBounds tagBounds trie aTagOp aGroupInfo ifa co eo where
dfa = indexesToDFA [startIndex]
indexBounds = bounds aQNFA
tagBounds = bounds aTagOp
ifa = (not (multiline co)) && isDFAFrontAnchored dfa
indexesToDFA = Trie.lookupAsc trie
trie :: TrieSet DFA
trie = Trie.fromSinglesMerge dlose mergeDFA (bounds aQNFA) indexToDFA
newTransition :: DTrans -> Transition
newTransition dtrans = Transition { trans_many = indexesToDFA (IMap.keys dtransWithSpawn)
, trans_single = indexesToDFA (IMap.keys dtrans)
, trans_how = dtransWithSpawn }
where dtransWithSpawn = addSpawn dtrans
makeTransition :: DTrans -> Transition
makeTransition dtrans | hasSpawn = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
, trans_single = indexesToDFA (IMap.keys (IMap.delete startIndex dtrans))
, trans_how = dtrans }
| otherwise = Transition { trans_many = indexesToDFA (IMap.keys dtrans)
, trans_single = indexesToDFA (IMap.keys dtrans)
, trans_how = dtrans }
where hasSpawn = maybe False IMap.null (IMap.lookup startIndex dtrans)
addSpawn :: DTrans -> DTrans
addSpawn dtrans | IMap.member startIndex dtrans = dtrans
| otherwise = IMap.insert startIndex mempty dtrans
indexToDFA :: Index -> DFA
indexToDFA i = makeDFA (ISet.singleton source) (qtToDT qtIn)
where
(QNFA {q_id = source,q_qt = qtIn}) = aQNFA!i
qtToDT :: QT -> DT
qtToDT (Testing {qt_test=wt, qt_dopas=dopas, qt_a=a, qt_b=b}) =
Testing' { dt_test = wt
, dt_dopas = dopas
, dt_a = qtToDT a
, dt_b = qtToDT b }
qtToDT (Simple {qt_win=w, qt_trans=t, qt_other=o}) =
Simple' { dt_win = makeWinner
, dt_trans = fmap qtransToDFA t
, dt_other = qtransToDFA o}
where
makeWinner :: IntMap Instructions
makeWinner | noWin w = IMap.empty
| otherwise = IMap.singleton source (cleanWin w)
qtransToDFA :: QTrans -> Transition
qtransToDFA qtrans =
newTransition dtrans
where
dtrans :: DTrans
dtrans =IMap.fromDistinctAscList . mapSnd (IMap.singleton source) $ best
best :: [(Index ,(DoPa,Instructions))]
best = pickQTrans aTagOp $ qtrans
mergeDFA :: DFA -> DFA -> DFA
mergeDFA d1 d2 = makeDFA i dt
where
i = d_id d1 `mappend` d_id d2
dt = d_dt d1 `mergeDT` d_dt d2
mergeDT,nestDT :: DT -> DT -> DT
mergeDT (Simple' w1 t1 o1) (Simple' w2 t2 o2) = Simple' w t o
where
w = w1 `mappend` w2
t = fuseDTrans
o = mergeDTrans o1 o2
mergeDTrans :: Transition -> Transition -> Transition
mergeDTrans (Transition {trans_how=dt1}) (Transition {trans_how=dt2}) = makeTransition dtrans
where dtrans = IMap.unionWith IMap.union dt1 dt2
fuseDTrans :: CharMap Transition
fuseDTrans = CharMap (IMap.fromDistinctAscList (fuse l1 l2))
where
l1 = IMap.toAscList (unCharMap t1)
l2 = IMap.toAscList (unCharMap t2)
fuse :: [(IMap.Key, Transition)]
-> [(IMap.Key, Transition)]
-> [(IMap.Key, Transition)]
fuse [] y = fmap (fmap (mergeDTrans o1)) y
fuse x [] = fmap (fmap (mergeDTrans o2)) x
fuse x@((xc,xa):xs) y@((yc,ya):ys) =
case compare xc yc of
LT -> (xc,mergeDTrans o2 xa) : fuse xs y
EQ -> (xc,mergeDTrans xa ya) : fuse xs ys
GT -> (yc,mergeDTrans o1 ya) : fuse x ys
mergeDT dt1@(Testing' wt1 dopas1 a1 b1) dt2@(Testing' wt2 dopas2 a2 b2) =
case compare wt1 wt2 of
LT -> nestDT dt1 dt2
EQ -> Testing' { dt_test = wt1
, dt_dopas = dopas1 `mappend` dopas2
, dt_a = mergeDT a1 a2
, dt_b = mergeDT b1 b2 }
GT -> nestDT dt2 dt1
mergeDT dt1@(Testing' {}) dt2 = nestDT dt1 dt2
mergeDT dt1 dt2@(Testing' {}) = nestDT dt2 dt1
nestDT dt1@(Testing' {dt_a=a,dt_b=b}) dt2 = dt1 { dt_a = mergeDT a dt2, dt_b = mergeDT b dt2 }
nestDT _ _ = err "nestDT called on Simple -- cannot happen"
patternToRegex :: (Pattern,(GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex
patternToRegex pattern compOpt execOpt = nfaToDFA (patternToNFA compOpt pattern) compOpt execOpt
dfaMap :: DFA -> Data.Map.Map SetIndex DFA
dfaMap = seen (Data.Map.empty) where
seen old d@(DFA {d_id=i,d_dt=dt}) =
if i `Data.Map.member` old
then old
else let new = Data.Map.insert i d old
in foldl' seen new (flattenDT dt)
flattenDT :: DT -> [DFA]
flattenDT (Simple' {dt_trans=(CharMap mt),dt_other=o}) = concatMap (\d -> [trans_many d ]) . (:) o . IMap.elems $ mt
flattenDT (Testing' {dt_a=a,dt_b=b}) = flattenDT a ++ flattenDT b
examineDFA :: Regex -> String
examineDFA (Regex {regex_dfa=dfa}) = unlines . (:) ("Number of reachable DFA states: "++show (length dfas)) . map show $ dfas
where dfas = Data.Map.elems $ dfaMap dfa
pickQTrans :: Array Tag OP -> QTrans -> [(Index,(DoPa,Instructions))]
pickQTrans op tr = mapSnd (bestTrans op) . IMap.toList $ tr
cleanWin :: WinTags -> Instructions
cleanWin = toInstructions
bestTrans :: Array Tag OP -> [TagCommand] -> (DoPa,Instructions)
bestTrans _ [] = err "bestTrans : There were no transition choose from!"
bestTrans aTagOP (f:fs) | null fs = canonical f
| otherwise = answer
where
answer = foldl' pick (canonical f) fs
canonical :: TagCommand -> (DoPa,Instructions)
canonical (dopa,spec) = (dopa, toInstructions spec)
pick :: (DoPa,Instructions) -> TagCommand -> (DoPa,Instructions)
pick win@(dopa1,winI) (dopa2,spec) =
let nextI = toInstructions spec
in case compareWith choose (toListing winI) (toListing nextI) of
GT -> win
LT -> (dopa2,nextI)
EQ -> if dopa1 >= dopa2 then win else (dopa2,nextI)
toListing :: Instructions -> [(Tag,Action)]
toListing (Instructions {newPos = nextPos}) = filter notReset nextPos
where notReset (_,SetVal (1)) = False
notReset _ = True
choose :: Maybe (Tag,Action) -> Maybe (Tag,Action) -> Ordering
choose Nothing Nothing = EQ
choose Nothing x = flipOrder (choose x Nothing)
choose (Just (tag,_post)) Nothing =
case aTagOP!tag of
Maximize -> GT
Minimize -> LT
Ignore -> GT
Orbit -> LT
choose (Just (tag,post1)) (Just (_,post2)) =
case aTagOP!tag of
Maximize -> order
Minimize -> flipOrder order
Ignore -> EQ
Orbit -> EQ
where order = case (post1,post2) of
(SetPre,SetPre) -> EQ
(SetPost,SetPost) -> EQ
(SetPre,SetPost) -> LT
(SetPost,SetPre) -> GT
(SetVal v1,SetVal v2) -> compare v1 v2
_ -> err $ "bestTrans.compareWith.choose sees incomparable "++show (tag,post1,post2)
compareWith :: (Ord x,Monoid a) => (Maybe (x,b) -> Maybe (x,c) -> a) -> [(x,b)] -> [(x,c)] -> a
compareWith comp = cw where
cw [] [] = comp Nothing Nothing
cw xx@(x:xs) yy@(y:ys) =
case compare (fst x) (fst y) of
GT -> comp Nothing (Just y) `mappend` cw xx ys
EQ -> comp (Just x) (Just y) `mappend` cw xs ys
LT -> comp (Just x) Nothing `mappend` cw xs yy
cw xx [] = foldr (\x rest -> comp (Just x) Nothing `mappend` rest) mempty xx
cw [] yy = foldr (\y rest -> comp Nothing (Just y) `mappend` rest) mempty yy
isDFAFrontAnchored :: DFA -> Bool
isDFAFrontAnchored = isDTFrontAnchored . d_dt
where
isDTFrontAnchored :: DT -> Bool
isDTFrontAnchored (Simple' {}) = False
isDTFrontAnchored (Testing' {dt_test=wt,dt_a=a,dt_b=b}) | wt == Test_BOL = isDTLosing b
| otherwise = isDTFrontAnchored a && isDTFrontAnchored b
where
isDTLosing :: DT -> Bool
isDTLosing (Testing' {dt_a=a',dt_b=b'}) = isDTLosing a' && isDTLosing b'
isDTLosing (Simple' {dt_win=w}) | not (IMap.null w) = False
isDTLosing (Simple' {dt_trans=CharMap mt,dt_other=o}) =
let ts = o : IMap.elems mt
in all transLoses ts
where
transLoses :: Transition -> Bool
transLoses (Transition {trans_single=dfa,trans_how=dtrans}) = isDTLose dfa || onlySpawns dtrans
where
isDTLose :: DFA -> Bool
isDTLose dfa' = ISet.null (d_id dfa')
onlySpawns :: DTrans -> Bool
onlySpawns t = case IMap.elems t of
[m] -> IMap.null m
_ -> False
toInstructions :: TagList -> Instructions
toInstructions spec =
let (p,o) = execState (assemble spec) (mempty,mempty)
in Instructions { newPos = IMap.toList p
, newOrbits = if IMap.null o then Nothing
else Just $ alterOrbits (IMap.toList o)
}
type CompileInstructions a = State
( IntMap Action
, IntMap AlterOrbit
) a
data AlterOrbit = AlterReset
| AlterLeave
| AlterModify { newInOrbit :: Bool
, freshOrbit :: Bool}
deriving (Show)
assemble :: TagList -> CompileInstructions ()
assemble = mapM_ oneInstruction where
oneInstruction (tag,command) =
case command of
PreUpdate TagTask -> setPreTag tag
PreUpdate ResetGroupStopTask -> resetGroupTag tag
PreUpdate SetGroupStopTask -> setGroupTag tag
PreUpdate ResetOrbitTask -> resetOrbit tag
PreUpdate EnterOrbitTask -> enterOrbit tag
PreUpdate LeaveOrbitTask -> leaveOrbit tag
PostUpdate TagTask -> setPostTag tag
PostUpdate ResetGroupStopTask -> resetGroupTag tag
PostUpdate SetGroupStopTask -> setGroupTag tag
_ -> err ("assemble : Weird orbit command: "++show (tag,command))
setPreTag :: Tag -> CompileInstructions ()
setPreTag = modifyPos SetPre
setPostTag :: Tag -> CompileInstructions ()
setPostTag = modifyPos SetPost
resetGroupTag :: Tag -> CompileInstructions ()
resetGroupTag = modifyPos (SetVal (1))
setGroupTag :: Tag -> CompileInstructions ()
setGroupTag = modifyPos (SetVal 0)
resetOrbit :: Tag -> CompileInstructions ()
resetOrbit tag = modifyPos (SetVal (1)) tag >> modifyOrbit (IMap.insert tag AlterReset)
enterOrbit :: Tag -> CompileInstructions ()
enterOrbit tag = modifyPos (SetVal 0) tag >> modifyOrbit changeOrbit where
changeOrbit = IMap.insertWith overwriteOrbit tag appendNewOrbit
appendNewOrbit = AlterModify {newInOrbit = True, freshOrbit = False}
startNewOrbit = AlterModify {newInOrbit = True, freshOrbit = True}
overwriteOrbit _ AlterReset = startNewOrbit
overwriteOrbit _ AlterLeave = startNewOrbit
overwriteOrbit _ (AlterModify {newInOrbit = False}) = startNewOrbit
overwriteOrbit _ (AlterModify {newInOrbit = True}) =
err $ "enterOrbit: Cannot enterOrbit twice in a row: " ++ show tag
leaveOrbit :: Tag -> CompileInstructions ()
leaveOrbit tag = modifyOrbit escapeOrbit where
escapeOrbit = IMap.insertWith setInOrbitFalse tag AlterLeave where
setInOrbitFalse _ x@(AlterModify {}) = x {newInOrbit = False}
setInOrbitFalse _ x = x
modifyPos :: Action -> Tag -> CompileInstructions ()
modifyPos todo tag = do
(a,c) <- get
let a' = IMap.insert tag todo a
seq a' $ put (a',c)
modifyOrbit :: (IntMap AlterOrbit -> IntMap AlterOrbit) -> CompileInstructions ()
modifyOrbit f = do
(a,c) <- get
let c' = f c
seq c' $ put (a,c')
alterOrbits :: [(Tag,AlterOrbit)] -> (Position -> OrbitTransformer)
alterOrbits x = let items = map alterOrbit x
in (\ pos m -> foldl (flip ($)) m (map ($ pos) items))
alterOrbit :: (Tag,AlterOrbit) -> (Position -> OrbitTransformer)
alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = True}) =
(\ pos m -> IMap.insert tag (Orbits { inOrbit = inOrbit'
, basePos = pos
, ordinal = Nothing
, getOrbits = mempty}) m)
alterOrbit (tag,AlterModify {newInOrbit = inOrbit',freshOrbit = False}) =
(\ pos m -> IMap.insertWithKey (updateOrbit pos) tag (newOrbit pos) m) where
newOrbit pos = Orbits { inOrbit = inOrbit'
, basePos = pos
, ordinal = Nothing
, getOrbits = mempty}
updateOrbit pos _tag new old | inOrbit old = old { inOrbit = inOrbit'
, getOrbits = getOrbits old |> pos }
| otherwise = new
alterOrbit (tag,AlterReset) = (\ _ m -> IMap.delete tag m)
alterOrbit (tag,AlterLeave) = (\ _ m -> case IMap.lookup tag m of
Nothing -> m
Just x -> IMap.insert tag (x {inOrbit=False}) m)