{-# language BangPatterns #-}
{-# language DeriveFunctor #-}
{-# language DerivingStrategies #-}
{-# language MagicHash #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language UnboxedTuples #-}
module Automata.Dfst
(
Dfst
, evaluate
, evaluateAscii
, union
, map
, rejection
, Builder
, State
, build
, state
, transition
, accept
) where
import Prelude hiding (map)
import Automata.Internal (State(..),Dfsa(..),composeMapping)
import Automata.Internal.Transducer (Dfst(..),MotionDfst(..),Edge(..),EdgeDest(..))
import Control.Monad.ST (runST)
import Data.Foldable (foldl',for_)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Primitive (Array,indexArray)
import Data.Semigroup (Last(..))
import Data.Set (Set)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BC
import qualified Data.List as L
import qualified Data.Map.Interval.DBTSLL as DM
import qualified Data.Map.Strict as M
import qualified Data.Primitive.Contiguous as C
import qualified Data.Set as S
import qualified Data.Set.Unboxed as SU
import qualified GHC.Exts as E
map :: Eq n => (m -> n) -> Dfst t m -> Dfst t n
map f (Dfst t m) =
Dfst (fmap (DM.map (\(MotionDfst s x) -> MotionDfst s (f x))) t) m
rejection :: (Bounded t, Monoid m) => Dfst t m
rejection = Dfst (C.singleton (DM.pure (MotionDfst 0 mempty))) SU.empty
union :: forall t m. (Ord t, Bounded t, Enum t, Monoid m) => Dfst t m -> Dfst t m -> Dfst t m
union a@(Dfst ax _) b@(Dfst bx _) =
let (mapping, Dfsa t0 f) = composeMapping (||) (unsafeToDfsa a) (unsafeToDfsa b)
revMapping :: Map Int (Set (Int,Int))
revMapping = M.foldlWithKey' (\acc k v -> M.insertWith (<>) v (S.singleton k) acc) M.empty mapping
t1 :: Array (DM.Map t (MotionDfst m))
t1 = C.imap
(\source m -> DM.mapBijection
(\dest ->
let oldSources = fromMaybe (error "Automata.Nfst.toDfst: missing old source") (M.lookup source revMapping)
oldDests = fromMaybe (error "Automata.Nfst.toDfst: missing old dest") (M.lookup dest revMapping)
newOutput = foldMap
(\(oldSourceA,oldSourceB) -> mconcat $ E.toList $ do
MotionDfst oldDestA outA <- DM.elems (indexArray ax oldSourceA)
MotionDfst oldDestB outB <- DM.elems (indexArray bx oldSourceB)
if S.member (oldDestA,oldDestB) oldDests then pure (outA <> outB) else mempty
) oldSources
in MotionDfst dest newOutput
) m
) t0
in Dfst t1 f
evaluate :: (Foldable f, Ord t) => Dfst t m -> f t -> Maybe (Array m)
evaluate (Dfst transitions finals) tokens =
let !(!finalState,!totalSize,!allOutput) = foldl'
(\(!active,!sz,!output) token ->
let MotionDfst nextState outputToken = DM.lookup token (indexArray transitions active)
in (nextState,sz + 1,outputToken : output)
) (0,0,[]) tokens
in if SU.member finalState finals
then Just (C.unsafeFromListReverseN totalSize allOutput)
else Nothing
evaluateAscii :: forall m. Ord m => Dfst Char m -> ByteString -> Maybe (Array m)
evaluateAscii (Dfst transitions finals) !tokens =
let !(!finalState,!allOutput) = BC.foldl'
(\(!active,!output) token ->
let MotionDfst nextState outputToken = DM.lookup token (indexArray transitions active)
in (nextState,outputToken : output)
) (0,[]) tokens
in if SU.member finalState finals
then Just (C.unsafeFromListReverseN (BC.length tokens) allOutput)
else Nothing
newtype Builder t m s a = Builder (Int -> [Edge t m] -> [Int] -> Result t m a)
deriving stock (Functor)
data Result t m a = Result !Int ![Edge t m] ![Int] a
deriving stock (Functor)
instance Applicative (Builder t m s) where
pure a = Builder (\i es fs -> Result i es fs a)
Builder f <*> Builder g = Builder $ \i es fs -> case f i es fs of
Result i' es' fs' x -> case g i' es' fs' of
Result i'' es'' fs'' y -> Result i'' es'' fs'' (x y)
instance Monad (Builder t m s) where
Builder f >>= g = Builder $ \i es fs -> case f i es fs of
Result i' es' fs' a -> case g a of
Builder g' -> g' i' es' fs'
state :: Builder t m s (State s)
state = Builder $ \i edges final ->
Result (i + 1) edges final (State i)
accept :: State s -> Builder t m s ()
accept (State n) = Builder $ \i edges final -> Result i edges (n : final) ()
transition ::
t
-> t
-> m
-> State s
-> State s
-> Builder t m s ()
transition lo hi output (State source) (State dest) =
Builder $ \i edges final -> Result i (Edge source dest lo hi output : edges) final ()
build :: forall t m a. (Bounded t, Ord t, Enum t, Monoid m, Ord m) => (forall s. State s -> Builder t m s a) -> Dfst t m
build fromStartState =
case state >>= fromStartState of
Builder f -> case f 0 [] [] of
Result totalStates edges final _ ->
let ts0 = runST $ do
transitions <- C.replicateM totalStates (DM.pure Nothing)
outbounds <- C.replicateM totalStates []
for_ edges $ \(Edge source destination lo hi output) -> do
edgeDests0 <- C.read outbounds source
let !edgeDests1 = EdgeDest destination lo hi output : edgeDests0
C.write outbounds source edgeDests1
(outbounds' :: Array [EdgeDest t m]) <- C.unsafeFreeze outbounds
flip C.imapMutable' transitions $ \i _ ->
let dests = C.index outbounds' i
in mconcat
( L.map
(\(EdgeDest dest lo hi output) ->
DM.singleton mempty lo hi (Just (Last (MotionDfst dest output)))
)
dests
)
C.unsafeFreeze transitions
in Dfst (fmap (DM.map (maybe (MotionDfst 0 mempty) getLast)) ts0) (SU.fromList final)
unsafeToDfsa :: Dfst t m -> Dfsa t
unsafeToDfsa (Dfst t f) = Dfsa (fmap (DM.map motionDfstState) t) f