{-# language BangPatterns #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language UnboxedTuples #-}
{-# language ScopedTypeVariables #-}

module Automata.Internal
  ( -- * Types
    Dfsa(..)
  , Nfsa(..)
  , TransitionNfsa(..)
    -- * Builder Types
  , State(..)
  , Epsilon(..)
    -- * NFA Functions 
  , toDfsa
  , toDfsaMapping
  , append
  , empty
  , rejectionNfsa
  , unionNfsa
  , epsilonClosure
    -- * DFA Functions
  , union
  , intersection
  , acceptance
  , rejection
  , minimize
  , minimizeMapping
  , composeMapping
  ) where

import Control.Applicative (liftA2)
import Control.Monad (forM_,(<=<))
import Control.Monad.ST (runST)
import Data.Foldable (foldl',toList)
import Data.Map (Map)
import Data.Maybe (fromMaybe,isNothing,mapMaybe)
import Data.Primitive (Array,indexArray)
import Data.Semigroup (First(..))
import Data.Semiring (Semiring)
import Data.Set (Set)

import Debug.Trace

import qualified Data.List as L
import qualified Data.Set as S
import qualified Data.Set.Unboxed as SU
import qualified Data.Map.Strict as M
import qualified Control.Monad.Trans.State.Strict as State
import qualified Data.Map.Interval.DBTSLL as DM
import qualified Data.Map.Unboxed.Lifted as MUL
import qualified Data.Primitive.Contiguous as C
import qualified Data.Primitive as PM
import qualified Data.Map.Lifted.Lifted as MLL
import qualified GHC.Exts as E
import qualified Data.Semiring

-- | Deterministic Finite State Automaton.
--
-- The start state is always zero.
data Dfsa t = Dfsa
  { dfaTransition :: !(Array (DM.Map t Int))
    -- ^ Given a state and transition, this field tells you what
    --   state to go to next. The length of this array must match
    --   the total number of states.
  , dfaFinal :: !(SU.Set Int)
    -- ^ A string that ends in any of these set of states is
    --   considered to have been accepted by the grammar.
  } deriving (Eq,Show)

-- | Non-Deterministic Finite State Automaton.
--
-- Some notes on the implementation and design:
--
-- * You can transition to any non-negative number of states (including 0).
-- * There is only one start state.
-- * We use the Thompson encoding. This means that there is an epsilon
--   transition that consumes no input.
-- * We store the full epsilon closure for every state. This means that,
--   when evaluating the NFA, we do not ever need to compute the closure.
-- * There is no Eq instance for NFA. In general, this can take exponential
--   time. If you really need to do this, convert the NFA to a DFA.
--
-- Invariants:
-- 
-- * The start state is always the state at position 0.
-- * The length of nfaTransition is given by nfaStates.
data Nfsa t = Nfsa
  { nfaTransition :: !(Array (TransitionNfsa t))
    -- ^ Given a state and transition, this field tells you what
    --   state to go to next. The length of this array must match
    --   the total number of states. The data structure inside is
    --   a diet map. This is capable of collapsing adjacent key-value
    --   pairs into ranges.
  , nfaFinal :: !(SU.Set Int)
    -- ^ A string that ends in any of these set of states is
    --   considered to have been accepted by the grammar.
  } deriving (Show)

data TransitionNfsa t = TransitionNfsa
  { transitionNfsaEpsilon :: {-# UNPACK #-} !(SU.Set Int)
  , transitionNfsaConsume :: {-# UNPACK #-} !(DM.Map t (SU.Set Int))
  } deriving (Eq,Show)

data Conversion = Conversion
  { conversionLabel :: !Int
    -- The state identifier to be assigned to the next state.
  , conversionResolutions :: !(Map (SU.Set Int) Int)
    -- The map from subsets of states to new state identifiers.
    -- This is a bidirectional map.
  , conversionTraversed :: !(Set Int)
    -- The new state identifiers that have already been dealt with.
    -- This must be a subset of the keys of resolutions.
  , conversionPending :: !(Map Int (SU.Set Int))
    -- Newly created states that we need to consider transitions for.
    -- The keys in this should all be less than the label.
  }

data Pairing = Pairing
  { pairingMap :: !(Map (Int,Int) Int)
  , pairingReversedOld :: ![(Int,Int)]
  , pairingState :: !Int
  }

append :: Nfsa t -> Nfsa t -> Nfsa t
append (Nfsa t1 f1) (Nfsa t2 f2) = 
  let n1 = C.size t1
      n2 = C.size t2
      n3 = n1 + n2
      f3 = SU.mapMonotonic (+n1) f2
      t3 = fmap (\(TransitionNfsa eps consume) -> TransitionNfsa (SU.mapMonotonic (+n1) eps) (DM.mapBijection (SU.mapMonotonic (+n1)) consume)) t2
      t4 = fmap (\(TransitionNfsa eps consume) -> TransitionNfsa eps (DM.mapBijection (\states -> if SU.null (SU.intersection states f1) then states else states <> transitionNfsaEpsilon (C.index t3 0)) consume)) t1
      !(# placeholder #) = C.index# t1 0
      t5 = runST $ do
        m <- C.replicateM n3 placeholder
        C.copy m 0 t4 0 n1
        C.copy m n1 t3 0 n2
        flip SU.traverse_ f1 $ \ix -> do
          TransitionNfsa epsilon consume <- C.read m ix
          let transition = TransitionNfsa (epsilon <> transitionNfsaEpsilon (C.index t3 0)) consume
          C.write m ix transition
        C.unsafeFreeze m
   in Nfsa t5 f3

nextIdentifier :: State.State Conversion Int
nextIdentifier = do
  Conversion n a b c <- State.get 
  State.put (Conversion (n + 1) a b c)
  return n

-- Mark a new state as having been completed.
complete :: Int -> State.State Conversion ()
complete s = do
  c <- State.get
  State.put c
    { conversionTraversed = S.insert s (conversionTraversed c)
    , conversionPending = M.delete s (conversionPending c)
    }

-- Convert the subset of NFA states to a single DFA state.
resolveSubset :: Array (TransitionNfsa t) -> SU.Set Int -> State.State Conversion Int
resolveSubset transitions s0 = do
  let s = epsilonClosure transitions s0
  Conversion _ resolutions0 _ _ <- State.get
  case M.lookup s resolutions0 of
    Nothing -> do
      ident <- nextIdentifier
      c <- State.get
      State.put c
        { conversionResolutions = M.insert s ident (conversionResolutions c)
        , conversionPending = M.insert ident s (conversionPending c)
        }
      return ident
    Just ident -> return ident
  
epsilonClosure :: Array (TransitionNfsa t) -> SU.Set Int -> SU.Set Int
epsilonClosure s states = go states SU.empty where
  go new old = if new == old
    then new
    else
      let together = old <> new
       in go (mconcat (map (\ident -> transitionNfsaEpsilon (indexArray s ident)) (SU.toList together)) <> together) together

data Node t = Node
  !Int -- identifier
  !(DM.Map t Int) -- transitions

-- | Convert an NFSA to a DFSA. For certain inputs, this causes
--   the number of states to blow up expontentially, so do not
--   call this on untrusted input.
toDfsa :: (Ord t, Bounded t, Enum t) => Nfsa t -> Dfsa t
toDfsa = snd . toDfsaMapping

toDfsaMapping :: forall t. (Ord t, Bounded t, Enum t) => Nfsa t -> (Map (SU.Set Int) Int, Dfsa t)
toDfsaMapping (Nfsa t0 f0) = runST $ do
  let ((len,nodes),c) = State.runState
        (go 0 [])
        (Conversion 1 (M.singleton startClosure 0) S.empty (M.singleton 0 startClosure))
      resolutions = conversionResolutions c
  marr <- C.new len
  forM_ nodes $ \(Node ident transitions) -> C.write marr ident transitions
  arr <- C.unsafeFreeze marr
  let f1 = SU.fromList (M.foldrWithKey (\k v xs -> if SU.null (SU.intersection k f0) then xs else v : xs) [] resolutions)
  let (canonB,r) = minimizeMapping arr f1
      canon = fmap (fromMaybe (error "toDfsaMapping: missing canon value") . flip M.lookup canonB) resolutions
  return (canon,r)
  where
  startClosure :: SU.Set Int
  startClosure = epsilonClosure t0 (SU.singleton 0)
  go :: Int -> [Node t] -> State.State Conversion (Int, [Node t])
  go !n !edges0 = do
    Conversion _ _ _ pending <- State.get
    case M.foldMapWithKey (\k v -> Just (First (k,v))) pending of
      Nothing -> return (n, edges0)
      Just (First (m,states)) -> do
        t <- DM.traverseBijection (resolveSubset t0) (mconcat (map (transitionNfsaConsume . indexArray t0) (SU.toList states)))
        complete m
        go (n + 1) (Node m t : edges0)

-- | This uses Hopcroft's Algorithm. It is like a smart constructor for Dfsa.
minimize :: (Ord t, Bounded t, Enum t) => Array (DM.Map t Int) -> SU.Set Int -> Dfsa t
minimize t0 f0 = snd (minimizeMapping t0 f0)

-- | This uses Hopcroft's Algorithm. It also provides the mapping from old
--   state number to new state number. We need this mapping for a special
--   NFST to DFST minimizer.
minimizeMapping :: forall t. (Ord t, Bounded t, Enum t) => Array (DM.Map t Int) -> SU.Set Int -> (Map Int Int, Dfsa t)
minimizeMapping t0 f0 =
  let partitions0 = go (S.fromList [f1,S.difference q0 f1]) (S.singleton f1)
      -- We move the partition containing the start state to the front.
      partitions1 = case L.find (S.member 0) partitions0 of
        Just startStates -> startStates : deletePredicate (\s -> S.member 0 s || S.null s) (S.toList partitions0)
        Nothing -> error "Automata.Nfsa.minimize: incorrect"
      -- Creates a map from old state to new state. This is not a bijection
      -- since two old states may map to the same new state. However, we
      -- may treat it as a bijection since at most one of the old states
      -- is preserved.
      assign :: Int -> Map Int Int -> [Set Int] -> Map Int Int
      assign !_ !m [] = m
      assign !ix !m (s : ss) = assign (ix + 1) (M.union (M.fromSet (const ix) s) m) ss
      assignments = assign 0 M.empty partitions1
      newTransitions0 = E.fromList (map (\s -> DM.map (\oldState -> fromMaybe (error "Automata.Nfsa.minimize: missing state") (M.lookup oldState assignments)) (PM.indexArray t1 (S.findMin s))) partitions1)
      canonization = establishOrder newTransitions0
      description = "[canonization=" ++ show canonization ++ "][assignments=" ++ show assignments ++ "]"
      newTransitions1 :: Array (DM.Map t Int) = C.map' (DM.mapBijection (\s -> fromMaybe (error ("Automata.Nfsa.minimize: canonization missing state [state=" ++ show s ++ "]" ++ description)) (M.lookup s canonization))) newTransitions0
      newTransitions2 = runST $ do
        marr <- C.replicateM (M.size canonization) (error ("Automata.Nfsa.minimize: uninitialized element " ++ description))
        flip C.itraverse_ newTransitions1 $ \ix dm -> C.write marr (fromMaybe (error ("Automata.Nfsa.minimize: missing state while rearranging [state=" ++ show ix ++ "]" ++ description)) (M.lookup ix canonization)) dm
        C.unsafeFreeze marr
      newAcceptingStates = foldMap (maybe SU.empty SU.singleton . (flip M.lookup canonization <=< flip M.lookup assignments)) f1
      finalCanonization = fmap (fromMaybe (error ("minimizeMapping: failed to connect the canons.\npartitions:\n" ++ show partitions1 ++ "\ninitial canon:\n" ++ show initialCanonization ++ "\nsecond canon:\n" ++ show canonization)) . (flip M.lookup canonization <=< flip M.lookup assignments)) initialCanonization
   in (finalCanonization,Dfsa newTransitions2 newAcceptingStates)
  where
  q0 = S.fromList (enumFromTo 0 (C.size t1 - 1))
  f1 = S.fromList (mapMaybe (\x -> M.lookup x initialCanonization) (SU.toList f0))
  -- Do we actually need to canonize the states twice? Yes, we do.
  t1' :: Array (DM.Map t Int)
  t1' = C.map' (DM.mapBijection (\s -> fromMaybe (error "Automata.Nfsa.minimize: t1 prime") (M.lookup s initialCanonization))) t0
  t1 = runST $ do
    marr <- C.replicateM (M.size initialCanonization) (error "Automata.Nfsa.minimize: t1 uninitialized element")
    flip C.itraverse_ t1' $ \ix dm -> case M.lookup ix initialCanonization of
      Nothing -> return ()
      Just newIx -> C.write marr newIx dm
    C.unsafeFreeze marr
  initialCanonization = establishOrder t0
  -- The inverted transitions has the destination state as well as
  -- all source states that lead to it when the token is consumed. 
  invertedTransitions :: DM.Map t (MLL.Map Int (Set Int))
  invertedTransitions = mconcat (toList (C.imap (\ix m -> DM.mapBijection (\dest -> MLL.singleton dest (S.singleton ix)) m) t1 :: Array (DM.Map t (MLL.Map Int (Set Int)))))
  -- The result of go is set of disjoint sets. It represents the equivalence classes
  -- that have been established. All references to any state in an equivalence class
  -- can be replaced with any of the other states in the same equivalence class.
  go :: Set (Set Int) -> Set (Set Int) -> Set (Set Int)
  go p1 w1 = case S.minView w1 of
    Nothing -> p1
    Just (a,w2) ->
      let (p2,w3) = DM.foldl'
            (\(p3,w4) m ->
              let x = foldMap (\s -> fromMaybe S.empty (MLL.lookup s m)) a
               in foldl'
                    (\(p4, w5) y ->
                      let diffYX = S.difference y x
                          intersectYX = S.intersection y x
                       in if not (S.disjoint x y) && not (S.null diffYX)
                            then
                              ( S.insert diffYX (S.insert intersectYX (S.delete y p4))
                              , if S.member y w5
                                  then S.insert diffYX (S.insert intersectYX (S.delete y w5))
                                  else if S.size intersectYX <= S.size diffYX
                                    then S.insert intersectYX w5
                                    else S.insert diffYX w5
                              )
                            else (S.insert y p4, w5)
                    ) (S.empty, w4) p3
            ) (p1,w2) invertedTransitions
       in go p2 w3

-- This gives a canonical order to the states. Any state missing from
-- the resulting map was not reachable. The map goes from old state
-- identifiers to new state identifiers. It is a bijection.
establishOrder :: Array (DM.Map t Int) -> Map Int Int
establishOrder t = go 0 [0] M.empty where
  go :: Int -> [Int] -> Map Int Int -> Map Int Int
  go !ident !unvisited0 !assignments = case unvisited0 of
    [] -> assignments
    state : unvisited1 -> if isNothing (M.lookup state assignments)
      then
        let unvisited2 = DM.foldl'
             (\unvisited s -> if isNothing (M.lookup s assignments) then s : unvisited else unvisited)
             unvisited1
             (PM.indexArray t state)
         in go (ident + 1) unvisited2 (M.insert state ident assignments)
      else go ident unvisited1 assignments

-- removeUnreachable :: Array (DM.Map t Int) -> SU.Set Int -> (Array (DM.Map t Int), SU.Set Int)

deletePredicate :: (a -> Bool) -> [a] -> [a]
deletePredicate _ [] = []
deletePredicate p (y:ys) = if p y then deletePredicate p ys else y : deletePredicate p ys

-- | Accepts input that is accepted by both of the two argument DFAs. This is also known
--   as completely synchronous composition in the literature.
intersection :: (Ord t, Bounded t, Enum t) => Dfsa t -> Dfsa t -> Dfsa t
intersection = compose (&&)

-- Adjusts all the values in the first interval map by multiplying
-- them by the number of states in the second automaton. Then,
-- adds these to the states numbers from the second automaton
scoot :: Ord t => Int -> DM.Map t Int -> DM.Map t Int -> DM.Map t Int
scoot n2 d1 d2 = DM.unionWith (\s1 s2 -> n2 * s1 + s2) d1 d2

{-# NOINLINE errorThunkUnion #-}
errorThunkUnion :: a
errorThunkUnion = error "Automata.Dfsa.union: slot not filled"

-- | Accepts input that is accepted by either of the two argument DFAs. This is also known
--   as synchronous composition in the literature.
union :: (Ord t, Bounded t, Enum t) => Dfsa t -> Dfsa t -> Dfsa t
union = compose (||)

composeMapping :: (Ord t, Bounded t, Enum t) => (Bool -> Bool -> Bool) -> Dfsa t -> Dfsa t -> (Map (Int,Int) Int, Dfsa t)
composeMapping combineFinalMembership (Dfsa t1 f1) (Dfsa t2 f2) = runST $ do
  let Pairing oldToNew reversedOld n3 = compositionReachable t1 t2
  m <- PM.newArray n3 errorThunkUnion
  let go !_ [] = return ()
      go !ix (statePair@(stateA,stateB) : xs) = do
        PM.writeArray m ix (DM.unionWith (\x y -> fromMaybe (error "Automata.Dfsa.union: could not find pair in oldToNew") (M.lookup (x,y) oldToNew)) (PM.indexArray t1 stateA) (PM.indexArray t2 stateB))
        go (ix - 1) xs
  go (n3 - 1) reversedOld
  frozen <- PM.unsafeFreezeArray m
  let finals = SU.fromList (M.foldrWithKey (\(stateA,stateB) stateNew xs -> if combineFinalMembership (SU.member stateA f1) (SU.member stateB f2) then stateNew : xs else xs) [] oldToNew)
  let (secondMapping, r) = minimizeMapping frozen finals
  return (M.map (\x -> fromMaybe (error "composeMapping: bad lookup") (M.lookup x secondMapping)) oldToNew, r)

compose :: (Ord t, Bounded t, Enum t) => (Bool -> Bool -> Bool) -> Dfsa t -> Dfsa t -> Dfsa t
compose combineFinalMembership a b = snd (composeMapping combineFinalMembership a b)

compositionReachable :: Ord t => Array (DM.Map t Int) -> Array (DM.Map t Int) -> Pairing
compositionReachable a b = State.execState (go 0 0) (Pairing M.empty [] 0) where
  !szA = PM.sizeofArray a
  !szB = PM.sizeofArray b
  go :: Int -> Int -> State.State Pairing ()
  go !stateA !stateB = do
    Pairing m xs s <- State.get
    case M.lookup (stateA,stateB) m of
      Just _ -> return ()
      Nothing -> do
        State.put (Pairing (M.insert (stateA,stateB) s m) ((stateA,stateB) : xs) (s + 1))
        DM.traverse_ (uncurry go) (DM.unionWith (,) (PM.indexArray a stateA) (PM.indexArray b stateB))
    

-- | Docs for this are at @Automata.Nfsa.union@.
unionNfsa :: (Bounded t) => Nfsa t -> Nfsa t -> Nfsa t
unionNfsa (Nfsa t1 f1) (Nfsa t2 f2) = Nfsa
  ( runST $ do
      m <- C.replicateM (n1 + n2 + 1)
        ( TransitionNfsa
          (mconcat
            [ SU.mapMonotonic (+1) (transitionNfsaEpsilon (C.index t1 0))
            , SU.mapMonotonic (\x -> 1 + n1) (transitionNfsaEpsilon (C.index t2 0))
            , SU.tripleton 0 1 (1 + n1)
            ]
          )
          (DM.pure SU.empty)
        )
      C.copy m 1 (fmap (translateTransitionNfsa 1) t1) 0 n1
      C.copy m (1 + n1) (fmap (translateTransitionNfsa (1 + n1)) t2) 0 n2
      C.unsafeFreeze m
  )
  (SU.mapMonotonic (+1) f1 <> SU.mapMonotonic (\x -> 1 + n1 + x) f2)
  where
  !n1 = PM.sizeofArray t1
  !n2 = PM.sizeofArray t2

translateTransitionNfsa :: Int -> TransitionNfsa t -> TransitionNfsa t
translateTransitionNfsa n (TransitionNfsa eps m) = TransitionNfsa
  (SU.mapMonotonic (+n) eps)
  (DM.mapBijection (SU.mapMonotonic (+n)) m)

-- | Automaton that accepts all input. This is the identity
-- for 'intersection'.
acceptance :: Bounded t => Dfsa t
acceptance = Dfsa (C.singleton (DM.pure 0)) (SU.singleton 0)

-- | Automaton that rejects all input. This is the identity
-- for 'union'.
rejection :: Bounded t => Dfsa t
rejection = Dfsa (C.singleton (DM.pure 0)) SU.empty

-- | Automaton that accepts the empty string and rejects all
-- other strings. This is the identity for 'append'.
empty :: Bounded t => Nfsa t
empty = Nfsa
  ( C.doubleton
    (TransitionNfsa (SU.singleton 0) (DM.pure (SU.singleton 1)))
    (TransitionNfsa (SU.singleton 1) (DM.pure SU.empty))
  )
  (SU.singleton 0)

-- | Docs for this are at @Automata.Nfsa.rejection@.
rejectionNfsa :: Bounded t => Nfsa t
rejectionNfsa = Nfsa
  (C.singleton (TransitionNfsa (SU.singleton 0) (DM.pure SU.empty)))
  SU.empty

-- | This uses 'union' for @plus@ and 'intersection' for @times@.
instance (Ord t, Enum t, Bounded t) => Semiring (Dfsa t) where
  plus = union
  times = intersection
  zero = rejection
  one = acceptance

-- | This uses @union@ for @plus@ and @append@ for @times@.
instance (Bounded t) => Semiring (Nfsa t) where
  plus = unionNfsa
  times = append
  zero = rejectionNfsa
  one = empty
  
data Epsilon = Epsilon !Int !Int

newtype State s = State Int