{-# LANGUAGE BangPatterns           #-}
{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs                  #-}
{-# LANGUAGE Safe                   #-}
{-# LANGUAGE ScopedTypeVariables    #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Kleene.DFA (
    DFA (..),
    -- * Conversions
    fromRE,
    toRE,
    fromERE,
    toERE,
    fromTM,
    fromTMEquiv,
    toKleene,
    toDot,
    toDot',
    ) where

import Prelude ()
import Prelude.Compat

import Algebra.Lattice
       (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..))
import Data.IntMap       (IntMap)
import Data.IntSet       (IntSet)
import Data.List         (intercalate)
import Data.Map          (Map)
import Data.Maybe        (fromMaybe)
import Data.RangeSet.Map (RSet)

import qualified Data.ByteString                    as BS
import qualified Data.Function.Step.Discrete.Closed as SF
import qualified Data.IntMap                        as IntMap
import qualified Data.IntSet                        as IntSet
import qualified Data.Map                           as Map
import qualified Data.MemoTrie                      as MT
import qualified Data.RangeSet.Map                  as RSet

import           Kleene.Classes
import qualified Kleene.ERE             as ERE
import           Kleene.Internal.Pretty
import qualified Kleene.Internal.RE     as RE

-- | Deterministic finite automaton.
--
-- A deterministic finite automaton (DFA) over an alphabet \(\Sigma\) (type
-- variable @c@) is 4-tuple \(Q\), \(q_0\) , \(F\), \(\delta\), where
--
-- * \(Q\) is a finite set of states (subset of 's'),
-- * \(q_0 \in Q\) is the distinguised start state ('dfaInitial'),
-- * \(F \subset Q\) is a set of final (or  accepting) states ('dfaAcceptable'), and
-- * \(\delta : Q \times \Sigma \to Q\) is a function called the state
-- transition function ('dfaTransition').
--
data DFA c = DFA
    { dfaTransition   :: !(IntMap (SF.SF c Int))
      -- ^ transition function
    , dfaInitial      :: !Int
      -- ^ initial state
    , dfaAcceptable   :: !IntSet
      -- ^ accept states
    , dfaBlackholes   :: !IntSet
      -- ^ states we cannot escape
    }
  deriving Show

-------------------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------------------

-- | Convert 'RE.RE' to 'DFA'.
--
-- >>> putPretty $ fromRE $ RE.star "abc"
-- 0+ -> \x -> if
--     | x <= '`'  -> 3
--     | x <= 'a'  -> 2
--     | otherwise -> 3
-- 1 -> \x -> if
--     | x <= 'b'  -> 3
--     | x <= 'c'  -> 0
--     | otherwise -> 3
-- 2 -> \x -> if
--     | x <= 'a'  -> 3
--     | x <= 'b'  -> 1
--     | otherwise -> 3
-- 3 -> \_ -> 3 -- black hole
--
-- Everything and nothing result in blackholes:
--
-- >>> traverse_ (putPretty . fromRE) [RE.empty, RE.star RE.anyChar]
-- 0 -> \_ -> 0 -- black hole
-- 0+ -> \_ -> 0 -- black hole
--
-- Character ranges are effecient:
--
-- >>> putPretty $ fromRE $ RE.charRange 'a' 'z'
-- 0 -> \x -> if
--     | x <= '`'  -> 2
--     | x <= 'z'  -> 1
--     | otherwise -> 2
-- 1+ -> \_ -> 2
-- 2 -> \_ -> 2 -- black hole
--
-- An example with two blackholes:
--
-- >>> putPretty $ fromRE $ "c" <> RE.star RE.anyChar
-- 0 -> \x -> if
--     | x <= 'b'  -> 2
--     | x <= 'c'  -> 1
--     | otherwise -> 2
-- 1+ -> \_ -> 1 -- black hole
-- 2 -> \_ -> 2 -- black hole
--
fromRE :: forall c. (Ord c, Enum c, Bounded c) => RE.RE c -> DFA c
fromRE = fromTM

-- | Convert 'ERE.ERE' to 'DFA'.
--
-- We don't always generate a minimal automata:
--
-- >>> putPretty $ fromERE $ "a" /\ "b"
-- 0 -> \_ -> 1
-- 1 -> \_ -> 1 -- black hole
--
-- Compare this to a 'complement' example
--
-- Using 'fromTMEquiv', we can get a minimal automaton, for the cost of higher
-- complexity (slow!).
--
-- >>> putPretty $ fromTMEquiv $ ("a" /\ "b" :: ERE.ERE Char)
-- 0 -> \_ -> 0 -- black hole
--
-- >>> putPretty $ fromERE $ complement $ star "abc"
-- 0 -> \x -> if
--     | x <= '`'  -> 3
--     | x <= 'a'  -> 2
--     | otherwise -> 3
-- 1+ -> \x -> if
--     | x <= 'b'  -> 3
--     | x <= 'c'  -> 0
--     | otherwise -> 3
-- 2+ -> \x -> if
--     | x <= 'a'  -> 3
--     | x <= 'b'  -> 1
--     | otherwise -> 3
-- 3+ -> \_ -> 3 -- black hole
--
fromERE :: forall c. (Ord c, Enum c, Bounded c) => ERE.ERE c -> DFA c
fromERE = fromTM

-- | Create from 'TransitionMap'.
--
-- See 'fromRE' for a specific example.
fromTM :: forall k c. (Ord k, Ord c, TransitionMap c k) => k -> DFA c
fromTM = fromTMImpl Nothing

-- | Create from 'TransitonMap' minimising states with 'Equivalent'.
--
-- See 'fromERE' for an example.
--
fromTMEquiv :: forall k c. (Ord k, Ord c, TransitionMap c k, Equivalent c k) => k -> DFA c
fromTMEquiv = fromTMImpl (Just equivalent)

fromTMImpl :: forall k c. (Ord k, Ord c, TransitionMap c k)
    => Maybe (k ->  k -> Bool)
    -> k
    -> DFA c
fromTMImpl mequiv re = DFA
    { dfaTransition = transition
    , dfaInitial    = 0
    , dfaAcceptable = IntSet.fromList
        [ i
        | (re', i) <- Map.toList lookupMap
        , nullable re'
        ]
    , dfaBlackholes = blackholes
    }
  where
    transition = IntMap.fromList
        [ (i, js)
        | (re', pm) <- Map.toList tm
        , let i  = fromMaybe 0 $ Map.lookup re' lookupMap
        , let js = SF.normalise $ fmap (\re'' -> fromMaybe 0 $ Map.lookup re'' lookupMap) pm
        ]

    blackholes = IntSet.fromList
        [ i
        | (i, sf) <- IntMap.toList transition
        , sf == pure i
        ]

    tm = transitionMap re

    -- reversing makes error state go last, usually
    lookupMap :: Map k Int
    lookupMap = makeLookup 1 lookupMap' (reverse $ Map.toList $ Map.delete re tm)

    lookupMap' :: Map k Int
    lookupMap' = case Map.lookup re tm of
        Nothing -> Map.empty
        Just _  -> Map.singleton re 0

    makeLookup :: Int -> Map k Int -> [(k, b)] -> Map k Int
    makeLookup = maybe makeLookupEq makeLookupEquiv mequiv

    makeLookupEq :: Int -> Map k Int -> [(k, b)] -> Map k Int
    makeLookupEq !_ !acc []            = acc
    makeLookupEq !n acc ((x, _) : xs) = makeLookup (n + 1) (Map.insert x n acc) xs

    -- this differs from makeLookupEq. We don't insert new states right away,
    -- but check whether equivalent state is already in the map.
    --
    -- This causes n^2 of exp m operations, where n = number of states and
    -- m size of @k@.
    makeLookupEquiv :: (k -> k -> Bool) ->  Int -> Map k Int -> [(k, b)] -> Map k Int
    makeLookupEquiv _  !_ !acc []           = acc
    makeLookupEquiv eq !n acc ((x, _) : xs) = case ys of
        []           -> makeLookup (n + 1) (Map.insert x n acc) xs
        ((_, i) : _) -> makeLookup n       (Map.insert x i acc) xs
      where
        ys = [ p | p@(y, _) <- Map.toList acc, eq x y ]

-------------------------------------------------------------------------------
-- Destruction
-------------------------------------------------------------------------------

-- | Convert 'DFA' to 'RE.RE'.
--
-- >>> putPretty $ toRE $ fromRE "foobar"
-- ^foobar$
--
-- For 'RE.string' regular expressions, @'toRE' . 'fromRE' = 'id'@:
--
-- prop> let s = take 5 s' in RE.string (s :: String) === toRE (fromRE (RE.string s))
--
-- But in general it isn't:
--
-- >>> let aToZ = RE.star $ RE.charRange 'a' 'z'
-- >>> traverse_ putPretty [aToZ, toRE $ fromRE aToZ]
-- ^[a-z]*$
-- ^([a-z]|[a-z]?[a-z]*[a-z]?)?$
--
-- @
-- not-prop> (re :: RE.RE Char) === toRE (fromRE re)
-- @
--
-- However, they are 'RE.equivalent':
--
-- >>> RE.equivalent aToZ (toRE (fromRE aToZ))
-- True
--
-- And so are others
--
-- >>> all (\re -> RE.equivalent re (toRE (fromRE re))) [RE.star "a", RE.star "ab"]
-- True
--
-- @
-- expensive-prop> RE.equivalent re (toRE (fromRE (re :: RE.RE Char)))
-- @
--
-- Note, that @'toRE' . 'fromRE'@ can, and usually makes regexp unrecognisable:
--
-- >>> putPretty $ toRE $ fromRE $ RE.star "ab"
-- ^(a(ba)*b)?$
--
-- We can 'complement' DFA, therefore we can complement 'RE.RE'.
-- For example. regular expression matching string containing an @a@:
--
-- >>> let withA = RE.star RE.anyChar <> "a" <> RE.star RE.anyChar
-- >>> let withoutA = toRE $ complement $ fromRE withA
-- >>> putPretty withoutA
-- ^([^a]|[^a]?[^a]*[^a]?)?$
--
-- >>> let withoutA' = RE.star $ RE.REChars $ RSet.complement $ RSet.singleton 'a'
-- >>> putPretty withoutA'
-- ^[^a]*$
--
-- >>> RE.equivalent withoutA withoutA'
-- True
--
-- Quite small, for example 2 state DFAs can result in big regular expressions:
--
-- >>> putPretty $ toRE $ complement $ fromRE $ star "ab"
-- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$
--
-- We can use @'toRE' . 'fromERE'@ to convert 'ERE.ERE' to 'RE.RE':
--
-- >>> putPretty $ toRE $ fromERE $ complement $ star "ab"
-- ^([^]|a(ba)*(ba)?|a(ba)*([^b]|b[^a])|([^a]|a(ba)*([^b]|b[^a]))[^]*[^]?)$
--
-- >>> putPretty $ toRE $ fromERE $ "a" /\ "b"
-- ^[]$
--
-- See <https://mathoverflow.net/questions/45149/can-regular-expressions-be-made-unambiguous>
-- for the description of the algorithm used.
--
toRE :: (Ord c, Enum c, Bounded c) => DFA c -> RE.RE c
toRE = toKleene

-- | Convert 'DFA' to 'ERE.ERE'.
toERE :: (Ord c, Enum c, Bounded c) => DFA c -> ERE.ERE c
toERE = toKleene

-- | Convert to any 'Kleene'.
--
-- See 'toRE' for a specific example.
--
toKleene :: forall k c. (Ord c, Enum c, Bounded c, FiniteKleene c k) => DFA c -> k
toKleene (DFA tr ini acc _) = unions
    [ re ini j maxN
    | j <- IntSet.toList acc
    ]
  where
    maxN | IntMap.null tr = 1
         | otherwise      = succ $ fst $ IntMap.findMax tr

    {-
    -- this is useful for debug
    table =
      [ show i ++ " " ++ show j ++ " " ++ show k ++ " = " ++ pretty (re i j k)
      | k <- [0..pred maxN]
      , i <- [0..pred maxN]
      , j <- [0..pred maxN]
      ]
    -}

    re i j k = MT.memo re' (i, j, k)
    re' (i, j, k)
        | k <= 0    = if i == j then unions [eps, r] else r
        | otherwise = unions [re i j k', appends [re i k' k', star (re k' k' k'), re k' j k']]
      where
        r = maybe empty fromRSet $ Map.lookup (i, j) re0map
        k' = k - 1

    re0map :: Map (Int, Int) (RSet c)
    re0map = Map.fromListWith RSet.union
        [ ((i, j), RSet.singletonRange (lo, hi))
        | (i, tr') <- IntMap.toList tr
        , (lo, hi, j) <- toPieces tr'
        ]

toPieces :: (Enum a, Bounded a, Ord a) => SF.SF a b -> [(a, a, b)]
toPieces (SF.SF m v)
    | maxBound `Map.member` m = toPieces' m
    | otherwise               = toPieces' (Map.insert maxBound v m)

toPieces' :: (Enum a, Bounded a) => Map a b -> [(a, a, b)]
toPieces' = go minBound . Map.toList where
    go _lo []            = []
    go  lo ((k, v) : kv) = (lo, k, v) : go (succ k) kv

-------------------------------------------------------------------------------
-- Operations
-------------------------------------------------------------------------------

-- | Run 'DFA' on the input.
--
-- Because we have analysed a language, in some cases we can determine a result
-- without traversing all of the input.
-- That's not the cases with 'RE.RE' 'match'.
--
-- >>> let dfa = fromRE $ RE.star "abc"
-- >>> map (match dfa) ["", "abc", "abcabc", "aa", 'a' : 'a' : undefined]
-- [True,True,True,False,False]
--
-- Holds:
--
-- @
-- 'match' ('fromRE' re) xs == 'match' re xs
-- @
--
-- prop> all (match (fromRE r)) $ take 10 $ RE.generate (curry QC.choose) 42 (r :: RE.RE Char)
--
instance Ord c => Match c (DFA c) where
    match (DFA tr i acc bh) = go i where
        go !s _ | IntSet.member s bh = IntSet.member s acc
        go !s []                 = IntSet.member s acc
        go !s (c : cs)           = case IntMap.lookup s tr of
            Nothing -> False
            Just sf -> go (sf SF.! c) cs

    match8 (DFA tr i acc bh) = go i where
        go !s !_ | IntSet.member s bh = IntSet.member s acc
        go !s bs = case BS.uncons bs of
            Nothing      -> IntSet.member s acc
            Just (c, cs) -> case IntMap.lookup s tr of
                Nothing -> False
                Just sf -> go (sf SF.! c) cs

-- | Complement DFA.
--
-- Complement of 'DFA' is way easier than of 'RE.RE': complement accept states.
--
-- >>> let dfa = complement $ fromRE $ RE.star "abc"
-- >>> putPretty dfa
-- 0 -> \x -> if
--     | x <= '`'  -> 3
--     | x <= 'a'  -> 2
--     | otherwise -> 3
-- 1+ -> \x -> if
--     | x <= 'b'  -> 3
--     | x <= 'c'  -> 0
--     | otherwise -> 3
-- 2+ -> \x -> if
--     | x <= 'a'  -> 3
--     | x <= 'b'  -> 1
--     | otherwise -> 3
-- 3+ -> \_ -> 3 -- black hole
--
-- >>> map (match dfa) ["", "abc", "abcabc", "aa","abca", 'a' : 'a' : undefined]
-- [False,False,False,True,True,True]
--
instance Complement c (DFA c) where
    complement (DFA tr ini acc bh) = DFA tr ini acc' bh where
        acc' = IntSet.difference (IntMap.keysSet tr) acc

instance Ord c => Derivate c (DFA c) where
    nullable (DFA _tr ini acc _bh) = IntSet.member ini acc

    derivate c (DFA tr ini acc bh) = DFA tr ini' acc bh where
        ini' = case IntMap.lookup ini tr of
            Nothing -> ini -- in error case let's just stay in the same state.
            Just sf -> sf SF.! c

-------------------------------------------------------------------------------
-- toDot
-------------------------------------------------------------------------------

-- | Get Graphviz dot-code of DFA.
--
-- >>> let dfa = fromRE $ RE.star "abc"
-- >>> putStr $ toDot dfa
-- digraph dfa {
-- rankdir=LR;
-- // states
-- "0" [shape=doublecircle];
-- "1" [shape=circle];
-- "2" [shape=circle];
-- // initial state
-- "" [shape=none];
-- "" -> "0";
-- // transitions
-- "0" -> "2"[label="a"]
-- "1" -> "0"[label="c"]
-- "2" -> "1"[label="b"]
-- }
--
toDot :: DFA Char -> String
toDot = toDot' show pure

-- | More flexible version of 'toDot'.
toDot' :: (Ord c, Enum c, Bounded c) => (Int -> String) -> (c -> String) -> DFA c -> String
toDot' showS showC (DFA tr ini acc bh)
    = showString "digraph dfa {\n"
    . showString "rankdir=LR;\n"
    . showString "// states\n"
    . showStates
    . showString "// initial state\n"
    . showInitial
    . showString "// transitions\n"
    . showTransitions
    . showString "}\n"
    $ ""
  where
    showStates  = foldr (.) id
        [ showState i
        | i <- IntMap.keys tr
        , IntSet.member i acc || IntSet.notMember i bh
        ]
    showState s = showS' s . shape where
        shape
            | IntSet.member s acc = showString " [shape=doublecircle];\n"
            | otherwise        = showString " [shape=circle];\n"

    showInitial
        = showString "\"\" [shape=none];\n"
        . showString "\"\" -> "
        . showS' ini
        . showString ";\n"

    showTransitions = foldr (.) id
        [ showS' i
        . showString " -> "
        . showS' j
        . showString "[label="
        . label
                . showString "]\n"
        | (i, sf) <- IntMap.toList tr
        , (lo, hi, j) <- toPieces sf
        , IntSet.member j acc || IntSet.notMember j bh
        , let label
                | lo == hi
                    = shows (showC lo)
                | lo == minBound && hi == maxBound
                    = shows ("-any" :: String)
                | otherwise
                    = shows (showC lo ++ "-" ++ showC hi)
        ]

    showS' = shows . showS

-------------------------------------------------------------------------------
-- Orphans
-------------------------------------------------------------------------------

-- | __WARNING__: The '/\' is inefficient, it actually computes the conjunction:
--
-- >>> putPretty $ asREChar $ "a" /\ "b"
-- ^[]$
--
-- >>> putPretty $ asREChar $ "a" /\ star "a"
-- ^a$
--
-- >>> putPretty $ asREChar $ star "aa" /\ star "aaa"
-- ^(a(aaaaaa)*aaaaa)?$
--
instance (Ord c, Enum c, Bounded c) => Lattice (RE.RE c) where
    r /\ r' = toRE $ fromERE $ ERE.fromRE r /\ ERE.fromRE r'
    r \/ r' = unions [r, r']

instance (Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE.RE c) where
    bottom = empty

instance (Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE.RE c) where
    top = RE.REStar (RE.REChars RSet.full)

instance (Ord c, Enum c, Bounded c) => Complement c (RE.RE c) where
    complement = toRE . complement . fromRE

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

instance Show c => Pretty (DFA c) where
    pretty dfa = intercalate "\n"
        [ show i ++ acc ++ " -> " ++ SF.showSF sf ++ bh
        | (i, sf) <- IntMap.toList (dfaTransition dfa)
        , let acc = if IntSet.member i (dfaAcceptable dfa) then "+" else ""
        , let bh = if IntSet.member i $ dfaBlackholes dfa then " -- black hole" else ""
        ]

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Foldable (traverse_)
-- >>> import Algebra.Lattice ((/\))
--
-- >>> import Test.QuickCheck ((===))
-- >>> import qualified Test.QuickCheck as QC
--
-- >>> newtype Smaller a = Smaller a deriving (Show)
-- >>> let intLog2 = (`div` 10)
-- >>> instance QC.Arbitrary a => QC.Arbitrary (Smaller a) where arbitrary = QC.scale intLog2 QC.arbitrary; shrink (Smaller a) = map Smaller (QC.shrink a)
--
-- >>> let asREChar :: RE.RE Char -> RE.RE Char; asREChar = id