-----------------------------------------------------------------------------
-- |
-- Module      :  Language.HaLex.RegExp2Fa
-- Copyright   :  (c) João Saraiva 2001,2002,2003,2004,2005,2016
-- License     :  LGPL
--
-- Maintainer  :  jas@di.uminho.pt
-- Stability   :  provisional
-- Portability :  portable
--
-- From Regular Expressions into Non-Deterministic and Deterministic Finite Automata
--
-- Code Included in the Lecture Notes on
--         Language Processing (with a functional flavour).
--
-----------------------------------------------------------------------------

module Language.HaLex.RegExp2Fa (
                   regExp2Ndfa
                 , regExp2Dfa
                 , regExp2Ndfa'
                 ) where

import Data.List
import Language.HaLex.RegExp
import Language.HaLex.Dfa
import Language.HaLex.Ndfa
import Language.HaLex.FaOperations


-- |  Compute a 'Ndfa' from a 'RegExp'.

regExp2Ndfa :: Eq sy
            => RegExp sy     -- ^ Regular expression
            -> Ndfa Int sy   -- ^ Automaton
regExp2Ndfa er = fst (regExp2Ndfa' er 1)



-- |  Compute a 'Ndfa' from a 'RegExp'.
--    Auxiliar function threading the context: the first available int to
--    name the states

regExp2Ndfa' :: Eq sy => RegExp sy -> Int -> (Ndfa Int sy , Int)


regExp2Ndfa' Empty n = ( Ndfa [] [sa,za] [sa] [za] delta , n+2 )
  where sa = n
        za = n+1
        delta _ _ = []


regExp2Ndfa' Epsilon n = ( Ndfa [] [sa] [sa] [sa] delta , n+1 )
  where sa = n
        delta _ _ = []

regExp2Ndfa' (Literal a) n = ( Ndfa [a] [sa,za] [sa] [za] delta , n+2)
  where sa = n
        za = n+1
        delta q (Just v) | q == sa && (v == a) = [za]
        delta _ _ = []

regExp2Ndfa' (Then p q) n = ( Ndfa v' q' s' z' delta' , nq)
  where (Ndfa vp qp sp zp dp , np) = regExp2Ndfa' p n
        (Ndfa vq qq sq zq dq , nq) = regExp2Ndfa' q np
        v' = vp `union` vq
        q' = qp `union` qq
        s' = sp
        z' = zq
        delta' q | q `elem` qp = if q `elem` zp then dp' q
                                                else dp  q
                 | otherwise   = dq q
         where dp' q Nothing = (dp q Nothing) `union` sq
               dp' q (Just aa) = dp q (Just aa)

regExp2Ndfa' (Or p q) n = ( Ndfa v' q' s' z' delta' , nq+1 )
  where (Ndfa vp qp sp zp dp , np) = regExp2Ndfa' p (n + 1)
        (Ndfa vq qq sq zq dq , nq) = regExp2Ndfa' q np
        v' = vp `union` vq
        s' = [n]
        z' = [nq]
        q' = s' `union` qp `union` qq `union` z'
        delta' q | q `elem` s' = dd q
                 | q `elem` zp = ddp' q
                 | q `elem` zq = ddq' q
                 | q `elem` qp = dp q
                 | q `elem` qq = dq q
                 | otherwise   = dd'' q
         where dd q  Nothing = sp `union` sq
               dd _ _        = []

               ddp' q Nothing = z' `union` (dp q Nothing)
               ddp' _ _       = []

               ddq' q Nothing = z' `union` (dq q Nothing)
               ddq' _ _       = []

               dd'' _ _ = []

regExp2Ndfa' (Star p) n = ( Ndfa v' q' s' z' delta' , np+1 )
  where (Ndfa vp qp sp zp dp , np) = regExp2Ndfa' p (n + 1)
        v' = vp
        s' = [n]
        z' = [np]
        q' = s'  `union` qp `union` z'
        delta' q | q `elem` s' = dd q
                 | q `elem` zp = dd' q
                 | otherwise   = dp q
          where dd q Nothing = sp `union` z'
                dd _ _ = []

                dd' q Nothing = sp `union` z'
                dd' _ _ = []

regExp2Ndfa' (RESet set) n = (Ndfa set [ss,zs] [ss] [zs] delta , n+2)
  where ss = n
        zs = n+1 
        delta q (Just v) | q == ss && (v `elem` set) = [zs]
        delta _ _ = []


regExp2Ndfa' (OneOrMore re) n = regExp2Ndfa' re' n
  where re' = re ` Then` (Star re)

regExp2Ndfa' (Optional re) n = regExp2Ndfa' re' n
  where re' = Epsilon `Or` re



-- | Compute a 'Dfa' from a 'RegExp'.
--   (via the intermediate 'Ndfa')
regExp2Dfa :: Eq sy
           => RegExp sy       -- ^ Regular expression
           -> Dfa [Int] sy    -- ^ Automaton
regExp2Dfa = ndfa2dfa . regExp2Ndfa