{-|
Copyright  :  (C) 2013-2016, University of Twente,
                  2017     , Google Inc.
License    :  BSD2 (see the file LICENSE)
Maintainer :  Christiaan Baaij <christiaan.baaij@gmail.com>

Self-synchronising circuits based on data-flow principles.
-}

{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MagicHash             #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE TypeOperators         #-}

{-# LANGUAGE Trustworthy #-}

{-# OPTIONS_GHC -fplugin GHC.TypeLits.KnownNat.Solver #-}
{-# OPTIONS_GHC -fplugin GHC.TypeLits.Normalise       #-}
{-# OPTIONS_HADDOCK show-extensions #-}

module Clash.Prelude.DataFlow
  ( -- * Data types
    DataFlow (..)
    -- * Creating DataFlow circuits
  , liftDF
  , pureDF
  , mealyDF
  , mooreDF
  , fifoDF
    -- * Composition combinators
  , idDF
  , seqDF
  , firstDF
  , swapDF
  , secondDF
  , parDF
  , parNDF
  , loopDF
  , loopDF_nobuf
    -- * Lock-Step operation
  , LockStep (..)
  )
where

import GHC.TypeLits           (KnownNat, type (+), type (^))
import Prelude                hiding ((++), (!!), length, map, repeat, tail, unzip3, zip3
                              , zipWith)

import Clash.Class.BitPack    (boolToBV)
import Clash.Class.Resize     (truncateB)
import Clash.Prelude.BitIndex (msb)
import Clash.Explicit.Mealy   (mealyB)
import Clash.Promoted.Nat     (SNat)
import Clash.Signal           ((.&&.), unbundle)
import Clash.Signal.Bundle    (Bundle (..))
import Clash.Signal.Internal  (clockGate, register#)
import Clash.Explicit.Signal  (Clock, Reset, Signal)
import Clash.Sized.BitVector  (BitVector)
import Clash.Sized.Vector

{- | Dataflow circuit with bidirectional synchronisation channels.

In the /forward/ direction we assert /validity/ of the data. In the /backward/
direction we assert that the circuit is /ready/ to receive new data. A circuit
adhering to the 'DataFlow' type should:

 * Not consume data when validity is deasserted.
 * Only update its output when readiness is asserted.

The 'DataFlow'' type is defined as:

@
newtype DataFlow' dom iEn oEn i o
  = DF
  { df :: 'Signal'' dom i     -- Incoming data
       -> 'Signal'' dom iEn   -- Flagged with /valid/ bits @iEn@.
       -> 'Signal'' dom oEn   -- Incoming back-pressure, /ready/ edge.
       -> ( 'Signal'' dom o   -- Outgoing data.
          , 'Signal'' dom oEn -- Flagged with /valid/ bits @oEn@.
          , 'Signal'' dom iEn -- Outgoing back-pressure, /ready/ edge.
          )
  }
@

where:

 * @dom@ is the clock to which the circuit is synchronised.
 * @iEn@ is the type of the bidirectional incoming synchronisation channel.
 * @oEn@ is the type of the bidirectional outgoing synchronisation channel.
 * @i@ is the incoming data type.
 * @o@ is the outgoing data type.

We define several composition operators for our 'DataFlow' circuits:

 * 'seqDF' sequential composition.
 * 'parDF' parallel composition.
 * 'loopDF' add a feedback arc.
 * 'lockStep' proceed in lock-step.

When you look at the types of the above operators it becomes clear why we
parametrise in the types of the synchronisation channels.
-}
newtype DataFlow domain iEn oEn i o
  = DF
  { -- | Create an ordinary circuit from a 'DataFlow' circuit
    df :: Signal domain i     -- Incoming data
       -> Signal domain iEn   -- Flagged with /valid/ bits @iEn@.
       -> Signal domain oEn   -- Incoming back-pressure, /ready/ edge.
       -> ( Signal domain o   -- Outgoing data.
          , Signal domain oEn -- Flagged with /valid/ bits @oEn@.
          , Signal domain iEn -- Outgoing back-pressure, /ready/ edge.
          )
  }

-- | Dataflow circuit synchronised to the 'systemClockGen'.
-- type DataFlow iEn oEn i o = DataFlow' systemClockGen iEn oEn i o

-- | Create a 'DataFlow' circuit from a circuit description with the appropriate
-- type:
--
-- @
-- 'Signal'' dom i        -- Incoming data.
-- -> 'Signal'' dom Bool  -- Flagged with a single /valid/ bit.
-- -> 'Signal'' dom Bool  -- Incoming back-pressure, /ready/ bit.
-- -> ( 'Signal'' dom o   -- Outgoing data.
--    , 'Signal'' dom oEn -- Flagged with a single /valid/ bit.
--    , 'Signal'' dom iEn -- Outgoing back-pressure, /ready/ bit.
--    )
-- @
--
-- A circuit adhering to the 'DataFlow' type should:
--
--  * Not consume data when validity is deasserted.
--  * Only update its output when readiness is asserted.
liftDF :: (Signal dom i -> Signal dom Bool -> Signal dom Bool
                        -> (Signal dom o, Signal dom Bool, Signal dom Bool))
       -> DataFlow dom Bool Bool i o
liftDF = DF

-- | Create a 'DataFlow' circuit where the given function @f@ operates on the
-- data, and the synchronisation channels are passed unaltered.
pureDF :: (i -> o)
       -> DataFlow dom Bool Bool i o
pureDF f = DF (\i iV oR -> (fmap f i,iV,oR))

-- | Create a 'DataFlow' circuit from a Mealy machine description as those of
-- "Clash.Prelude.Mealy"
mealyDF :: Clock domain gated
        -> Reset domain synchronous
        -> (s -> i -> (s,o))
        -> s
        -> DataFlow domain Bool Bool i o
mealyDF clk rst f iS =
  DF (\i iV oR -> let en     = iV .&&. oR
                      (s',o) = unbundle (f <$> s <*> i)
                      s      = register# (clockGate clk en) rst iS s'
                  in  (o,iV,oR))

-- | Create a 'DataFlow' circuit from a Moore machine description as those of
-- "Clash.Prelude.Moore"
mooreDF :: Clock domain gated
        -> Reset domain synchronous
        -> (s -> i -> s)
        -> (s -> o)
        -> s
        -> DataFlow domain Bool Bool i o
mooreDF clk rst ft fo iS =
  DF (\i iV oR -> let en  = iV .&&. oR
                      s'  = ft <$> s <*> i
                      s   = register# (clockGate clk en) rst iS s'
                      o   = fo <$> s
                  in  (o,iV,oR))

fifoDF_mealy :: forall addrSize a . KnownNat addrSize
  => (Vec (2^addrSize) a, BitVector (addrSize + 1), BitVector (addrSize + 1))
  -> (a, Bool, Bool)
  -> ((Vec (2^addrSize) a, BitVector (addrSize + 1), BitVector (addrSize + 1))
     ,(a, Bool, Bool))
fifoDF_mealy (mem,rptr,wptr) (wdata,winc,rinc) =
  let raddr = truncateB rptr :: BitVector addrSize
      waddr = truncateB wptr :: BitVector addrSize

      mem' | winc && not full = replace waddr wdata mem
           | otherwise        = mem

      rdata = mem !! raddr

      rptr' = rptr + boolToBV (rinc && not empty)
      wptr' = wptr + boolToBV (winc && not full)
      empty = rptr == wptr
      full  = msb rptr /= msb wptr && raddr == waddr
  in  ((mem',rptr',wptr'), (rdata,empty,full))

-- | Create a FIFO buffer adhering to the 'DataFlow' protocol. Can be filled
-- with initial content.
--
-- To create a FIFO of size 4, with two initial values 2 and 3 you would write:
--
-- @
-- fifo4 = 'fifoDF' d4 (2 :> 3 :> Nil)
-- @
fifoDF :: forall addrSize m n a domain gated synchronous .
     (KnownNat addrSize, KnownNat n, KnownNat m, (m + n) ~ (2 ^ addrSize))
  => Clock domain gated
  -> Reset domain synchronous
  -> SNat (m + n) -- ^ Depth of the FIFO buffer. Must be a power of two.
  -> Vec m a      -- ^ Initial content. Can be smaller than the size of the
                  -- FIFO. Empty spaces are initialised with 'undefined'.
  -> DataFlow domain Bool Bool a a
fifoDF clk rst _ iS = DF $ \i iV oR ->
  let initRdPtr      = 0
      initWrPtr      = fromIntegral (length iS)
      initMem        = iS ++ repeat undefined :: Vec (m + n) a
      initS          = (initMem,initRdPtr,initWrPtr)
      (o,empty,full) = mealyB clk rst fifoDF_mealy initS (i,iV,oR)
  in  (o,not <$> empty, not <$> full)

-- | Identity circuit
--
-- <<doc/idDF.svg>>
idDF :: DataFlow dom en en a a
idDF = DF (\a val rdy -> (a,val,rdy))

-- | Sequential composition of two 'DataFlow' circuits.
--
-- <<doc/seqDF.svg>>
seqDF :: DataFlow dom aEn bEn a b
      -> DataFlow dom bEn cEn b c
      -> DataFlow dom aEn cEn a c
(DF f) `seqDF` (DF g) = DF (\a aVal cRdy -> let (b,bVal,aRdy) = f a aVal bRdy
                                                (c,cVal,bRdy) = g b bVal cRdy
                                            in  (c,cVal,aRdy))

-- | Apply the circuit to the first halve of the communication channels, leave
-- the second halve unchanged.
--
-- <<doc/firstDF.svg>>
firstDF :: DataFlow dom aEn bEn a b
        -> DataFlow dom (aEn,cEn) (bEn,cEn) (a,c) (b,c)
firstDF (DF f) = DF (\ac acV bcR -> let (a,c)     = unbundle ac
                                        (aV,cV)   = unbundle acV
                                        (bR,cR)   = unbundle bcR
                                        (b,bV,aR) = f a aV bR
                                        bc        = bundle (b,c)
                                        bcV       = bundle (bV,cV)
                                        acR       = bundle (aR,cR)
                                    in  (bc,bcV,acR)
                    )

-- | Swap the two communication channels.
--
-- <<doc/swapDF.svg>>
swapDF :: DataFlow dom (aEn,bEn) (bEn,aEn) (a,b) (b,a)
swapDF = DF (\ab abV baR -> (swap <$> ab, swap <$> abV, swap <$> baR))
  where
    swap ~(a,b) = (b,a)

-- | Apply the circuit to the second halve of the communication channels, leave
-- the first halve unchanged.
--
-- <<doc/secondDF.svg>>
secondDF :: DataFlow dom aEn bEn a b
         -> DataFlow dom (cEn,aEn) (cEn,bEn) (c,a) (c,b)
secondDF f = swapDF `seqDF` firstDF f `seqDF` swapDF

-- | Compose two 'DataFlow' circuits in parallel.
--
-- <<doc/parDF.svg>>
parDF :: DataFlow dom aEn bEn a b
      -> DataFlow dom cEn dEn c d
      -> DataFlow dom (aEn,cEn) (bEn,dEn) (a,c) (b,d)
f `parDF` g = firstDF f `seqDF` secondDF g

-- | Compose /n/ 'DataFlow' circuits in parallel.
parNDF :: KnownNat n
       => Vec n (DataFlow dom aEn bEn a b)
       -> DataFlow dom
                    (Vec n aEn)
                    (Vec n bEn)
                    (Vec n a)
                    (Vec n b)
parNDF fs =
  DF (\as aVs bRs ->
        let as'  = unbundle as
            aVs' = unbundle aVs
            bRs' = unbundle bRs
            (bs,bVs,aRs) = unzip3 (zipWith (\k (a,b,r) -> df k a b r) fs
                                  (zip3 (lazyV as') (lazyV aVs') bRs'))
        in  (bundle bs,bundle bVs, bundle aRs)
     )

-- | Feed back the second halve of the communication channel. The feedback loop
-- is buffered by a 'fifoDF' circuit.
--
-- So given a circuit /h/ with two synchronisation channels:
--
-- @
-- __h__ :: 'DataFlow' (Bool,Bool) (Bool,Bool) (a,d) (b,d)
-- @
--
-- Feeding back the /d/ part (including its synchronisation channels) results
-- in:
--
-- @
-- 'loopDF' d4 Nil h
-- @
--
-- <<doc/loopDF.svg>>
--
-- When you have a circuit @h'@, with only a single synchronisation channel:
--
-- @
-- __h'__ :: 'DataFlow' Bool Bool (a,d) (b,d)
-- @
--
-- and you want to compose /h'/ in a feedback loop, the following will not work:
--
-- @
-- f \`@'seqDF'@\` ('loopDF' d4 Nil h') \`@'seqDF'@\` g
-- @
--
-- The circuits @f@, @h@, and @g@, must operate in /lock-step/ because the /h'/
-- circuit only has a single synchronisation channel. Consequently, there
-- should only be progress when all three circuits are producing /valid/ data
-- and all three circuits are /ready/ to receive new data. We need to compose
-- /h'/ with the 'lockStep' and 'stepLock' functions to achieve the /lock-step/
-- operation.
--
-- @
-- f \`@'seqDF'@\` ('lockStep' \`@'seqDF'@\` 'loopDF' d4 Nil h' \`@'seqDF'@\` 'stepLock') \`@'seqDF'@\` g
-- @
--
-- <<doc/loopDF_sync.svg>>
loopDF :: (KnownNat m, KnownNat n, KnownNat addrSize, (m+n) ~ (2^addrSize))
       => Clock dom gated
       -> Reset dom synchronous
       -> SNat (m + n) -- ^ Depth of the FIFO buffer. Must be a power of two
       -> Vec m d -- ^ Initial content of the FIFO buffer. Can be smaller than
                  -- the size of the FIFO. Empty spaces are initialised with
                  -- 'undefined'.
       -> DataFlow dom (Bool,Bool) (Bool,Bool) (a,d) (b,d)
       -> DataFlow dom Bool Bool   a           b
loopDF clk rst sz is (DF f) =
  DF (\a aV bR -> let (bd,bdV,adR) = f ad adV bdR
                      (b,d)        = unbundle bd
                      (bV,dV)      = unbundle bdV
                      (aR,dR)      = unbundle adR
                      (d_buf,dV_buf,dR_buf) = df (fifoDF clk rst sz is) d dV dR

                      ad  = bundle (a,d_buf)
                      adV = bundle (aV,dV_buf)
                      bdR = bundle (bR,dR_buf)
                  in  (b,bV,aR)
     )

-- | Feed back the second halve of the communication channel. Unlike 'loopDF',
-- the feedback loop is /not/ buffered.
loopDF_nobuf :: DataFlow dom (Bool,Bool) (Bool,Bool) (a,d) (b,d)
             -> DataFlow dom Bool Bool   a           b
loopDF_nobuf (DF f) = DF (\a aV bR -> let (bd,bdV,adR) = f ad adV bdR
                                          (b,d)        = unbundle bd
                                          (bV,dV)      = unbundle bdV
                                          (aR,dR)      = unbundle adR
                                          ad           = bundle (a,d)
                                          adV          = bundle (aV,dV)
                                          bdR          = bundle (bR,dR)
                                      in  (b,bV,aR)
                         )

-- | Reduce or extend the synchronisation granularity of parallel compositions.
class LockStep a b where
  -- | Reduce the synchronisation granularity to a single 'Bool'ean value.
  --
  -- Given:
  --
  -- @
  -- __f__ :: 'DataFlow' Bool Bool a b
  -- __g__ :: 'DataFlow' Bool Bool c d
  -- __h__ :: 'DataFlow' Bool Bool (b,d) (p,q)
  -- @
  --
  -- We /cannot/ simply write:
  --
  -- @
  -- (f \`@'parDF'@\` g) \`@'seqDF'@\` h
  -- @
  --
  -- because, @f \`parDF\` g@, has type, @'DataFlow' (Bool,Bool) (Bool,Bool) (a,c) (b,d)@,
  -- which does not match the expected synchronisation granularity of @h@. We
  -- need a circuit in between that has the type:
  --
  -- @
  -- 'DataFlow' (Bool,Bool) Bool (b,d) (b,d)
  -- @
  --
  -- Simply '&&'-ing the /valid/ signals in the forward direction, and
  -- duplicating the /ready/ signal in the backward direction is however not
  -- enough. We also need to make sure that @f@ does not update its output when
  -- @g@'s output is invalid and visa versa, as @h@ can only consume its input
  -- when both @f@ and @g@ are producing valid data. @g@'s /ready/ port is hence
  -- only asserted when @h@ is ready and @f@ is producing /valid/ data. And @f@'s
  -- ready port is only asserted when @h@ is ready and @g@ is producing valid
  -- data. @f@ and @g@ will hence be proceeding in /lock-step/.
  --
  -- The 'lockStep' function ensures that all synchronisation signals are
  -- properly connected:
  --
  -- @
  -- (f \`@'parDF'@\` g) \`@'seqDF'@\` 'lockStep' \`@'seqDF'@\` h
  -- @
  --
  -- <<doc/lockStep.svg>>
  --
  -- __Note 1__: ensure that the components that you are synchronising have
  -- buffered/delayed @ready@ and @valid@ signals, or 'lockStep' has the
  -- potential to introduce combinational loops. You can do this by placing
  -- 'fifoDF's on the parallel channels. Extending the above example, you would
  -- write:
  --
  -- @
  -- ((f \`@'seqDF'@\` 'fifoDF' d4 Nil) \`@'parDF'@\` (g \`@'seqDF'@\` 'fifoDF' d4 Nil)) \`@'seqDF'@\` 'lockStep' \`@'seqDF'@\` h
  -- @
  --
  -- __Note 2__: 'lockStep' works for arbitrarily nested tuples. That is:
  --
  -- @
  -- p :: 'DataFlow' Bool Bool ((b,d),d) z
  --
  -- q :: 'DataFlow' ((Bool,Bool),Bool) ((Bool,Bool),Bool) ((a,c),c) ((b,d),d)
  -- q = f \`@'parDF'@\` g \`@'parDF'@\` g
  --
  -- r = q \`@'seqDF'@\` 'lockStep' \`@'seqDF'@\` p
  -- @
  --
  -- Does the right thing.
  lockStep :: DataFlow dom a Bool b b

  -- | Extend the synchronisation granularity from a single 'Bool'ean value.
  --
  -- Given:
  --
  -- @
  -- __f__ :: 'DataFlow' Bool Bool a b
  -- __g__ :: 'DataFlow' Bool Bool c d
  -- __h__ :: 'DataFlow' Bool Bool (p,q) (a,c)
  -- @
  --
  -- We /cannot/ simply write:
  --
  -- @
  -- h \`@'seqDF'@\` (f \`@'parDF'@\` g)
  -- @
  --
  -- because, @f \`parDF\` g@, has type, @'DataFlow' (Bool,Bool) (Bool,Bool) (a,c) (b,d)@,
  -- which does not match the expected synchronisation granularity of @h@. We
  -- need a circuit in between that has the type:
  --
  -- @
  -- 'DataFlow' Bool (Bool,Bool) (a,c) (a,c)
  -- @
  --
  -- Simply '&&'-ing the /ready/ signals in the backward direction, and
  -- duplicating the /valid/ signal in the forward direction is however not
  -- enough. We need to make sure that @f@ does not consume values when @g@ is
  -- not /ready/ and visa versa, because @h@ cannot update the values of its
  -- output tuple independently. @f@'s /valid/ port is hence only asserted when
  -- @h@ is valid and @g@ is ready to receive new values. @g@'s /valid/ port is
  -- only asserted when @h@ is valid and @f@ is ready to receive new values.
  -- @f@ and @g@ will hence be proceeding in /lock-step/.
  --
  -- The 'stepLock' function ensures that all synchronisation signals are
  -- properly connected:
  --
  -- @
  -- h \`@'seqDF'@\` 'stepLock' \`@'seqDF'@\` (f \`@'parDF'@\` g)
  -- @
  --
  -- <<doc/stepLock.svg>>
  --
  -- __Note 1__: ensure that the components that you are synchronising have
  -- buffered/delayed @ready@ and @valid@ signals, or 'stepLock' has the
  -- potential to introduce combinational loops. You can do this by placing
  -- 'fifoDF's on the parallel channels. Extending the above example, you would
  -- write:
  --
  -- @
  -- h \`@'seqDF'@\` 'stepLock' \`@'seqDF'@\` ((`fifoDF` d4 Nil \`@'seqDF'@\` f) \`@'parDF'@\` (`fifoDF` d4 Nil \`@'seqDF'@\` g))
  -- @
  --
  -- __Note 2__: 'stepLock' works for arbitrarily nested tuples. That is:
  --
  -- @
  -- p :: 'DataFlow' Bool Bool z ((a,c),c)
  --
  -- q :: 'DataFlow' ((Bool,Bool),Bool) ((Bool,Bool),Bool) ((a,c),c) ((b,d),d)
  -- q = f \`@'parDF'@\` g \`@'parDF'@\` g
  --
  -- r = p \`@'seqDF'@\` 'stepLock' \`@'seqDF'@\` q
  -- @
  --
  -- Does the right thing.
  stepLock :: DataFlow dom Bool a b b

instance LockStep Bool c where
  lockStep = idDF
  stepLock = idDF

instance (LockStep a x, LockStep b y) => LockStep (a,b) (x,y) where
  lockStep = (lockStep `parDF` lockStep) `seqDF`
                (DF (\xy xyV rdy -> let (xV,yV)   = unbundle xyV
                                        val       = xV .&&. yV
                                        xR        = yV .&&. rdy
                                        yR        = xV .&&. rdy
                                        xyR       = bundle (xR,yR)
                                    in  (xy,val,xyR)))

  stepLock = (DF (\xy val xyR -> let (xR,yR) = unbundle xyR
                                     rdy     = xR  .&&. yR
                                     xV      = val .&&. yR
                                     yV      = val .&&. xR
                                     xyV     = bundle (xV,yV)
                                 in  (xy,xyV,rdy))) `seqDF` (stepLock `parDF` stepLock)

instance (LockStep en a, KnownNat n) => LockStep (Vec n en) (Vec n a) where
  lockStep = parNDF (repeat lockStep) `seqDF`
    DF (\xs vals rdy ->
          let val  = (and . (True :>)) <$> vals
              rdys = allReady <$> rdy <*> (repeat . (:< True) <$> vals)
          in  (xs,val,rdys)
       )
  stepLock =
    DF (\xs val rdys ->
          let rdy  = (and . (True :>)) <$> rdys
              vals = allReady <$> val <*> (repeat . (:< True) <$> rdys)
          in  (xs,vals,rdy)
       ) `seqDF` parNDF (repeat stepLock)

allReady :: KnownNat n
         => Bool
         -> Vec n (Vec (n+1) Bool)
         -> Vec n Bool
allReady b vs = map (and . (b :>) . tail) (smap (flip rotateLeftS) vs)