{-# LANGUAGE MagicHash #-}
-- | Evaluate an array by dividing it into rectangular blocks and filling
--   each block in parallel.
module Data.Array.Repa.Eval.Cursored
        ( fillBlock2P
        , fillCursoredBlock2P
        , fillCursoredBlock2S )
where
import Data.Array.Repa.Index
import Data.Array.Repa.Shape
import Data.Array.Repa.Eval.Elt
import Data.Array.Repa.Eval.Gang
import GHC.Base


-- Non-cursored interface -----------------------------------------------------
-- | Fill a block in a rank-2 array in parallel.
--
--   * Blockwise filling can be more cache-efficient than linear filling for
--    rank-2 arrays.
--
--   * Coordinates given are of the filled edges of the block.
-- 
--   * We divide the block into columns, and give one column to each thread.
-- 
--   * Each column is filled in row major order from top to bottom.
--
fillBlock2P 
        :: Elt a
        => (Int -> a -> IO ())  -- ^ Update function to write into result buffer.
        -> (DIM2 -> a)          -- ^ Function to evaluate the element at an index.
        -> Int#                 -- ^ Width of the whole array.
        -> Int#                 -- ^ x0 lower left corner of block to fill
        -> Int#                 -- ^ y0 
        -> Int#                 -- ^ w0 width of block to fill.
        -> Int#                 -- ^ h0 height of block to fill.
        -> IO ()

{-# INLINE [0] fillBlock2P #-}
fillBlock2P :: (Int -> a -> IO ())
-> (DIM2 -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
fillBlock2P Int -> a -> IO ()
write DIM2 -> a
getElem !Int#
imageWidth !Int#
x0 !Int#
y0 !Int#
w0 Int#
h0
 = (Int -> a -> IO ())
-> (DIM2 -> DIM2)
-> (DIM2 -> DIM2 -> DIM2)
-> (DIM2 -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
forall a cursor.
Elt a =>
(Int -> a -> IO ())
-> (DIM2 -> cursor)
-> (DIM2 -> cursor -> cursor)
-> (cursor -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
fillCursoredBlock2P 
        Int -> a -> IO ()
write DIM2 -> DIM2
forall a. a -> a
id DIM2 -> DIM2 -> DIM2
forall sh. Shape sh => sh -> sh -> sh
addDim DIM2 -> a
getElem 
        Int#
imageWidth Int#
x0 Int#
y0 Int#
w0 Int#
h0

{-
-- | Fill a block in a rank-2 array sequentially.
--
--   * Blockwise filling can be more cache-efficient than linear filling for
--    rank-2 arrays.
--
--   * Coordinates given are of the filled edges of the block.
-- 
--   * The block is filled in row major order from top to bottom.
--
fillBlock2S
        :: Elt a
        => (Int -> a -> IO ())  -- ^ Update function to write into result buffer.
        -> (DIM2 -> a)          -- ^ Function to evaluate the element at an index.
        -> Int#                 -- ^ Width of the whole array.
        -> Int#                 -- ^ x0 lower left corner of block to fill
        -> Int#                 -- ^ y0
        -> Int#                 -- ^ w0 width of block to fill
        -> Int#                 -- ^ h0 height of block to filll
        -> IO ()

{-# INLINE [0] fillBlock2S #-}
fillBlock2S write getElem !imageWidth !x0 !y0 !w0 !h0
 = fillCursoredBlock2S
        write id addDim getElem 
        imageWidth x0 y0 w0 h0
-}

-- Block filling ----------------------------------------------------------------------------------
-- | Fill a block in a rank-2 array in parallel.
-- 
--   * Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
--
--   * Using cursor functions can help to expose inter-element indexing computations to
--     the GHC and LLVM optimisers.
--
--   * Coordinates given are of the filled edges of the block.
-- 
--   * We divide the block into columns, and give one column to each thread.
-- 
--   * Each column is filled in row major order from top to bottom.
--
fillCursoredBlock2P
        :: Elt a
        => (Int -> a -> IO ())          -- ^ Update function to write into result buffer.
        -> (DIM2   -> cursor)           -- ^ Make a cursor to a particular element.
        -> (DIM2   -> cursor -> cursor) -- ^ Shift the cursor by an offset.
        -> (cursor -> a)                -- ^ Function to evaluate the element at an index.
        -> Int#                         -- ^ Width of the whole array.
        -> Int#                         -- ^ x0 lower left corner of block to fill
        -> Int#                         -- ^ y0
        -> Int#                         -- ^ w0 width of block to fill
        -> Int#                         -- ^ h0 height of block to fill
        -> IO ()

{-# INLINE [0] fillCursoredBlock2P #-}
fillCursoredBlock2P :: (Int -> a -> IO ())
-> (DIM2 -> cursor)
-> (DIM2 -> cursor -> cursor)
-> (cursor -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
fillCursoredBlock2P
        Int -> a -> IO ()
write
        DIM2 -> cursor
makeCursorFCB DIM2 -> cursor -> cursor
shiftCursorFCB cursor -> a
getElemFCB
        !Int#
imageWidth !Int#
x0 !Int#
y0 !Int#
w0 !Int#
h0
 =      Gang -> (Int -> IO ()) -> IO ()
gangIO Gang
theGang Int -> IO ()
fillBlock
 where  
        !(I# Int#
threads)  = Gang -> Int
gangSize Gang
theGang

        -- All columns have at least this many pixels.
        !colChunkLen :: Int#
colChunkLen   = Int#
w0 Int# -> Int# -> Int#
`quotInt#` Int#
threads

        -- Extra pixels that we have to divide between some of the threads.
        !colChunkSlack :: Int#
colChunkSlack = Int#
w0 Int# -> Int# -> Int#
`remInt#` Int#
threads

        -- Get the starting pixel of a column in the image.
        {-# INLINE colIx #-}
        colIx :: Int# -> Int#
colIx !Int#
ix
         | Int#
1# <- Int#
ix Int# -> Int# -> Int#
<# Int#
colChunkSlack = Int#
x0 Int# -> Int# -> Int#
+# (Int#
ix Int# -> Int# -> Int#
*# (Int#
colChunkLen Int# -> Int# -> Int#
+# Int#
1#))
         | Bool
otherwise                 = Int#
x0 Int# -> Int# -> Int#
+# (Int#
ix Int# -> Int# -> Int#
*# Int#
colChunkLen) Int# -> Int# -> Int#
+# Int#
colChunkSlack

        -- Give one column to each thread
        {-# INLINE fillBlock #-}
        fillBlock :: Int -> IO ()
        fillBlock :: Int -> IO ()
fillBlock !(I# Int#
ix)
         = let  !x0' :: Int#
x0'      = Int# -> Int#
colIx Int#
ix
                !w0' :: Int#
w0'      = Int# -> Int#
colIx (Int#
ix Int# -> Int# -> Int#
+# Int#
1#) Int# -> Int# -> Int#
-# Int#
x0'
                !y0' :: Int#
y0'      = Int#
y0
                !h0' :: Int#
h0'      = Int#
h0
           in   (Int -> a -> IO ())
-> (DIM2 -> cursor)
-> (DIM2 -> cursor -> cursor)
-> (cursor -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
forall a cursor.
Elt a =>
(Int -> a -> IO ())
-> (DIM2 -> cursor)
-> (DIM2 -> cursor -> cursor)
-> (cursor -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
fillCursoredBlock2S
                        Int -> a -> IO ()
write
                        DIM2 -> cursor
makeCursorFCB DIM2 -> cursor -> cursor
shiftCursorFCB cursor -> a
getElemFCB
                        Int#
imageWidth Int#
x0' Int#
y0' Int#
w0' Int#
h0'


-- | Fill a block in a rank-2 array, sequentially.
--
--   * Blockwise filling can be more cache-efficient than linear filling for rank-2 arrays.
--
--   * Using cursor functions can help to expose inter-element indexing computations to
--     the GHC and LLVM optimisers.
--
--   * Coordinates given are of the filled edges of the block.
--
--   * The block is filled in row major order from top to bottom.
--
fillCursoredBlock2S
        :: Elt a
        => (Int -> a -> IO ())          -- ^ Update function to write into result buffer.
        -> (DIM2   -> cursor)           -- ^ Make a cursor to a particular element.
        -> (DIM2   -> cursor -> cursor) -- ^ Shift the cursor by an offset.
        -> (cursor -> a)                -- ^ Function to evaluate an element at the given index.
        -> Int#                         -- ^ Width of the whole array.
        -> Int#                         -- ^ x0 lower left corner of block to fill.
        -> Int#                         -- ^ y0
        -> Int#                         -- ^ w0 width of block to fill
        -> Int#                         -- ^ h0 height of block to fill
        -> IO ()

{-# INLINE [0] fillCursoredBlock2S #-}
fillCursoredBlock2S :: (Int -> a -> IO ())
-> (DIM2 -> cursor)
-> (DIM2 -> cursor -> cursor)
-> (cursor -> a)
-> Int#
-> Int#
-> Int#
-> Int#
-> Int#
-> IO ()
fillCursoredBlock2S
        Int -> a -> IO ()
write
        DIM2 -> cursor
makeCursor DIM2 -> cursor -> cursor
shiftCursor cursor -> a
getElem
        !Int#
imageWidth !Int#
x0 !Int#
y0 !Int#
w0 Int#
h0

 = do   Int# -> IO ()
fillBlock Int#
y0
 where  !x1 :: Int#
x1     = Int#
x0 Int# -> Int# -> Int#
+# Int#
w0
        !y1 :: Int#
y1     = Int#
y0 Int# -> Int# -> Int#
+# Int#
h0

        {-# INLINE fillBlock #-}
        fillBlock :: Int# -> IO ()
fillBlock !Int#
y
         | Int#
1# <- Int#
y Int# -> Int# -> Int#
>=# Int#
y1      = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
         | Bool
otherwise
         = do   Int# -> IO ()
fillLine4 Int#
x0
                Int# -> IO ()
fillBlock (Int#
y Int# -> Int# -> Int#
+# Int#
1#)

         where  {-# INLINE fillLine4 #-}
                fillLine4 :: Int# -> IO ()
fillLine4 !Int#
x
                 | Int#
1# <- Int#
x Int# -> Int# -> Int#
+# Int#
4# Int# -> Int# -> Int#
>=# Int#
x1  = Int# -> IO ()
fillLine1 Int#
x
                 | Bool
otherwise
                 = do   -- Compute each source cursor based on the previous one so that
                        -- the variable live ranges in the generated code are shorter.
                        let srcCur0 :: cursor
srcCur0     = DIM2 -> cursor
makeCursor  (Z
Z Z -> Int -> Z :. Int
forall tail head. tail -> head -> tail :. head
:. (Int# -> Int
I# Int#
y) (Z :. Int) -> Int -> DIM2
forall tail head. tail -> head -> tail :. head
:. (Int# -> Int
I# Int#
x))
                        let srcCur1 :: cursor
srcCur1     = DIM2 -> cursor -> cursor
shiftCursor (Z
Z Z -> Int -> Z :. Int
forall tail head. tail -> head -> tail :. head
:. Int
0 (Z :. Int) -> Int -> DIM2
forall tail head. tail -> head -> tail :. head
:. Int
1) cursor
srcCur0
                        let srcCur2 :: cursor
srcCur2     = DIM2 -> cursor -> cursor
shiftCursor (Z
Z Z -> Int -> Z :. Int
forall tail head. tail -> head -> tail :. head
:. Int
0 (Z :. Int) -> Int -> DIM2
forall tail head. tail -> head -> tail :. head
:. Int
1) cursor
srcCur1
                        let srcCur3 :: cursor
srcCur3     = DIM2 -> cursor -> cursor
shiftCursor (Z
Z Z -> Int -> Z :. Int
forall tail head. tail -> head -> tail :. head
:. Int
0 (Z :. Int) -> Int -> DIM2
forall tail head. tail -> head -> tail :. head
:. Int
1) cursor
srcCur2

                        -- Get the result value for each cursor.
                        let val0 :: a
val0        = cursor -> a
getElem cursor
srcCur0
                        let val1 :: a
val1        = cursor -> a
getElem cursor
srcCur1
                        let val2 :: a
val2        = cursor -> a
getElem cursor
srcCur2
                        let val3 :: a
val3        = cursor -> a
getElem cursor
srcCur3

                        -- Ensure that we've computed each of the result values before we
                        -- write into the array. If the backend code generator can't tell
                        -- our destination array doesn't alias with the source then writing
                        -- to it can prevent the sharing of intermediate computations.
                        a -> IO ()
forall a. Elt a => a -> IO ()
touch a
val0
                        a -> IO ()
forall a. Elt a => a -> IO ()
touch a
val1
                        a -> IO ()
forall a. Elt a => a -> IO ()
touch a
val2
                        a -> IO ()
forall a. Elt a => a -> IO ()
touch a
val3

                        -- Compute cursor into destination array.
                        let !dstCur0 :: Int#
dstCur0    = Int#
x Int# -> Int# -> Int#
+# (Int#
y Int# -> Int# -> Int#
*# Int#
imageWidth)
                        Int -> a -> IO ()
write (Int# -> Int
I# Int#
dstCur0)         a
val0
                        Int -> a -> IO ()
write (Int# -> Int
I# (Int#
dstCur0 Int# -> Int# -> Int#
+# Int#
1#)) a
val1
                        Int -> a -> IO ()
write (Int# -> Int
I# (Int#
dstCur0 Int# -> Int# -> Int#
+# Int#
2#)) a
val2
                        Int -> a -> IO ()
write (Int# -> Int
I# (Int#
dstCur0 Int# -> Int# -> Int#
+# Int#
3#)) a
val3
                        Int# -> IO ()
fillLine4 (Int#
x Int# -> Int# -> Int#
+# Int#
4#)

                {-# INLINE fillLine1 #-}
                fillLine1 :: Int# -> IO ()
fillLine1 !Int#
x
                 | Int#
1# <- Int#
x Int# -> Int# -> Int#
>=# Int#
x1 = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                 | Bool
otherwise
                 = do   let val0 :: a
val0  = (cursor -> a
getElem (cursor -> a) -> cursor -> a
forall a b. (a -> b) -> a -> b
$ DIM2 -> cursor
makeCursor (Z
Z Z -> Int -> Z :. Int
forall tail head. tail -> head -> tail :. head
:. (Int# -> Int
I# Int#
y) (Z :. Int) -> Int -> DIM2
forall tail head. tail -> head -> tail :. head
:. (Int# -> Int
I# Int#
x)))
                        Int -> a -> IO ()
write (Int# -> Int
I# (Int#
x Int# -> Int# -> Int#
+# (Int#
y Int# -> Int# -> Int#
*# Int#
imageWidth))) a
val0
                        Int# -> IO ()
fillLine1 (Int#
x Int# -> Int# -> Int#
+# Int#
1#)