{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Numeric.LAPACK.Matrix.Plain (
   Full,
   General, Tall, Wide,
   ShapeInt, shapeInt,
   transpose, adjoint,
   Matrix.height, Matrix.width,
   Basic.caseTallWide,
   fromList,
   mapExtent, fromFull,
   tallFromGeneral, wideFromGeneral,
   generalizeTall, generalizeWide,
   mapHeight, mapWidth,
   identity,
   diagonal,
   fromRowsNonEmpty,    fromRowArray,    fromRows,
   fromRowsNonEmptyContainer,    fromRowContainer,
   fromColumnsNonEmpty, fromColumnArray, fromColumns,
   fromColumnsNonEmptyContainer, fromColumnContainer,
   Basic.singleRow,   Basic.singleColumn,
   Basic.flattenRow,  Basic.flattenColumn,
   Basic.liftRow,     Basic.liftColumn,
   Basic.unliftRow,   Basic.unliftColumn,
   toRows, toColumns,
   toRowArray, toColumnArray,
   toRowContainer, toColumnContainer,
   takeRow, takeColumn,
   Basic.takeRows, Basic.takeColumns, takeEqually,
   Basic.dropRows, Basic.dropColumns, dropEqually,
   Basic.takeTop, Basic.takeBottom,
   Basic.takeLeft, Basic.takeRight,
   takeRowArray, takeColumnArray,
   swapRows, swapColumns,
   reverseRows, reverseColumns,
   fromRowMajor, toRowMajor,
   forceOrder, adaptOrder,

   (|*-),
   tensorProduct,
   outer,
   kronecker,
   sumRank1,

   add, sub,
   rowSums, columnSums,
   rowArgAbsMaximums, columnArgAbsMaximums,
   Basic.scaleRows, Basic.scaleColumns,
   Basic.scaleRowsComplex, Basic.scaleColumnsComplex,
   Basic.scaleRowsReal, Basic.scaleColumnsReal,
   ) where

import qualified Numeric.LAPACK.Matrix.Shape.Private as MatrixShape
import qualified Numeric.LAPACK.Matrix.Square.Basic as Square
import qualified Numeric.LAPACK.Matrix.Extent.Private as Extent
import qualified Numeric.LAPACK.Matrix.Basic as Basic
import qualified Numeric.LAPACK.Matrix.RowMajor as RowMajor
import qualified Numeric.LAPACK.Matrix.Private as Matrix
import qualified Numeric.LAPACK.Vector.Private as VectorPriv
import qualified Numeric.LAPACK.Vector as Vector
import Numeric.LAPACK.Matrix.Shape.Private
         (Order(RowMajor, ColumnMajor), transposeFromOrder)
import Numeric.LAPACK.Matrix.Basic
         (transpose, adjoint, forceOrder, forceRowMajor)
import Numeric.LAPACK.Matrix.Private
         (Full, Tall, Wide, General, ShapeInt, shapeInt,
          mapExtent, fromFull, generalizeTall, generalizeWide)
import Numeric.LAPACK.Matrix.Modifier (Conjugation(NonConjugated,Conjugated))
import Numeric.LAPACK.Vector (Vector)
import Numeric.LAPACK.Scalar (zero, one)
import Numeric.LAPACK.Private (pointerSeq, fill, copySubMatrix, copyBlock)

import qualified Numeric.LAPACK.FFI.Generic as LapackGen
import qualified Numeric.BLAS.FFI.Generic as BlasGen
import qualified Numeric.Netlib.Utility as Call
import qualified Numeric.Netlib.Class as Class

import qualified Data.Array.Comfort.Boxed as BoxedArray
import qualified Data.Array.Comfort.Storable.Unchecked.Monadic as ArrayIO
import qualified Data.Array.Comfort.Storable.Unchecked as Array
import qualified Data.Array.Comfort.Storable as CheckedArray
import qualified Data.Array.Comfort.Container as Container
import qualified Data.Array.Comfort.Shape as Shape
import Data.Array.Comfort.Storable.Unchecked (Array(Array))

import Foreign.Marshal.Array (advancePtr, pokeArray)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Storable (Storable, poke, peekElemOff)

import Control.Monad.Trans.Cont (ContT(ContT), evalContT)
import Control.Monad.IO.Class (liftIO)
import Control.Monad (when, mfilter)
import Control.Applicative ((<$>))

import qualified Data.NonEmpty.Mixed as NonEmptyM
import qualified Data.NonEmpty as NonEmpty
import qualified Data.Foldable as Fold
import Data.Foldable (forM_)
import Data.Maybe (listToMaybe)
import Data.Bool.HT (if')


fromList ::
   (Shape.C height, Shape.C width, Storable a) =>
   height -> width -> [a] -> General height width a
fromList height width =
   CheckedArray.fromList (MatrixShape.general RowMajor height width)

tallFromGeneral ::
   (Shape.C height, Shape.C width, Storable a) =>
   General height width a -> Tall height width a
tallFromGeneral =
   Array.mapShape $ \(MatrixShape.Full order extent) ->
      uncurry (MatrixShape.tall order) (Extent.dimensions extent)

wideFromGeneral ::
   (Shape.C height, Shape.C width, Storable a) =>
   General height width a -> Wide height width a
wideFromGeneral =
   Array.mapShape $ \(MatrixShape.Full order extent) ->
      uncurry (MatrixShape.wide order) (Extent.dimensions extent)


identity ::
   (Shape.C sh, Class.Floating a) =>
   sh -> General sh sh a
identity = Square.toFull . Square.identity

diagonal ::
   (Shape.C sh, Class.Floating a) =>
   Vector sh a -> General sh sh a
diagonal = Square.toFull . Square.diagonal


mapHeight ::
   (Shape.C heightA, Shape.C heightB,
    Extent.GeneralTallWide vert horiz,
    Extent.GeneralTallWide horiz vert) =>
   (heightA -> heightB) ->
   Full vert horiz heightA width a ->
   Full vert horiz heightB width a
mapHeight f = Basic.mapHeight $ MatrixShape.mapChecked "mapHeight" f

mapWidth ::
   (Shape.C widthA, Shape.C widthB,
    Extent.GeneralTallWide vert horiz,
    Extent.GeneralTallWide horiz vert) =>
   (widthA -> widthB) ->
   Full vert horiz height widthA a ->
   Full vert horiz height widthB a
mapWidth f = Basic.mapWidth $ MatrixShape.mapChecked "mapWidth" f


fromRowsNonEmpty ::
   (Shape.C width, Eq width, Storable a) =>
   NonEmpty.T [] (Vector width a) -> General ShapeInt width a
fromRowsNonEmpty (NonEmpty.Cons row rows) =
   fromRows (Array.shape row) (row:rows)

fromRowArray ::
   (Shape.C height, Shape.C width, Eq width, Storable a) =>
   width -> BoxedArray.Array height (Vector width a) -> General height width a
fromRowArray width rows =
   Basic.mapHeight (const $ BoxedArray.shape rows) $
   fromRows width $ BoxedArray.toList rows

fromRowsNonEmptyContainer ::
   (f ~ NonEmpty.T g, Container.C g,
    Shape.C width, Eq width, Storable a) =>
   f (Vector width a) -> General (Container.Shape f) width a
fromRowsNonEmptyContainer rows =
   fromRowContainer (Array.shape $ NonEmpty.head rows) rows

fromRowContainer ::
   (Container.C f, Shape.C width, Eq width, Storable a) =>
   width -> f (Vector width a) -> General (Container.Shape f) width a
fromRowContainer width rows =
   Basic.mapHeight (const $ Container.toShape rows) $
   fromRows width $ Fold.toList rows

fromRows ::
   (Shape.C width, Eq width, Storable a) =>
   width -> [Vector width a] -> General ShapeInt width a
fromRows width = fromRowMajor . RowMajor.fromRows width


fromColumnsNonEmpty ::
   (Shape.C height, Eq height, Storable a) =>
   NonEmpty.T [] (Vector height a) -> General height ShapeInt a
fromColumnsNonEmpty (NonEmpty.Cons column columns) =
   fromColumns (Array.shape column) (column:columns)

fromColumnArray ::
   (Shape.C height, Eq height, Shape.C width, Storable a) =>
   height -> BoxedArray.Array width (Vector height a) -> General height width a
fromColumnArray height columns =
   Basic.mapWidth (const $ BoxedArray.shape columns) $
   fromColumns height $ BoxedArray.toList columns

fromColumnsNonEmptyContainer ::
   (f ~ NonEmpty.T g, Container.C g,
    Shape.C height, Eq height, Storable a) =>
   f (Vector height a) -> General height (Container.Shape f) a
fromColumnsNonEmptyContainer columns =
   fromColumnContainer (Array.shape $ NonEmpty.head columns) columns

fromColumnContainer ::
   (Shape.C height, Eq height, Container.C f, Storable a) =>
   height -> f (Vector height a) -> General height (Container.Shape f) a
fromColumnContainer height columns =
   Basic.mapWidth (const $ Container.toShape columns) $
   fromColumns height $ Fold.toList columns

fromColumns ::
   (Shape.C height, Eq height, Storable a) =>
   height -> [Vector height a] -> General height ShapeInt a
fromColumns height = transpose . fromRowMajor . RowMajor.fromRows height


toRows ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> [Vector width a]
toRows a =
   let ad = Basic.mapHeight Shape.Deferred $ fromFull a
   in map (takeRow ad) $ Shape.indices $ Matrix.height ad

toColumns ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> [Vector height a]
toColumns a =
   let ad = Basic.mapWidth Shape.Deferred $ fromFull a
   in map (takeColumn ad) $ Shape.indices $ Matrix.width ad

toRowArray ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> BoxedArray.Array height (Vector width a)
toRowArray a = BoxedArray.fromList (Matrix.height a) (toRows a)

toColumnArray ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> BoxedArray.Array width (Vector height a)
toColumnArray a = BoxedArray.fromList (Matrix.width a) (toColumns a)

toRowContainer ::
   (Extent.C vert, Extent.C horiz,
    Container.C f, Shape.C width, Class.Floating a) =>
   Full vert horiz (Container.Shape f) width a -> f (Vector width a)
toRowContainer a = Container.fromList (Matrix.height a) (toRows a)

toColumnContainer ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Container.C f, Class.Floating a) =>
   Full vert horiz height (Container.Shape f) a -> f (Vector height a)
toColumnContainer a = Container.fromList (Matrix.width a) (toColumns a)


takeRow ::
   (Extent.C vert, Extent.C horiz,
    Shape.Indexed height, Shape.C width, Shape.Index height ~ ix,
    Class.Floating a) =>
   Full vert horiz height width a -> ix -> Vector width a
takeRow x ix =
   either (RowMajor.takeRow ix) (RowMajor.takeColumn ix) $
   Matrix.revealOrder x

takeColumn ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.Indexed width, Shape.Index width ~ ix,
    Class.Floating a) =>
   Full vert horiz height width a -> ix -> Vector height a
takeColumn x ix =
   either (RowMajor.takeColumn ix) (RowMajor.takeRow ix) $
   Matrix.revealOrder x


takeEqually ::
   (Extent.C vert, Extent.C horiz, Class.Floating a) =>
   Int ->
   Full vert horiz ShapeInt ShapeInt a ->
   Full vert horiz ShapeInt ShapeInt a
takeEqually k (Array (MatrixShape.Full order extentA) a) =
   let (Shape.ZeroBased heightA, Shape.ZeroBased widthA) =
         Extent.dimensions extentA
       heightB = min k heightA
       widthB  = min k widthA
       extentB =
         Extent.reduceConsistent
            (Shape.ZeroBased heightB) (Shape.ZeroBased widthB) extentA
   in if' (k<0) (error "take: negative number") $
      Array.unsafeCreate (MatrixShape.Full order extentB) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor -> copySubMatrix widthB heightB widthA aPtr widthB bPtr
         ColumnMajor -> copySubMatrix heightB widthB heightA aPtr heightB bPtr

dropEqually ::
   (Extent.C vert, Extent.C horiz, Class.Floating a) =>
   Int ->
   Full vert horiz ShapeInt ShapeInt a ->
   Full vert horiz ShapeInt ShapeInt a
dropEqually k (Array (MatrixShape.Full order extentA) a) =
   let (Shape.ZeroBased heightA, Shape.ZeroBased widthA) =
         Extent.dimensions extentA
       heightB = heightA - top; top  = min k heightA
       widthB  = widthA - left; left = min k widthA
       extentB =
         Extent.reduceConsistent
            (Shape.ZeroBased heightB) (Shape.ZeroBased widthB) extentA
   in if' (k<0) (error "drop: negative number") $
      Array.unsafeCreate (MatrixShape.Full order extentB) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor ->
            copySubMatrix widthB heightB
               widthA (advancePtr aPtr (top*widthA+left)) widthB bPtr
         ColumnMajor ->
            copySubMatrix heightB widthB
               heightA (advancePtr aPtr (left*heightA+top)) heightB bPtr


swapRows ::
   (Extent.C vert, Extent.C horiz,
    Shape.Indexed height, Shape.C width, Class.Floating a) =>
   Shape.Index height -> Shape.Index height ->
   Full vert horiz height width a -> Full vert horiz height width a
swapRows i j (Array shape@(MatrixShape.Full order extent) a) =
   Array.unsafeCreateWithSize shape $ \blockSize bPtr -> evalContT $ do
      let (height,width) = Extent.dimensions extent
      let m = Shape.size height
      let n = Shape.size width
      nPtr <- Call.cint n
      aPtr <- ContT $ withForeignPtr a
      let offsetI = Shape.offset height i
      let offsetJ = Shape.offset height j
      let (incVert,incHoriz) =
            case order of
               RowMajor -> (n,1)
               ColumnMajor -> (1,m)
      incPtr <- Call.cint incHoriz
      liftIO $ do
         copyBlock blockSize aPtr bPtr
         when (offsetI/=offsetJ) $
            BlasGen.swap nPtr
               (advancePtr bPtr (incVert*offsetI)) incPtr
               (advancePtr bPtr (incVert*offsetJ)) incPtr

swapColumns ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.Indexed width, Class.Floating a) =>
   Shape.Index width -> Shape.Index width ->
   Full vert horiz height width a -> Full vert horiz height width a
swapColumns i j = transpose . swapRows i j . transpose


-- alternative: laswp
reverseRows ::
   (Extent.C vert, Extent.C horiz, Shape.C width, Class.Floating a) =>
   Full vert horiz ShapeInt width a -> Full vert horiz ShapeInt width a
reverseRows (Array shape@(MatrixShape.Full order extent) a) =
   Array.unsafeCreateWithSize shape $ \blockSize bPtr -> evalContT $ do
      let (height,width) = Extent.dimensions extent
      let n = Shape.size height
      let m = Shape.size width
      fwdPtr <- Call.bool True
      nPtr <- Call.cint n
      mPtr <- Call.cint m
      kPtr <- Call.allocaArray n
      aPtr <- ContT $ withForeignPtr a
      liftIO $ do
         copyBlock blockSize aPtr bPtr
         pokeArray kPtr $ take n $ iterate (subtract 1) $ fromIntegral n
         case order of
            RowMajor -> LapackGen.lapmt fwdPtr mPtr nPtr bPtr mPtr kPtr
            ColumnMajor -> LapackGen.lapmr fwdPtr nPtr mPtr bPtr nPtr kPtr

reverseColumns ::
   (Extent.C vert, Extent.C horiz, Shape.C height, Class.Floating a) =>
   Full vert horiz height ShapeInt a -> Full vert horiz height ShapeInt a
reverseColumns = transpose . reverseRows . transpose


{-
The function is optimized for blocks of consecutive rows.
For scattered rows in column major order
the function has quite ugly memory access patterns.
-}
takeRowArray ::
   (Shape.Indexed height, Shape.C width, Shape.C sh, Class.Floating a) =>
   BoxedArray.Array sh (Shape.Index height) ->
   General height width a -> General sh width a
takeRowArray ixs (Array (MatrixShape.Full order extent) a) =
   let (heightA, width) = Extent.dimensions extent
       heightB = BoxedArray.shape ixs
       offsets = map (Shape.offset heightA) $ BoxedArray.toList ixs
       startBlocks blocks = zip (scanl (+) 0 $ map fst blocks) blocks
       ma = Shape.size heightA
       mb = Shape.size heightB
       n = Shape.size width
   in Array.unsafeCreate (MatrixShape.general order heightB width) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor -> do
            forM_ (startBlocks $ chopRowBlocks offsets) $
               \(dest, (numRows, (start,step))) ->
                  copySubMatrix n numRows
                     (step*n) (advancePtr aPtr (start*n))
                     n (advancePtr bPtr (dest*n))
         ColumnMajor -> do
            forM_ (startBlocks $ chopColumnBlocks offsets) $
               \(dest, (numRows, start)) ->
                  copySubMatrix numRows n
                     ma (advancePtr aPtr start)
                     mb (advancePtr bPtr dest)

chopRowBlocks :: (Integral i) => [i] -> [(Int,(i,i))]
chopRowBlocks =
   let go [] = []
       go is@(i0:is0) =
         case mfilter (i0<) $ listToMaybe is0 of
            Nothing -> (1,(i0,0)) : go is0
            Just i1 ->
               let (consecutive,remainder) =
                     span (uncurry (==)) $ zip [i0,i1..] is
               in (length consecutive, (i0,i1-i0)) : go (map snd remainder)
   in go

chopColumnBlocks :: (Integral i) => [i] -> [(Int,i)]
chopColumnBlocks =
   map (\is -> (length $ NonEmpty.flatten is, NonEmpty.head is)) .
   NonEmptyM.groupBy (\i j -> i+1 == j)


takeColumnArray ::
   (Shape.C height, Shape.Indexed width, Shape.C sh, Class.Floating a) =>
   BoxedArray.Array sh (Shape.Index width) ->
   General height width a -> General height sh a
takeColumnArray ixs = transpose . takeRowArray ixs . transpose



fromRowMajor ::
   (Shape.C height, Shape.C width, Storable a) =>
   Array (height,width) a -> General height width a
fromRowMajor = Array.mapShape (uncurry $ MatrixShape.general RowMajor)

toRowMajor ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Array (height,width) a
toRowMajor =
   Array.mapShape
      (\shape -> (MatrixShape.fullHeight shape, MatrixShape.fullWidth shape)) .
   forceRowMajor

adaptOrder ::
   (Extent.C vert, Extent.C horiz, Shape.C height, Shape.C width,
    Class.Floating a) =>
   Full vert horiz height width a ->
   Full vert horiz height width a ->
   Full vert horiz height width a
adaptOrder x = forceOrder (MatrixShape.fullOrder $ Array.shape x)


add, sub ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Eq height, Eq width,
    Class.Floating a) =>
   Full vert horiz height width a ->
   Full vert horiz height width a ->
   Full vert horiz height width a
add x y = Vector.add (adaptOrder y x) y
sub x y = Vector.sub (adaptOrder y x) y


rowSums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector height a
rowSums m = Basic.multiplyVectorUnchecked m $ Vector.one $ Matrix.width m

-- Does not work because incx must not be zero
_rowSums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector height a
_rowSums (Array shape@(MatrixShape.Full order extent) a) =
      Array.unsafeCreate (Extent.height extent) $ \yPtr -> do
   let (m,n) = MatrixShape.dimensions shape
   let lda = m
   evalContT $ do
      transPtr <- Call.char $ transposeFromOrder order
      mPtr <- Call.cint m
      nPtr <- Call.cint n
      alphaPtr <- Call.number one
      aPtr <- ContT $ withForeignPtr a
      ldaPtr <- Call.leadingDim lda
      xPtr <- Call.number one
      incxPtr <- Call.cint 0
      betaPtr <- Call.number zero
      incyPtr <- Call.cint 1
      liftIO $
         BlasGen.gemv
            transPtr mPtr nPtr alphaPtr aPtr ldaPtr
            xPtr incxPtr betaPtr yPtr incyPtr

columnSums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector width a
columnSums m =
   Basic.multiplyVectorUnchecked (transpose m) $ Vector.one $ Matrix.height m


rowArgAbsMaximums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.InvIndexed width, Shape.Index width ~ ix, Storable ix,
    Class.Floating a) =>
   Full vert horiz height width a -> (Vector height ix, Vector height a)
rowArgAbsMaximums (Array (MatrixShape.Full order extent) a) =
   let (height,width) = Extent.dimensions extent
   in Array.unsafeCreateWithSizeAndResult height $ \m ixPtr ->
      ArrayIO.unsafeCreate height $ \bPtr -> evalContT $ do
      let n = Shape.size width
      let (inca,incx) =
            case order of
               {-
               It would be more CPU friendly to compute the maximum row by row.
               Unfortunately BLAS does not support this mode.
               -}
               ColumnMajor -> (1,m)
               RowMajor -> (n,1)
      nPtr <- Call.cint n
      aPtr <- ContT $ withForeignPtr a
      incxPtr <- Call.cint incx
      liftIO $ do
         Call.assert "rowArgAbsMaximums: no columns" (n>0)
         forM_ (take m $ zip (pointerSeq inca aPtr) $
                zip (pointerSeq 1 ixPtr) (pointerSeq 1 bPtr)) $
               \(aiPtr,(ixiPtr,biPtr)) -> do
            ix <- pred . fromIntegral <$> VectorPriv.absMax nPtr aiPtr incxPtr
            poke ixiPtr $ Shape.indexFromOffset width ix
            poke biPtr =<< peekElemOff aiPtr (ix*incx)

columnArgAbsMaximums ::
   (Extent.C vert, Extent.C horiz,
    Shape.InvIndexed height, Shape.C width,
    Shape.Index height ~ ix, Storable ix,
    Class.Floating a) =>
   Full vert horiz height width a -> (Vector width ix, Vector width a)
columnArgAbsMaximums = rowArgAbsMaximums . transpose


infixl 7 |*-

(|*-) ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Vector height a -> Vector width a -> General height width a
(|*-) = tensorProduct RowMajor

tensorProduct ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Order -> Vector height a -> Vector width a -> General height width a
tensorProduct = tensorProductAux NonConjugated

outer ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Order -> Vector height a -> Vector width a -> General height width a
outer = tensorProductAux Conjugated

tensorProductAux ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Conjugation -> Order ->
   Vector height a -> Vector width a -> General height width a
tensorProductAux conjugated order x y =
   case order of
      ColumnMajor ->
         transpose $
         fromRowMajor $ RowMajor.tensorProduct (Right conjugated) y x
      RowMajor -> fromRowMajor $ RowMajor.tensorProduct (Left conjugated) x y


kronecker ::
   (Extent.C vert, Extent.C horiz,
    Shape.C heightA, Shape.C widthA, Shape.C heightB, Shape.C widthB,
    Class.Floating a) =>
   Full vert horiz heightA widthA a ->
   Full vert horiz heightB widthB a ->
   Full vert horiz (heightA,heightB) (widthA,widthB) a
kronecker a b =
   let MatrixShape.Full orderB extentB = Array.shape b
       shc =
         MatrixShape.Full orderB $
         Extent.kronecker (MatrixShape.fullExtent $ Array.shape a) extentB
   in either
         (Array.reshape shc . RowMajor.kronecker a)
         (Array.reshape shc . RowMajor.kronecker (transpose a)) $
      Matrix.revealOrder b


sumRank1 ::
   (Shape.C height, Eq height, Shape.C width, Eq width, Class.Floating a) =>
   (height,width) ->
   [(a, (Vector height a, Vector width a))] -> General height width a
sumRank1 (height,width) xys =
   Array.unsafeCreateWithSize (MatrixShape.general ColumnMajor height width) $
      \size aPtr ->
   evalContT $ do
      let m = Shape.size height
      let n = Shape.size width
      mPtr <- Call.cint m
      nPtr <- Call.cint n
      alphaPtr <- Call.alloca
      incxPtr <- Call.cint 1
      incyPtr <- Call.cint 1
      ldaPtr <- Call.leadingDim m
      liftIO $ do
         fill zero size aPtr
         forM_ xys $ \(alpha, (Array shX x, Array shY y)) ->
            withForeignPtr x $ \xPtr ->
            withForeignPtr y $ \yPtr -> do
               Call.assert "Matrix.sumRank1: non-matching height" (height==shX)
               Call.assert "Matrix.sumRank1: non-matching width" (width==shY)
               poke alphaPtr alpha
               BlasGen.gerc mPtr nPtr
                  alphaPtr xPtr incxPtr yPtr incyPtr aPtr ldaPtr