// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H namespace Eigen { namespace internal { // pack a selfadjoint block diagonal for use with the gebp_kernel template struct symm_pack_lhs { template inline void pack(Scalar* blockA, const const_blas_data_mapper& lhs, Index cols, Index i, Index& count) { // normal copy for(Index k=0; k lhs(_lhs,lhsStride); Index count = 0; Index peeled_mc = (rows/Pack1)*Pack1; for(Index i=0; i(blockA, lhs, cols, i, count); } if(rows-peeled_mc>=Pack2) { pack(blockA, lhs, cols, peeled_mc, count); peeled_mc += Pack2; } // do the same with mr==1 for(Index i=peeled_mc; i struct symm_pack_rhs { enum { PacketSize = packet_traits::size }; void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) { Index end_k = k2 + rows; Index count = 0; const_blas_data_mapper rhs(_rhs,rhsStride); Index packet_cols = (cols/nr)*nr; // first part: normal case for(Index j2=0; j2 the same with nr==1) for(Index j2=packet_cols; j2 struct product_selfadjoint_matrix; template struct product_selfadjoint_matrix { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, const Scalar& alpha) { product_selfadjoint_matrix::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, LhsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), ColMajor> ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); } }; template struct product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, const Scalar& alpha); }; template EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, const Scalar& alpha) { Index size = rows; const_blas_data_mapper lhs(_lhs,lhsStride); const_blas_data_mapper rhs(_rhs,rhsStride); typedef gebp_traits Traits; Index kc = size; // cache block size along the K direction Index mc = rows; // cache block size along the M direction Index nc = cols; // cache block size along the N direction computeProductBlockingSizes(kc, mc, nc); // kc must smaller than mc kc = (std::min)(kc,mc); std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeB = sizeW + kc*cols; ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); Scalar* blockB = allocatedBlockB + sizeW; gebp_kernel gebp_kernel; symm_pack_lhs pack_lhs; gemm_pack_rhs pack_rhs; gemm_pack_lhs pack_lhs_transposed; for(Index k2=0; k2 transposed packed copy // 2 - the diagonal block => special packed copy // 3 - the panel below the diagonal block => generic packed copy for(Index i2=0; i2() (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); } } } // matrix * selfadjoint product template struct product_selfadjoint_matrix { static EIGEN_DONT_INLINE void run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, const Scalar& alpha); }; template EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, const Scalar& alpha) { Index size = cols; const_blas_data_mapper lhs(_lhs,lhsStride); typedef gebp_traits Traits; Index kc = size; // cache block size along the K direction Index mc = rows; // cache block size along the M direction Index nc = cols; // cache block size along the N direction computeProductBlockingSizes(kc, mc, nc); std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeB = sizeW + kc*cols; ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); Scalar* blockB = allocatedBlockB + sizeW; gebp_kernel gebp_kernel; gemm_pack_lhs pack_lhs; symm_pack_rhs pack_rhs; for(Index k2=0; k2 GEPP for(Index i2=0; i2 struct traits > : traits, Lhs, Rhs> > {}; } template struct SelfadjointProductMatrix : public ProductBase, Lhs, Rhs > { EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} enum { LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint }; template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(m_lhs); typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); internal::product_selfadjoint_matrix::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper, internal::traits::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor> ::run( lhs.rows(), rhs.cols(), // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha // alpha ); } }; } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H