// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SPARSEDENSEPRODUCT_H #define EIGEN_SPARSEDENSEPRODUCT_H namespace Eigen { namespace internal { template <> struct product_promote_storage_type { typedef Sparse ret; }; template <> struct product_promote_storage_type { typedef Sparse ret; }; template struct sparse_time_dense_product_impl; template struct sparse_time_dense_product_impl { typedef typename internal::remove_all::type Lhs; typedef typename internal::remove_all::type Rhs; typedef typename internal::remove_all::type Res; typedef typename evaluator::InnerIterator LhsInnerIterator; typedef evaluator LhsEval; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { LhsEval lhsEval(lhs); Index n = lhs.outerSize(); #ifdef EIGEN_HAS_OPENMP Eigen::initParallel(); Index threads = Eigen::nbThreads(); #endif for(Index c=0; c1 && lhsEval.nonZerosEstimate() > 20000) { #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) for(Index i=0; i let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators // template // struct ScalarBinaryOpTraits > // { // enum { // Defined = 1 // }; // typedef typename CwiseUnaryOp, T2>::PlainObject ReturnType; // }; template struct sparse_time_dense_product_impl { typedef typename internal::remove_all::type Lhs; typedef typename internal::remove_all::type Rhs; typedef typename internal::remove_all::type Res; typedef evaluator LhsEval; typedef typename LhsEval::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) { LhsEval lhsEval(lhs); for(Index c=0; c::ReturnType rhs_j(alpha * rhs.coeff(j,c)); for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } } } }; template struct sparse_time_dense_product_impl { typedef typename internal::remove_all::type Lhs; typedef typename internal::remove_all::type Rhs; typedef typename internal::remove_all::type Res; typedef evaluator LhsEval; typedef typename LhsEval::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { Index n = lhs.rows(); LhsEval lhsEval(lhs); #ifdef EIGEN_HAS_OPENMP Eigen::initParallel(); Index threads = Eigen::nbThreads(); // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems. // It basically represents the minimal amount of work to be done to be worth it. if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000) { #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) for(Index i=0; i struct sparse_time_dense_product_impl { typedef typename internal::remove_all::type Lhs; typedef typename internal::remove_all::type Rhs; typedef typename internal::remove_all::type Res; typedef typename evaluator::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { evaluator lhsEval(lhs); for(Index j=0; j inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) { sparse_time_dense_product_impl::run(lhs, rhs, res, alpha); } } // end namespace internal namespace internal { template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha); } }; template struct generic_product_impl : generic_product_impl {}; template struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; template static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); // transpose everything Transpose dstT(dst); internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; template struct generic_product_impl : generic_product_impl {}; template struct sparse_dense_outer_product_evaluator { protected: typedef typename conditional::type Lhs1; typedef typename conditional::type ActualRhs; typedef Product ProdXprType; // if the actual left-hand side is a dense vector, // then build a sparse-view so that we can seamlessly iterate over it. typedef typename conditional::StorageKind,Sparse>::value, Lhs1, SparseView >::type ActualLhs; typedef typename conditional::StorageKind,Sparse>::value, Lhs1 const&, SparseView >::type LhsArg; typedef evaluator LhsEval; typedef evaluator RhsEval; typedef typename evaluator::InnerIterator LhsIterator; typedef typename ProdXprType::Scalar Scalar; public: enum { Flags = NeedToTranspose ? RowMajorBit : 0, CoeffReadCost = HugeCost }; class InnerIterator : public LhsIterator { public: InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer) : LhsIterator(xprEval.m_lhsXprImpl, 0), m_outer(outer), m_empty(false), m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits::StorageKind() )) {} EIGEN_STRONG_INLINE Index outer() const { return m_outer; } EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); } EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; } EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; } EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); } protected: Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const { return rhs.coeff(outer); } Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse()) { typename RhsEval::InnerIterator it(rhs, outer); if (it && it.index()==0 && it.value()!=Scalar(0)) return it.value(); m_empty = true; return Scalar(0); } Index m_outer; bool m_empty; Scalar m_factor; }; sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } // transpose case sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } protected: const LhsArg m_lhs; evaluator m_lhsXprImpl; evaluator m_rhsXprImpl; }; // sparse * dense outer product template struct product_evaluator, OuterProduct, SparseShape, DenseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; typedef Product XprType; typedef typename XprType::PlainObject PlainObject; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {} }; template struct product_evaluator, OuterProduct, DenseShape, SparseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; typedef Product XprType; typedef typename XprType::PlainObject PlainObject; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {} }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_SPARSEDENSEPRODUCT_H