sl@0: // sl@0: // Copyright (c) 2000-2002 sl@0: // Joerg Walter, Mathias Koch sl@0: // sl@0: // Permission to use, copy, modify, distribute and sell this software sl@0: // and its documentation for any purpose is hereby granted without fee, sl@0: // provided that the above copyright notice appear in all copies and sl@0: // that both that copyright notice and this permission notice appear sl@0: // in supporting documentation. The authors make no representations sl@0: // about the suitability of this software for any purpose. sl@0: // It is provided "as is" without express or implied warranty. sl@0: // sl@0: // The authors gratefully acknowledge the support of sl@0: // GeNeSys mbH & Co. KG in producing this work. sl@0: // sl@0: sl@0: #ifndef _BOOST_UBLAS_OPERATION_ sl@0: #define _BOOST_UBLAS_OPERATION_ sl@0: sl@0: #include sl@0: sl@0: /** \file operation.hpp sl@0: * \brief This file contains some specialized products. sl@0: */ sl@0: sl@0: // axpy-based products sl@0: // Alexei Novakov had a lot of ideas to improve these. Thanks. sl@0: // Hendrik Kueck proposed some new kernel. Thanks again. sl@0: sl@0: namespace boost { namespace numeric { namespace ublas { sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const compressed_matrix &e1, sl@0: const vector_expression &e2, sl@0: V &v, row_major_tag) { sl@0: typedef typename V::size_type size_type; sl@0: typedef typename V::value_type value_type; sl@0: sl@0: for (size_type i = 0; i < e1.filled1 () -1; ++ i) { sl@0: size_type begin = e1.index1_data () [i]; sl@0: size_type end = e1.index1_data () [i + 1]; sl@0: value_type t (v (i)); sl@0: for (size_type j = begin; j < end; ++ j) sl@0: t += e1.value_data () [j] * e2 () (e1.index2_data () [j]); sl@0: v (i) = t; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const compressed_matrix &e1, sl@0: const vector_expression &e2, sl@0: V &v, column_major_tag) { sl@0: typedef typename V::size_type size_type; sl@0: sl@0: for (size_type j = 0; j < e1.filled1 () -1; ++ j) { sl@0: size_type begin = e1.index1_data () [j]; sl@0: size_type end = e1.index1_data () [j + 1]; sl@0: for (size_type i = begin; i < end; ++ i) sl@0: v (e1.index2_data () [i]) += e1.value_data () [i] * e2 () (j); sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: // Dispatcher sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const compressed_matrix &e1, sl@0: const vector_expression &e2, sl@0: V &v, bool init = true) { sl@0: typedef typename V::value_type value_type; sl@0: typedef typename L1::orientation_category orientation_category; sl@0: sl@0: if (init) sl@0: v.assign (zero_vector (e1.size1 ())); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: vector cv (v); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type verrorbound (norm_1 (v) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_vector_assign (cv, prod (e1, e2)); sl@0: #endif sl@0: axpy_prod (e1, e2, v, orientation_category ()); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (v - cv) <= 2 * std::numeric_limits::epsilon () * verrorbound, internal_logic ()); sl@0: #endif sl@0: return v; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V sl@0: axpy_prod (const compressed_matrix &e1, sl@0: const vector_expression &e2) { sl@0: typedef V vector_type; sl@0: sl@0: vector_type v (e1.size1 ()); sl@0: return axpy_prod (e1, e2, v, true); sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const coordinate_matrix &e1, sl@0: const vector_expression &e2, sl@0: V &v, bool init = true) { sl@0: typedef typename V::size_type size_type; sl@0: typedef typename V::value_type value_type; sl@0: typedef L1 layout_type; sl@0: sl@0: size_type size1 = e1.size1(); sl@0: size_type size2 = e1.size2(); sl@0: sl@0: if (init) { sl@0: noalias(v) = zero_vector(size1); sl@0: } sl@0: sl@0: for (size_type i = 0; i < e1.nnz(); ++i) { sl@0: size_type row_index = layout_type::element1( e1.index1_data () [i], size1, e1.index2_data () [i], size2 ); sl@0: size_type col_index = layout_type::element2( e1.index1_data () [i], size1, e1.index2_data () [i], size2 ); sl@0: v( row_index ) += e1.value_data () [i] * e2 () (col_index); sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2, sl@0: V &v, packed_random_access_iterator_tag, row_major_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression1_type::const_iterator1 it1 (e1 ().begin1 ()); sl@0: typename expression1_type::const_iterator1 it1_end (e1 ().end1 ()); sl@0: while (it1 != it1_end) { sl@0: size_type index1 (it1.index1 ()); sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression1_type::const_iterator2 it2 (it1.begin ()); sl@0: typename expression1_type::const_iterator2 it2_end (it1.end ()); sl@0: #else sl@0: typename expression1_type::const_iterator2 it2 (boost::numeric::ublas::begin (it1, iterator1_tag ())); sl@0: typename expression1_type::const_iterator2 it2_end (boost::numeric::ublas::end (it1, iterator1_tag ())); sl@0: #endif sl@0: while (it2 != it2_end) { sl@0: v (index1) += *it2 * e2 () (it2.index2 ()); sl@0: ++ it2; sl@0: } sl@0: ++ it1; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2, sl@0: V &v, packed_random_access_iterator_tag, column_major_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression1_type::const_iterator2 it2 (e1 ().begin2 ()); sl@0: typename expression1_type::const_iterator2 it2_end (e1 ().end2 ()); sl@0: while (it2 != it2_end) { sl@0: size_type index2 (it2.index2 ()); sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression1_type::const_iterator1 it1 (it2.begin ()); sl@0: typename expression1_type::const_iterator1 it1_end (it2.end ()); sl@0: #else sl@0: typename expression1_type::const_iterator1 it1 (boost::numeric::ublas::begin (it2, iterator2_tag ())); sl@0: typename expression1_type::const_iterator1 it1_end (boost::numeric::ublas::end (it2, iterator2_tag ())); sl@0: #endif sl@0: while (it1 != it1_end) { sl@0: v (it1.index1 ()) += *it1 * e2 () (index2); sl@0: ++ it1; sl@0: } sl@0: ++ it2; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2, sl@0: V &v, sparse_bidirectional_iterator_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression2_type::const_iterator it (e2 ().begin ()); sl@0: typename expression2_type::const_iterator it_end (e2 ().end ()); sl@0: while (it != it_end) { sl@0: v.plus_assign (column (e1 (), it.index ()) * *it); sl@0: ++ it; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: // Dispatcher sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2, sl@0: V &v, packed_random_access_iterator_tag) { sl@0: typedef typename E1::orientation_category orientation_category; sl@0: return axpy_prod (e1, e2, v, packed_random_access_iterator_tag (), orientation_category ()); sl@0: } sl@0: sl@0: sl@0: /** \brief computes v += A x or v = A x in an sl@0: optimized fashion. sl@0: sl@0: \param e1 the matrix expression \c A sl@0: \param e2 the vector expression \c x sl@0: \param v the result vector \c v sl@0: \param init a boolean parameter sl@0: sl@0: axpy_prod(A, x, v, init) implements the well known sl@0: axpy-product. Setting \a init to \c true is equivalent to call sl@0: v.clear() before axpy_prod. Currently \a init sl@0: defaults to \c true, but this may change in the future. sl@0: sl@0: Up to now there are some specialisation for compressed sl@0: matrices that give a large speed up compared to prod. sl@0: sl@0: \ingroup blas2 sl@0: sl@0: \internal sl@0: sl@0: template parameters: sl@0: \param V type of the result vector \c v sl@0: \param E1 type of a matrix expression \c A sl@0: \param E2 type of a vector expression \c x sl@0: */ sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2, sl@0: V &v, bool init = true) { sl@0: typedef typename V::value_type value_type; sl@0: typedef typename E2::const_iterator::iterator_category iterator_category; sl@0: sl@0: if (init) sl@0: v.assign (zero_vector (e1 ().size1 ())); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: vector cv (v); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type verrorbound (norm_1 (v) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_vector_assign (cv, prod (e1, e2)); sl@0: #endif sl@0: axpy_prod (e1, e2, v, iterator_category ()); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (v - cv) <= 2 * std::numeric_limits::epsilon () * verrorbound, internal_logic ()); sl@0: #endif sl@0: return v; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V sl@0: axpy_prod (const matrix_expression &e1, sl@0: const vector_expression &e2) { sl@0: typedef V vector_type; sl@0: sl@0: vector_type v (e1 ().size1 ()); sl@0: return axpy_prod (e1, e2, v, true); sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const compressed_matrix &e2, sl@0: V &v, column_major_tag) { sl@0: typedef typename V::size_type size_type; sl@0: typedef typename V::value_type value_type; sl@0: sl@0: for (size_type j = 0; j < e2.filled1 () -1; ++ j) { sl@0: size_type begin = e2.index1_data () [j]; sl@0: size_type end = e2.index1_data () [j + 1]; sl@0: value_type t (v (j)); sl@0: for (size_type i = begin; i < end; ++ i) sl@0: t += e2.value_data () [i] * e1 () (e2.index2_data () [i]); sl@0: v (j) = t; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const compressed_matrix &e2, sl@0: V &v, row_major_tag) { sl@0: typedef typename V::size_type size_type; sl@0: sl@0: for (size_type i = 0; i < e2.filled1 () -1; ++ i) { sl@0: size_type begin = e2.index1_data () [i]; sl@0: size_type end = e2.index1_data () [i + 1]; sl@0: for (size_type j = begin; j < end; ++ j) sl@0: v (e2.index2_data () [j]) += e2.value_data () [j] * e1 () (i); sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: // Dispatcher sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const compressed_matrix &e2, sl@0: V &v, bool init = true) { sl@0: typedef typename V::value_type value_type; sl@0: typedef typename L2::orientation_category orientation_category; sl@0: sl@0: if (init) sl@0: v.assign (zero_vector (e2.size2 ())); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: vector cv (v); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type verrorbound (norm_1 (v) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_vector_assign (cv, prod (e1, e2)); sl@0: #endif sl@0: axpy_prod (e1, e2, v, orientation_category ()); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (v - cv) <= 2 * std::numeric_limits::epsilon () * verrorbound, internal_logic ()); sl@0: #endif sl@0: return v; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V sl@0: axpy_prod (const vector_expression &e1, sl@0: const compressed_matrix &e2) { sl@0: typedef V vector_type; sl@0: sl@0: vector_type v (e2.size2 ()); sl@0: return axpy_prod (e1, e2, v, true); sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2, sl@0: V &v, packed_random_access_iterator_tag, column_major_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression2_type::const_iterator2 it2 (e2 ().begin2 ()); sl@0: typename expression2_type::const_iterator2 it2_end (e2 ().end2 ()); sl@0: while (it2 != it2_end) { sl@0: size_type index2 (it2.index2 ()); sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression2_type::const_iterator1 it1 (it2.begin ()); sl@0: typename expression2_type::const_iterator1 it1_end (it2.end ()); sl@0: #else sl@0: typename expression2_type::const_iterator1 it1 (boost::numeric::ublas::begin (it2, iterator2_tag ())); sl@0: typename expression2_type::const_iterator1 it1_end (boost::numeric::ublas::end (it2, iterator2_tag ())); sl@0: #endif sl@0: while (it1 != it1_end) { sl@0: v (index2) += *it1 * e1 () (it1.index1 ()); sl@0: ++ it1; sl@0: } sl@0: ++ it2; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2, sl@0: V &v, packed_random_access_iterator_tag, row_major_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression2_type::const_iterator1 it1 (e2 ().begin1 ()); sl@0: typename expression2_type::const_iterator1 it1_end (e2 ().end1 ()); sl@0: while (it1 != it1_end) { sl@0: size_type index1 (it1.index1 ()); sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression2_type::const_iterator2 it2 (it1.begin ()); sl@0: typename expression2_type::const_iterator2 it2_end (it1.end ()); sl@0: #else sl@0: typename expression2_type::const_iterator2 it2 (boost::numeric::ublas::begin (it1, iterator1_tag ())); sl@0: typename expression2_type::const_iterator2 it2_end (boost::numeric::ublas::end (it1, iterator1_tag ())); sl@0: #endif sl@0: while (it2 != it2_end) { sl@0: v (it2.index2 ()) += *it2 * e1 () (index1); sl@0: ++ it2; sl@0: } sl@0: ++ it1; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2, sl@0: V &v, sparse_bidirectional_iterator_tag) { sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename V::size_type size_type; sl@0: sl@0: typename expression1_type::const_iterator it (e1 ().begin ()); sl@0: typename expression1_type::const_iterator it_end (e1 ().end ()); sl@0: while (it != it_end) { sl@0: v.plus_assign (*it * row (e2 (), it.index ())); sl@0: ++ it; sl@0: } sl@0: return v; sl@0: } sl@0: sl@0: // Dispatcher sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2, sl@0: V &v, packed_random_access_iterator_tag) { sl@0: typedef typename E2::orientation_category orientation_category; sl@0: return axpy_prod (e1, e2, v, packed_random_access_iterator_tag (), orientation_category ()); sl@0: } sl@0: sl@0: sl@0: /** \brief computes v += AT x or v = AT x in an sl@0: optimized fashion. sl@0: sl@0: \param e1 the vector expression \c x sl@0: \param e2 the matrix expression \c A sl@0: \param v the result vector \c v sl@0: \param init a boolean parameter sl@0: sl@0: axpy_prod(x, A, v, init) implements the well known sl@0: axpy-product. Setting \a init to \c true is equivalent to call sl@0: v.clear() before axpy_prod. Currently \a init sl@0: defaults to \c true, but this may change in the future. sl@0: sl@0: Up to now there are some specialisation for compressed sl@0: matrices that give a large speed up compared to prod. sl@0: sl@0: \ingroup blas2 sl@0: sl@0: \internal sl@0: sl@0: template parameters: sl@0: \param V type of the result vector \c v sl@0: \param E1 type of a vector expression \c x sl@0: \param E2 type of a matrix expression \c A sl@0: */ sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V & sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2, sl@0: V &v, bool init = true) { sl@0: typedef typename V::value_type value_type; sl@0: typedef typename E1::const_iterator::iterator_category iterator_category; sl@0: sl@0: if (init) sl@0: v.assign (zero_vector (e2 ().size2 ())); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: vector cv (v); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type verrorbound (norm_1 (v) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_vector_assign (cv, prod (e1, e2)); sl@0: #endif sl@0: axpy_prod (e1, e2, v, iterator_category ()); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (v - cv) <= 2 * std::numeric_limits::epsilon () * verrorbound, internal_logic ()); sl@0: #endif sl@0: return v; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: V sl@0: axpy_prod (const vector_expression &e1, sl@0: const matrix_expression &e2) { sl@0: typedef V vector_type; sl@0: sl@0: vector_type v (e2 ().size2 ()); sl@0: return axpy_prod (e1, e2, v, true); sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, TRI, sl@0: dense_proxy_tag, row_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), row_major_tag ()); sl@0: #endif sl@0: size_type size1 (e1 ().size1 ()); sl@0: size_type size2 (e1 ().size2 ()); sl@0: for (size_type i = 0; i < size1; ++ i) sl@0: for (size_type j = 0; j < size2; ++ j) sl@0: row (m, i).plus_assign (e1 () (i, j) * row (e2 (), j)); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, TRI, sl@0: sparse_proxy_tag, row_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef TRI triangular_restriction; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), row_major_tag ()); sl@0: #endif sl@0: typename expression1_type::const_iterator1 it1 (e1 ().begin1 ()); sl@0: typename expression1_type::const_iterator1 it1_end (e1 ().end1 ()); sl@0: while (it1 != it1_end) { sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression1_type::const_iterator2 it2 (it1.begin ()); sl@0: typename expression1_type::const_iterator2 it2_end (it1.end ()); sl@0: #else sl@0: typename expression1_type::const_iterator2 it2 (boost::numeric::ublas::begin (it1, iterator1_tag ())); sl@0: typename expression1_type::const_iterator2 it2_end (boost::numeric::ublas::end (it1, iterator1_tag ())); sl@0: #endif sl@0: while (it2 != it2_end) { sl@0: // row (m, it1.index1 ()).plus_assign (*it2 * row (e2 (), it2.index2 ())); sl@0: matrix_row mr (e2 (), it2.index2 ()); sl@0: typename matrix_row::const_iterator itr (mr.begin ()); sl@0: typename matrix_row::const_iterator itr_end (mr.end ()); sl@0: while (itr != itr_end) { sl@0: if (triangular_restriction::other (it1.index1 (), itr.index ())) sl@0: m (it1.index1 (), itr.index ()) += *it2 * *itr; sl@0: ++ itr; sl@0: } sl@0: ++ it2; sl@0: } sl@0: ++ it1; sl@0: } sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, TRI, sl@0: dense_proxy_tag, column_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), column_major_tag ()); sl@0: #endif sl@0: size_type size1 (e2 ().size1 ()); sl@0: size_type size2 (e2 ().size2 ()); sl@0: for (size_type j = 0; j < size2; ++ j) sl@0: for (size_type i = 0; i < size1; ++ i) sl@0: column (m, j).plus_assign (e2 () (i, j) * column (e1 (), i)); sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, TRI, sl@0: sparse_proxy_tag, column_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef TRI triangular_restriction; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), column_major_tag ()); sl@0: #endif sl@0: typename expression2_type::const_iterator2 it2 (e2 ().begin2 ()); sl@0: typename expression2_type::const_iterator2 it2_end (e2 ().end2 ()); sl@0: while (it2 != it2_end) { sl@0: #ifndef BOOST_UBLAS_NO_NESTED_CLASS_RELATION sl@0: typename expression2_type::const_iterator1 it1 (it2.begin ()); sl@0: typename expression2_type::const_iterator1 it1_end (it2.end ()); sl@0: #else sl@0: typename expression2_type::const_iterator1 it1 (boost::numeric::ublas::begin (it2, iterator2_tag ())); sl@0: typename expression2_type::const_iterator1 it1_end (boost::numeric::ublas::end (it2, iterator2_tag ())); sl@0: #endif sl@0: while (it1 != it1_end) { sl@0: // column (m, it2.index2 ()).plus_assign (*it1 * column (e1 (), it1.index1 ())); sl@0: matrix_column mc (e1 (), it1.index1 ()); sl@0: typename matrix_column::const_iterator itc (mc.begin ()); sl@0: typename matrix_column::const_iterator itc_end (mc.end ()); sl@0: while (itc != itc_end) { sl@0: if (triangular_restriction::functor_type ().other (itc.index (), it2.index2 ())) sl@0: m (itc.index (), it2.index2 ()) += *it1 * *itc; sl@0: ++ itc; sl@0: } sl@0: ++ it1; sl@0: } sl@0: ++ it2; sl@0: } sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: sl@0: // Dispatcher sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, TRI, bool init = true) { sl@0: typedef typename M::value_type value_type; sl@0: typedef typename M::storage_category storage_category; sl@0: typedef typename M::orientation_category orientation_category; sl@0: typedef TRI triangular_restriction; sl@0: sl@0: if (init) sl@0: m.assign (zero_matrix (e1 ().size1 (), e2 ().size2 ())); sl@0: return axpy_prod (e1, e2, m, triangular_restriction (), storage_category (), orientation_category ()); sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: TRI) { sl@0: typedef M matrix_type; sl@0: typedef TRI triangular_restriction; sl@0: sl@0: matrix_type m (e1 ().size1 (), e2 ().size2 ()); sl@0: return axpy_prod (e1, e2, m, triangular_restriction (), true); sl@0: } sl@0: sl@0: /** \brief computes M += A X or M = A X in an sl@0: optimized fashion. sl@0: sl@0: \param e1 the matrix expression \c A sl@0: \param e2 the matrix expression \c X sl@0: \param m the result matrix \c M sl@0: \param init a boolean parameter sl@0: sl@0: axpy_prod(A, X, M, init) implements the well known sl@0: axpy-product. Setting \a init to \c true is equivalent to call sl@0: M.clear() before axpy_prod. Currently \a init sl@0: defaults to \c true, but this may change in the future. sl@0: sl@0: Up to now there are no specialisations. sl@0: sl@0: \ingroup blas3 sl@0: sl@0: \internal sl@0: sl@0: template parameters: sl@0: \param M type of the result matrix \c M sl@0: \param E1 type of a matrix expression \c A sl@0: \param E2 type of a matrix expression \c X sl@0: */ sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, bool init = true) { sl@0: typedef typename M::value_type value_type; sl@0: typedef typename M::storage_category storage_category; sl@0: typedef typename M::orientation_category orientation_category; sl@0: sl@0: if (init) sl@0: m.assign (zero_matrix (e1 ().size1 (), e2 ().size2 ())); sl@0: return axpy_prod (e1, e2, m, full (), storage_category (), orientation_category ()); sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M sl@0: axpy_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2) { sl@0: typedef M matrix_type; sl@0: sl@0: matrix_type m (e1 ().size1 (), e2 ().size2 ()); sl@0: return axpy_prod (e1, e2, m, full (), true); sl@0: } sl@0: sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: opb_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, sl@0: dense_proxy_tag, row_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), row_major_tag ()); sl@0: #endif sl@0: size_type size (BOOST_UBLAS_SAME (e1 ().size2 (), e2 ().size1 ())); sl@0: for (size_type k = 0; k < size; ++ k) { sl@0: vector ce1 (column (e1 (), k)); sl@0: vector re2 (row (e2 (), k)); sl@0: m.plus_assign (outer_prod (ce1, re2)); sl@0: } sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: opb_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, sl@0: dense_proxy_tag, column_major_tag) { sl@0: typedef M matrix_type; sl@0: typedef const E1 expression1_type; sl@0: typedef const E2 expression2_type; sl@0: typedef typename M::size_type size_type; sl@0: typedef typename M::value_type value_type; sl@0: sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: matrix cm (m); sl@0: typedef typename type_traits::real_type real_type; sl@0: real_type merrorbound (norm_1 (m) + norm_1 (e1) * norm_1 (e2)); sl@0: indexing_matrix_assign (cm, prod (e1, e2), column_major_tag ()); sl@0: #endif sl@0: size_type size (BOOST_UBLAS_SAME (e1 ().size2 (), e2 ().size1 ())); sl@0: for (size_type k = 0; k < size; ++ k) { sl@0: vector ce1 (column (e1 (), k)); sl@0: vector re2 (row (e2 (), k)); sl@0: m.plus_assign (outer_prod (ce1, re2)); sl@0: } sl@0: #if BOOST_UBLAS_TYPE_CHECK sl@0: BOOST_UBLAS_CHECK (norm_1 (m - cm) <= 2 * std::numeric_limits::epsilon () * merrorbound, internal_logic ()); sl@0: #endif sl@0: return m; sl@0: } sl@0: sl@0: // Dispatcher sl@0: sl@0: /** \brief computes M += A X or M = A X in an sl@0: optimized fashion. sl@0: sl@0: \param e1 the matrix expression \c A sl@0: \param e2 the matrix expression \c X sl@0: \param m the result matrix \c M sl@0: \param init a boolean parameter sl@0: sl@0: opb_prod(A, X, M, init) implements the well known sl@0: axpy-product. Setting \a init to \c true is equivalent to call sl@0: M.clear() before opb_prod. Currently \a init sl@0: defaults to \c true, but this may change in the future. sl@0: sl@0: This function may give a speedup if \c A has less columns than sl@0: rows, because the product is computed as a sum of outer sl@0: products. sl@0: sl@0: \ingroup blas3 sl@0: sl@0: \internal sl@0: sl@0: template parameters: sl@0: \param M type of the result matrix \c M sl@0: \param E1 type of a matrix expression \c A sl@0: \param E2 type of a matrix expression \c X sl@0: */ sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M & sl@0: opb_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2, sl@0: M &m, bool init = true) { sl@0: typedef typename M::value_type value_type; sl@0: typedef typename M::storage_category storage_category; sl@0: typedef typename M::orientation_category orientation_category; sl@0: sl@0: if (init) sl@0: m.assign (zero_matrix (e1 ().size1 (), e2 ().size2 ())); sl@0: return opb_prod (e1, e2, m, storage_category (), orientation_category ()); sl@0: } sl@0: template sl@0: BOOST_UBLAS_INLINE sl@0: M sl@0: opb_prod (const matrix_expression &e1, sl@0: const matrix_expression &e2) { sl@0: typedef M matrix_type; sl@0: sl@0: matrix_type m (e1 ().size1 (), e2 ().size2 ()); sl@0: return opb_prod (e1, e2, m, true); sl@0: } sl@0: sl@0: }}} sl@0: sl@0: #endif