* basic support for multicore CPU via a .evalOMP() which
internaly uses OpenMP if enabled at compile time. * added a bench/ folder with a couple benchmarks and benchmark tools.
This commit is contained in:
		
							parent
							
								
									f64311e07d
								
							
						
					
					
						commit
						9d9d81ad71
					
				| @ -40,6 +40,7 @@ namespace Eigen { | ||||
| #include "src/Core/IO.h" | ||||
| #include "src/Core/Swap.h" | ||||
| #include "src/Core/CommaInitializer.h" | ||||
| #include "src/Core/EvalOMP.h" | ||||
| 
 | ||||
| } // namespace Eigen | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										118
									
								
								Eigen/src/Core/EvalOMP.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								Eigen/src/Core/EvalOMP.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,118 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library
 | ||||
| // for linear algebra. Eigen itself is part of the KDE project.
 | ||||
| //
 | ||||
| // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 | ||||
| // Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
 | ||||
| //
 | ||||
| // Eigen is free software; you can redistribute it and/or
 | ||||
| // modify it under the terms of the GNU Lesser General Public
 | ||||
| // License as published by the Free Software Foundation; either
 | ||||
| // version 3 of the License, or (at your option) any later version.
 | ||||
| //
 | ||||
| // Alternatively, you can redistribute it and/or
 | ||||
| // modify it under the terms of the GNU General Public License as
 | ||||
| // published by the Free Software Foundation; either version 2 of
 | ||||
| // the License, or (at your option) any later version.
 | ||||
| //
 | ||||
| // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 | ||||
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | ||||
| // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 | ||||
| // GNU General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Lesser General Public
 | ||||
| // License and a copy of the GNU General Public License along with
 | ||||
| // Eigen. If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| #ifndef EIGEN_EVAL_OMP_H | ||||
| #define EIGEN_EVAL_OMP_H | ||||
| 
 | ||||
| /** \class EvalOMP
 | ||||
|   * | ||||
|   * \brief Parallel evaluation of an expression using OpenMP | ||||
|   * | ||||
|   * The template parameter Expression is the type of the expression that we are evaluating. | ||||
|   * | ||||
|   * This class is the return type of MatrixBase::evalOMP() and most of the time this is the | ||||
|   * only way it is used. | ||||
|   * | ||||
|   * Note that if OpenMP is not enabled, then this class is equivalent to Eval. | ||||
|   * | ||||
|   * \sa MatrixBase::evalOMP(), class Eval, MatrixBase::eval() | ||||
|   */ | ||||
| template<typename ExpressionType> class EvalOMP : NoOperatorEquals, | ||||
|   public Matrix< typename ExpressionType::Scalar, | ||||
|                  ExpressionType::Traits::RowsAtCompileTime, | ||||
|                  ExpressionType::Traits::ColsAtCompileTime, | ||||
|                  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER, | ||||
|                  ExpressionType::Traits::MaxRowsAtCompileTime, | ||||
|                  ExpressionType::Traits::MaxColsAtCompileTime> | ||||
| { | ||||
|   public: | ||||
|     typedef typename ExpressionType::Scalar Scalar; | ||||
| 
 | ||||
|     /** The actual matrix type to evaluate to. This type can be used independently
 | ||||
|       * of the rest of this class to get the actual matrix type to evaluate and store | ||||
|       * the value of an expression. | ||||
|       */ | ||||
|     typedef Matrix<Scalar, | ||||
|                    ExpressionType::Traits::RowsAtCompileTime, | ||||
|                    ExpressionType::Traits::ColsAtCompileTime, | ||||
|                    EIGEN_DEFAULT_MATRIX_STORAGE_ORDER, | ||||
|                    ExpressionType::Traits::MaxRowsAtCompileTime, | ||||
|                    ExpressionType::Traits::MaxColsAtCompileTime> MatrixType; | ||||
| 
 | ||||
|     #ifdef _OPENMP | ||||
|     explicit EvalOMP(const ExpressionType& other) | ||||
|       : MatrixType(other.rows(), other.cols()) | ||||
|     { | ||||
|       #ifdef __INTEL_COMPILER | ||||
|       #pragma omp parallel default(none) shared(other) | ||||
|       #else | ||||
|       #pragma omp parallel default(none) | ||||
|       #endif | ||||
|       { | ||||
|         if (this->cols()>this->rows()) | ||||
|         { | ||||
|           #pragma omp for | ||||
|           for(int j = 0; j < this->cols(); j++) | ||||
|             for(int i = 0; i < this->rows(); i++) | ||||
|               this->coeffRef(i, j) = other.coeff(i, j); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           #pragma omp for | ||||
|           for(int i = 0; i < this->rows(); i++) | ||||
|             for(int j = 0; j < this->cols(); j++) | ||||
|               this->coeffRef(i, j) = other.coeff(i, j); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     #else | ||||
|     explicit EvalOMP(const ExpressionType& other) : MatrixType(other) {} | ||||
|     #endif | ||||
| }; | ||||
| 
 | ||||
| /** Evaluates *this in a parallel fashion using OpenMP and returns the obtained matrix.
 | ||||
|   * | ||||
|   * Of course, it only makes sense to call this function for complex expressions, and/or | ||||
|   * large matrices (>32x32), \b and if there is no outer loop which can be parallelized. | ||||
|   * | ||||
|   * It is the responsibility of the user manage the OpenMP parameters, for instance: | ||||
|   * \code | ||||
|   * #include <omp.h> | ||||
|   * // ...
 | ||||
|   * omp_set_num_threads(omp_get_num_procs()); | ||||
|   * \endcode | ||||
|   * You also need to enable OpenMP on your compiler (e.g., -fopenmp) during both compilation and linking. | ||||
|   * | ||||
|   * Note that if OpenMP is not enabled, then evalOMP() is equivalent to eval(). | ||||
|   * | ||||
|   * \sa class EvalOMP, eval() | ||||
|   */ | ||||
| template<typename Scalar, typename Derived> | ||||
| const EvalOMP<Derived> MatrixBase<Scalar, Derived>::evalOMP() const | ||||
| { | ||||
|   return EvalOMP<Derived>(*static_cast<const Derived*>(this)); | ||||
| } | ||||
| 
 | ||||
| #endif // EIGEN_EVAL_OMP_H
 | ||||
| @ -44,6 +44,7 @@ template<typename MatrixType> class DiagonalCoeffs; | ||||
| template<typename MatrixType> class Identity; | ||||
| template<typename MatrixType> class Map; | ||||
| template<typename Derived> class Eval; | ||||
| template<typename Derived> class EvalOMP; | ||||
| 
 | ||||
| struct ScalarProductOp; | ||||
| struct ScalarQuotientOp; | ||||
|  | ||||
| @ -359,6 +359,7 @@ template<typename Scalar, typename Derived> class MatrixBase | ||||
|     /// \name special functions
 | ||||
|     //@{
 | ||||
|     const Eval<Derived> eval() const EIGEN_ALWAYS_INLINE; | ||||
|     const EvalOMP<Derived> evalOMP() const EIGEN_ALWAYS_INLINE; | ||||
| 
 | ||||
|     template<typename CustomUnaryOp> | ||||
|     const CwiseUnaryOp<CustomUnaryOp, Derived> cwise(const CustomUnaryOp& func = CustomUnaryOp()) const; | ||||
|  | ||||
							
								
								
									
										75
									
								
								bench/BenchTimer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								bench/BenchTimer.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library
 | ||||
| // for linear algebra. Eigen itself is part of the KDE project.
 | ||||
| //
 | ||||
| // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 | ||||
| // Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
 | ||||
| //
 | ||||
| // Eigen is free software; you can redistribute it and/or
 | ||||
| // modify it under the terms of the GNU Lesser General Public
 | ||||
| // License as published by the Free Software Foundation; either
 | ||||
| // version 3 of the License, or (at your option) any later version.
 | ||||
| //
 | ||||
| // Alternatively, you can redistribute it and/or
 | ||||
| // modify it under the terms of the GNU General Public License as
 | ||||
| // published by the Free Software Foundation; either version 2 of
 | ||||
| // the License, or (at your option) any later version.
 | ||||
| //
 | ||||
| // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 | ||||
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | ||||
| // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 | ||||
| // GNU General Public License for more details.
 | ||||
| //
 | ||||
| // You should have received a copy of the GNU Lesser General Public
 | ||||
| // License and a copy of the GNU General Public License along with
 | ||||
| // Eigen. If not, see <http://www.gnu.org/licenses/>.
 | ||||
| 
 | ||||
| #ifndef EIGEN_BENCH_TIMER_H | ||||
| #define EIGEN_BENCH_TIMER_H | ||||
| 
 | ||||
| #include <sys/time.h> | ||||
| #include <unistd.h> | ||||
| #include <cstdlib> | ||||
| 
 | ||||
| namespace Eigen | ||||
| { | ||||
| 
 | ||||
| /** Elapsed time timer keeping the best try.
 | ||||
|   */ | ||||
| class BenchTimer | ||||
| { | ||||
| public: | ||||
| 
 | ||||
|   BenchTimer() : m_best(1e99) {} | ||||
| 
 | ||||
|   ~BenchTimer() {} | ||||
| 
 | ||||
|   inline void start(void) {m_start = getTime();} | ||||
|   inline void stop(void) | ||||
|   { | ||||
|     m_best = std::min(m_best, getTime() - m_start); | ||||
|   } | ||||
| 
 | ||||
|   /** Return the best elapsed time.
 | ||||
|     */ | ||||
|   inline double value(void) | ||||
|   { | ||||
|       return m_best; | ||||
|   } | ||||
| 
 | ||||
|   static inline double getTime(void) | ||||
|   { | ||||
|       struct timeval tv; | ||||
|       struct timezone tz; | ||||
|       gettimeofday(&tv, &tz); | ||||
|       return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; | ||||
|   } | ||||
| 
 | ||||
| protected: | ||||
| 
 | ||||
|   double m_best, m_start; | ||||
| 
 | ||||
| }; | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| #endif // EIGEN_BENCH_TIMER_H
 | ||||
							
								
								
									
										28
									
								
								bench/BenchUtil.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								bench/BenchUtil.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | ||||
| 
 | ||||
| #include <Eigen/Core> | ||||
| #include "BenchTimer.h" | ||||
| 
 | ||||
| using namespace std; | ||||
| USING_PART_OF_NAMESPACE_EIGEN | ||||
| 
 | ||||
| #include <boost/preprocessor/repetition/enum_params.hpp> | ||||
| #include <boost/preprocessor/repetition.hpp> | ||||
| #include <boost/preprocessor/seq.hpp> | ||||
| #include <boost/preprocessor/array.hpp> | ||||
| #include <boost/preprocessor/arithmetic.hpp> | ||||
| #include <boost/preprocessor/comparison.hpp> | ||||
| #include <boost/preprocessor/punctuation.hpp> | ||||
| #include <boost/preprocessor/punctuation/comma.hpp> | ||||
| #include <boost/preprocessor/stringize.hpp> | ||||
| 
 | ||||
| template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline)); | ||||
| template<typename MatrixType> void initMatrix_random(MatrixType& mat) | ||||
| { | ||||
|   mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
 | ||||
| } | ||||
| 
 | ||||
| template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); | ||||
| template<typename MatrixType> void initMatrix_identity(MatrixType& mat) | ||||
| { | ||||
|   mat.setIdentity(); | ||||
| } | ||||
							
								
								
									
										55
									
								
								bench/README.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								bench/README.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,55 @@ | ||||
| 
 | ||||
| This folder contains a couple of benchmark utities and Eigen benchmarks. | ||||
| 
 | ||||
| **************************** | ||||
| * bench_multi_compilers.sh * | ||||
| **************************** | ||||
| 
 | ||||
| This script allows to run a benchmark on a set of different compilers/compiler options. | ||||
| It takes two arguments: | ||||
|  - a file defining the list of the compilers with their options | ||||
|  - the .cpp file of the benchmark | ||||
| 
 | ||||
| Examples: | ||||
| 
 | ||||
| $ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp | ||||
| 
 | ||||
|     g++-4.1 -O3 -DNDEBUG -finline-limit=10000 | ||||
|     3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   / | ||||
|     0.271102   0.131416   0.422322   0.198633 | ||||
|     0.201658   0.102436   0.397566   0.207282 | ||||
| 
 | ||||
|     g++-4.2 -O3 -DNDEBUG -finline-limit=10000 | ||||
|     3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   / | ||||
|     0.107805   0.0890579   0.30265   0.161843 | ||||
|     0.127157   0.0712581   0.278341   0.191029 | ||||
| 
 | ||||
|     g++-4.3 -O3 -DNDEBUG -finline-limit=10000 | ||||
|     3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   / | ||||
|     0.134318   0.105291   0.3704   0.180966 | ||||
|     0.137703   0.0732472   0.31225   0.202204 | ||||
| 
 | ||||
|     icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size | ||||
|     3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   / | ||||
|     0.226145   0.0941319   0.371873   0.159433 | ||||
|     0.109302   0.0837538   0.328102   0.173891 | ||||
| 
 | ||||
| 
 | ||||
| $ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp | ||||
| 
 | ||||
|     g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp | ||||
|     double, fixed-size 4x4: 0.00165105s  0.0778739s | ||||
|     double, 32x32: 0.0654769s 0.075289s  => x0.869674 (2) | ||||
|     double, 128x128: 0.054148s 0.0419669s  => x1.29025 (2) | ||||
|     double, 512x512: 0.913799s 0.428533s  => x2.13239 (2) | ||||
|     double, 1024x1024: 14.5972s 9.3542s  => x1.5605 (2) | ||||
| 
 | ||||
|     icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp | ||||
|     double, fixed-size 4x4: 0.000589848s  0.019949s | ||||
|     double, 32x32: 0.0682781s 0.0449722s  => x1.51823 (2) | ||||
|     double, 128x128: 0.0547509s 0.0435519s  => x1.25714 (2) | ||||
|     double, 512x512: 0.829436s 0.424438s  => x1.9542 (2) | ||||
|     double, 1024x1024: 14.5243s 10.7735s  => x1.34815 (2) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										12
									
								
								bench/basicbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								bench/basicbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG" | ||||
| CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000" | ||||
| 
 | ||||
| CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG" | ||||
| CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000" | ||||
| 
 | ||||
| CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG" | ||||
| CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000" | ||||
| 
 | ||||
| CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size" | ||||
							
								
								
									
										46
									
								
								bench/basicbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								bench/basicbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| 
 | ||||
| #include "BenchUtil.h" | ||||
| #include "basicbenchmark.h" | ||||
| 
 | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
|   // disbale floating point exceptions
 | ||||
|   // this leads to more stable bench results
 | ||||
|   // (this is done by default by ICC)
 | ||||
|   #ifndef __INTEL_COMPILER | ||||
|   { | ||||
|     int aux; | ||||
|     asm( | ||||
|     "stmxcsr   %[aux]           \n\t" | ||||
|     "orl       $32832, %[aux]   \n\t" | ||||
|     "ldmxcsr   %[aux]           \n\t" | ||||
|     : : [aux] "m" (aux)); | ||||
|   } | ||||
|   #endif | ||||
| 
 | ||||
|   // this is the list of matrix type and size we want to bench:
 | ||||
|   // ((suffix) (matrix size) (number of iterations))
 | ||||
|   #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000)) | ||||
| //   #define MODES ((Xd)(20)(10000))
 | ||||
| 
 | ||||
|   #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \ | ||||
|     << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \ | ||||
|     << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "   /   " | ||||
| 
 | ||||
|   std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl; | ||||
| 
 | ||||
|   const int tries = 10; | ||||
| 
 | ||||
|   #define _RUN_BENCH(R,ARG,EL) \ | ||||
|     std::cout << ARG( \ | ||||
|       BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\ | ||||
|          BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \ | ||||
|     << "   "; | ||||
| 
 | ||||
|   BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES ); | ||||
|   std::cout << endl; | ||||
|   BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES ); | ||||
|   std::cout << endl; | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
							
								
								
									
										59
									
								
								bench/basicbenchmark.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								bench/basicbenchmark.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | ||||
| 
 | ||||
| enum {LazyEval, EarlyEval, OmpEval}; | ||||
| 
 | ||||
| template<int Mode, typename MatrixType> | ||||
| void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); | ||||
| 
 | ||||
| template<int Mode, typename MatrixType> | ||||
| void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) | ||||
| { | ||||
|   for(int a = 0; a < iterations; a++) | ||||
|   { | ||||
|     if (Mode==LazyEval) | ||||
|     { | ||||
|       asm("#begin_bench_loop LazyEval"); | ||||
|       if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); | ||||
|       m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); | ||||
|     } | ||||
|     else if (Mode==OmpEval) | ||||
|     { | ||||
|       asm("#begin_bench_loop OmpEval"); | ||||
|       if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); | ||||
|       m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP(); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       asm("#begin_bench_loop EarlyEval"); | ||||
|       if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); | ||||
|       m = I + 0.00005 * (m + m * m); | ||||
|     } | ||||
|     asm("#end_bench_loop"); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| template<int Mode, typename MatrixType> | ||||
| double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); | ||||
| 
 | ||||
| template<int Mode, typename MatrixType> | ||||
| double benchBasic(const MatrixType& mat, int iterations, int tries) | ||||
| { | ||||
|   const int rows = mat.rows(); | ||||
|   const int cols = mat.cols(); | ||||
| 
 | ||||
|   MatrixType I(rows,cols); | ||||
|   MatrixType m(rows,cols); | ||||
| 
 | ||||
|   initMatrix_identity(I); | ||||
| 
 | ||||
|   Eigen::BenchTimer timer; | ||||
|   for(uint t=0; t<tries; ++t) | ||||
|   { | ||||
|     initMatrix_random(m); | ||||
|     timer.start(); | ||||
|     benchBasic_loop<Mode>(I, m, iterations); | ||||
|     timer.stop(); | ||||
|     cerr << m; | ||||
|   } | ||||
|   return timer.value(); | ||||
| }; | ||||
| 
 | ||||
							
								
								
									
										28
									
								
								bench/bench_multi_compilers.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										28
									
								
								bench/bench_multi_compilers.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,28 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| if (($# < 2)); then | ||||
|     echo "Usage: $0 compilerlist.txt benchfile.cpp" | ||||
| else | ||||
| 
 | ||||
| compilerlist=$1 | ||||
| benchfile=$2 | ||||
| 
 | ||||
| g=0 | ||||
| source $compilerlist | ||||
| 
 | ||||
| # for each compiler, compile benchfile and run the benchmark | ||||
| for (( i=0 ; i<g ; ++i )) ; do | ||||
|   # check the compiler exists | ||||
|   compiler=`echo ${CLIST[$i]} | cut -d " " -f 1` | ||||
|   if [ -e `which $compiler` ]; then | ||||
|     echo "${CLIST[$i]}" | ||||
| #     echo "${CLIST[$i]} $benchfile -I.. -o bench~" | ||||
|     if [ -e ./.bench ] ; then rm .bench; fi | ||||
|     ${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null | ||||
|     echo "" | ||||
|   else | ||||
|     echo "compiler not found: $compiler" | ||||
|   fi | ||||
| done | ||||
| 
 | ||||
| fi | ||||
| @ -13,7 +13,7 @@ int main(int argc, char *argv[]) | ||||
| 	{ | ||||
| 		m(i,j) = 0.1 * (i+20*j); | ||||
| 	} | ||||
| 	for(int a = 0; a < 1000000; a++) | ||||
| 	for(int a = 0; a < 100000; a++) | ||||
| 	{ | ||||
| 		m = I + 0.00005 * (m + m*m); | ||||
| 	} | ||||
							
								
								
									
										7
									
								
								bench/ompbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								bench/ompbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" | ||||
| 
 | ||||
| # CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" | ||||
| 
 | ||||
| CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp" | ||||
							
								
								
									
										81
									
								
								bench/ompbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								bench/ompbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,81 @@ | ||||
| // g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
 | ||||
| // icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp  benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
 | ||||
| 
 | ||||
| #include <omp.h> | ||||
| #include "BenchUtil.h" | ||||
| #include "basicbenchmark.h" | ||||
| 
 | ||||
| // #include <Eigen/Core>
 | ||||
| // #include "BenchTimer.h"
 | ||||
| //
 | ||||
| // using namespace std;
 | ||||
| // USING_PART_OF_NAMESPACE_EIGEN
 | ||||
| //
 | ||||
| // enum {LazyEval, EarlyEval, OmpEval};
 | ||||
| //
 | ||||
| // template<int Mode, typename MatrixType>
 | ||||
| // double benchSingleProc(const MatrixType& mat, int iterations, int tries)  __attribute__((noinline));
 | ||||
| //
 | ||||
| // template<int Mode, typename MatrixType>
 | ||||
| // double benchBasic(const MatrixType& mat, int iterations, int tries)
 | ||||
| // {
 | ||||
| //   const int rows = mat.rows();
 | ||||
| //   const int cols = mat.cols();
 | ||||
| //
 | ||||
| //   Eigen::BenchTimer timer;
 | ||||
| //   for(uint t=0; t<tries; ++t)
 | ||||
| //   {
 | ||||
| //     MatrixType I = MatrixType::identity(rows, cols);
 | ||||
| //     MatrixType m = MatrixType::random(rows, cols);
 | ||||
| //
 | ||||
| //     timer.start();
 | ||||
| //     for(int a = 0; a < iterations; a++)
 | ||||
| //     {
 | ||||
| //       if(Mode==LazyEval)
 | ||||
| //         m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
 | ||||
| //       else if(Mode==OmpEval)
 | ||||
| //         m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
 | ||||
| //       else
 | ||||
| //         m = I + 0.00005 * (m + m * m);
 | ||||
| //     }
 | ||||
| //     timer.stop();
 | ||||
| //     cerr << m;
 | ||||
| //   }
 | ||||
| //   return timer.value();
 | ||||
| // };
 | ||||
| 
 | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
|   // disbale floating point exceptions
 | ||||
|   // this leads to more stable bench results
 | ||||
|   { | ||||
|     int aux; | ||||
|     asm( | ||||
|     "stmxcsr   %[aux]           \n\t" | ||||
|     "orl       $32832, %[aux]   \n\t" | ||||
|     "ldmxcsr   %[aux]           \n\t" | ||||
|     : : [aux] "m" (aux)); | ||||
|   } | ||||
| 
 | ||||
|   // commented since the default setting is use as many threads as processors
 | ||||
|   //omp_set_num_threads(omp_get_num_procs());
 | ||||
| 
 | ||||
|   std::cout << "double, fixed-size 4x4: " | ||||
|     << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s  " | ||||
|     << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s  \n"; | ||||
| 
 | ||||
|   #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\ | ||||
|       double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ | ||||
|       double omp    = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \ | ||||
|       std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \ | ||||
|         << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \ | ||||
|     } | ||||
| 
 | ||||
|   BENCH_MATRIX(double,   32, 1000, 10); | ||||
|   BENCH_MATRIX(double,  128,   10, 10); | ||||
|   BENCH_MATRIX(double,  512,    1,  6); | ||||
|   BENCH_MATRIX(double, 1024,    1,  4); | ||||
| 
 | ||||
|   return 0; | ||||
| } | ||||
| 
 | ||||
| @ -75,7 +75,7 @@ Eigen is well tested with recent versions of GCC and ICC. Both GCC 4.2 and ICC g | ||||
| 
 | ||||
| For best performance, we recommend the following compilation flags: | ||||
| <ul> | ||||
|   <li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 \c -falign-functions=64 </li> | ||||
|   <li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000  </li> | ||||
|   <li>\b ICC: \c -O3 \c -DNDEBUG \c -xT \c -ipo \c -no-prec-div \c -no-inline-max-size </li> | ||||
| </ul> | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Gael Guennebaud
						Gael Guennebaud