| 
									
										
										
										
											2010-08-24 11:21:25 +08:00
										 |  |  | // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
 | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  | // possible options:
 | 
					
						
							|  |  |  | //    -DEIGEN_DONT_VECTORIZE
 | 
					
						
							|  |  |  | //    -msse2
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // #define EIGEN_DEFAULT_TO_ROW_MAJOR
 | 
					
						
							|  |  |  | #define _FLOAT
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-09 09:59:36 +08:00
										 |  |  | #include <iostream>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | #include <Eigen/Core>
 | 
					
						
							|  |  |  | #include "BenchTimer.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // include the BLAS headers
 | 
					
						
							| 
									
										
										
										
											2010-08-24 11:21:25 +08:00
										 |  |  | extern "C" { | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | #include <cblas.h>
 | 
					
						
							| 
									
										
										
										
											2010-08-24 11:21:25 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | #include <string>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _FLOAT
 | 
					
						
							|  |  |  | typedef float Scalar; | 
					
						
							|  |  |  | #define CBLAS_GEMM cblas_sgemm
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | typedef double Scalar; | 
					
						
							|  |  |  | #define CBLAS_GEMM cblas_dgemm
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix; | 
					
						
							|  |  |  | void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); | 
					
						
							|  |  |  | void check_product(int M, int N, int K); | 
					
						
							|  |  |  | void check_product(void); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int main(int argc, char* argv[]) { | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  | // disable SSE exceptions
 | 
					
						
							|  |  |  | #ifdef __GNUC__
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |   { | 
					
						
							|  |  |  |     int aux; | 
					
						
							|  |  |  |     asm("stmxcsr   %[aux]           \n\t" | 
					
						
							|  |  |  |         "orl       $32832, %[aux]   \n\t" | 
					
						
							|  |  |  |         "ldmxcsr   %[aux]           \n\t" | 
					
						
							|  |  |  |         : | 
					
						
							|  |  |  |         : [aux] "m"(aux)); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   int nbtries = 1, nbloops = 1, M, N, K; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (argc == 2) { | 
					
						
							|  |  |  |     if (std::string(argv[1]) == "check") | 
					
						
							|  |  |  |       check_product(); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       M = N = K = atoi(argv[1]); | 
					
						
							|  |  |  |   } else if ((argc == 3) && (std::string(argv[1]) == "auto")) { | 
					
						
							|  |  |  |     M = N = K = atoi(argv[2]); | 
					
						
							|  |  |  |     nbloops = 1000000000 / (M * M * M); | 
					
						
							|  |  |  |     if (nbloops < 1) nbloops = 1; | 
					
						
							|  |  |  |     nbtries = 6; | 
					
						
							|  |  |  |   } else if (argc == 4) { | 
					
						
							|  |  |  |     M = N = K = atoi(argv[1]); | 
					
						
							|  |  |  |     nbloops = atoi(argv[2]); | 
					
						
							|  |  |  |     nbtries = atoi(argv[3]); | 
					
						
							|  |  |  |   } else if (argc == 6) { | 
					
						
							|  |  |  |     M = atoi(argv[1]); | 
					
						
							|  |  |  |     N = atoi(argv[2]); | 
					
						
							|  |  |  |     K = atoi(argv[3]); | 
					
						
							|  |  |  |     nbloops = atoi(argv[4]); | 
					
						
							|  |  |  |     nbtries = atoi(argv[5]); | 
					
						
							|  |  |  |   } else { | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |     std::cout << "Usage: " << argv[0] << " size  \n"; | 
					
						
							|  |  |  |     std::cout << "Usage: " << argv[0] << " auto size\n"; | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; | 
					
						
							|  |  |  |     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |     std::cout << "Usage: " << argv[0] << " check\n"; | 
					
						
							| 
									
										
										
										
											2008-07-13 06:59:34 +08:00
										 |  |  |     std::cout << "Options:\n"; | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |     std::cout << "    size       unique size of the 2 matrices (integer)\n"; | 
					
						
							|  |  |  |     std::cout << "    auto       automatically set the number of repetitions and tries\n"; | 
					
						
							| 
									
										
										
										
											2008-07-13 06:59:34 +08:00
										 |  |  |     std::cout << "    nbloops    number of times the GEMM routines is executed\n"; | 
					
						
							|  |  |  |     std::cout << "    nbtries    number of times the loop is benched (return the best try)\n"; | 
					
						
							|  |  |  |     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n"; | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |     std::cout << "    check      check eigen product using cblas as a reference\n"; | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |     exit(1); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   double nbmad = double(M) * double(N) * double(K) * double(nbloops); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   Scalar alpha, beta; | 
					
						
							|  |  |  |   MyMatrix ma(M, K), mb(K, N), mc(M, N); | 
					
						
							| 
									
										
										
										
											2008-07-27 19:39:47 +08:00
										 |  |  |   ma = MyMatrix::Random(M, K); | 
					
						
							|  |  |  |   mb = MyMatrix::Random(K, N); | 
					
						
							|  |  |  |   mc = MyMatrix::Random(M, N); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   Eigen::BenchTimer timer; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // we simply compute c += a*b, so:
 | 
					
						
							|  |  |  |   alpha = 1; | 
					
						
							|  |  |  |   beta = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // bench cblas
 | 
					
						
							|  |  |  |   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
 | 
					
						
							|  |  |  |   if (!(std::string(argv[1]) == "auto")) { | 
					
						
							|  |  |  |     timer.reset(); | 
					
						
							|  |  |  |     for (uint k = 0; k < nbtries; ++k) { | 
					
						
							|  |  |  |       timer.start(); | 
					
						
							|  |  |  |       for (uint j = 0; j < nbloops; ++j) | 
					
						
							|  |  |  | #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
 | 
					
						
							|  |  |  |         CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, | 
					
						
							|  |  |  |                    mc.data(), N); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |         CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, | 
					
						
							|  |  |  |                    mc.data(), M); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |       timer.stop(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (!(std::string(argv[1]) == "auto")) | 
					
						
							|  |  |  |       std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // clear
 | 
					
						
							| 
									
										
										
										
											2008-07-27 19:39:47 +08:00
										 |  |  |   ma = MyMatrix::Random(M, K); | 
					
						
							|  |  |  |   mb = MyMatrix::Random(K, N); | 
					
						
							|  |  |  |   mc = MyMatrix::Random(M, N); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |   // eigen
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |   //   if (!(std::string(argv[1])=="auto"))
 | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     timer.reset(); | 
					
						
							|  |  |  |     for (uint k = 0; k < nbtries; ++k) { | 
					
						
							|  |  |  |       timer.start(); | 
					
						
							|  |  |  |       bench_eigengemm(mc, ma, mb, nbloops); | 
					
						
							|  |  |  |       timer.stop(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (!(std::string(argv[1]) == "auto")) | 
					
						
							|  |  |  |       std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-24 22:53:33 +08:00
										 |  |  |   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; | 
					
						
							|  |  |  |   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |   return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | using namespace Eigen; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) { | 
					
						
							| 
									
										
										
										
											2009-11-06 18:33:18 +08:00
										 |  |  |   for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb; | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MYVERIFY(A, M)                  \
 | 
					
						
							|  |  |  |   if (!(A)) {                           \ | 
					
						
							|  |  |  |     std::cout << "FAIL: " << M << "\n"; \ | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | void check_product(int M, int N, int K) { | 
					
						
							|  |  |  |   MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N); | 
					
						
							| 
									
										
										
										
											2008-07-27 19:39:47 +08:00
										 |  |  |   ma = MyMatrix::Random(M, K); | 
					
						
							|  |  |  |   mb = MyMatrix::Random(K, N); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |   maT = ma.transpose(); | 
					
						
							|  |  |  |   mbT = mb.transpose(); | 
					
						
							| 
									
										
										
										
											2008-07-27 19:39:47 +08:00
										 |  |  |   mc = MyMatrix::Random(M, N); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |   MyMatrix::Scalar eps = 1e-4; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   meigen = mref = mc; | 
					
						
							|  |  |  |   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); | 
					
						
							|  |  |  |   meigen += ma * mb; | 
					
						
							|  |  |  |   MYVERIFY(meigen.isApprox(mref, eps), ". * ."); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-03 00:05:33 +08:00
										 |  |  |   meigen = mref = mc; | 
					
						
							|  |  |  |   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); | 
					
						
							|  |  |  |   meigen += maT.transpose() * mb; | 
					
						
							|  |  |  |   MYVERIFY(meigen.isApprox(mref, eps), "T * ."); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   meigen = mref = mc; | 
					
						
							|  |  |  |   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); | 
					
						
							|  |  |  |   meigen += (maT.transpose()) * (mbT.transpose()); | 
					
						
							|  |  |  |   MYVERIFY(meigen.isApprox(mref, eps), "T * T"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   meigen = mref = mc; | 
					
						
							|  |  |  |   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); | 
					
						
							|  |  |  |   meigen += ma * mbT.transpose(); | 
					
						
							|  |  |  |   MYVERIFY(meigen.isApprox(mref, eps), ". * T"); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void check_product(void) { | 
					
						
							|  |  |  |   int M, N, K; | 
					
						
							|  |  |  |   for (uint i = 0; i < 1000; ++i) { | 
					
						
							| 
									
										
										
										
											2010-10-25 22:15:22 +08:00
										 |  |  |     M = internal::random<int>(1, 64); | 
					
						
							|  |  |  |     N = internal::random<int>(1, 768); | 
					
						
							|  |  |  |     K = internal::random<int>(1, 768); | 
					
						
							| 
									
										
										
										
											2008-07-02 00:20:06 +08:00
										 |  |  |     M = (0 + M) * 1; | 
					
						
							|  |  |  |     std::cout << M << " x " << N << " x " << K << "\n"; | 
					
						
							|  |  |  |     check_product(M, N, K); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } |