Squashed 'third_party/eigen/' content from commit 61d72f6

Change-Id: Iccc90fa0b55ab44037f018046d2fcffd90d9d025
git-subtree-dir: third_party/eigen
git-subtree-split: 61d72f6383cfa842868c53e30e087b0258177257
diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp
new file mode 100644
index 0000000..cb086a5
--- /dev/null
+++ b/bench/benchBlasGemm.cpp
@@ -0,0 +1,219 @@
+// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
+// possible options:
+//    -DEIGEN_DONT_VECTORIZE
+//    -msse2
+
+// #define EIGEN_DEFAULT_TO_ROW_MAJOR
+#define _FLOAT
+
+#include <iostream>
+
+#include <Eigen/Core>
+#include "BenchTimer.h"
+
+// include the BLAS headers
+extern "C" {
+#include <cblas.h>
+}
+#include <string>
+
+#ifdef _FLOAT
+typedef float Scalar;
+#define CBLAS_GEMM cblas_sgemm
+#else
+typedef double Scalar;
+#define CBLAS_GEMM cblas_dgemm
+#endif
+
+
+typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
+void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
+void check_product(int M, int N, int K);
+void check_product(void);
+
+int main(int argc, char *argv[])
+{
+  // disable SSE exceptions
+  #ifdef __GNUC__
+  {
+    int aux;
+    asm(
+    "stmxcsr   %[aux]           \n\t"
+    "orl       $32832, %[aux]   \n\t"
+    "ldmxcsr   %[aux]           \n\t"
+    : : [aux] "m" (aux));
+  }
+  #endif
+
+  int nbtries=1, nbloops=1, M, N, K;
+
+  if (argc==2)
+  {
+    if (std::string(argv[1])=="check")
+      check_product();
+    else
+      M = N = K = atoi(argv[1]);
+  }
+  else if ((argc==3) && (std::string(argv[1])=="auto"))
+  {
+    M = N = K = atoi(argv[2]);
+    nbloops = 1000000000/(M*M*M);
+    if (nbloops<1)
+      nbloops = 1;
+    nbtries = 6;
+  }
+  else if (argc==4)
+  {
+    M = N = K = atoi(argv[1]);
+    nbloops = atoi(argv[2]);
+    nbtries = atoi(argv[3]);
+  }
+  else if (argc==6)
+  {
+    M = atoi(argv[1]);
+    N = atoi(argv[2]);
+    K = atoi(argv[3]);
+    nbloops = atoi(argv[4]);
+    nbtries = atoi(argv[5]);
+  }
+  else
+  {
+    std::cout << "Usage: " << argv[0] << " size  \n";
+    std::cout << "Usage: " << argv[0] << " auto size\n";
+    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
+    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
+    std::cout << "Usage: " << argv[0] << " check\n";
+    std::cout << "Options:\n";
+    std::cout << "    size       unique size of the 2 matrices (integer)\n";
+    std::cout << "    auto       automatically set the number of repetitions and tries\n";
+    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
+    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
+    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
+    std::cout << "    check      check eigen product using cblas as a reference\n";
+    exit(1);
+  }
+
+  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
+
+  if (!(std::string(argv[1])=="auto"))
+    std::cout << M << " x " << N << " x " << K << "\n";
+
+  Scalar alpha, beta;
+  MyMatrix ma(M,K), mb(K,N), mc(M,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  mc = MyMatrix::Random(M,N);
+
+  Eigen::BenchTimer timer;
+
+  // we simply compute c += a*b, so:
+  alpha = 1;
+  beta = 1;
+
+  // bench cblas
+  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
+  if (!(std::string(argv[1])=="auto"))
+  {
+    timer.reset();
+    for (uint k=0 ; k<nbtries ; ++k)
+    {
+        timer.start();
+        for (uint j=0 ; j<nbloops ; ++j)
+              #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
+              #else
+              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
+              #endif
+        timer.stop();
+    }
+    if (!(std::string(argv[1])=="auto"))
+      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
+    else
+        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+  }
+
+  // clear
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  mc = MyMatrix::Random(M,N);
+
+  // eigen
+//   if (!(std::string(argv[1])=="auto"))
+  {
+      timer.reset();
+      for (uint k=0 ; k<nbtries ; ++k)
+      {
+          timer.start();
+          bench_eigengemm(mc, ma, mb, nbloops);
+          timer.stop();
+      }
+      if (!(std::string(argv[1])=="auto"))
+        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
+      else
+        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+  }
+
+  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
+  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
+  
+
+  return 0;
+}
+
+using namespace Eigen;
+
+void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
+{
+  for (uint j=0 ; j<nbloops ; ++j)
+      mc.noalias() += ma * mb;
+}
+
+#define MYVERIFY(A,M) if (!(A)) { \
+    std::cout << "FAIL: " << M << "\n"; \
+  }
+void check_product(int M, int N, int K)
+{
+  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  maT = ma.transpose();
+  mbT = mb.transpose();
+  mc = MyMatrix::Random(M,N);
+
+  MyMatrix::Scalar eps = 1e-4;
+
+  meigen = mref = mc;
+  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
+  meigen += ma * mb;
+  MYVERIFY(meigen.isApprox(mref, eps),". * .");
+
+  meigen = mref = mc;
+  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
+  meigen += maT.transpose() * mb;
+  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
+
+  meigen = mref = mc;
+  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
+  meigen += (maT.transpose()) * (mbT.transpose());
+  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
+
+  meigen = mref = mc;
+  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
+  meigen += ma * mbT.transpose();
+  MYVERIFY(meigen.isApprox(mref, eps),". * T");
+}
+
+void check_product(void)
+{
+  int M, N, K;
+  for (uint i=0; i<1000; ++i)
+  {
+    M = internal::random<int>(1,64);
+    N = internal::random<int>(1,768);
+    K = internal::random<int>(1,768);
+    M = (0 + M) * 1;
+    std::cout << M << " x " << N << " x " << K << "\n";
+    check_product(M, N, K);
+  }
+}
+