Squashed 'third_party/boostorg/ublas/' content from commit e8607b3

Change-Id: Ia06afd642157a24e17fa9ddea28fb8601810b78e
git-subtree-dir: third_party/boostorg/ublas
git-subtree-split: e8607b3eea238e590eca93bfe498c21f470155c1
diff --git a/benchmarks/bench1/Jamfile.v2 b/benchmarks/bench1/Jamfile.v2
new file mode 100644
index 0000000..77b11c7
--- /dev/null
+++ b/benchmarks/bench1/Jamfile.v2
@@ -0,0 +1,10 @@
+# Copyright (c) 2004 Michael Stevens
+# Use, modification and distribution are subject to the
+# Boost Software License, Version 1.0. (See accompanying file
+# LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# bench1 - measure the abstraction penalty of dense matrix and vector operations.
+
+exe bench1
+    : bench1.cpp bench11.cpp bench12.cpp bench13.cpp
+    ;
diff --git a/benchmarks/bench1/bench1.cpp b/benchmarks/bench1/bench1.cpp
new file mode 100644
index 0000000..87478e1
--- /dev/null
+++ b/benchmarks/bench1/bench1.cpp
@@ -0,0 +1,122 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench1.hpp"
+
+void header (std::string text) {
+    std::cout << text << std::endl;
+}
+
+template<class T>
+struct peak_c_plus {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (0);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s += T (0);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class T>
+struct peak_c_multiplies {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s *= T (1);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T>
+void peak<T>::operator () (int runs) {
+    header ("peak");
+
+    header ("plus");
+    peak_c_plus<T> () (runs);
+
+    header ("multiplies");
+    peak_c_multiplies<T> () (runs);
+}
+
+
+template <typename scalar> 
+void do_bench (std::string type_string, int scale)
+{
+    header (type_string);
+    peak<scalar> () (1000000 * scale);
+
+    header (type_string + ", 3");
+    bench_1<scalar, 3> () (1000000 * scale);
+    bench_2<scalar, 3> () (300000 * scale);
+    bench_3<scalar, 3> () (100000 * scale);
+
+    header (type_string + ", 10");
+    bench_1<scalar, 10> () (300000 * scale);
+    bench_2<scalar, 10> () (30000 * scale);
+    bench_3<scalar, 10> () (3000 * scale);
+
+    header (type_string + ", 30");
+    bench_1<scalar, 30> () (100000 * scale);
+    bench_2<scalar, 30> () (3000 * scale);
+    bench_3<scalar, 30> () (100 * scale);
+
+    header (type_string + ", 100");
+    bench_1<scalar, 100> () (30000 * scale);
+    bench_2<scalar, 100> () (300 * scale);
+    bench_3<scalar, 100> () (3 * scale);
+}
+
+int main (int argc, char *argv []) {
+
+    int scale = 1;
+    if (argc > 1)
+        scale = std::atoi (argv [1]);
+
+#ifdef USE_FLOAT
+    do_bench<float> ("FLOAT", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<double> ("DOUBLE", scale);
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+    do_bench<std::complex<float> > ("COMPLEX<FLOAT>", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<std::complex<double> > ("COMPLEX<DOUBLE>", scale);
+#endif
+#endif
+
+    return 0;
+}
diff --git a/benchmarks/bench1/bench1.hpp b/benchmarks/bench1/bench1.hpp
new file mode 100644
index 0000000..d799463
--- /dev/null
+++ b/benchmarks/bench1/bench1.hpp
@@ -0,0 +1,159 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#ifndef BENCH1_H
+#define BENCH1_H
+
+#include <iostream>
+#include <string>
+#include <valarray>
+
+#include <boost/numeric/ublas/vector.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+
+#include <boost/timer.hpp>
+
+
+#define BOOST_UBLAS_NOT_USED(x) (void)(x)
+
+
+namespace ublas = boost::numeric::ublas;
+
+void header (std::string text);
+
+template<class T>
+struct footer {
+    void operator () (int multiplies, int plus, int runs, double elapsed) {
+        std::cout << "elapsed: " << elapsed << " s, "
+                  << (multiplies * ublas::type_traits<T>::multiplies_complexity +
+                      plus * ublas::type_traits<T>::plus_complexity) * runs /
+                     (1024 * 1024 * elapsed) << " Mflops" << std::endl;
+    }
+};
+
+template<class T, int N>
+struct c_vector_traits {
+    typedef T type [N];
+};
+template<class T, int N, int M>
+struct c_matrix_traits {
+    typedef T type [N] [M];
+};
+
+template<class T, int N>
+struct initialize_c_vector  {
+    void operator () (typename c_vector_traits<T, N>::type &v) {
+        for (int i = 0; i < N; ++ i)
+            v [i] = std::rand () * 1.f;
+//            v [i] = 0.f;
+        }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void initialize_vector (V &v) {
+    int size = v.size ();
+    for (int i = 0; i < size; ++ i)
+        v [i] = std::rand () * 1.f;
+//      v [i] = 0.f;
+}
+
+template<class T, int N, int M>
+struct initialize_c_matrix  {
+    void operator () (typename c_matrix_traits<T, N, M>::type &m) {
+        for (int i = 0; i < N; ++ i)
+            for (int j = 0; j < M; ++ j)
+                m [i] [j] = std::rand () * 1.f;
+//                m [i] [j] = 0.f;
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void initialize_matrix (M &m) {
+    int size1 = m.size1 ();
+    int size2 = m.size2 ();
+    for (int i = 0; i < size1; ++ i)
+        for (int j = 0; j < size2; ++ j)
+            m (i, j) = std::rand () * 1.f;
+//          m (i, j) = 0.f;
+}
+
+template<class T>
+BOOST_UBLAS_INLINE
+void sink_scalar (const T &s) {
+    static T g_s = s;
+}
+
+template<class T, int N>
+struct sink_c_vector {
+    void operator () (const typename c_vector_traits<T, N>::type &v) {
+        static typename c_vector_traits<T, N>::type g_v;
+        for (int i = 0; i < N; ++ i)
+            g_v [i] = v [i];
+    }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void sink_vector (const V &v) {
+    static V g_v (v);
+}
+
+template<class T, int N, int M>
+struct sink_c_matrix {
+    void operator () (const typename c_matrix_traits<T, N, M>::type &m) {
+    static typename c_matrix_traits<T, N, M>::type g_m;
+    for (int i = 0; i < N; ++ i)
+        for (int j = 0; j < M; ++ j)
+            g_m [i] [j] = m [i] [j];
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void sink_matrix (const M &m) {
+    static M g_m (m);
+}
+
+template<class T>
+struct peak {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_1 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_2 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_3 {
+    void operator () (int runs);
+};
+
+struct safe_tag {};
+struct fast_tag {};
+
+//#define USE_FLOAT
+#define USE_DOUBLE
+// #define USE_STD_COMPLEX
+
+#define USE_C_ARRAY
+// #define USE_BOUNDED_ARRAY
+#define USE_UNBOUNDED_ARRAY
+// #define USE_STD_VALARRAY
+//#define USE_STD_VECTOR
+
+#endif
+
+
diff --git a/benchmarks/bench1/bench11.cpp b/benchmarks/bench1/bench11.cpp
new file mode 100644
index 0000000..806a422
--- /dev/null
+++ b/benchmarks/bench1/bench11.cpp
@@ -0,0 +1,287 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench1.hpp"
+
+template<class T, int N>
+struct bench_c_inner_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                for (int j = 0; j < N; ++ j) {
+                    s += v1 [j] * v2 [j];
+                }
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = ublas::inner_prod (v1, v2);
+//                sink_scalar (s);
+                BOOST_UBLAS_NOT_USED(s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = (v1 * v2).sum ();
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_vector_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2, v3;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v3 [j] = - (v1 [j] + v2 [j]);
+                }
+//                sink_c_vector<T, N> () (v3);
+                BOOST_UBLAS_NOT_USED(v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3 = - (v1 + v2);
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3.assign (- (v1 + v2));
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3 = - (v1 + v2);
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n)
+template<class T, int N>
+void bench_1<T, N>::operator () (int runs) {
+    header ("bench_1");
+
+    header ("inner_prod");
+
+    header ("C array");
+    bench_c_inner_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_vector");
+    bench_my_inner_prod<ublas::c_vector<T, N>, N> () (runs);
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("vector<bounded_array>");
+    bench_my_inner_prod<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs);
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("vector<unbounded_array>");
+    bench_my_inner_prod<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs);
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("vector<std::valarray>");
+    bench_my_inner_prod<ublas::vector<T, std::valarray<T> >, N> () ();
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("vector<std::vector>");
+    bench_my_inner_prod<ublas::vector<T, std::vector<T> >, N> () (runs);
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_inner_prod<std::valarray<T>, N> () (runs);
+#endif
+
+    header ("vector + vector");
+
+    header ("C array");
+    bench_c_vector_add<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_vector safe");
+    bench_my_vector_add<ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_vector fast");
+    bench_my_vector_add<ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("vector<bounded_array> safe");
+    bench_my_vector_add<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("vector<bounded_array> fast");
+    bench_my_vector_add<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("vector<unbounded_array> safe");
+    bench_my_vector_add<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<unbounded_array> fast");
+    bench_my_vector_add<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("vector<std::valarray> safe");
+    bench_my_vector_add<ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<std::valarray> fast");
+    bench_my_vector_add<ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("vector<std::vector> safe");
+    bench_my_vector_add<ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<std::vector> fast");
+    bench_my_vector_add<ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_vector_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_1<float, 3>;
+template struct bench_1<float, 10>;
+template struct bench_1<float, 30>;
+template struct bench_1<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<double, 3>;
+template struct bench_1<double, 10>;
+template struct bench_1<double, 30>;
+template struct bench_1<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_1<std::complex<float>, 3>;
+template struct bench_1<std::complex<float>, 10>;
+template struct bench_1<std::complex<float>, 30>;
+template struct bench_1<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<std::complex<double>, 3>;
+template struct bench_1<std::complex<double>, 10>;
+template struct bench_1<std::complex<double>, 30>;
+template struct bench_1<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench1/bench12.cpp b/benchmarks/bench1/bench12.cpp
new file mode 100644
index 0000000..439188f
--- /dev/null
+++ b/benchmarks/bench1/bench12.cpp
@@ -0,0 +1,491 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench1.hpp"
+
+template<class T, int N>
+struct bench_c_outer_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [j] [k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m);
+            }
+            BOOST_UBLAS_NOT_USED(m);
+
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N);
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m = - ublas::outer_prod (v1, v2);
+//                sink_matrix (m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N);
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m.assign (- ublas::outer_prod (v1, v2));
+//                sink_matrix (m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [N * j + k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_vector (m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_vector_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_matrix<T, N, N> () (m);
+            initialize_c_vector<T, N> () (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v2 [j] = 0;
+                    for (int k = 0; k < N; ++ k) {
+                        v2 [j] += m [j] [k] * v1 [k];
+                    }
+                }
+//                sink_c_vector<T, N> () (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N);
+            static V v1 (N), v2 (N);
+            initialize_matrix (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v2 = ublas::prod (m, v1);
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N);
+            static V v1 (N), v2 (N);
+            initialize_matrix (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v2.assign (ublas::prod (m, v1));
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m [std::slice (N * j, N, 1)]);
+                    v2 [j] = (row * v1).sum ();
+                }
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = - (m1 [j] [k] + m2 [j] [k]);
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+            }
+            BOOST_UBLAS_NOT_USED(m3);
+
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_my_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3.assign (- (m1 + m2));
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_vector (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 2)
+template<class T, int N>
+void bench_2<T, N>::operator () (int runs) {
+    header ("bench_2");
+
+    header ("outer_prod");
+
+    header ("C array");
+    bench_c_outer_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix, c_vector safe");
+    bench_my_outer_prod<ublas::c_matrix<T, N, N>,
+                        ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix, c_vector fast");
+    bench_my_outer_prod<ublas::c_matrix<T, N, N>,
+                        ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array>, vector<bounded_array> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                        ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array>, vector<bounded_array> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                        ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array>, vector<unbounded_array> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                        ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array>, vector<unbounded_array> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                        ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray>, vector<std::valarray> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                        ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray>, vector<std::valarray> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                        ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector>, vector<std::vector> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                        ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector>, vector<std::vector> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                        ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_outer_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("prod (matrix, vector)");
+
+    header ("C array");
+    bench_c_matrix_vector_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix, c_vector safe");
+    bench_my_matrix_vector_prod<ublas::c_matrix<T, N, N>,
+                                ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix, c_vector fast");
+    bench_my_matrix_vector_prod<ublas::c_matrix<T, N, N>,
+                                ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array>, vector<bounded_array> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                                ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array>, vector<bounded_array> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                                ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array>, vector<unbounded_array> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                                ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array>, vector<unbounded_array> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                                ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray>, vector<std::valarray> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                                ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray>, vector<std::valarray> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                                ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector>, vector<std::vector> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                                ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector>, vector<std::vector> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                                ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_vector_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("matrix + matrix");
+
+    header ("C array");
+    bench_c_matrix_add<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix safe");
+    bench_my_matrix_add<ublas::c_matrix<T, N, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix fast");
+    bench_my_matrix_add<ublas::c_matrix<T, N, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_2<float, 3>;
+template struct bench_2<float, 10>;
+template struct bench_2<float, 30>;
+template struct bench_2<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<double, 3>;
+template struct bench_2<double, 10>;
+template struct bench_2<double, 30>;
+template struct bench_2<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_2<std::complex<float>, 3>;
+template struct bench_2<std::complex<float>, 10>;
+template struct bench_2<std::complex<float>, 30>;
+template struct bench_2<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<std::complex<double>, 3>;
+template struct bench_2<std::complex<double>, 10>;
+template struct bench_2<std::complex<double>, 30>;
+template struct bench_2<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench1/bench13.cpp b/benchmarks/bench1/bench13.cpp
new file mode 100644
index 0000000..fadb0b6
--- /dev/null
+++ b/benchmarks/bench1/bench13.cpp
@@ -0,0 +1,192 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench1.hpp"
+
+template<class T, int N>
+struct bench_c_matrix_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = 0;
+                        for (int l = 0; l < N; ++ l) {
+                            m3 [j] [k] += m1 [j] [l] * m2 [l] [k];
+                        }
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_my_matrix_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = ublas::prod (m1, m2);
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3.assign (ublas::prod (m1, m2));
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m1 [std::slice (N * j, N, 1)]);
+                    for (int k = 0; k < N; ++ k) {
+                        std::valarray<value_type> column (m2 [std::slice (k, N, N)]);
+                        m3 [N * j + k] = (row * column).sum ();
+                    }
+                }
+//                sink_vector (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 3)
+template<class T, int N>
+void bench_3<T, N>::operator () (int runs) {
+    header ("bench_3");
+
+    header ("prod (matrix, matrix)");
+
+    header ("C array");
+    bench_c_matrix_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix safe");
+    bench_my_matrix_prod<ublas::c_matrix<T, N, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix fast");
+    bench_my_matrix_prod<ublas::c_matrix<T, N, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_prod<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_3<float, 3>;
+template struct bench_3<float, 10>;
+template struct bench_3<float, 30>;
+template struct bench_3<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<double, 3>;
+template struct bench_3<double, 10>;
+template struct bench_3<double, 30>;
+template struct bench_3<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_3<std::complex<float>, 3>;
+template struct bench_3<std::complex<float>, 10>;
+template struct bench_3<std::complex<float>, 30>;
+template struct bench_3<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<std::complex<double>, 3>;
+template struct bench_3<std::complex<double>, 10>;
+template struct bench_3<std::complex<double>, 30>;
+template struct bench_3<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench2/Jamfile.v2 b/benchmarks/bench2/Jamfile.v2
new file mode 100644
index 0000000..4eb8015
--- /dev/null
+++ b/benchmarks/bench2/Jamfile.v2
@@ -0,0 +1,10 @@
+# Copyright (c) 2004 Michael Stevens
+# Use, modification and distribution are subject to the
+# Boost Software License, Version 1.0. (See accompanying file
+# LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# bench2 - measurs the performance of sparse matrix and vector operations.
+
+exe bench2
+    : bench2.cpp bench21.cpp bench22.cpp bench23.cpp
+    ;
diff --git a/benchmarks/bench2/bench2.cpp b/benchmarks/bench2/bench2.cpp
new file mode 100644
index 0000000..43ba152
--- /dev/null
+++ b/benchmarks/bench2/bench2.cpp
@@ -0,0 +1,122 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench2.hpp"
+
+void header (std::string text) {
+    std::cout << text << std::endl;
+}
+
+template<class T>
+struct peak_c_plus {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (0);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s += T (0);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class T>
+struct peak_c_multiplies {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s *= T (1);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T>
+void peak<T>::operator () (int runs) {
+    header ("peak");
+
+    header ("plus");
+    peak_c_plus<T> () (runs);
+
+    header ("multiplies");
+    peak_c_multiplies<T> () (runs);
+}
+
+
+template <typename scalar> 
+void do_bench (std::string type_string, int scale)
+{
+    header (type_string);
+    peak<scalar> () (1000000 * scale);
+
+    header (type_string + ", 3");
+    bench_1<scalar, 3> () (1000000 * scale);
+    bench_2<scalar, 3> () (300000 * scale);
+    bench_3<scalar, 3> () (100000 * scale);
+
+    header (type_string + ", 10");
+    bench_1<scalar, 10> () (300000 * scale);
+    bench_2<scalar, 10> () (30000 * scale);
+    bench_3<scalar, 10> () (3000 * scale);
+
+    header (type_string + ", 30");
+    bench_1<scalar, 30> () (100000 * scale);
+    bench_2<scalar, 30> () (3000 * scale);
+    bench_3<scalar, 30> () (100 * scale);
+
+    header (type_string + ", 100");
+    bench_1<scalar, 100> () (30000 * scale);
+    bench_2<scalar, 100> () (300 * scale);
+    bench_3<scalar, 100> () (3 * scale);
+}
+
+int main (int argc, char *argv []) {
+
+    int scale = 1;
+    if (argc > 1)
+        scale = std::atoi (argv [1]);
+
+#ifdef USE_FLOAT
+    do_bench<float> ("FLOAT", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<double> ("DOUBLE", scale);
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+    do_bench<std::complex<float> > ("COMPLEX<FLOAT>", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<std::complex<double> > ("COMPLEX<DOUBLE>", scale);
+#endif
+#endif
+
+    return 0;
+}
diff --git a/benchmarks/bench2/bench2.hpp b/benchmarks/bench2/bench2.hpp
new file mode 100644
index 0000000..efb20a2
--- /dev/null
+++ b/benchmarks/bench2/bench2.hpp
@@ -0,0 +1,182 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#ifndef BENCH2_H
+#define BENCH2_H
+
+#include <iostream>
+#include <string>
+#include <valarray>
+
+#include <boost/numeric/ublas/vector.hpp>
+#include <boost/numeric/ublas/vector_sparse.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/matrix_sparse.hpp>
+
+#include <boost/timer.hpp>
+
+
+#define BOOST_UBLAS_NOT_USED(x) (void)(x)
+
+
+namespace ublas = boost::numeric::ublas;
+
+void header (std::string text);
+
+template<class T>
+struct footer {
+    void operator () (int multiplies, int plus, int runs, double elapsed) {
+        std::cout << "elapsed: " << elapsed << " s, "
+                  << (multiplies * ublas::type_traits<T>::multiplies_complexity +
+                      plus * ublas::type_traits<T>::plus_complexity) * runs /
+                     (1024 * 1024 * elapsed) << " Mflops" << std::endl;
+    }
+};
+
+template<class T, int N>
+struct c_vector_traits {
+    typedef T type [N];
+};
+template<class T, int N, int M>
+struct c_matrix_traits {
+    typedef T type [N] [M];
+};
+
+template<class T, int N>
+struct initialize_c_vector  {
+    void operator () (typename c_vector_traits<T, N>::type &v) {
+        for (int i = 0; i < N; ++ i)
+            v [i] = std::rand () * 1.f;
+//            v [i] = 0.f;
+        }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void initialize_vector (V &v) {
+    int size = v.size ();
+    for (int i = 0; i < size; ++ i)
+        v [i] = std::rand () * 1.f;
+//        v [i] = 0.f;
+}
+
+template<class T, int N, int M>
+struct initialize_c_matrix  {
+    void operator () (typename c_matrix_traits<T, N, M>::type &m) {
+        for (int i = 0; i < N; ++ i)
+            for (int j = 0; j < M; ++ j)
+                m [i] [j] = std::rand () * 1.f;
+//                m [i] [j] = 0.f;
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void initialize_matrix (M &m, ublas::row_major_tag) {
+    int size1 = m.size1 ();
+    int size2 = m.size2 ();
+    for (int i = 0; i < size1; ++ i)
+        for (int j = 0; j < size2; ++ j)
+            m (i, j) = std::rand () * 1.f;
+//            m (i, j) = 0.f;
+}
+template<class M>
+BOOST_UBLAS_INLINE
+void initialize_matrix (M &m, ublas::column_major_tag) {
+    int size1 = m.size1 ();
+    int size2 = m.size2 ();
+    for (int j = 0; j < size2; ++ j)
+        for (int i = 0; i < size1; ++ i)
+            m (i, j) = std::rand () * 1.f;
+//            m (i, j) = 0.f;
+}
+template<class M>
+BOOST_UBLAS_INLINE
+void initialize_matrix (M &m) {
+    typedef typename M::orientation_category orientation_category;
+    initialize_matrix (m, orientation_category ());
+}
+
+template<class T>
+BOOST_UBLAS_INLINE
+void sink_scalar (const T &s) {
+    static T g_s = s;
+}
+
+template<class T, int N>
+struct sink_c_vector {
+    void operator () (const typename c_vector_traits<T, N>::type &v) {
+        static typename c_vector_traits<T, N>::type g_v;
+        for (int i = 0; i < N; ++ i)
+            g_v [i] = v [i];
+    }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void sink_vector (const V &v) {
+    static V g_v (v);
+}
+
+template<class T, int N, int M>
+struct sink_c_matrix {
+    void operator () (const typename c_matrix_traits<T, N, M>::type &m) {
+    static typename c_matrix_traits<T, N, M>::type g_m;
+    for (int i = 0; i < N; ++ i)
+        for (int j = 0; j < M; ++ j)
+            g_m [i] [j] = m [i] [j];
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void sink_matrix (const M &m) {
+    static M g_m (m);
+}
+
+template<class T>
+struct peak {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_1 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_2 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_3 {
+    void operator () (int runs);
+};
+
+struct safe_tag {};
+struct fast_tag {};
+
+// #define USE_FLOAT
+#define USE_DOUBLE
+// #define USE_STD_COMPLEX
+
+#define USE_MAP_ARRAY
+// #define USE_STD_MAP
+// #define USE_STD_VALARRAY
+
+#define USE_MAPPED_VECTOR
+#define USE_COMPRESSED_VECTOR
+#define USE_COORDINATE_VECTOR
+
+#define USE_MAPPED_MATRIX
+// #define USE_SPARSE_VECTOR_OF_SPARSE_VECTOR
+#define USE_COMPRESSED_MATRIX
+#define USE_COORDINATE_MATRIX
+
+#endif
diff --git a/benchmarks/bench2/bench21.cpp b/benchmarks/bench2/bench21.cpp
new file mode 100644
index 0000000..50d70a4
--- /dev/null
+++ b/benchmarks/bench2/bench21.cpp
@@ -0,0 +1,282 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench2.hpp"
+
+template<class T, int N>
+struct bench_c_inner_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                for (int j = 0; j < N; ++ j) {
+                    s += v1 [j] * v2 [j];
+                }
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N, N), v2 (N, N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = ublas::inner_prod (v1, v2);
+//                sink_scalar (s);
+                BOOST_UBLAS_NOT_USED(s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = (v1 * v2).sum ();
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_vector_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2, v3;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v3 [j] = - (v1 [j] + v2 [j]);
+                }
+//                sink_c_vector<T, N> () (v3);
+                BOOST_UBLAS_NOT_USED(v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static V v1 (N, N), v2 (N, N), v3 (N, N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3 = - (v1 + v2);
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static V v1 (N, N), v2 (N, N), v3 (N, N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3.assign (- (v1 + v2));
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3 = - (v1 + v2);
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n)
+template<class T, int N>
+void bench_1<T, N>::operator () (int runs) {
+    header ("bench_1");
+
+    header ("inner_prod");
+
+    header ("C array");
+    bench_c_inner_prod<T, N> () (runs);
+
+#ifdef USE_MAPPED_VECTOR
+#ifdef USE_MAP_ARRAY
+    header ("mapped_vector<map_array>");
+    bench_my_inner_prod<ublas::mapped_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs);
+#endif
+
+#ifdef USE_STD_MAP
+    header ("mapped_vector<std::map>");
+    bench_my_inner_prod<ublas::mapped_vector<T, std::map<std::size_t, T> >, N> () (runs);
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_VECTOR
+    header ("compressed_vector");
+    bench_my_inner_prod<ublas::compressed_vector<T>, N> () (runs);
+#endif
+
+#ifdef USE_COORDINATE_VECTOR
+    header ("coordinate_vector");
+    bench_my_inner_prod<ublas::coordinate_vector<T>, N> () (runs);
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_inner_prod<std::valarray<T>, N> () (runs);
+#endif
+
+    header ("vector + vector");
+
+    header ("C array");
+    bench_c_vector_add<T, N> () (runs);
+
+#ifdef USE_MAPPED_VECTOR
+#ifdef USE_MAP_ARRAY
+    header ("mapped_vector<map_array> safe");
+    bench_my_vector_add<ublas::mapped_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("maped_vector<map_array> fast");
+    bench_my_vector_add<ublas::mapped_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_MAP
+    header ("mapped_vector<std::map> safe");
+    bench_my_vector_add<ublas::mapped_vector<T, std::map<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("mapped_vector<std::map> fast");
+    bench_my_vector_add<ublas::mapped_vector<T, std::map<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_VECTOR
+#ifdef USE_MAP_ARRAY
+    header ("compressed_vector safe");
+    bench_my_vector_add<ublas::compressed_vector<T>, N> () (runs, safe_tag ());
+
+    header ("compressed_vector fast");
+    bench_my_vector_add<ublas::compressed_vector<T>, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COORDINATE_VECTOR
+#ifdef USE_MAP_ARRAY
+    header ("coordinate_vector safe");
+    bench_my_vector_add<ublas::coordinate_vector<T>, N> () (runs, safe_tag ());
+
+    header ("coordinate_vector fast");
+    bench_my_vector_add<ublas::coordinate_vector<T>, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_vector_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_1<float, 3>;
+template struct bench_1<float, 10>;
+template struct bench_1<float, 30>;
+template struct bench_1<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<double, 3>;
+template struct bench_1<double, 10>;
+template struct bench_1<double, 30>;
+template struct bench_1<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_1<std::complex<float>, 3>;
+template struct bench_1<std::complex<float>, 10>;
+template struct bench_1<std::complex<float>, 30>;
+template struct bench_1<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<std::complex<double>, 3>;
+template struct bench_1<std::complex<double>, 10>;
+template struct bench_1<std::complex<double>, 30>;
+template struct bench_1<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench2/bench22.cpp b/benchmarks/bench2/bench22.cpp
new file mode 100644
index 0000000..f40df79
--- /dev/null
+++ b/benchmarks/bench2/bench22.cpp
@@ -0,0 +1,469 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench2.hpp"
+
+template<class T, int N>
+struct bench_c_outer_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [j] [k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m);
+                BOOST_UBLAS_NOT_USED(m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N, N * N);
+            static V v1 (N, N), v2 (N, N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m = - ublas::outer_prod (v1, v2);
+//                sink_matrix (m);
+                BOOST_UBLAS_NOT_USED(m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N, N * N);
+            static V v1 (N, N), v2 (N, N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m.assign (- ublas::outer_prod (v1, v2));
+//                sink_matrix (m);
+                BOOST_UBLAS_NOT_USED(m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [N * j + k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_vector (m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_vector_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_matrix<T, N, N> () (m);
+            initialize_c_vector<T, N> () (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v2 [j] = 0;
+                    for (int k = 0; k < N; ++ k) {
+                        v2 [j] += m [j] [k] * v1 [k];
+                    }
+                }
+//                sink_c_vector<T, N> () (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N, N * N);
+            static V v1 (N, N), v2 (N, N);
+            initialize_matrix (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v2 = ublas::prod (m, v1);
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N, N * N);
+            static V v1 (N, N), v2 (N, N);
+            initialize_matrix (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v2.assign (ublas::prod (m, v1));
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m [std::slice (N * j, N, 1)]);
+                    v2 [j] = (row * v1).sum ();
+                }
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = - (m1 [j] [k] + m2 [j] [k]);
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+                BOOST_UBLAS_NOT_USED(m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_my_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m1 (N, N, N * N), m2 (N, N, N * N), m3 (N, N, N * N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m1 (N, N, N * N), m2 (N, N, N * N), m3 (N, N, N * N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3.assign (- (m1 + m2));
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_vector (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 2)
+template<class T, int N>
+void bench_2<T, N>::operator () (int runs) {
+    header ("bench_2");
+
+    header ("outer_prod");
+
+    header ("C array");
+    bench_c_outer_prod<T, N> () (runs);
+
+#ifdef USE_SPARSE_MATRIX
+#ifdef USE_MAP_ARRAY
+    header ("sparse_matrix<map_array>, sparse_vector<map_array> safe");
+    bench_my_outer_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                        ublas::sparse_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<map_array>, sparse_vector<map_array> fast");
+    bench_my_outer_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                        ublas::sparse_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_MAP
+    header ("sparse_matrix<std::map>, sparse_vector<std::map> safe");
+    bench_my_outer_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                        ublas::sparse_vector<T, std::map<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<std::map>, sparse_vector<std::map> fast");
+    bench_my_outer_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                        ublas::sparse_vector<T, std::map<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_MATRIX
+    header ("compressed_matrix, compressed_vector safe");
+    bench_my_outer_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                        ublas::compressed_vector<T>, N> () (runs, safe_tag ());
+
+    header ("compressed_matrix, compressed_vector fast");
+    bench_my_outer_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                        ublas::compressed_vector<T>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_COORDINATE_MATRIX
+    header ("coordinate_matrix, coordinate_vector safe");
+    bench_my_outer_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                        ublas::coordinate_vector<T>, N> () (runs, safe_tag ());
+
+    header ("coordinate_matrix, coordinate_vector fast");
+    bench_my_outer_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                        ublas::coordinate_vector<T>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_outer_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("prod (matrix, vector)");
+
+    header ("C array");
+    bench_c_matrix_vector_prod<T, N> () (runs);
+
+#ifdef USE_SPARSE_MATRIX
+#ifdef USE_MAP_ARRAY
+    header ("sparse_matrix<map_array>, sparse_vector<map_array> safe");
+    bench_my_matrix_vector_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                                ublas::sparse_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<map_array>, sparse_vector<map_array> fast");
+    bench_my_matrix_vector_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                                ublas::sparse_vector<T, ublas::map_array<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_MAP
+    header ("sparse_matrix<std::map>, sparse_vector<std::map> safe");
+    bench_my_matrix_vector_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                                ublas::sparse_vector<T, std::map<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<std::map>, sparse_vector<std::map> fast");
+    bench_my_matrix_vector_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                                ublas::sparse_vector<T, std::map<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_MATRIX
+    header ("compressed_matrix, compressed_vector safe");
+    bench_my_matrix_vector_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                                ublas::compressed_vector<T>, N> () (runs, safe_tag ());
+
+    header ("compressed_matrix, compressed_vector fast");
+    bench_my_matrix_vector_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                                ublas::compressed_vector<T>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_COORDINATE_MATRIX
+    header ("coordinate_matrix, coordinate_vector safe");
+    bench_my_matrix_vector_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                                ublas::coordinate_vector<T>, N> () (runs, safe_tag ());
+
+    header ("coordinate_matrix, coordinate_vector fast");
+    bench_my_matrix_vector_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                                ublas::coordinate_vector<T>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_vector_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("matrix + matrix");
+
+    header ("C array");
+    bench_c_matrix_add<T, N> () (runs);
+
+#ifdef USE_SPARSE_MATRIX
+#ifdef USE_MAP_ARRAY
+    header ("sparse_matrix<map_array> safe");
+    bench_my_matrix_add<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<map_array> fast");
+    bench_my_matrix_add<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_MAP
+    header ("sparse_matrix<std::map> safe");
+    bench_my_matrix_add<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<std::map> fast");
+    bench_my_matrix_add<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_MATRIX
+    header ("compressed_matrix safe");
+    bench_my_matrix_add<ublas::compressed_matrix<T, ublas::row_major>, N> () (runs, safe_tag ());
+
+    header ("compressed_matrix fast");
+    bench_my_matrix_add<ublas::compressed_matrix<T, ublas::row_major>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_COORDINATE_MATRIX
+    header ("coordinate_matrix safe");
+    bench_my_matrix_add<ublas::coordinate_matrix<T, ublas::row_major>, N> () (runs, safe_tag ());
+
+    header ("coordinate_matrix fast");
+    bench_my_matrix_add<ublas::coordinate_matrix<T, ublas::row_major>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_2<float, 3>;
+template struct bench_2<float, 10>;
+template struct bench_2<float, 30>;
+template struct bench_2<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<double, 3>;
+template struct bench_2<double, 10>;
+template struct bench_2<double, 30>;
+template struct bench_2<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_2<std::complex<float>, 3>;
+template struct bench_2<std::complex<float>, 10>;
+template struct bench_2<std::complex<float>, 30>;
+template struct bench_2<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<std::complex<double>, 3>;
+template struct bench_2<std::complex<double>, 10>;
+template struct bench_2<std::complex<double>, 30>;
+template struct bench_2<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench2/bench23.cpp b/benchmarks/bench2/bench23.cpp
new file mode 100644
index 0000000..bb363f5
--- /dev/null
+++ b/benchmarks/bench2/bench23.cpp
@@ -0,0 +1,196 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench2.hpp"
+
+template<class T, int N>
+struct bench_c_matrix_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = 0;
+                        for (int l = 0; l < N; ++ l) {
+                            m3 [j] [k] += m1 [j] [l] * m2 [l] [k];
+                        }
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M1, class M2, int N>
+struct bench_my_matrix_prod {
+    typedef typename M1::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M1 m1 (N, N, N * N), m3 (N, N, N * N);
+            static M2 m2 (N, N, N * N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = ublas::prod (m1, m2);
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M1 m1 (N, N, N * N), m3 (N, N, N * N);
+            static M2 m2 (N, N, N * N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3.assign (ublas::prod (m1, m2));
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m1 [std::slice (N * j, N, 1)]);
+                    for (int k = 0; k < N; ++ k) {
+                        std::valarray<value_type> column (m2 [std::slice (k, N, N)]);
+                        m3 [N * j + k] = (row * column).sum ();
+                    }
+                }
+//                sink_vector (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 3)
+template<class T, int N>
+void bench_3<T, N>::operator () (int runs) {
+    header ("bench_3");
+
+    header ("prod (matrix, matrix)");
+
+    header ("C array");
+    bench_c_matrix_prod<T, N> () (runs);
+
+#ifdef USE_SPARSE_MATRIX
+#ifdef USE_MAP_ARRAY
+    header ("sparse_matrix<row_major, map_array>, sparse_matrix<column_major, map_array> safe");
+    bench_my_matrix_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                         ublas::sparse_matrix<T, ublas::column_major, ublas::map_array<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<row_major, map_array>, sparse_matrix<column_major, map_array> fast");
+    bench_my_matrix_prod<ublas::sparse_matrix<T, ublas::row_major, ublas::map_array<std::size_t, T> >,
+                         ublas::sparse_matrix<T, ublas::column_major, ublas::map_array<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_MAP
+    header ("sparse_matrix<row_major, std::map>, sparse_matrix<column_major, std::map> safe");
+    bench_my_matrix_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                         ublas::sparse_matrix<T, ublas::column_major, std::map<std::size_t, T> >, N> () (runs, safe_tag ());
+
+    header ("sparse_matrix<row_major, std::map>, sparse_matrix<column_major, std::map> fast");
+    bench_my_matrix_prod<ublas::sparse_matrix<T, ublas::row_major, std::map<std::size_t, T> >,
+                         ublas::sparse_matrix<T, ublas::column_major, std::map<std::size_t, T> >, N> () (runs, fast_tag ());
+#endif
+#endif
+
+#ifdef USE_COMPRESSED_MATRIX
+    header ("compressed_matrix<row_major>, compressed_matrix<column_major> safe");
+    bench_my_matrix_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                         ublas::compressed_matrix<T, ublas::column_major>, N> () (runs, safe_tag ());
+
+    header ("compressed_matrix<row_major>, compressed_matrix<column_major> fast");
+    bench_my_matrix_prod<ublas::compressed_matrix<T, ublas::row_major>,
+                         ublas::compressed_matrix<T, ublas::column_major>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_COORDINATE_MATRIX
+    header ("coordinate_matrix<row_major>, coordinate_matrix<column_major> safe");
+    bench_my_matrix_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                         ublas::coordinate_matrix<T, ublas::column_major>, N> () (runs, safe_tag ());
+
+    header ("coordinate_matrix<row_major>, coordinate_matrix<column_major> fast");
+    bench_my_matrix_prod<ublas::coordinate_matrix<T, ublas::row_major>,
+                         ublas::coordinate_matrix<T, ublas::column_major>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_prod<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_3<float, 3>;
+template struct bench_3<float, 10>;
+template struct bench_3<float, 30>;
+template struct bench_3<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<double, 3>;
+template struct bench_3<double, 10>;
+template struct bench_3<double, 30>;
+template struct bench_3<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_3<std::complex<float>, 3>;
+template struct bench_3<std::complex<float>, 10>;
+template struct bench_3<std::complex<float>, 30>;
+template struct bench_3<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<std::complex<double>, 3>;
+template struct bench_3<std::complex<double>, 10>;
+template struct bench_3<std::complex<double>, 30>;
+template struct bench_3<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench3/Jamfile.v2 b/benchmarks/bench3/Jamfile.v2
new file mode 100644
index 0000000..7ce9c9b
--- /dev/null
+++ b/benchmarks/bench3/Jamfile.v2
@@ -0,0 +1,10 @@
+# Copyright (c) 2004 Michael Stevens
+# Use, modification and distribution are subject to the
+# Boost Software License, Version 1.0. (See accompanying file
+# LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# bench3 - measure the performance of vector and matrix proxy's operations.
+
+exe bench3
+    : bench3.cpp bench31.cpp bench32.cpp bench33.cpp
+    ;
diff --git a/benchmarks/bench3/bench3.cpp b/benchmarks/bench3/bench3.cpp
new file mode 100644
index 0000000..390d226
--- /dev/null
+++ b/benchmarks/bench3/bench3.cpp
@@ -0,0 +1,122 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench3.hpp"
+
+void header (std::string text) {
+    std::cout << text << std::endl;
+}
+
+template<class T>
+struct peak_c_plus {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (0);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s += T (0);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class T>
+struct peak_c_multiplies {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s *= T (1);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T>
+void peak<T>::operator () (int runs) {
+    header ("peak");
+
+    header ("plus");
+    peak_c_plus<T> () (runs);
+
+    header ("multiplies");
+    peak_c_multiplies<T> () (runs);
+}
+
+
+template <typename scalar> 
+void do_bench (std::string type_string, int scale)
+{
+    header (type_string);
+    peak<scalar> () (1000000 * scale);
+
+    header (type_string + ", 3");
+    bench_1<scalar, 3> () (1000000 * scale);
+    bench_2<scalar, 3> () (300000 * scale);
+    bench_3<scalar, 3> () (100000 * scale);
+
+    header (type_string + ", 10");
+    bench_1<scalar, 10> () (300000 * scale);
+    bench_2<scalar, 10> () (30000 * scale);
+    bench_3<scalar, 10> () (3000 * scale);
+
+    header (type_string + ", 30");
+    bench_1<scalar, 30> () (100000 * scale);
+    bench_2<scalar, 30> () (3000 * scale);
+    bench_3<scalar, 30> () (100 * scale);
+
+    header (type_string + ", 100");
+    bench_1<scalar, 100> () (30000 * scale);
+    bench_2<scalar, 100> () (300 * scale);
+    bench_3<scalar, 100> () (3 * scale);
+}
+
+int main (int argc, char *argv []) {
+
+    int scale = 1;
+    if (argc > 1)
+        scale = std::atoi (argv [1]);
+
+#ifdef USE_FLOAT
+    do_bench<float> ("FLOAT", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<double> ("DOUBLE", scale);
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+    do_bench<std::complex<float> > ("COMPLEX<FLOAT>", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<std::complex<double> > ("COMPLEX<DOUBLE>", scale);
+#endif
+#endif
+
+    return 0;
+}
diff --git a/benchmarks/bench3/bench3.hpp b/benchmarks/bench3/bench3.hpp
new file mode 100644
index 0000000..5b64b59
--- /dev/null
+++ b/benchmarks/bench3/bench3.hpp
@@ -0,0 +1,159 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#ifndef BENCH3_H
+#define BENCH3_H
+
+#include <iostream>
+#include <string>
+#include <valarray>
+
+#include <boost/numeric/ublas/vector.hpp>
+#include <boost/numeric/ublas/vector_proxy.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/matrix_proxy.hpp>
+
+#include <boost/timer.hpp>
+
+
+#define BOOST_UBLAS_NOT_USED(x) (void)(x)
+
+
+namespace ublas = boost::numeric::ublas;
+
+void header (std::string text);
+
+template<class T>
+struct footer {
+    void operator () (int multiplies, int plus, int runs, double elapsed) {
+        std::cout << "elapsed: " << elapsed << " s, "
+                  << (multiplies * ublas::type_traits<T>::multiplies_complexity +
+                      plus * ublas::type_traits<T>::plus_complexity) * runs /
+                     (1024 * 1024 * elapsed) << " Mflops" << std::endl;
+    }
+};
+
+template<class T, int N>
+struct c_vector_traits {
+    typedef T type [N];
+};
+template<class T, int N, int M>
+struct c_matrix_traits {
+    typedef T type [N] [M];
+};
+
+template<class T, int N>
+struct initialize_c_vector  {
+    void operator () (typename c_vector_traits<T, N>::type &v) {
+        for (int i = 0; i < N; ++ i)
+            v [i] = std::rand () * 1.f;
+//            v [i] = 0.f;
+        }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void initialize_vector (V &v) {
+    int size = v.size ();
+    for (int i = 0; i < size; ++ i)
+        v [i] = std::rand () * 1.f;
+//      v [i] = 0.f;
+}
+
+template<class T, int N, int M>
+struct initialize_c_matrix  {
+    void operator () (typename c_matrix_traits<T, N, M>::type &m) {
+        for (int i = 0; i < N; ++ i)
+            for (int j = 0; j < M; ++ j)
+                m [i] [j] = std::rand () * 1.f;
+//                m [i] [j] = 0.f;
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void initialize_matrix (M &m) {
+    int size1 = m.size1 ();
+    int size2 = m.size2 ();
+    for (int i = 0; i < size1; ++ i)
+        for (int j = 0; j < size2; ++ j)
+            m (i, j) = std::rand () * 1.f;
+//            m (i, j) = 0.f;
+}
+
+template<class T>
+BOOST_UBLAS_INLINE
+void sink_scalar (const T &s) {
+    static T g_s = s;
+}
+
+template<class T, int N>
+struct sink_c_vector {
+    void operator () (const typename c_vector_traits<T, N>::type &v) {
+        static typename c_vector_traits<T, N>::type g_v;
+        for (int i = 0; i < N; ++ i)
+            g_v [i] = v [i];
+    }
+};
+template<class V>
+BOOST_UBLAS_INLINE
+void sink_vector (const V &v) {
+    static V g_v (v);
+}
+
+template<class T, int N, int M>
+struct sink_c_matrix {
+    void operator () (const typename c_matrix_traits<T, N, M>::type &m) {
+    static typename c_matrix_traits<T, N, M>::type g_m;
+    for (int i = 0; i < N; ++ i)
+        for (int j = 0; j < M; ++ j)
+            g_m [i] [j] = m [i] [j];
+    }
+};
+template<class M>
+BOOST_UBLAS_INLINE
+void sink_matrix (const M &m) {
+    static M g_m (m);
+}
+
+template<class T>
+struct peak {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_1 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_2 {
+    void operator () (int runs);
+};
+
+template<class T, int N>
+struct bench_3 {
+    void operator () (int runs);
+};
+
+struct safe_tag {};
+struct fast_tag {};
+
+// #define USE_FLOAT
+#define USE_DOUBLE
+// #define USE_STD_COMPLEX
+
+#define USE_C_ARRAY
+// #define USE_BOUNDED_ARRAY
+#define USE_UNBOUNDED_ARRAY
+// #define USE_STD_VALARRAY
+#define USE_STD_VECTOR
+
+#endif
diff --git a/benchmarks/bench3/bench31.cpp b/benchmarks/bench3/bench31.cpp
new file mode 100644
index 0000000..9181eb1
--- /dev/null
+++ b/benchmarks/bench3/bench31.cpp
@@ -0,0 +1,296 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench3.hpp"
+
+template<class T, int N>
+struct bench_c_inner_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                for (int j = 0; j < N; ++ j) {
+                    s += v1 [j] * v2 [j];
+                }
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N));
+            initialize_vector (vr1);
+            initialize_vector (vr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = ublas::inner_prod (vr1, vr2);
+//                sink_scalar (s);
+                BOOST_UBLAS_NOT_USED(s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_inner_prod {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                static value_type s (0);
+                s = (v1 * v2).sum ();
+//                sink_scalar (s);
+            }
+            footer<value_type> () (N, N - 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_vector_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_vector_traits<T, N>::type v1, v2, v3;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v3 [j] = - (v1 [j] + v2 [j]);
+                }
+//                sink_c_vector<T, N> () (v3);
+                BOOST_UBLAS_NOT_USED(v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_my_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N)),
+                                   vr3 (v2, ublas::range (0, N));
+            initialize_vector (vr1);
+            initialize_vector (vr2);
+            initialize_vector (vr3);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                vr3 = - (vr1 + vr2);
+//                sink_vector (vr3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N)),
+                                   vr3 (v2, ublas::range (0, N));
+            initialize_vector (vr1);
+            initialize_vector (vr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                vr3.assign (- (vr1 + vr2));
+//                sink_vector (vr3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class V, int N>
+struct bench_cpp_vector_add {
+    typedef typename V::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static V v1 (N), v2 (N), v3 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                v3 = - (v1 + v2);
+//                sink_vector (v3);
+            }
+            footer<value_type> () (0, 2 * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n)
+template<class T, int N>
+void bench_1<T, N>::operator () (int runs) {
+    header ("bench_1");
+
+    header ("inner_prod");
+
+    header ("C array");
+    bench_c_inner_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_vector");
+    bench_my_inner_prod<ublas::c_vector<T, N>, N> () (runs);
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("vector<bounded_array>");
+    bench_my_inner_prod<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs);
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("vector<unbounded_array>");
+    bench_my_inner_prod<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs);
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("vector<std::valarray>");
+    bench_my_inner_prod<ublas::vector<T, std::valarray<T> >, N> () ();
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("vector<std::vector>");
+    bench_my_inner_prod<ublas::vector<T, std::vector<T> >, N> () (runs);
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_inner_prod<std::valarray<T>, N> () (runs);
+#endif
+
+    header ("vector + vector");
+
+    header ("C array");
+    bench_c_vector_add<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_vector safe");
+    bench_my_vector_add<ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_vector fast");
+    bench_my_vector_add<ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("vector<bounded_array> safe");
+    bench_my_vector_add<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("vector<bounded_array> fast");
+    bench_my_vector_add<ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("vector<unbounded_array> safe");
+    bench_my_vector_add<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<unbounded_array> fast");
+    bench_my_vector_add<ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("vector<std::valarray> safe");
+    bench_my_vector_add<ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<std::valarray> fast");
+    bench_my_vector_add<ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("vector<std::vector> safe");
+    bench_my_vector_add<ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("vector<std::vector> fast");
+    bench_my_vector_add<ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_vector_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_1<float, 3>;
+template struct bench_1<float, 10>;
+template struct bench_1<float, 30>;
+template struct bench_1<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<double, 3>;
+template struct bench_1<double, 10>;
+template struct bench_1<double, 30>;
+template struct bench_1<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_1<std::complex<float>, 3>;
+template struct bench_1<std::complex<float>, 10>;
+template struct bench_1<std::complex<float>, 30>;
+template struct bench_1<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<std::complex<double>, 3>;
+template struct bench_1<std::complex<double>, 10>;
+template struct bench_1<std::complex<double>, 30>;
+template struct bench_1<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench3/bench32.cpp b/benchmarks/bench3/bench32.cpp
new file mode 100644
index 0000000..3819090
--- /dev/null
+++ b/benchmarks/bench3/bench32.cpp
@@ -0,0 +1,501 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench3.hpp"
+
+template<class T, int N>
+struct bench_c_outer_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_vector<T, N> () (v1);
+            initialize_c_vector<T, N> () (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [j] [k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m);
+                BOOST_UBLAS_NOT_USED(m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N);
+            ublas::matrix_range<M> mr (m, ublas::range (0, N), ublas::range (0, N));
+            static V v1 (N), v2 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N));
+            initialize_vector (vr1);
+            initialize_vector (vr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                mr = - ublas::outer_prod (vr1, vr2);
+//                sink_matrix (mr);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N);
+            ublas::matrix_range<M> mr (m, ublas::range (0, N), ublas::range (0, N));
+            static V v1 (N), v2 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N));
+            initialize_vector (vr1);
+            initialize_vector (vr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                mr.assign (- ublas::outer_prod (vr1, vr2));
+//                sink_matrix (mr);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_outer_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (v1);
+            initialize_vector (v2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m [N * j + k] = - v1 [j] * v2 [k];
+                    }
+                }
+//                sink_vector (m);
+            }
+            footer<value_type> () (N * N, N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_vector_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m;
+            static typename c_vector_traits<T, N>::type v1, v2;
+            initialize_c_matrix<T, N, N> () (m);
+            initialize_c_vector<T, N> () (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    v2 [j] = 0;
+                    for (int k = 0; k < N; ++ k) {
+                        v2 [j] += m [j] [k] * v1 [k];
+                    }
+                }
+//                sink_c_vector<T, N> () (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_my_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m (N, N);
+            ublas::matrix_range<M> mr (m, ublas::range (0, N), ublas::range (0, N));
+            static V v1 (N), v2 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N));
+            initialize_matrix (mr);
+            initialize_vector (vr1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                vr2 = ublas::prod (mr, vr1);
+//                sink_vector (vr2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m (N, N);
+            ublas::matrix_range<M> mr (m, ublas::range (0, N), ublas::range (0, N));
+            static V v1 (N), v2 (N);
+            ublas::vector_range<V> vr1 (v1, ublas::range (0, N)),
+                                   vr2 (v2, ublas::range (0, N));
+            initialize_matrix (mr);
+            initialize_vector (vr1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                vr2.assign (ublas::prod (mr, vr1));
+//                sink_vector (vr2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, class V, int N>
+struct bench_cpp_matrix_vector_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m (N * N);
+            static V v1 (N), v2 (N);
+            initialize_vector (m);
+            initialize_vector (v1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m [std::slice (N * j, N, 1)]);
+                    v2 [j] = (row * v1).sum ();
+                }
+//                sink_vector (v2);
+            }
+            footer<value_type> () (N * N, N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T, int N>
+struct bench_c_matrix_add {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = - (m1 [j] [k] + m2 [j] [k]);
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+                BOOST_UBLAS_NOT_USED(m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_my_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            initialize_matrix (m1);
+            initialize_matrix (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3.assign (- (m1 + m2));
+//                sink_matrix (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_add {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                m3 = - (m1 + m2);
+//                sink_vector (m3);
+            }
+            footer<value_type> () (0, 2 * N * N, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 2)
+template<class T, int N>
+void bench_2<T, N>::operator () (int runs) {
+    header ("bench_2");
+
+    header ("outer_prod");
+
+    header ("C array");
+    bench_c_outer_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix, c_vector safe");
+    bench_my_outer_prod<ublas::c_matrix<T, N, N>,
+                        ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix, c_vector fast");
+    bench_my_outer_prod<ublas::c_matrix<T, N, N>,
+                        ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array>, vector<bounded_array> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                        ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array>, vector<bounded_array> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                        ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array>, vector<unbounded_array> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                        ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array>, vector<unbounded_array> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                        ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray>, vector<std::valarray> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                        ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray>, vector<std::valarray> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                        ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector>, vector<std::vector> safe");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                        ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector>, vector<std::vector> fast");
+    bench_my_outer_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                        ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_outer_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("prod (matrix, vector)");
+
+    header ("C array");
+    bench_c_matrix_vector_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix, c_vector safe");
+    bench_my_matrix_vector_prod<ublas::c_matrix<T, N, N>,
+                                ublas::c_vector<T, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix, c_vector fast");
+    bench_my_matrix_vector_prod<ublas::c_matrix<T, N, N>,
+                                ublas::c_vector<T, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array>, vector<bounded_array> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                                ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array>, vector<bounded_array> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >,
+                                ublas::vector<T, ublas::bounded_array<T, N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array>, vector<unbounded_array> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                                ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array>, vector<unbounded_array> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >,
+                                ublas::vector<T, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray>, vector<std::valarray> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                                ublas::vector<T, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray>, vector<std::valarray> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >,
+                                ublas::vector<T, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector>, vector<std::vector> safe");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                                ublas::vector<T, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector>, vector<std::vector> fast");
+    bench_my_matrix_vector_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >,
+                                ublas::vector<T, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_vector_prod<std::valarray<T>, std::valarray<T>, N> () (runs);
+#endif
+
+    header ("matrix + matrix");
+
+    header ("C array");
+    bench_c_matrix_add<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix safe");
+    bench_my_matrix_add<ublas::c_matrix<T, N, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix fast");
+    bench_my_matrix_add<ublas::c_matrix<T, N, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector> safe");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector> fast");
+    bench_my_matrix_add<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_add<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_2<float, 3>;
+template struct bench_2<float, 10>;
+template struct bench_2<float, 30>;
+template struct bench_2<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<double, 3>;
+template struct bench_2<double, 10>;
+template struct bench_2<double, 30>;
+template struct bench_2<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_2<std::complex<float>, 3>;
+template struct bench_2<std::complex<float>, 10>;
+template struct bench_2<std::complex<float>, 30>;
+template struct bench_2<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<std::complex<double>, 3>;
+template struct bench_2<std::complex<double>, 10>;
+template struct bench_2<std::complex<double>, 30>;
+template struct bench_2<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench3/bench33.cpp b/benchmarks/bench3/bench33.cpp
new file mode 100644
index 0000000..9b8e107
--- /dev/null
+++ b/benchmarks/bench3/bench33.cpp
@@ -0,0 +1,198 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include "bench3.hpp"
+
+template<class T, int N>
+struct bench_c_matrix_prod {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static typename c_matrix_traits<T, N, N>::type m1, m2, m3;
+            initialize_c_matrix<T, N, N> () (m1);
+            initialize_c_matrix<T, N, N> () (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    for (int k = 0; k < N; ++ k) {
+                        m3 [j] [k] = 0;
+                        for (int l = 0; l < N; ++ l) {
+                            m3 [j] [k] += m1 [j] [l] * m2 [l] [k];
+                        }
+                    }
+                }
+//                sink_c_matrix<T, N, N> () (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_my_matrix_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs, safe_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            ublas::matrix_range<M> mr1 (m1, ublas::range (0, N), ublas::range (0, N)),
+                                   mr2 (m2, ublas::range (0, N), ublas::range (0, N)),
+                                   mr3 (m3, ublas::range (0, N), ublas::range (0, N));
+            initialize_matrix (mr1);
+            initialize_matrix (mr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                mr3 = ublas::prod (mr1, mr2);
+//                sink_matrix (mr3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+    void operator () (int runs, fast_tag) const {
+        try {
+            static M m1 (N, N), m2 (N, N), m3 (N, N);
+            ublas::matrix_range<M> mr1 (m1, ublas::range (0, N), ublas::range (0, N)),
+                                   mr2 (m2, ublas::range (0, N), ublas::range (0, N)),
+                                   mr3 (m3, ublas::range (0, N), ublas::range (0, N));
+            initialize_matrix (mr1);
+            initialize_matrix (mr2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                mr3.assign (ublas::prod (mr1, mr2));
+//                sink_matrix (mr3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class M, int N>
+struct bench_cpp_matrix_prod {
+    typedef typename M::value_type value_type;
+
+    void operator () (int runs) const {
+        try {
+            static M m1 (N * N), m2 (N * N), m3 (N * N);
+            initialize_vector (m1);
+            initialize_vector (m2);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                for (int j = 0; j < N; ++ j) {
+                    std::valarray<value_type> row (m1 [std::slice (N * j, N, 1)]);
+                    for (int k = 0; k < N; ++ k) {
+                        std::valarray<value_type> column (m2 [std::slice (k, N, N)]);
+                        m3 [N * j + k] = (row * column).sum ();
+                    }
+                }
+//                sink_vector (m3);
+            }
+            footer<value_type> () (N * N * N, N * N * (N - 1), runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+// Benchmark O (n ^ 3)
+template<class T, int N>
+void bench_3<T, N>::operator () (int runs) {
+    header ("bench_3");
+
+    header ("prod (matrix, matrix)");
+
+    header ("C array");
+    bench_c_matrix_prod<T, N> () (runs);
+
+#ifdef USE_C_ARRAY
+    header ("c_matrix safe");
+    bench_my_matrix_prod<ublas::c_matrix<T, N, N>, N> () (runs, safe_tag ());
+
+    header ("c_matrix fast");
+    bench_my_matrix_prod<ublas::c_matrix<T, N, N>, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_BOUNDED_ARRAY
+    header ("matrix<bounded_array> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, safe_tag ());
+
+    header ("matrix<bounded_array> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::bounded_array<T, N * N> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_UNBOUNDED_ARRAY
+    header ("matrix<unbounded_array> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<unbounded_array> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, ublas::unbounded_array<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("matrix<std::valarray> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::valarray> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::valarray<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VECTOR
+    header ("matrix<std::vector> safe");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, safe_tag ());
+
+    header ("matrix<std::vector> fast");
+    bench_my_matrix_prod<ublas::matrix<T, ublas::row_major, std::vector<T> >, N> () (runs, fast_tag ());
+#endif
+
+#ifdef USE_STD_VALARRAY
+    header ("std::valarray");
+    bench_cpp_matrix_prod<std::valarray<T>, N> () (runs);
+#endif
+}
+
+#ifdef USE_FLOAT
+template struct bench_3<float, 3>;
+template struct bench_3<float, 10>;
+template struct bench_3<float, 30>;
+template struct bench_3<float, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<double, 3>;
+template struct bench_3<double, 10>;
+template struct bench_3<double, 30>;
+template struct bench_3<double, 100>;
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_3<std::complex<float>, 3>;
+template struct bench_3<std::complex<float>, 10>;
+template struct bench_3<std::complex<float>, 30>;
+template struct bench_3<std::complex<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<std::complex<double>, 3>;
+template struct bench_3<std::complex<double>, 10>;
+template struct bench_3<std::complex<double>, 30>;
+template struct bench_3<std::complex<double>, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench4/Jamfile.v2 b/benchmarks/bench4/Jamfile.v2
new file mode 100644
index 0000000..94a9f07
--- /dev/null
+++ b/benchmarks/bench4/Jamfile.v2
@@ -0,0 +1,12 @@
+# Copyright (c) 2004 Michael Stevens
+# Use, modification and distribution are subject to the
+# Boost Software License, Version 1.0. (See accompanying file
+# LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# bench4 measurs the abstraction penalty of dense matrix and vector
+#        operations with boost::numeric::interval(s).
+
+exe bench4
+    : bench4.cpp bench41.cpp bench42.cpp bench43.cpp
+    : <define>BOOST_UBLAS_USE_INTERVAL
+    ;
diff --git a/benchmarks/bench4/bench4.cpp b/benchmarks/bench4/bench4.cpp
new file mode 100644
index 0000000..6d460eb
--- /dev/null
+++ b/benchmarks/bench4/bench4.cpp
@@ -0,0 +1,135 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include <boost/numeric/interval.hpp>
+#include <boost/numeric/interval/io.hpp>
+#include "../bench1/bench1.hpp"
+
+void header (std::string text) {
+    std::cout << text << std::endl;
+}
+
+template<class T>
+struct peak_c_plus {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (0);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s += T (0);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+template<class T>
+struct peak_c_multiplies {
+    typedef T value_type;
+
+    void operator () (int runs) const {
+        try {
+            static T s (1);
+            boost::timer t;
+            for (int i = 0; i < runs; ++ i) {
+                s *= T (1);
+//                sink_scalar (s);
+            }
+            footer<value_type> () (0, 1, runs, t.elapsed ());
+        }
+        catch (std::exception &e) {
+            std::cout << e.what () << std::endl;
+        }
+    }
+};
+
+template<class T>
+void peak<T>::operator () (int runs) {
+    header ("peak");
+
+    header ("plus");
+    peak_c_plus<T> () (runs);
+
+    header ("multiplies");
+    peak_c_multiplies<T> () (runs);
+}
+
+template struct peak<boost::numeric::interval<float> >;
+template struct peak<boost::numeric::interval<double> >;
+
+#ifdef USE_BOOST_COMPLEX
+
+template struct peak<boost::complex<boost::numeric::interval<float> > >;
+template struct peak<boost::complex<boost::numeric::interval<double> > >;
+
+#endif
+
+
+
+template <typename scalar> 
+void do_bench (std::string type_string, int scale)
+{
+    header (type_string);
+    peak<scalar> () (1000000 * scale);
+
+    header (type_string + ", 3");
+    bench_1<scalar, 3> () (1000000 * scale);
+    bench_2<scalar, 3> () (300000 * scale);
+    bench_3<scalar, 3> () (100000 * scale);
+
+    header (type_string + ", 10");
+    bench_1<scalar, 10> () (300000 * scale);
+    bench_2<scalar, 10> () (30000 * scale);
+    bench_3<scalar, 10> () (3000 * scale);
+
+    header (type_string + ", 30");
+    bench_1<scalar, 30> () (100000 * scale);
+    bench_2<scalar, 30> () (3000 * scale);
+    bench_3<scalar, 30> () (100 * scale);
+
+    header (type_string + ", 100");
+    bench_1<scalar, 100> () (30000 * scale);
+    bench_2<scalar, 100> () (300 * scale);
+    bench_3<scalar, 100> () (3 * scale);
+}
+
+int main (int argc, char *argv []) {
+
+    int scale = 1;
+    if (argc > 1)
+        scale = std::atoi (argv [1]);
+
+#ifdef USE_FLOAT
+    do_bench<boost::numeric::interval<float> > ("boost::numeric::interval<FLOAT>", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<boost::numeric::interval<double> > ("boost::numeric::interval<DOUBLE>", scale);
+#endif
+
+#ifdef USE_STD_COMPLEX
+#ifdef USE_FLOAT
+    do_bench<std::complex<boost::numeric::interval<float> > > ("boost::numeric::interval<COMPLEX<FLOAT>>", scale);
+#endif
+
+#ifdef USE_DOUBLE
+    do_bench<std::complex<doublboost::numeric::interval<double> > > ("boost::numeric::interval<COMPLEX<DOUBLE>>", scale);
+#endif
+#endif
+
+    return 0;
+}
diff --git a/benchmarks/bench4/bench41.cpp b/benchmarks/bench4/bench41.cpp
new file mode 100644
index 0000000..2ef2d69
--- /dev/null
+++ b/benchmarks/bench4/bench41.cpp
@@ -0,0 +1,46 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include <boost/numeric/interval.hpp>
+#include <boost/numeric/interval/io.hpp>
+#include "../bench1/bench11.cpp"
+
+
+#ifdef USE_FLOAT
+template struct bench_1<boost::numeric::interval<float>, 3>;
+template struct bench_1<boost::numeric::interval<float>, 10>;
+template struct bench_1<boost::numeric::interval<float>, 30>;
+template struct bench_1<boost::numeric::interval<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<boost::numeric::interval<double>, 3>;
+template struct bench_1<boost::numeric::interval<double>, 10>;
+template struct bench_1<boost::numeric::interval<double>, 30>;
+template struct bench_1<boost::numeric::interval<double>, 100>;
+#endif
+
+#ifdef USE_BOOST_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_1<boost::complex<boost::numeric::interval<float> >, 3>;
+template struct bench_1<boost::complex<boost::numeric::interval<float> >, 10>;
+template struct bench_1<boost::complex<boost::numeric::interval<float> >, 30>;
+template struct bench_1<boost::complex<boost::numeric::interval<float> >, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_1<boost::complex<boost::numeric::interval<double> >, 3>;
+template struct bench_1<boost::complex<boost::numeric::interval<double> >, 10>;
+template struct bench_1<boost::complex<boost::numeric::interval<double> >, 30>;
+template struct bench_1<boost::complex<boost::numeric::interval<double> >, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench4/bench42.cpp b/benchmarks/bench4/bench42.cpp
new file mode 100644
index 0000000..55f9060
--- /dev/null
+++ b/benchmarks/bench4/bench42.cpp
@@ -0,0 +1,46 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include <boost/numeric/interval.hpp>
+#include <boost/numeric/interval/io.hpp>
+#include "../bench1/bench12.cpp"
+
+
+#ifdef USE_FLOAT
+template struct bench_2<boost::numeric::interval<float>, 3>;
+template struct bench_2<boost::numeric::interval<float>, 10>;
+template struct bench_2<boost::numeric::interval<float>, 30>;
+template struct bench_2<boost::numeric::interval<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<boost::numeric::interval<double>, 3>;
+template struct bench_2<boost::numeric::interval<double>, 10>;
+template struct bench_2<boost::numeric::interval<double>, 30>;
+template struct bench_2<boost::numeric::interval<double>, 100>;
+#endif
+
+#ifdef USE_BOOST_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_2<boost::complex<boost::numeric::interval<float> >, 3>;
+template struct bench_2<boost::complex<boost::numeric::interval<float> >, 10>;
+template struct bench_2<boost::complex<boost::numeric::interval<float> >, 30>;
+template struct bench_2<boost::complex<boost::numeric::interval<float> >, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_2<boost::complex<boost::numeric::interval<double> >, 3>;
+template struct bench_2<boost::complex<boost::numeric::interval<double> >, 10>;
+template struct bench_2<boost::complex<boost::numeric::interval<double> >, 30>;
+template struct bench_2<boost::complex<boost::numeric::interval<double> >, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench4/bench43.cpp b/benchmarks/bench4/bench43.cpp
new file mode 100644
index 0000000..c39655a
--- /dev/null
+++ b/benchmarks/bench4/bench43.cpp
@@ -0,0 +1,46 @@
+//
+//  Copyright (c) 2000-2002
+//  Joerg Walter, Mathias Koch
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+//  The authors gratefully acknowledge the support of
+//  GeNeSys mbH & Co. KG in producing this work.
+//
+
+#include <boost/numeric/interval.hpp>
+#include <boost/numeric/interval/io.hpp>
+#include "../bench1/bench13.cpp"
+
+
+#ifdef USE_FLOAT
+template struct bench_3<boost::numeric::interval<float>, 3>;
+template struct bench_3<boost::numeric::interval<float>, 10>;
+template struct bench_3<boost::numeric::interval<float>, 30>;
+template struct bench_3<boost::numeric::interval<float>, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<boost::numeric::interval<double>, 3>;
+template struct bench_3<boost::numeric::interval<double>, 10>;
+template struct bench_3<boost::numeric::interval<double>, 30>;
+template struct bench_3<boost::numeric::interval<double>, 100>;
+#endif
+
+#ifdef USE_BOOST_COMPLEX
+#ifdef USE_FLOAT
+template struct bench_3<boost::complex<boost::numeric::interval<float> >, 3>;
+template struct bench_3<boost::complex<boost::numeric::interval<float> >, 10>;
+template struct bench_3<boost::complex<boost::numeric::interval<float> >, 30>;
+template struct bench_3<boost::complex<boost::numeric::interval<float> >, 100>;
+#endif
+
+#ifdef USE_DOUBLE
+template struct bench_3<boost::complex<boost::numeric::interval<double> >, 3>;
+template struct bench_3<boost::complex<boost::numeric::interval<double> >, 10>;
+template struct bench_3<boost::complex<boost::numeric::interval<double> >, 30>;
+template struct bench_3<boost::complex<boost::numeric::interval<double> >, 100>;
+#endif
+#endif
diff --git a/benchmarks/bench5/Jamfile.v2 b/benchmarks/bench5/Jamfile.v2
new file mode 100644
index 0000000..d180f5c
--- /dev/null
+++ b/benchmarks/bench5/Jamfile.v2
@@ -0,0 +1,11 @@
+# Copyright (c) 2004-2010 Michael Stevens, David Bellot
+# Use, modification and distribution are subject to the
+# Boost Software License, Version 1.0. (See accompanying file
+# LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# bench5 measures performance of the assignment operator
+
+exe bench5
+    : assignment_bench.cpp
+    : <define>BOOST_UBLAS_USE_INTERVAL
+    ;
diff --git a/benchmarks/bench5/assignment_bench.cpp b/benchmarks/bench5/assignment_bench.cpp
new file mode 100644
index 0000000..532f379
--- /dev/null
+++ b/benchmarks/bench5/assignment_bench.cpp
@@ -0,0 +1,141 @@
+//
+//  Copyright (c) 2010 Athanasios Iliopoulos
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+
+#include <boost/numeric/ublas/assignment.hpp>
+#include <boost/numeric/ublas/vector.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/io.hpp>
+#include <boost/timer.hpp>
+
+using namespace boost::numeric::ublas;
+
+int main() {
+
+    boost::timer timer;
+
+    unsigned int iterations = 1000000000;
+    double elapsed_exp, elapsed_assigner;
+
+    std::cout << "Ublas vector<double> Benchmarks------------------------ " << "\n";
+
+    {
+    std::cout << "Size 2 vector: " << "\n";
+    vector<double> a(2);
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++) {
+        a(0)=0; a(1)=1;
+    }
+    elapsed_exp = timer.elapsed();
+    std::cout << "Explicit element assign time: " << elapsed_exp << " secs" << "\n";
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= 0, 1;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time: " << elapsed_assigner << " secs" << "\n";
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+    }
+
+    {
+    std::cout << "Size 3 vector: " << "\n";
+    vector<double> a(3);
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++) {
+        a(0)=0; a(1)=1; a(2)=2;
+    }
+    elapsed_exp = timer.elapsed();
+    std::cout << "Explicit element assign time: " << elapsed_exp << " secs" << "\n";
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= 0, 1, 2;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time: " << elapsed_assigner << " secs" << "\n";
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+    }
+
+    iterations = 100000000;
+
+    {
+    std::cout << "Size 8 vector: " << "\n";
+    vector<double> a(8);
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++) {
+        a(0)=0; a(1)=1; a(2)=2; a(3)=3; a(4)=4; a(5)=5; a(6)=6; a(7)=7;
+    }
+    elapsed_exp = timer.elapsed();
+    std::cout << "Explicit element assign time: " << elapsed_exp << " secs" << "\n";
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= 0, 1, 2, 3, 4, 5, 6, 7;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time: " << elapsed_assigner << " secs" << "\n";
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+    }
+
+
+    std::cout << "Ublas matrix<double> Benchmarks------------------------ " << "\n";
+
+    iterations = 200000000;
+    {
+    std::cout << "Size 3x3 matrix: " << "\n";
+    matrix<double> a(3,3);
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++) {
+        a(0,0)=0; a(0,1)=1; a(0,2)=2;
+        a(1,0)=3; a(1,1)=4; a(1,2)=5;
+        a(2,0)=6; a(2,1)=7; a(2,2)=8;
+    }
+    elapsed_exp = timer.elapsed();
+    std::cout << "Explicit element assign time: " << elapsed_exp << " secs" << "\n";
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= 0, 1, 2, 3, 4, 5, 6, 7, 8;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time: " << elapsed_assigner << " secs" << "\n";
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+    }
+
+    std::cout << "Size 2x2 matrix: " << "\n";
+    iterations = 500000000;
+    {
+    matrix<double> a(2,2);
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++) {
+        a(0,0)=0; a(0,1)=1;
+        a(1,0)=3; a(1,1)=4;
+    }
+    elapsed_exp = timer.elapsed();
+    std::cout << "Explicit element assign time: " << elapsed_exp << " secs" << "\n";
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= 0, 1, 3, 4;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time: " << elapsed_assigner << " secs" << "\n";
+
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+
+    timer.restart();
+    for(unsigned int i=0; i!=iterations; i++)
+        a <<= traverse_policy::by_row_no_wrap(), 0, 1, next_row(), 3, 4;
+    elapsed_assigner = timer.elapsed();
+    std::cout << "Assigner time no_wrap: " << elapsed_assigner << " secs" << "\n";
+    std::cout << "Difference: " << (elapsed_assigner/elapsed_exp-1)*100 << "%" << std::endl;
+    }
+
+    return 0;
+}
+