Squashed 'third_party/boostorg/odeint/' content from commit 6ff2719

Change-Id: If4892e29c1a5e6cf3a7aa51486a2725c251b0c7d
git-subtree-dir: third_party/boostorg/odeint
git-subtree-split: 6ff2719b6907b86596c3d43e88c1bcfdf29df560
diff --git a/examples/thrust/Makefile b/examples/thrust/Makefile
new file mode 100644
index 0000000..5a33bdb
--- /dev/null
+++ b/examples/thrust/Makefile
@@ -0,0 +1,34 @@
+# Copyright 2011-2014 Mario Mulansky
+# Copyright 2011-2012 Karsten Ahnert
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or
+# copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# make sure BOOST_ROOT is pointing to your boost directory
+# otherwise, set it here:
+# BOOST_ROOT = /path/to/boost
+
+# path to the cuda installation
+CUDA_ROOT = /usr/local/cuda
+# target architecture
+ARCH = sm_13
+
+NVCC = $(CUDA_ROOT)/bin/nvcc
+
+INCLUDES += -I../../include/ -I$(BOOST_ROOT)
+
+NVCCFLAGS = -O3 $(INCLUDES) -arch $(ARCH)
+
+%.o : %.cu
+	$(NVCC) $(NVCCFLAGS) -c $< -o $@
+
+% : %.o
+	$(NVCC) $(NVCCFLAGS) -o $@ $<
+
+
+all : phase_oscillator_chain phase_oscillator_ensemble lorenz_parameters relaxation
+
+
+clean :
+	-rm *~ *.o phase_oscillator_chain phase_oscillator_ensemble lorenz_parameters relaxation 
diff --git a/examples/thrust/lorenz_parameters.cu b/examples/thrust/lorenz_parameters.cu
new file mode 100644
index 0000000..b332375
--- /dev/null
+++ b/examples/thrust/lorenz_parameters.cu
@@ -0,0 +1,296 @@
+/*
+ Copyright 2011-2012 Karsten Ahnert
+ Copyright 2011-2013 Mario Mulansky
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or
+ copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include <iostream>
+#include <cmath>
+#include <utility>
+
+
+#include <thrust/device_vector.h>
+#include <thrust/reduce.h>
+#include <thrust/functional.h>
+
+#include <boost/numeric/odeint.hpp>
+
+#include <boost/numeric/odeint/external/thrust/thrust.hpp>
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real.hpp>
+#include <boost/random/variate_generator.hpp>
+
+
+using namespace std;
+using namespace boost::numeric::odeint;
+
+//change this to float if your device does not support double computation
+typedef double value_type;
+
+//change this to host_vector< ... > of you want to run on CPU
+typedef thrust::device_vector< value_type > state_type;
+typedef thrust::device_vector< size_t > index_vector_type;
+// typedef thrust::host_vector< value_type > state_type;
+// typedef thrust::host_vector< size_t > index_vector_type;
+
+
+const value_type sigma = 10.0;
+const value_type b = 8.0 / 3.0;
+
+
+//[ thrust_lorenz_parameters_define_simple_system
+struct lorenz_system
+{
+    struct lorenz_functor
+    {
+        template< class T >
+        __host__ __device__
+        void operator()( T t ) const
+        {
+            // unpack the parameter we want to vary and the Lorenz variables
+            value_type R = thrust::get< 3 >( t );
+            value_type x = thrust::get< 0 >( t );
+            value_type y = thrust::get< 1 >( t );
+            value_type z = thrust::get< 2 >( t );
+            thrust::get< 4 >( t ) = sigma * ( y - x );
+            thrust::get< 5 >( t ) = R * x - y - x * z;
+            thrust::get< 6 >( t ) = -b * z + x * y ;
+
+        }
+    };
+
+    lorenz_system( size_t N , const state_type &beta )
+    : m_N( N ) , m_beta( beta ) { }
+
+    template< class State , class Deriv >
+    void operator()(  const State &x , Deriv &dxdt , value_type t ) const
+    {
+        thrust::for_each(
+                thrust::make_zip_iterator( thrust::make_tuple(
+                        boost::begin( x ) ,
+                        boost::begin( x ) + m_N ,
+                        boost::begin( x ) + 2 * m_N ,
+                        m_beta.begin() ,
+                        boost::begin( dxdt ) ,
+                        boost::begin( dxdt ) + m_N ,
+                        boost::begin( dxdt ) + 2 * m_N  ) ) ,
+                thrust::make_zip_iterator( thrust::make_tuple(
+                        boost::begin( x ) + m_N ,
+                        boost::begin( x ) + 2 * m_N ,
+                        boost::begin( x ) + 3 * m_N ,
+                        m_beta.begin() ,
+                        boost::begin( dxdt ) + m_N ,
+                        boost::begin( dxdt ) + 2 * m_N ,
+                        boost::begin( dxdt ) + 3 * m_N  ) ) ,
+                lorenz_functor() );
+    }
+    size_t m_N;
+    const state_type &m_beta;
+};
+//]
+
+struct lorenz_perturbation_system
+{
+    struct lorenz_perturbation_functor
+    {
+        template< class T >
+        __host__ __device__
+        void operator()( T t ) const
+        {
+            value_type R = thrust::get< 1 >( t );
+            value_type x = thrust::get< 0 >( thrust::get< 0 >( t ) );
+            value_type y = thrust::get< 1 >( thrust::get< 0 >( t ) );
+            value_type z = thrust::get< 2 >( thrust::get< 0 >( t ) );
+            value_type dx = thrust::get< 3 >( thrust::get< 0 >( t ) );
+            value_type dy = thrust::get< 4 >( thrust::get< 0 >( t ) );
+            value_type dz = thrust::get< 5 >( thrust::get< 0 >( t ) );
+            thrust::get< 0 >( thrust::get< 2 >( t ) ) = sigma * ( y - x );
+            thrust::get< 1 >( thrust::get< 2 >( t ) ) = R * x - y - x * z;
+            thrust::get< 2 >( thrust::get< 2 >( t ) ) = -b * z + x * y ;
+            thrust::get< 3 >( thrust::get< 2 >( t ) ) = sigma * ( dy - dx );
+            thrust::get< 4 >( thrust::get< 2 >( t ) ) = ( R - z ) * dx - dy - x * dz;
+            thrust::get< 5 >( thrust::get< 2 >( t ) ) = y * dx + x * dy - b * dz;
+        }
+    };
+
+    lorenz_perturbation_system( size_t N , const state_type &beta )
+    : m_N( N ) , m_beta( beta ) { }
+
+    template< class State , class Deriv >
+    void operator()(  const State &x , Deriv &dxdt , value_type t ) const
+    {
+        thrust::for_each(
+                thrust::make_zip_iterator( thrust::make_tuple(
+                        thrust::make_zip_iterator( thrust::make_tuple(
+                                boost::begin( x ) ,
+                                boost::begin( x ) + m_N ,
+                                boost::begin( x ) + 2 * m_N ,
+                                boost::begin( x ) + 3 * m_N ,
+                                boost::begin( x ) + 4 * m_N ,
+                                boost::begin( x ) + 5 * m_N ) ) ,
+                        m_beta.begin() ,
+                        thrust::make_zip_iterator( thrust::make_tuple(
+                                boost::begin( dxdt ) ,
+                                boost::begin( dxdt ) + m_N ,
+                                boost::begin( dxdt ) + 2 * m_N ,
+                                boost::begin( dxdt ) + 3 * m_N ,
+                                boost::begin( dxdt ) + 4 * m_N ,
+                                boost::begin( dxdt ) + 5 * m_N ) )
+                ) ) ,
+                thrust::make_zip_iterator( thrust::make_tuple(
+                        thrust::make_zip_iterator( thrust::make_tuple(
+                                boost::begin( x ) + m_N ,
+                                boost::begin( x ) + 2 * m_N ,
+                                boost::begin( x ) + 3 * m_N ,
+                                boost::begin( x ) + 4 * m_N ,
+                                boost::begin( x ) + 5 * m_N ,
+                                boost::begin( x ) + 6 * m_N ) ) ,
+                        m_beta.begin() ,
+                        thrust::make_zip_iterator( thrust::make_tuple(
+                                boost::begin( dxdt ) + m_N ,
+                                boost::begin( dxdt ) + 2 * m_N ,
+                                boost::begin( dxdt ) + 3 * m_N ,
+                                boost::begin( dxdt ) + 4 * m_N ,
+                                boost::begin( dxdt ) + 5 * m_N ,
+                                boost::begin( dxdt ) + 6 * m_N  ) )
+                ) ) ,
+                lorenz_perturbation_functor() );
+    }
+
+    size_t m_N;
+    const state_type &m_beta;
+};
+
+struct lyap_observer
+{
+    //[thrust_lorenz_parameters_observer_functor
+    struct lyap_functor
+    {
+        template< class T >
+        __host__ __device__
+        void operator()( T t ) const
+        {
+            value_type &dx = thrust::get< 0 >( t );
+            value_type &dy = thrust::get< 1 >( t );
+            value_type &dz = thrust::get< 2 >( t );
+            value_type norm = sqrt( dx * dx + dy * dy + dz * dz );
+            dx /= norm;
+            dy /= norm;
+            dz /= norm;
+            thrust::get< 3 >( t ) += log( norm );
+        }
+    };
+    //]
+
+    lyap_observer( size_t N , size_t every = 100 )
+    : m_N( N ) , m_lyap( N ) , m_every( every ) , m_count( 0 )
+    {
+        thrust::fill( m_lyap.begin() , m_lyap.end() , 0.0 );
+    }
+
+    template< class Lyap >
+    void fill_lyap( Lyap &lyap )
+    {
+        thrust::copy( m_lyap.begin() , m_lyap.end() , lyap.begin() );
+        for( size_t i=0 ; i<lyap.size() ; ++i )
+            lyap[i] /= m_t_overall;
+    }
+
+
+    template< class State >
+    void operator()( State &x , value_type t )
+    {
+        if( ( m_count != 0 ) && ( ( m_count % m_every ) == 0 ) )
+        {
+            thrust::for_each(
+                    thrust::make_zip_iterator( thrust::make_tuple(
+                            boost::begin( x ) + 3 * m_N ,
+                            boost::begin( x ) + 4 * m_N ,
+                            boost::begin( x ) + 5 * m_N ,
+                            m_lyap.begin() ) ) ,
+                    thrust::make_zip_iterator( thrust::make_tuple(
+                            boost::begin( x ) + 4 * m_N ,
+                            boost::begin( x ) + 5 * m_N ,
+                            boost::begin( x ) + 6 * m_N ,
+                            m_lyap.end() ) ) ,
+                    lyap_functor() );
+            clog << t << "\n";
+        }
+        ++m_count;
+        m_t_overall = t;
+    }
+
+    size_t m_N;
+    state_type m_lyap;
+    size_t m_every;
+    size_t m_count;
+    value_type m_t_overall;
+};
+
+const size_t N = 1024*2;
+const value_type dt = 0.01;
+
+
+int main( int arc , char* argv[] )
+{
+    int driver_version , runtime_version;
+    cudaDriverGetVersion( &driver_version );
+    cudaRuntimeGetVersion ( &runtime_version );
+    cout << driver_version << "\t" << runtime_version << endl;
+
+
+    //[ thrust_lorenz_parameters_define_beta
+    vector< value_type > beta_host( N );
+    const value_type beta_min = 0.0 , beta_max = 56.0;
+    for( size_t i=0 ; i<N ; ++i )
+        beta_host[i] = beta_min + value_type( i ) * ( beta_max - beta_min ) / value_type( N - 1 );
+
+    state_type beta = beta_host;
+    //]
+
+    //[ thrust_lorenz_parameters_integration
+    state_type x( 6 * N );
+
+    // initialize x,y,z
+    thrust::fill( x.begin() , x.begin() + 3 * N , 10.0 );
+
+    // initial dx
+    thrust::fill( x.begin() + 3 * N , x.begin() + 4 * N , 1.0 );
+
+    // initialize dy,dz
+    thrust::fill( x.begin() + 4 * N , x.end() , 0.0 );
+
+
+    // create error stepper, can be used with make_controlled or make_dense_output
+    typedef runge_kutta_dopri5< state_type , value_type , state_type , value_type > stepper_type;
+
+
+    lorenz_system lorenz( N , beta );
+    lorenz_perturbation_system lorenz_perturbation( N , beta );
+    lyap_observer obs( N , 1 );
+
+    // calculate transients
+    integrate_adaptive( make_controlled( 1.0e-6 , 1.0e-6 , stepper_type() ) , lorenz , std::make_pair( x.begin() , x.begin() + 3 * N ) , 0.0 , 10.0 , dt );
+
+    // calculate the Lyapunov exponents -- the main loop
+    double t = 0.0;
+    while( t < 10000.0 )
+    {
+        integrate_adaptive( make_controlled( 1.0e-6 , 1.0e-6 , stepper_type() ) , lorenz_perturbation , x , t , t + 1.0 , 0.1 );
+        t += 1.0;
+        obs( x , t );
+    }
+
+    vector< value_type > lyap( N );
+    obs.fill_lyap( lyap );
+
+    for( size_t i=0 ; i<N ; ++i )
+        cout << beta_host[i] << "\t" << lyap[i] << "\n";
+    //]
+
+    return 0;
+}
diff --git a/examples/thrust/phase_oscillator_chain.cu b/examples/thrust/phase_oscillator_chain.cu
new file mode 100644
index 0000000..0409697
--- /dev/null
+++ b/examples/thrust/phase_oscillator_chain.cu
@@ -0,0 +1,156 @@
+/*
+ Copyright 2011-2013 Mario Mulansky
+ Copyright 2011 Karsten Ahnert
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or
+ copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * This example shows how to use odeint on CUDA devices with thrust.
+ * Note that we require at least Version 3.2 of the nVidia CUDA SDK
+ * and the thrust library should be installed in the CUDA include
+ * folder.
+ *
+ * As example we use a chain of phase oscillators with nearest neighbour
+ * coupling, as described in:
+ *
+ * Avis H. Cohen, Philip J. Holmes and Richard H. Rand:
+ * JOURNAL OF MATHEMATICAL BIOLOGY Volume 13, Number 3, 345-369,
+ *
+ */
+
+#include <iostream>
+#include <cmath>
+
+#include <thrust/device_vector.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+
+#include <boost/numeric/odeint/stepper/runge_kutta4.hpp>
+#include <boost/numeric/odeint/integrate/integrate_const.hpp>
+#include <boost/numeric/odeint/external/thrust/thrust.hpp>
+
+using namespace std;
+
+using namespace boost::numeric::odeint;
+
+
+//change this to float if your device does not support double computation
+typedef double value_type;
+
+
+//[ thrust_phase_chain_system
+//change this to host_vector< ... > if you want to run on CPU
+typedef thrust::device_vector< value_type > state_type;
+typedef thrust::device_vector< size_t > index_vector_type;
+//typedef thrust::host_vector< value_type > state_type;
+//typedef thrust::host_vector< size_t > index_vector_type;
+
+//<-
+/*
+ * This implements the rhs of the dynamical equation:
+ * \phi'_0 = \omega_0 + sin( \phi_1 - \phi_0 )
+ * \phi'_i  = \omega_i + sin( \phi_i+1 - \phi_i ) + sin( \phi_i - \phi_i-1 )
+ * \phi'_N-1 = \omega_N-1 + sin( \phi_N-1 - \phi_N-2 )
+ */
+//->
+class phase_oscillators
+{
+
+public:
+
+    struct sys_functor
+    {
+        template< class Tuple >
+        __host__ __device__
+        void operator()( Tuple t )  // this functor works on tuples of values
+        {
+            // first, unpack the tuple into value, neighbors and omega
+            const value_type phi = thrust::get<0>(t);
+            const value_type phi_left = thrust::get<1>(t);  // left neighbor
+            const value_type phi_right = thrust::get<2>(t); // right neighbor
+            const value_type omega = thrust::get<3>(t);
+            // the dynamical equation
+            thrust::get<4>(t) = omega + sin( phi_right - phi ) + sin( phi - phi_left );
+        }
+    };
+
+    phase_oscillators( const state_type &omega )
+        : m_omega( omega ) , m_N( omega.size() ) , m_prev( omega.size() ) , m_next( omega.size() )
+    {
+        // build indices pointing to left and right neighbours
+        thrust::counting_iterator<size_t> c( 0 );
+        thrust::copy( c , c+m_N-1 , m_prev.begin()+1 );
+        m_prev[0] = 0; // m_prev = { 0 , 0 , 1 , 2 , 3 , ... , N-1 }
+
+        thrust::copy( c+1 , c+m_N , m_next.begin() );
+        m_next[m_N-1] = m_N-1; // m_next = { 1 , 2 , 3 , ... , N-1 , N-1 }
+    }
+
+    void operator() ( const state_type &x , state_type &dxdt , const value_type dt )
+    {
+        thrust::for_each(
+                thrust::make_zip_iterator(
+                        thrust::make_tuple(
+                                x.begin() ,
+                                thrust::make_permutation_iterator( x.begin() , m_prev.begin() ) ,
+                                thrust::make_permutation_iterator( x.begin() , m_next.begin() ) ,
+                                m_omega.begin() ,
+                                dxdt.begin()
+                                ) ),
+                thrust::make_zip_iterator(
+                        thrust::make_tuple(
+                                x.end() ,
+                                thrust::make_permutation_iterator( x.begin() , m_prev.end() ) ,
+                                thrust::make_permutation_iterator( x.begin() , m_next.end() ) ,
+                                m_omega.end() ,
+                                dxdt.end()) ) ,
+                sys_functor()
+                );
+    }
+
+private:
+
+    const state_type &m_omega;
+    const size_t m_N;
+    index_vector_type m_prev;
+    index_vector_type m_next;
+};
+//]
+
+const size_t N = 32768;
+const value_type pi = 3.1415926535897932384626433832795029;
+const value_type epsilon = 6.0 / ( N * N ); // should be < 8/N^2 to see phase locking
+const value_type dt = 0.1;
+
+int main( int arc , char* argv[] )
+{
+    //[ thrust_phase_chain_integration
+    // create initial conditions and omegas on host:
+    vector< value_type > x_host( N );
+    vector< value_type > omega_host( N );
+    for( size_t i=0 ; i<N ; ++i )
+    {
+        x_host[i] = 2.0 * pi * drand48();
+        omega_host[i] = ( N - i ) * epsilon; // decreasing frequencies
+    }
+
+    // copy to device
+    state_type x = x_host;
+    state_type omega = omega_host;
+
+    // create stepper
+    runge_kutta4< state_type , value_type , state_type , value_type > stepper;
+
+    // create phase oscillator system function
+    phase_oscillators sys( omega );
+
+    // integrate
+    integrate_const( stepper , sys , x , 0.0 , 10.0 , dt );
+
+    thrust::copy( x.begin() , x.end() , std::ostream_iterator< value_type >( std::cout , "\n" ) );
+    std::cout << std::endl;
+    //]
+}
diff --git a/examples/thrust/phase_oscillator_ensemble.cu b/examples/thrust/phase_oscillator_ensemble.cu
new file mode 100644
index 0000000..d678b8f
--- /dev/null
+++ b/examples/thrust/phase_oscillator_ensemble.cu
@@ -0,0 +1,280 @@
+/*
+ The example how the phase_oscillator ensemble can be implemented using CUDA and thrust
+
+ Copyright 2011-2013 Mario Mulansky
+ Copyright 2011 Karsten Ahnert
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or
+ copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include <iostream>
+#include <fstream>
+#include <cmath>
+#include <utility>
+
+#include <thrust/device_vector.h>
+#include <thrust/reduce.h>
+#include <thrust/functional.h>
+
+#include <boost/numeric/odeint.hpp>
+#include <boost/numeric/odeint/external/thrust/thrust.hpp>
+
+#include <boost/timer.hpp>
+#include <boost/random/cauchy_distribution.hpp>
+
+using namespace std;
+
+using namespace boost::numeric::odeint;
+
+/*
+ * Sorry for that dirty hack, but nvcc has large problems with boost::random.
+ *
+ * Nevertheless we need the cauchy distribution from boost::random, and therefore
+ * we need a generator. Here it is:
+ */
+struct drand48_generator
+{
+    typedef double result_type;
+    result_type operator()( void ) const { return drand48(); }
+    result_type min( void ) const { return 0.0; }
+    result_type max( void ) const { return 1.0; }
+};
+
+//[ thrust_phase_ensemble_state_type
+//change this to float if your device does not support double computation
+typedef double value_type;
+
+//change this to host_vector< ... > of you want to run on CPU
+typedef thrust::device_vector< value_type > state_type;
+// typedef thrust::host_vector< value_type > state_type;
+//]
+
+
+//[ thrust_phase_ensemble_mean_field_calculator
+struct mean_field_calculator
+{
+    struct sin_functor : public thrust::unary_function< value_type , value_type >
+    {
+        __host__ __device__
+        value_type operator()( value_type x) const
+        {
+            return sin( x );
+        }
+    };
+
+    struct cos_functor : public thrust::unary_function< value_type , value_type >
+    {
+        __host__ __device__
+        value_type operator()( value_type x) const
+        {
+            return cos( x );
+        }
+    };
+
+    static std::pair< value_type , value_type > get_mean( const state_type &x )
+    {
+        //[ thrust_phase_ensemble_sin_sum
+        value_type sin_sum = thrust::reduce(
+                thrust::make_transform_iterator( x.begin() , sin_functor() ) ,
+                thrust::make_transform_iterator( x.end() , sin_functor() ) );
+        //]
+        value_type cos_sum = thrust::reduce(
+                thrust::make_transform_iterator( x.begin() , cos_functor() ) ,
+                thrust::make_transform_iterator( x.end() , cos_functor() ) );
+
+        cos_sum /= value_type( x.size() );
+        sin_sum /= value_type( x.size() );
+
+        value_type K = sqrt( cos_sum * cos_sum + sin_sum * sin_sum );
+        value_type Theta = atan2( sin_sum , cos_sum );
+
+        return std::make_pair( K , Theta );
+    }
+};
+//]
+
+
+
+//[ thrust_phase_ensemble_sys_function
+class phase_oscillator_ensemble
+{
+
+public:
+
+    struct sys_functor
+    {
+        value_type m_K , m_Theta , m_epsilon;
+
+        sys_functor( value_type K , value_type Theta , value_type epsilon )
+        : m_K( K ) , m_Theta( Theta ) , m_epsilon( epsilon ) { }
+
+        template< class Tuple >
+        __host__ __device__
+        void operator()( Tuple t )
+        {
+            thrust::get<2>(t) = thrust::get<1>(t) + m_epsilon * m_K * sin( m_Theta - thrust::get<0>(t) );
+        }
+    };
+
+    // ...
+    //<-
+    phase_oscillator_ensemble( size_t N , value_type g = 1.0 , value_type epsilon = 1.0 )
+        : m_omega() , m_N( N ) , m_epsilon( epsilon )
+    {
+        create_frequencies( g );
+    }
+
+    void create_frequencies( value_type g )
+    {
+        boost::cauchy_distribution< value_type > cauchy( 0.0 , g );
+//        boost::variate_generator< boost::mt19937&, boost::cauchy_distribution< value_type > > gen( rng , cauchy );
+        drand48_generator d48;
+        vector< value_type > omega( m_N );
+        for( size_t i=0 ; i<m_N ; ++i )
+            omega[i] = cauchy( d48 );
+//        generate( omega.begin() , omega.end() , gen );
+        m_omega = omega;
+    }
+
+    void set_epsilon( value_type epsilon ) { m_epsilon = epsilon; }
+
+    value_type get_epsilon( void ) const { return m_epsilon; }
+    //->
+
+    void operator() ( const state_type &x , state_type &dxdt , const value_type dt ) const
+    {
+        std::pair< value_type , value_type > mean_field = mean_field_calculator::get_mean( x );
+
+        thrust::for_each(
+                thrust::make_zip_iterator( thrust::make_tuple( x.begin() , m_omega.begin() , dxdt.begin() ) ),
+                thrust::make_zip_iterator( thrust::make_tuple( x.end() , m_omega.end() , dxdt.end()) ) ,
+                sys_functor( mean_field.first , mean_field.second , m_epsilon )
+                );
+    }
+
+    // ...
+    //<-
+private:
+
+    state_type m_omega;
+    const size_t m_N;
+    value_type m_epsilon;
+    //->
+};
+//]
+
+
+//[ thrust_phase_ensemble_observer
+struct statistics_observer
+{
+    value_type m_K_mean;
+    size_t m_count;
+
+    statistics_observer( void )
+    : m_K_mean( 0.0 ) , m_count( 0 ) { }
+
+    template< class State >
+    void operator()( const State &x , value_type t )
+    {
+        std::pair< value_type , value_type > mean = mean_field_calculator::get_mean( x );
+        m_K_mean += mean.first;
+        ++m_count;
+    }
+
+    value_type get_K_mean( void ) const { return ( m_count != 0 ) ? m_K_mean / value_type( m_count ) : 0.0 ; }
+
+    void reset( void ) { m_K_mean = 0.0; m_count = 0; }
+};
+//]
+
+
+
+// const size_t N = 16384 * 128;
+const size_t N = 16384;
+const value_type pi = 3.1415926535897932384626433832795029;
+const value_type dt = 0.1;
+const value_type d_epsilon = 0.1;
+const value_type epsilon_min = 0.0;
+const value_type epsilon_max = 5.0;
+const value_type t_transients = 10.0;
+const value_type t_max = 100.0;
+
+int main( int arc , char* argv[] )
+{
+    // initial conditions on host
+    vector< value_type > x_host( N );
+    for( size_t i=0 ; i<N ; ++i ) x_host[i] = 2.0 * pi * drand48();
+
+    //[ thrust_phase_ensemble_system_instance
+    phase_oscillator_ensemble ensemble( N , 1.0 );
+    //]
+
+
+
+    boost::timer timer;
+    boost::timer timer_local;
+    double dopri5_time = 0.0 , rk4_time = 0.0;
+    {
+        //[thrust_phase_ensemble_define_dopri5
+        typedef runge_kutta_dopri5< state_type , value_type , state_type , value_type > stepper_type;
+        //]
+
+        ofstream fout( "phase_ensemble_dopri5.dat" );
+        timer.restart();
+        for( value_type epsilon = epsilon_min ; epsilon < epsilon_max ; epsilon += d_epsilon )
+        {
+            ensemble.set_epsilon( epsilon );
+            statistics_observer obs;
+            state_type x = x_host;
+
+            timer_local.restart();
+
+            // calculate some transients steps
+            //[ thrust_phase_ensemble_integration
+            size_t steps1 = integrate_const( make_controlled( 1.0e-6 , 1.0e-6 , stepper_type() ) , boost::ref( ensemble ) , x , 0.0 , t_transients , dt );
+            //]
+
+            // integrate and compute the statistics
+            size_t steps2 = integrate_const( make_dense_output( 1.0e-6 , 1.0e-6 , stepper_type() ) , boost::ref( ensemble ) , x , 0.0 , t_max , dt , boost::ref( obs ) );
+
+            fout << epsilon << "\t" << obs.get_K_mean() << endl;
+            cout << "Dopri5 : " << epsilon << "\t" << obs.get_K_mean() << "\t" << timer_local.elapsed() << "\t" << steps1 << "\t" << steps2 << endl;
+        }
+        dopri5_time = timer.elapsed();
+    }
+
+
+
+    {
+        //[ thrust_phase_ensemble_define_rk4
+        typedef runge_kutta4< state_type , value_type , state_type , value_type > stepper_type;
+        //]
+
+        ofstream fout( "phase_ensemble_rk4.dat" );
+        timer.restart();
+        for( value_type epsilon = epsilon_min ; epsilon < epsilon_max ; epsilon += d_epsilon )
+        {
+            ensemble.set_epsilon( epsilon );
+            statistics_observer obs;
+            state_type x = x_host;
+
+            timer_local.restart();
+
+            // calculate some transients steps
+            size_t steps1 = integrate_const( stepper_type() , boost::ref( ensemble ) , x , 0.0 , t_transients , dt );
+
+            // integrate and compute the statistics
+            size_t steps2 = integrate_const( stepper_type() , boost::ref( ensemble ) , x , 0.0 , t_max , dt , boost::ref( obs ) );
+            fout << epsilon << "\t" << obs.get_K_mean() << endl;
+            cout << "RK4     : " << epsilon << "\t" << obs.get_K_mean() << "\t" << timer_local.elapsed() << "\t" << steps1 << "\t" << steps2 << endl;
+        }
+        rk4_time = timer.elapsed();
+    }
+
+    cout << "Dopri 5 : " << dopri5_time << " s\n";
+    cout << "RK4     : " << rk4_time << "\n";
+
+    return 0;
+}
diff --git a/examples/thrust/relaxation.cu b/examples/thrust/relaxation.cu
new file mode 100644
index 0000000..f1d9f3a
--- /dev/null
+++ b/examples/thrust/relaxation.cu
@@ -0,0 +1,81 @@
+/*
+ Copyright 2011-2012 Karsten Ahnert
+ Copyright 2013 Mario Mulansky
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or
+ copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+
+/*
+ * Solves many relaxation equations dxdt = - a * x in parallel and for different values of a.
+ * The relaxation equations are completely uncoupled.
+ */
+
+#include <thrust/device_vector.h>
+
+#include <boost/ref.hpp>
+
+#include <boost/numeric/odeint.hpp>
+#include <boost/numeric/odeint/external/thrust/thrust.hpp>
+
+
+using namespace std;
+using namespace boost::numeric::odeint;
+
+// change to float if your GPU does not support doubles
+typedef double value_type;
+typedef thrust::device_vector< value_type > state_type;
+typedef runge_kutta4< state_type , value_type , state_type , value_type > stepper_type;
+
+struct relaxation
+{
+    struct relaxation_functor
+    {
+        template< class T >
+        __host__ __device__
+        void operator()( T t ) const
+        {
+            // unpack the parameter we want to vary and the Lorenz variables
+            value_type a = thrust::get< 1 >( t );
+            value_type x = thrust::get< 0 >( t );
+            thrust::get< 2 >( t ) = -a * x;
+        }
+    };
+
+    relaxation( size_t N , const state_type &a )
+    : m_N( N ) , m_a( a ) { }
+
+    void operator()(  const state_type &x , state_type &dxdt , value_type t ) const
+    {
+        thrust::for_each(
+            thrust::make_zip_iterator( thrust::make_tuple( x.begin() , m_a.begin() , dxdt.begin() ) ) ,
+            thrust::make_zip_iterator( thrust::make_tuple( x.end() , m_a.end() , dxdt.end() ) ) ,
+            relaxation_functor() );
+    }
+
+    size_t m_N;
+    const state_type &m_a;
+};
+
+const size_t N = 1024 * 1024;
+const value_type dt = 0.01;
+
+int main( int arc , char* argv[] )
+{
+    // initialize the relaxation constants a
+    vector< value_type > a_host( N );
+    for( size_t i=0 ; i<N ; ++i ) a_host[i] = drand48();
+    state_type a = a_host;
+
+    // initialize the intial state x
+    state_type x( N );
+    thrust::fill( x.begin() , x.end() , 1.0 );
+
+    // integrate
+    relaxation relax( N , a );
+    integrate_const( stepper_type() , boost::ref( relax ) , x , 0.0 , 10.0 , dt );
+
+    return 0;
+}