Squashed 'third_party/hpipm/' content from commit c2c0261
Change-Id: I05e186a6e1e1f075aa629092e7ad86e6116ca711
git-subtree-dir: third_party/hpipm
git-subtree-split: c2c0261e8ded36f636d9c4390a2899bdc4c999cc
diff --git a/test_problems/CMakeLists.txt b/test_problems/CMakeLists.txt
new file mode 100644
index 0000000..4e98a5e
--- /dev/null
+++ b/test_problems/CMakeLists.txt
@@ -0,0 +1,44 @@
+###################################################################################################
+# #
+# This file is part of HPIPM. #
+# #
+# HPIPM -- High Performance Interior Point Method. #
+# Copyright (C) 2017 by Gianluca Frison. #
+# Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. #
+# All rights reserved. #
+# #
+# HPMPC is free software; you can redistribute it and/or #
+# modify it under the terms of the GNU Lesser General Public #
+# License as published by the Free Software Foundation; either #
+# version 2.1 of the License, or (at your option) any later version. #
+# #
+# HPMPC is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #
+# See the GNU Lesser General Public License for more details. #
+# #
+# You should have received a copy of the GNU Lesser General Public #
+# License along with HPMPC; if not, write to the Free Software #
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #
+# #
+# Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de #
+# #
+###################################################################################################
+
+add_executable(d_dense_qp test_d_dense.c)
+target_link_libraries(d_dense_qp hpipm blasfeo m)
+
+add_executable(s_dense_qp test_s_dense.c)
+target_link_libraries(s_dense_qp hpipm blasfeo m)
+
+add_executable(d_ocp_qp test_d_ocp.c d_tools.c)
+target_link_libraries(d_ocp_qp hpipm blasfeo m)
+
+add_executable(s_ocp_qp test_s_ocp.c s_tools.c)
+target_link_libraries(s_ocp_qp hpipm blasfeo m)
+
+add_executable(m_ocp_qp test_m_ocp.c d_tools.c)
+target_link_libraries(m_ocp_qp hpipm blasfeo m)
+
+add_executable(d_cond test_d_cond.c d_tools.c)
+target_link_libraries(d_cond hpipm blasfeo m)
diff --git a/test_problems/Makefile b/test_problems/Makefile
new file mode 100644
index 0000000..1c5113b
--- /dev/null
+++ b/test_problems/Makefile
@@ -0,0 +1,67 @@
+###################################################################################################
+# #
+# This file is part of HPIPM. #
+# #
+# HPIPM -- High Performance Interior Point Method. #
+# Copyright (C) 2017 by Gianluca Frison. #
+# Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. #
+# All rights reserved. #
+# #
+# HPMPC is free software; you can redistribute it and/or #
+# modify it under the terms of the GNU Lesser General Public #
+# License as published by the Free Software Foundation; either #
+# version 2.1 of the License, or (at your option) any later version. #
+# #
+# HPMPC is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #
+# See the GNU Lesser General Public License for more details. #
+# #
+# You should have received a copy of the GNU Lesser General Public #
+# License along with HPMPC; if not, write to the Free Software #
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #
+# #
+# Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de #
+# #
+###################################################################################################
+
+include ../Makefile.rule
+
+LIBS =
+
+LIBS += $(BLASFEO_PATH)/lib/libblasfeo.a
+
+ifeq ($(REF_BLAS), 0)
+LIBS += -lm
+endif
+ifeq ($(REF_BLAS), OPENBLAS)
+LIBS += /opt/openblas/lib/libopenblas.a -pthread -lgfortran -lm
+endif
+ifeq ($(REF_BLAS), BLIS)
+LIBS += /opt/netlib/liblapack.a /opt/blis/lib/libblis.a -lgfortran -lm -fopenmp
+endif
+ifeq ($(REF_BLAS), NETLIB)
+LIBS += /opt/netlib/liblapack.a /opt/netlib/libblas.a -lgfortran -lm
+endif
+ifeq ($(REF_BLAS), MKL)
+LIBS += -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_gf_lp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_sequential.a -Wl,--end-group -ldl -lpthread -lm
+endif
+ifeq ($(REF_BLAS), ATLAS)
+LIBS += /opt/atlas/lib/liblapack.a /opt/atlas/lib/libcblas.a /opt/atlas/lib/libf77blas.a /opt/atlas/lib/libatlas.a -lgfortran -lm
+endif
+
+#OBJS_TEST = d_tools.o test_d_cond.o
+#OBJS_TEST = test_d_dense.o
+#OBJS_TEST = test_s_dense.o
+OBJS_TEST = d_tools.o test_d_ocp.o
+#OBJS_TEST = s_tools.o test_s_ocp.o
+#OBJS_TEST = d_tools.o test_m_ocp.o
+
+obj: $(OBJS_TEST)
+ $(CC) -o test.out $(OBJS_TEST) -L. libhpipm.a $(LIBS) #-pg
+
+clean:
+ rm -f *.o
+ rm -f test.out
+ rm -f libhpipm.a
+
diff --git a/test_problems/d_tools.c b/test_problems/d_tools.c
new file mode 100644
index 0000000..8aa5c90
--- /dev/null
+++ b/test_problems/d_tools.c
@@ -0,0 +1,715 @@
+/**************************************************************************************************
+* *
+* This file is part of HPMPC. *
+* *
+* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
+* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, giaf (at) dtu.dk *
+* *
+**************************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <blasfeo_d_aux_ext_dep.h>
+
+
+
+// matrix-vector multiplication
+void dgemv_n_3l(int m, int n, double *A, int lda , double *x, double *z)
+ {
+
+ int ii, jj;
+
+ for(ii=0; ii<m; ii++)
+ {
+ z[ii] = 0.0;
+ for(jj=0; jj<n; jj++)
+ {
+ z[ii] += A[ii+lda*jj] * x[jj];
+ }
+ }
+
+ return;
+
+ }
+
+
+// matrix-matrix multiplication
+void dgemm_nn_3l(int m, int n, int k, double *A, int lda , double *B, int ldb, double *C, int ldc)
+ {
+
+ int ii, jj, kk;
+
+ for(jj=0; jj<n; jj++)
+ {
+ for(ii=0; ii<m; ii++)
+ {
+ C[ii+ldc*jj] = 0;
+ for(kk=0; kk<k; kk++)
+ {
+ C[ii+ldc*jj] += A[ii+lda*kk] * B[kk+ldb*jj];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+void daxpy_3l(int n, double da, double *dx, double *dy)
+ {
+ int i;
+ for(i=0; i<n; i++)
+ {
+ dy[i] += da*dx[i];
+ }
+ }
+
+
+
+void dscal_3l(int n, double da, double *dx)
+ {
+ int i;
+ for(i=0; i<n; i++)
+ {
+ dx[i] *= da;
+ }
+ }
+
+
+
+/************************************************
+ Routine that copies a matrix
+************************************************/
+void dmcopy(int row, int col, double *A, int lda, double *B, int ldb)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ B[i+j*ldb] = A[i+j*lda];
+ }
+ }
+ }
+
+
+
+int idamax_3l(int n, double *x)
+ {
+
+ if(n<=0)
+ return 0;
+ if(n==1)
+ return 0;
+
+ double dabs;
+ double dmax = (x[0]>0 ? x[0] : -x[0]);
+ int idmax = 0;
+ int jj;
+ for(jj=1; jj<n; jj++)
+ {
+ dabs = (x[jj]>0 ? x[jj] : -x[jj]);
+ if(dabs>dmax)
+ {
+ dmax = dabs;
+ idmax = jj;
+ }
+ }
+
+ return idmax;
+
+ }
+
+
+
+void dswap_3l(int n, double *x, int incx, double *y, int incy)
+ {
+
+ if(n<=0)
+ return;
+
+ double temp;
+ int jj;
+ for(jj=0; jj<n; jj++)
+ {
+ temp = x[0];
+ x[0] = y[0];
+ y[0] = temp;
+ x += incx;
+ y += incy;
+ }
+
+ }
+
+
+
+void dger_3l(int m, int n, double alpha, double *x, int incx, double *y, int incy, double *A, int lda)
+ {
+
+ if(m==0 || n==0 || alpha==0.0)
+ return;
+
+ int i, j;
+ double *px, *py, temp;
+
+ py = y;
+ for(j=0; j<n; j++)
+ {
+ temp = alpha * py[0];
+ px = x;
+ for(i=0; i<m; i++)
+ {
+ A[i+lda*j] += px[0] * temp;
+ px += incx;
+ }
+ py += incy;
+ }
+
+ return;
+
+ }
+
+
+
+void dgetf2_3l(int m, int n, double *A, int lda, int *ipiv, int *info)
+ {
+
+ if(m<=0 || n<=0)
+ return;
+
+ int i, j, jp;
+
+ double Ajj;
+
+ int size_min = ( m<n ? m : n );
+
+ for(j=0; j<size_min; j++)
+ // find the pivot and test for singularity
+ {
+ jp = j + idamax_3l(m-j, &A[j+lda*j]);
+ ipiv[j] = jp;
+ if( A[jp+lda*j]!=0)
+ {
+ // apply the interchange to columns 0:n-1
+ if(jp!=j)
+ {
+ dswap_3l(n, &A[j], lda, &A[jp], lda);
+ }
+ // compute elements j+1:m-1 of j-th column
+ if(j<m-1)
+ {
+ Ajj = A[j+lda*j];
+ if( ( Ajj>0 ? Ajj : -Ajj ) >= 2.22e-16 )
+ {
+ dscal_3l(m-j-1, 1.0/Ajj, &A[j+1+lda*j]);
+ }
+ else
+ {
+ for(i=j+1; i<m; i++)
+ {
+ A[i+lda*j] /= Ajj;
+ }
+ }
+ }
+ }
+ else if(*info==0)
+ {
+ *info = j+1;
+ }
+
+ if( j < size_min )
+ {
+ // update trailing submatrix
+ dger_3l(m-j-1, n-j-1, -1.0, &A[j+1+lda*j], 1, &A[j+lda*(j+1)], lda, &A[j+1+lda*(j+1)], lda);
+ }
+
+ }
+
+ return;
+
+ }
+
+
+
+void dlaswp_3l(int n, double *A, int lda, int k1, int k2, int *ipiv)
+ {
+
+ int i, j, k, ix, ix0, i1, i2, n32, ip;
+ double temp;
+
+ ix0 = k1;
+ i1 = k1;
+ i2 = k2;
+
+ n32 = (n/32)*32;
+ if(n32!=0)
+ {
+ for(j=0; j<n32; j+=32)
+ {
+ ix = ix0;
+ for(i=i1; i<i2; i++)
+ {
+ ip = ipiv[ix];
+ if(ip!=i)
+ {
+ for(k=j; k<j+32; k++)
+ {
+ temp = A[i+lda*k];
+ A[i+lda*k] = A[ip+lda*k];
+ A[ip+lda*k] = temp;
+ }
+ }
+ ix++;
+ }
+ }
+ }
+ if(n32!=n)
+ {
+ ix = ix0;
+ for(i=i1; i<i2; i++)
+ {
+ ip = ipiv[ix];
+ if(ip!=i)
+ {
+ for(k=n32; k<n; k++)
+ {
+ temp = A[i+lda*k];
+ A[i+lda*k] = A[ip+lda*k];
+ A[ip+lda*k] = temp;
+ }
+ }
+ ix++;
+ }
+ }
+
+ return;
+
+ }
+
+
+
+// left lower no-transp unit
+void dtrsm_l_l_n_u_3l(int m, int n, double *A, int lda, double *B, int ldb)
+ {
+
+ if(m==0 || n==0)
+ return;
+
+ int i, j, k;
+
+ for(j=0; j<n; j++)
+ {
+ for(k=0; k<m; k++)
+ {
+ for(i=k+1; i<m; i++)
+ {
+ B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+
+// left upper no-transp non-unit
+void dtrsm_l_u_n_n_3l(int m, int n, double *A, int lda, double *B, int ldb)
+ {
+
+ if(m==0 || n==0)
+ return;
+
+ int i, j, k;
+
+ for(j=0; j<n; j++)
+ {
+ for(k=m-1; k>=0; k--)
+ {
+ B[k+ldb*j] /= A[k+lda*k];
+ for(i=0; i<k; i++)
+ {
+ B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+
+void dgetrs_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
+ {
+
+ if(n==0 || nrhs==0)
+ return;
+
+ // solve A * X = B
+
+ // apply row interchanges to the rhs
+ dlaswp_3l(nrhs, B, ldb, 0, n, ipiv);
+
+ // solve L*X = B, overwriting B with X
+ dtrsm_l_l_n_u_3l(n, nrhs, A, lda, B, ldb);
+
+ // solve U*X = B, overwriting B with X
+ dtrsm_l_u_n_n_3l(n, nrhs, A, lda, B, ldb);
+
+ return;
+
+ }
+
+
+
+void dgesv_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
+ {
+
+ // compute the LU factorization of A
+ dgetf2_3l(n, n, A, lda, ipiv, info);
+
+ if(*info==0)
+ {
+ // solve the system A*X = B, overwriting B with X
+ dgetrs_3l(n, nrhs, A, lda, ipiv, B, ldb, info);
+ }
+
+ return;
+
+ }
+
+
+
+/* one norm of a matrix */
+double onenorm(int row, int col, double *ptrA)
+ {
+ double max, temp;
+ int i, j;
+ temp = 0;
+ for(j=0; j<col; j++)
+ {
+ temp = fabs(*(ptrA+j*row));
+ for(i=1; i<row; i++)
+ {
+ temp += fabs(*(ptrA+j*row+i));
+ }
+ if(j==0) max = temp;
+ else if(max>temp) temp = max;
+ }
+ return temp;
+ }
+
+
+
+/* computes the Pade approximation of degree m of the matrix A */
+void padeapprox(int m, int row, double *A)
+ {
+ int row2 = row*row;
+/* int i1 = 1;*/
+/* double d0 = 0;*/
+/* double d1 = 1;*/
+/* double dm1 = -1;*/
+
+ int ii;
+
+ double *U = malloc(row*row*sizeof(double));
+ double *V = malloc(row*row*sizeof(double));
+
+ if(m==3)
+ {
+ double c[] = {120, 60, 12, 1};
+ double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ double *A2 = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 0;
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+ double *temp = malloc(row*row*sizeof(double));
+// dscal_(&row2, &d0, temp, &i1);
+ dscal_3l(row2, 0, temp);
+// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ daxpy_3l(row2, c[3], A2, temp);
+// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ daxpy_3l(row2, c[1], A0, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// dscal_(&row2, &d0, V, &i1);
+ dscal_3l(row2, 0, V);
+// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ daxpy_3l(row2, c[2], A2, V);
+// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ daxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(temp);
+ }
+ else if(m==5)
+ {
+ double c[] = {30240, 15120, 3360, 420, 30, 1};
+ double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ double *A2 = malloc(row*row*sizeof(double));
+ double *A4 = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 0;
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+ dmcopy(row, row, A4, row, V, row);
+ double *temp = malloc(row*row*sizeof(double));
+ dmcopy(row, row, A4, row, temp, row);
+// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ daxpy_3l(row2, c[3], A2, temp);
+// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ daxpy_3l(row2, c[1], A0, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// dscal_(&row2, &c[4], V, &i1);
+ dscal_3l(row2, c[4], V);
+// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ daxpy_3l(row2, c[2], A2, V);
+// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ daxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(temp);
+ }
+ else if(m==7)
+ {
+ double c[] = {17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1};
+ double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ double *A2 = malloc(row*row*sizeof(double));
+ double *A4 = malloc(row*row*sizeof(double));
+ double *A6 = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 1;
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+ double *temp = malloc(row*row*sizeof(double));
+// dscal_(&row2, &d0, temp, &i1);
+ dscal_3l(row2, 0, temp);
+// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ daxpy_3l(row2, c[3], A2, temp);
+// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ daxpy_3l(row2, c[1], A0, temp);
+// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ daxpy_3l(row2, c[5], A4, temp);
+// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ daxpy_3l(row2, c[7], A6, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// dscal_(&row2, &d0, V, &i1);
+ dscal_3l(row2, 0, V);
+// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ daxpy_3l(row2, c[2], A2, V);
+// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ daxpy_3l(row2, c[0], A0, V);
+// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ daxpy_3l(row2, c[4], A4, V);
+// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ daxpy_3l(row2, c[6], A6, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(temp);
+ }
+ else if(m==9)
+ {
+ double c[] = {17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1};
+ double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ double *A2 = malloc(row*row*sizeof(double));
+ double *A4 = malloc(row*row*sizeof(double));
+ double *A6 = malloc(row*row*sizeof(double));
+ double *A8 = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 0;
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, A2, &row, &beta, A8, &row);
+ dgemm_nn_3l(row, row, row, A6, row, A2, row, A8, row);
+ dmcopy(row, row, A8, row, V, row);
+ double *temp = malloc(row*row*sizeof(double));
+ dmcopy(row, row, A8, row, temp, row);
+// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ daxpy_3l(row2, c[3], A2, temp);
+// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ daxpy_3l(row2, c[1], A0, temp);
+// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ daxpy_3l(row2, c[5], A4, temp);
+// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ daxpy_3l(row2, c[7], A6, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// dscal_(&row2, &c[8], V, &i1);
+ dscal_3l(row2, c[8], V);
+// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ daxpy_3l(row2, c[2], A2, V);
+// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ daxpy_3l(row2, c[0], A0, V);
+// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ daxpy_3l(row2, c[4], A4, V);
+// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ daxpy_3l(row2, c[6], A6, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(A8);
+ free(temp);
+ }
+ else if(m==13) // tested
+ {
+ double c[] = {64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1};
+ double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ double *A2 = malloc(row*row*sizeof(double));
+ double *A4 = malloc(row*row*sizeof(double));
+ double *A6 = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 0;
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+ dmcopy(row, row, A2, row, U, row);
+ double *temp = malloc(row*row*sizeof(double));
+// dscal_(&row2, &c[9], U, &i1);
+ dscal_3l(row2, c[9], U);
+// daxpy_(&row2, &c[11], A4, &i1, U, &i1);
+ daxpy_3l(row2, c[11], A4, U);
+// daxpy_(&row2, &c[13], A6, &i1, U, &i1);
+ daxpy_3l(row2, c[13], A6, U);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, U, &row, &beta, temp, &row);
+ dgemm_nn_3l(row, row, row, A6, row, U, row, temp, row);
+// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ daxpy_3l(row2, c[7], A6, temp);
+// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ daxpy_3l(row2, c[5], A4, temp);
+// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ daxpy_3l(row2, c[3], A2, temp);
+// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ daxpy_3l(row2, c[1], A0, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+ dmcopy(row, row, A2, row, temp, row);
+// dscal_(&row2, &c[8], V, &i1);
+ dscal_3l(row2, c[8], V);
+// daxpy_(&row2, &c[12], A6, &i1, temp, &i1);
+ daxpy_3l(row2, c[12], A6, temp);
+// daxpy_(&row2, &c[10], A4, &i1, temp, &i1);
+ daxpy_3l(row2, c[10], A4, temp);
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, temp, &row, &beta, V, &row);
+ dgemm_nn_3l(row, row, row, A6, row, temp, row, V, row);
+// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ daxpy_3l(row2, c[6], A6, V);
+// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ daxpy_3l(row2, c[4], A4, V);
+// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ daxpy_3l(row2, c[2], A2, V);
+// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ daxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(temp);
+ }
+ else
+ {
+ printf("%s\n", "Wrong Pade approximatin degree");
+ exit(1);
+ }
+ double *D = malloc(row*row*sizeof(double));
+// dcopy_(&row2, V, &i1, A, &i1);
+ dmcopy(row, row, V, row, A, row);
+// daxpy_(&row2, &d1, U, &i1, A, &i1);
+ daxpy_3l(row2, 1.0, U, A);
+// dcopy_(&row2, V, &i1, D, &i1);
+ dmcopy(row, row, V, row, D, row);
+// daxpy_(&row2, &dm1, U, &i1, D, &i1);
+ daxpy_3l(row2, -1.0, U, D);
+ int *ipiv = (int *) malloc(row*sizeof(int));
+ int info = 0;
+// dgesv_(&row, &row, D, &row, ipiv, A, &row, &info);
+ dgesv_3l(row, row, D, row, ipiv, A, row, &info);
+ free(ipiv);
+ free(D);
+ free(U);
+ free(V);
+ }
+
+
+
+void expm(int row, double *A)
+ {
+
+ int i;
+
+ int m_vals[] = {3, 5, 7, 9, 13};
+ double theta[] = {0.01495585217958292, 0.2539398330063230, 0.9504178996162932, 2.097847961257068, 5.371920351148152};
+ int lentheta = 5;
+
+ double normA = onenorm(row, row, A);
+
+ if(normA<=theta[4])
+ {
+ for(i=0; i<lentheta; i++)
+ {
+ if(normA<=theta[i])
+ {
+ padeapprox(m_vals[i], row, A);
+ break;
+ }
+ }
+ }
+ else
+ {
+ int s;
+ double t = frexp(normA/(theta[4]), &s);
+ s = s - (t==0.5);
+ t = pow(2,-s);
+ int row2 = row*row;
+/* int i1 = 1;*/
+// dscal_(&row2, &t, A, &i1);
+ dscal_3l(row2, t, A);
+ padeapprox(m_vals[4], row, A);
+ double *temp = malloc(row*row*sizeof(double));
+// char ta = 'n'; double alpha = 1; double beta = 0;
+ for(i=0; i<s; i++)
+ {
+// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, temp, &row);
+ dgemm_nn_3l(row, row, row, A, row, A, row, temp, row);
+ dmcopy(row, row, temp, row, A, row);
+ }
+ free(temp);
+ }
+ }
+
+
diff --git a/test_problems/d_tools.h b/test_problems/d_tools.h
new file mode 100644
index 0000000..137b9ad
--- /dev/null
+++ b/test_problems/d_tools.h
@@ -0,0 +1,38 @@
+/**************************************************************************************************
+* *
+* This file is part of HPMPC. *
+* *
+* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
+* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, giaf (at) dtu.dk *
+* *
+**************************************************************************************************/
+
+void dgemv_n_3l(int m, int n, double *A, int lda , double *x, double *z);
+void dgemm_nn_3l(int m, int n, int k, double *A, int lda , double *B, int ldb, double *C, int ldc);
+void daxpy_3l(int n, double da, double *dx, double *dy);
+void dscal_3l(int n, double da, double *dx);
+
+/* copies a matrix into another matrix */
+void dmcopy(int row, int col, double *ptrA, int lda, double *ptrB, int ldb);
+
+/* solution of a system of linear equations */
+void dgesv_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info);
+
+/* matrix exponential */
+void expm(int row, double *A);
diff --git a/test_problems/s_tools.c b/test_problems/s_tools.c
new file mode 100644
index 0000000..36a655f
--- /dev/null
+++ b/test_problems/s_tools.c
@@ -0,0 +1,715 @@
+/**************************************************************************************************
+* *
+* This file is part of HPMPC. *
+* *
+* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
+* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, giaf (at) dtu.dk *
+* *
+**************************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <blasfeo_s_aux_ext_dep.h>
+
+
+
+// matrix-vector multiplication
+void sgemv_n_3l(int m, int n, float *A, int lda , float *x, float *z)
+ {
+
+ int ii, jj;
+
+ for(ii=0; ii<m; ii++)
+ {
+ z[ii] = 0.0;
+ for(jj=0; jj<n; jj++)
+ {
+ z[ii] += A[ii+lda*jj] * x[jj];
+ }
+ }
+
+ return;
+
+ }
+
+
+// matrix-matrix multiplication
+void sgemm_nn_3l(int m, int n, int k, float *A, int lda , float *B, int ldb, float *C, int ldc)
+ {
+
+ int ii, jj, kk;
+
+ for(jj=0; jj<n; jj++)
+ {
+ for(ii=0; ii<m; ii++)
+ {
+ C[ii+ldc*jj] = 0;
+ for(kk=0; kk<k; kk++)
+ {
+ C[ii+ldc*jj] += A[ii+lda*kk] * B[kk+ldb*jj];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+void saxpy_3l(int n, float da, float *dx, float *dy)
+ {
+ int i;
+ for(i=0; i<n; i++)
+ {
+ dy[i] += da*dx[i];
+ }
+ }
+
+
+
+void sscal_3l(int n, float da, float *dx)
+ {
+ int i;
+ for(i=0; i<n; i++)
+ {
+ dx[i] *= da;
+ }
+ }
+
+
+
+/************************************************
+ Routine that copies a matrix
+************************************************/
+void smcopy(int row, int col, float *A, int lda, float *B, int ldb)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ B[i+j*ldb] = A[i+j*lda];
+ }
+ }
+ }
+
+
+
+int idamax_3l(int n, float *x)
+ {
+
+ if(n<=0)
+ return 0;
+ if(n==1)
+ return 0;
+
+ float dabs;
+ float dmax = (x[0]>0 ? x[0] : -x[0]);
+ int idmax = 0;
+ int jj;
+ for(jj=1; jj<n; jj++)
+ {
+ dabs = (x[jj]>0 ? x[jj] : -x[jj]);
+ if(dabs>dmax)
+ {
+ dmax = dabs;
+ idmax = jj;
+ }
+ }
+
+ return idmax;
+
+ }
+
+
+
+void sswap_3l(int n, float *x, int incx, float *y, int incy)
+ {
+
+ if(n<=0)
+ return;
+
+ float temp;
+ int jj;
+ for(jj=0; jj<n; jj++)
+ {
+ temp = x[0];
+ x[0] = y[0];
+ y[0] = temp;
+ x += incx;
+ y += incy;
+ }
+
+ }
+
+
+
+void sger_3l(int m, int n, float alpha, float *x, int incx, float *y, int incy, float *A, int lda)
+ {
+
+ if(m==0 || n==0 || alpha==0.0)
+ return;
+
+ int i, j;
+ float *px, *py, temp;
+
+ py = y;
+ for(j=0; j<n; j++)
+ {
+ temp = alpha * py[0];
+ px = x;
+ for(i=0; i<m; i++)
+ {
+ A[i+lda*j] += px[0] * temp;
+ px += incx;
+ }
+ py += incy;
+ }
+
+ return;
+
+ }
+
+
+
+void sgetf2_3l(int m, int n, float *A, int lda, int *ipiv, int *info)
+ {
+
+ if(m<=0 || n<=0)
+ return;
+
+ int i, j, jp;
+
+ float Ajj;
+
+ int size_min = ( m<n ? m : n );
+
+ for(j=0; j<size_min; j++)
+ // find the pivot and test for singularity
+ {
+ jp = j + idamax_3l(m-j, &A[j+lda*j]);
+ ipiv[j] = jp;
+ if( A[jp+lda*j]!=0)
+ {
+ // apply the interchange to columns 0:n-1
+ if(jp!=j)
+ {
+ sswap_3l(n, &A[j], lda, &A[jp], lda);
+ }
+ // compute elements j+1:m-1 of j-th column
+ if(j<m-1)
+ {
+ Ajj = A[j+lda*j];
+ if( ( Ajj>0 ? Ajj : -Ajj ) >= 2.22e-16 )
+ {
+ sscal_3l(m-j-1, 1.0/Ajj, &A[j+1+lda*j]);
+ }
+ else
+ {
+ for(i=j+1; i<m; i++)
+ {
+ A[i+lda*j] /= Ajj;
+ }
+ }
+ }
+ }
+ else if(*info==0)
+ {
+ *info = j+1;
+ }
+
+ if( j < size_min )
+ {
+ // update trailing submatrix
+ sger_3l(m-j-1, n-j-1, -1.0, &A[j+1+lda*j], 1, &A[j+lda*(j+1)], lda, &A[j+1+lda*(j+1)], lda);
+ }
+
+ }
+
+ return;
+
+ }
+
+
+
+void slaswp_3l(int n, float *A, int lda, int k1, int k2, int *ipiv)
+ {
+
+ int i, j, k, ix, ix0, i1, i2, n32, ip;
+ float temp;
+
+ ix0 = k1;
+ i1 = k1;
+ i2 = k2;
+
+ n32 = (n/32)*32;
+ if(n32!=0)
+ {
+ for(j=0; j<n32; j+=32)
+ {
+ ix = ix0;
+ for(i=i1; i<i2; i++)
+ {
+ ip = ipiv[ix];
+ if(ip!=i)
+ {
+ for(k=j; k<j+32; k++)
+ {
+ temp = A[i+lda*k];
+ A[i+lda*k] = A[ip+lda*k];
+ A[ip+lda*k] = temp;
+ }
+ }
+ ix++;
+ }
+ }
+ }
+ if(n32!=n)
+ {
+ ix = ix0;
+ for(i=i1; i<i2; i++)
+ {
+ ip = ipiv[ix];
+ if(ip!=i)
+ {
+ for(k=n32; k<n; k++)
+ {
+ temp = A[i+lda*k];
+ A[i+lda*k] = A[ip+lda*k];
+ A[ip+lda*k] = temp;
+ }
+ }
+ ix++;
+ }
+ }
+
+ return;
+
+ }
+
+
+
+// left lower no-transp unit
+void strsm_l_l_n_u_3l(int m, int n, float *A, int lda, float *B, int ldb)
+ {
+
+ if(m==0 || n==0)
+ return;
+
+ int i, j, k;
+
+ for(j=0; j<n; j++)
+ {
+ for(k=0; k<m; k++)
+ {
+ for(i=k+1; i<m; i++)
+ {
+ B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+
+// left upper no-transp non-unit
+void strsm_l_u_n_n_3l(int m, int n, float *A, int lda, float *B, int ldb)
+ {
+
+ if(m==0 || n==0)
+ return;
+
+ int i, j, k;
+
+ for(j=0; j<n; j++)
+ {
+ for(k=m-1; k>=0; k--)
+ {
+ B[k+ldb*j] /= A[k+lda*k];
+ for(i=0; i<k; i++)
+ {
+ B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
+ }
+ }
+ }
+
+ return;
+
+ }
+
+
+
+void sgetrs_3l(int n, int nrhs, float *A, int lda, int *ipiv, float *B, int ldb, int *info)
+ {
+
+ if(n==0 || nrhs==0)
+ return;
+
+ // solve A * X = B
+
+ // apply row interchanges to the rhs
+ slaswp_3l(nrhs, B, ldb, 0, n, ipiv);
+
+ // solve L*X = B, overwriting B with X
+ strsm_l_l_n_u_3l(n, nrhs, A, lda, B, ldb);
+
+ // solve U*X = B, overwriting B with X
+ strsm_l_u_n_n_3l(n, nrhs, A, lda, B, ldb);
+
+ return;
+
+ }
+
+
+
+void sgesv_3l(int n, int nrhs, float *A, int lda, int *ipiv, float *B, int ldb, int *info)
+ {
+
+ // compute the LU factorization of A
+ sgetf2_3l(n, n, A, lda, ipiv, info);
+
+ if(*info==0)
+ {
+ // solve the system A*X = B, overwriting B with X
+ sgetrs_3l(n, nrhs, A, lda, ipiv, B, ldb, info);
+ }
+
+ return;
+
+ }
+
+
+
+/* one norm of a matrix */
+float onenorm(int row, int col, float *ptrA)
+ {
+ float max, temp;
+ int i, j;
+ temp = 0;
+ for(j=0; j<col; j++)
+ {
+ temp = fabs(*(ptrA+j*row));
+ for(i=1; i<row; i++)
+ {
+ temp += fabs(*(ptrA+j*row+i));
+ }
+ if(j==0) max = temp;
+ else if(max>temp) temp = max;
+ }
+ return temp;
+ }
+
+
+
+/* computes the Pade approximation of degree m of the matrix A */
+void padeapprox(int m, int row, float *A)
+ {
+ int row2 = row*row;
+/* int i1 = 1;*/
+/* float d0 = 0;*/
+/* float d1 = 1;*/
+/* float dm1 = -1;*/
+
+ int ii;
+
+ float *U = malloc(row*row*sizeof(float));
+ float *V = malloc(row*row*sizeof(float));
+
+ if(m==3)
+ {
+ float c[] = {120, 60, 12, 1};
+ float *A0 = malloc(row*row*sizeof(float)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ float *A2 = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 0;
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+ float *temp = malloc(row*row*sizeof(float));
+// sscal_(&row2, &d0, temp, &i1);
+ sscal_3l(row2, 0, temp);
+// saxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ saxpy_3l(row2, c[3], A2, temp);
+// saxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ saxpy_3l(row2, c[1], A0, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ sgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// sscal_(&row2, &d0, V, &i1);
+ sscal_3l(row2, 0, V);
+// saxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ saxpy_3l(row2, c[2], A2, V);
+// saxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ saxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(temp);
+ }
+ else if(m==5)
+ {
+ float c[] = {30240, 15120, 3360, 420, 30, 1};
+ float *A0 = malloc(row*row*sizeof(float)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ float *A2 = malloc(row*row*sizeof(float));
+ float *A4 = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 0;
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ sgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+ smcopy(row, row, A4, row, V, row);
+ float *temp = malloc(row*row*sizeof(float));
+ smcopy(row, row, A4, row, temp, row);
+// saxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ saxpy_3l(row2, c[3], A2, temp);
+// saxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ saxpy_3l(row2, c[1], A0, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ sgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// sscal_(&row2, &c[4], V, &i1);
+ sscal_3l(row2, c[4], V);
+// saxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ saxpy_3l(row2, c[2], A2, V);
+// saxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ saxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(temp);
+ }
+ else if(m==7)
+ {
+ float c[] = {17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1};
+ float *A0 = malloc(row*row*sizeof(float)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ float *A2 = malloc(row*row*sizeof(float));
+ float *A4 = malloc(row*row*sizeof(float));
+ float *A6 = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 1;
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ sgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ sgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+ float *temp = malloc(row*row*sizeof(float));
+// sscal_(&row2, &d0, temp, &i1);
+ sscal_3l(row2, 0, temp);
+// saxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ saxpy_3l(row2, c[3], A2, temp);
+// saxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ saxpy_3l(row2, c[1], A0, temp);
+// saxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ saxpy_3l(row2, c[5], A4, temp);
+// saxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ saxpy_3l(row2, c[7], A6, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ sgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// sscal_(&row2, &d0, V, &i1);
+ sscal_3l(row2, 0, V);
+// saxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ saxpy_3l(row2, c[2], A2, V);
+// saxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ saxpy_3l(row2, c[0], A0, V);
+// saxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ saxpy_3l(row2, c[4], A4, V);
+// saxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ saxpy_3l(row2, c[6], A6, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(temp);
+ }
+ else if(m==9)
+ {
+ float c[] = {17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1};
+ float *A0 = malloc(row*row*sizeof(float)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ float *A2 = malloc(row*row*sizeof(float));
+ float *A4 = malloc(row*row*sizeof(float));
+ float *A6 = malloc(row*row*sizeof(float));
+ float *A8 = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 0;
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ sgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ sgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, A2, &row, &beta, A8, &row);
+ sgemm_nn_3l(row, row, row, A6, row, A2, row, A8, row);
+ smcopy(row, row, A8, row, V, row);
+ float *temp = malloc(row*row*sizeof(float));
+ smcopy(row, row, A8, row, temp, row);
+// saxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ saxpy_3l(row2, c[3], A2, temp);
+// saxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ saxpy_3l(row2, c[1], A0, temp);
+// saxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ saxpy_3l(row2, c[5], A4, temp);
+// saxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ saxpy_3l(row2, c[7], A6, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ sgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+// sscal_(&row2, &c[8], V, &i1);
+ sscal_3l(row2, c[8], V);
+// saxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ saxpy_3l(row2, c[2], A2, V);
+// saxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ saxpy_3l(row2, c[0], A0, V);
+// saxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ saxpy_3l(row2, c[4], A4, V);
+// saxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ saxpy_3l(row2, c[6], A6, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(A8);
+ free(temp);
+ }
+ else if(m==13) // tested
+ {
+ float c[] = {64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1};
+ float *A0 = malloc(row*row*sizeof(float)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
+ float *A2 = malloc(row*row*sizeof(float));
+ float *A4 = malloc(row*row*sizeof(float));
+ float *A6 = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 0;
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
+ sgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
+ sgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
+ smcopy(row, row, A2, row, U, row);
+ float *temp = malloc(row*row*sizeof(float));
+// sscal_(&row2, &c[9], U, &i1);
+ sscal_3l(row2, c[9], U);
+// saxpy_(&row2, &c[11], A4, &i1, U, &i1);
+ saxpy_3l(row2, c[11], A4, U);
+// saxpy_(&row2, &c[13], A6, &i1, U, &i1);
+ saxpy_3l(row2, c[13], A6, U);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, U, &row, &beta, temp, &row);
+ sgemm_nn_3l(row, row, row, A6, row, U, row, temp, row);
+// saxpy_(&row2, &c[7], A6, &i1, temp, &i1);
+ saxpy_3l(row2, c[7], A6, temp);
+// saxpy_(&row2, &c[5], A4, &i1, temp, &i1);
+ saxpy_3l(row2, c[5], A4, temp);
+// saxpy_(&row2, &c[3], A2, &i1, temp, &i1);
+ saxpy_3l(row2, c[3], A2, temp);
+// saxpy_(&row2, &c[1], A0, &i1, temp, &i1);
+ saxpy_3l(row2, c[1], A0, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
+ sgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
+ smcopy(row, row, A2, row, temp, row);
+// sscal_(&row2, &c[8], V, &i1);
+ sscal_3l(row2, c[8], V);
+// saxpy_(&row2, &c[12], A6, &i1, temp, &i1);
+ saxpy_3l(row2, c[12], A6, temp);
+// saxpy_(&row2, &c[10], A4, &i1, temp, &i1);
+ saxpy_3l(row2, c[10], A4, temp);
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, temp, &row, &beta, V, &row);
+ sgemm_nn_3l(row, row, row, A6, row, temp, row, V, row);
+// saxpy_(&row2, &c[6], A6, &i1, V, &i1);
+ saxpy_3l(row2, c[6], A6, V);
+// saxpy_(&row2, &c[4], A4, &i1, V, &i1);
+ saxpy_3l(row2, c[4], A4, V);
+// saxpy_(&row2, &c[2], A2, &i1, V, &i1);
+ saxpy_3l(row2, c[2], A2, V);
+// saxpy_(&row2, &c[0], A0, &i1, V, &i1);
+ saxpy_3l(row2, c[0], A0, V);
+ free(A0);
+ free(A2);
+ free(A4);
+ free(A6);
+ free(temp);
+ }
+ else
+ {
+ printf("%s\n", "Wrong Pade approximatin degree");
+ exit(1);
+ }
+ float *D = malloc(row*row*sizeof(float));
+// dcopy_(&row2, V, &i1, A, &i1);
+ smcopy(row, row, V, row, A, row);
+// saxpy_(&row2, &d1, U, &i1, A, &i1);
+ saxpy_3l(row2, 1.0, U, A);
+// dcopy_(&row2, V, &i1, D, &i1);
+ smcopy(row, row, V, row, D, row);
+// saxpy_(&row2, &dm1, U, &i1, D, &i1);
+ saxpy_3l(row2, -1.0, U, D);
+ int *ipiv = (int *) malloc(row*sizeof(int));
+ int info = 0;
+// dgesv_(&row, &row, D, &row, ipiv, A, &row, &info);
+ sgesv_3l(row, row, D, row, ipiv, A, row, &info);
+ free(ipiv);
+ free(D);
+ free(U);
+ free(V);
+ }
+
+
+
+void expm(int row, float *A)
+ {
+
+ int i;
+
+ int m_vals[] = {3, 5, 7, 9, 13};
+ float theta[] = {0.01495585217958292, 0.2539398330063230, 0.9504178996162932, 2.097847961257068, 5.371920351148152};
+ int lentheta = 5;
+
+ float normA = onenorm(row, row, A);
+
+ if(normA<=theta[4])
+ {
+ for(i=0; i<lentheta; i++)
+ {
+ if(normA<=theta[i])
+ {
+ padeapprox(m_vals[i], row, A);
+ break;
+ }
+ }
+ }
+ else
+ {
+ int s;
+ float t = frexp(normA/(theta[4]), &s);
+ s = s - (t==0.5);
+ t = pow(2,-s);
+ int row2 = row*row;
+/* int i1 = 1;*/
+// sscal_(&row2, &t, A, &i1);
+ sscal_3l(row2, t, A);
+ padeapprox(m_vals[4], row, A);
+ float *temp = malloc(row*row*sizeof(float));
+// char ta = 'n'; float alpha = 1; float beta = 0;
+ for(i=0; i<s; i++)
+ {
+// sgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, temp, &row);
+ sgemm_nn_3l(row, row, row, A, row, A, row, temp, row);
+ smcopy(row, row, temp, row, A, row);
+ }
+ free(temp);
+ }
+ }
+
+
diff --git a/test_problems/s_tools.h b/test_problems/s_tools.h
new file mode 100644
index 0000000..8a8ee87
--- /dev/null
+++ b/test_problems/s_tools.h
@@ -0,0 +1,38 @@
+/**************************************************************************************************
+* *
+* This file is part of HPMPC. *
+* *
+* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
+* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, giaf (at) dtu.dk *
+* *
+**************************************************************************************************/
+
+void sgemv_n_3l(int m, int n, float *A, int lda , float *x, float *z);
+void sgemm_nn_3l(int m, int n, int k, float *A, int lda , float *B, int ldb, float *C, int ldc);
+void saxpy_3l(int n, float da, float *dx, float *dy);
+void sscal_3l(int n, float da, float *dx);
+
+/* copies a matrix into another matrix */
+void smcopy(int row, int col, float *ptrA, int lda, float *ptrB, int ldb);
+
+/* solution of a system of linear equations */
+void sgesv_3l(int n, int nrhs, float *A, int lda, int *ipiv, float *B, int ldb, int *info);
+
+/* matrix exponential */
+void expm(int row, float *A);
diff --git a/test_problems/test_d_cond.c b/test_problems/test_d_cond.c
new file mode 100644
index 0000000..61f3264
--- /dev/null
+++ b/test_problems/test_d_cond.c
@@ -0,0 +1,778 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_d_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_d_aux.h>
+#include <blasfeo_d_blas.h>
+
+#include "../include/hpipm_d_ocp_qp.h"
+#include "../include/hpipm_d_ocp_qp_sol.h"
+#include "../include/hpipm_d_dense_qp.h"
+#include "../include/hpipm_d_dense_qp_sol.h"
+#include "../include/hpipm_d_dense_qp_ipm_hard.h"
+#include "../include/hpipm_d_cond.h"
+
+#include "d_tools.h"
+
+
+
+#define KEEP_X0 0
+
+#define PRINT 1
+
+
+
+/************************************************
+Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts.
+************************************************/
+void mass_spring_system(double Ts, int nx, int nu, int N, double *A, double *B, double *b, double *x0)
+ {
+
+ int nx2 = nx*nx;
+
+ int info = 0;
+
+ int pp = nx/2; // number of masses
+
+/************************************************
+* build the continuous time system
+************************************************/
+
+ double *T; d_zeros(&T, pp, pp);
+ int ii;
+ for(ii=0; ii<pp; ii++) T[ii*(pp+1)] = -2;
+ for(ii=0; ii<pp-1; ii++) T[ii*(pp+1)+1] = 1;
+ for(ii=1; ii<pp; ii++) T[ii*(pp+1)-1] = 1;
+
+ double *Z; d_zeros(&Z, pp, pp);
+ double *I; d_zeros(&I, pp, pp); for(ii=0; ii<pp; ii++) I[ii*(pp+1)]=1.0; // = eye(pp);
+ double *Ac; d_zeros(&Ac, nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac, nx);
+ dmcopy(pp, pp, T, pp, Ac+pp, nx);
+ dmcopy(pp, pp, I, pp, Ac+pp*nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac+pp*(nx+1), nx);
+ free(T);
+ free(Z);
+ free(I);
+
+ d_zeros(&I, nu, nu); for(ii=0; ii<nu; ii++) I[ii*(nu+1)]=1.0; //I = eye(nu);
+ double *Bc; d_zeros(&Bc, nx, nu);
+ dmcopy(nu, nu, I, nu, Bc+pp, nx);
+ free(I);
+
+/************************************************
+* compute the discrete time system
+************************************************/
+
+ double *bb; d_zeros(&bb, nx, 1);
+ dmcopy(nx, 1, bb, nx, b, nx);
+
+ dmcopy(nx, nx, Ac, nx, A, nx);
+ dscal_3l(nx2, Ts, A);
+ expm(nx, A);
+
+ d_zeros(&T, nx, nx);
+ d_zeros(&I, nx, nx); for(ii=0; ii<nx; ii++) I[ii*(nx+1)]=1.0; //I = eye(nx);
+ dmcopy(nx, nx, A, nx, T, nx);
+ daxpy_3l(nx2, -1.0, I, T);
+ dgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx);
+ free(T);
+ free(I);
+
+ int *ipiv = (int *) malloc(nx*sizeof(int));
+ dgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info);
+ free(ipiv);
+
+ free(Ac);
+ free(Bc);
+ free(bb);
+
+
+/************************************************
+* initial state
+************************************************/
+
+ if(nx==4)
+ {
+ x0[0] = 5;
+ x0[1] = 10;
+ x0[2] = 15;
+ x0[3] = 20;
+ }
+ else
+ {
+ int jj;
+ for(jj=0; jj<nx; jj++)
+ x0[jj] = 1;
+ }
+
+ }
+
+
+
+int main()
+ {
+
+
+ // local variables
+
+ int ii, jj;
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+
+
+ // problem size
+
+ int nx_ = 8; // number of states (it has to be even for the mass-spring system test problem)
+ int nu_ = 3; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
+ int N = 5; // horizon lenght
+
+
+
+ // stage-wise variant size
+
+ int nx[N+1];
+#if KEEP_X0
+ nx[0] = nx_;
+#else
+ nx[0] = 0;
+#endif
+ for(ii=1; ii<=N; ii++)
+ nx[ii] = nx_;
+// nx[N] = 0;
+
+ int nu[N+1];
+ for(ii=0; ii<N; ii++)
+ nu[ii] = nu_;
+ nu[N] = 0;
+
+#if 1
+ int nb[N+1];
+#if KEEP_X0
+ nb[0] = nu[0]+nx[0]/2;
+#else
+ nb[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[1]+nx[1]/2;
+ nb[N] = nx[N]/2;
+
+ int ng[N+1];
+ ng[0] = 0;
+ for(ii=1; ii<N; ii++)
+ ng[ii] = 0;
+ ng[N] = 0;
+#elif 0
+ int nb[N+1];
+ nb[0] = 0;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = 0;
+ nb[N] = 0;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nu[0]+nx[0]/2;
+#else
+ ng[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nu[1]+nx[1]/2;
+ ng[N] = nx[N]/2;
+#else
+ int nb[N+1];
+ nb[0] = nu[0] + nx[0]/2;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[ii] + nx[ii]/2;
+ nb[N] = nu[N] + nx[N]/2;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nx[0]/2;
+#else
+ ng[0] = 0;
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nx[1]/2;
+ ng[N] = nx[N]/2;
+#endif
+
+/************************************************
+* dynamical system
+************************************************/
+
+ double *A; d_zeros(&A, nx_, nx_); // states update matrix
+
+ double *B; d_zeros(&B, nx_, nu_); // inputs matrix
+
+ double *b; d_zeros(&b, nx_, 1); // states offset
+ double *x0; d_zeros(&x0, nx_, 1); // initial state
+
+ double Ts = 0.5; // sampling time
+ mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0);
+
+ for(jj=0; jj<nx_; jj++)
+ b[jj] = 0.1;
+
+ for(jj=0; jj<nx_; jj++)
+ x0[jj] = 0;
+ x0[0] = 2.5;
+ x0[1] = 2.5;
+
+ double *b0; d_zeros(&b0, nx_, 1);
+ dgemv_n_3l(nx_, nx_, A, nx_, x0, b0);
+ daxpy_3l(nx_, 1.0, b, b0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, A, nx_);
+ d_print_mat(nx_, nu_, B, nu_);
+ d_print_mat(1, nx_, b, 1);
+ d_print_mat(1, nx_, x0, 1);
+ d_print_mat(1, nx_, b0, 1);
+#endif
+
+/************************************************
+* cost function
+************************************************/
+
+ double *Q; d_zeros(&Q, nx_, nx_);
+ for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;
+
+ double *R; d_zeros(&R, nu_, nu_);
+ for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;
+
+ double *S; d_zeros(&S, nu_, nx_);
+
+ double *q; d_zeros(&q, nx_, 1);
+ for(ii=0; ii<nx_; ii++) q[ii] = 0.1;
+
+ double *r; d_zeros(&r, nu_, 1);
+ for(ii=0; ii<nu_; ii++) r[ii] = 0.2;
+
+ double *r0; d_zeros(&r0, nu_, 1);
+ dgemv_n_3l(nu_, nx_, S, nu_, x0, r0);
+ daxpy_3l(nu_, 1.0, r, r0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, Q, nx_);
+ d_print_mat(nu_, nu_, R, nu_);
+ d_print_mat(nu_, nx_, S, nu_);
+ d_print_mat(1, nx_, q, 1);
+ d_print_mat(1, nu_, r, 1);
+ d_print_mat(1, nu_, r0, 1);
+#endif
+
+ // maximum element in cost functions
+ double mu0 = 2.0;
+
+/************************************************
+* box & general constraints
+************************************************/
+
+ int *idxb0; int_zeros(&idxb0, nb[0], 1);
+ double *d_lb0; d_zeros(&d_lb0, nb[0], 1);
+ double *d_ub0; d_zeros(&d_ub0, nb[0], 1);
+ double *d_lg0; d_zeros(&d_lg0, ng[0], 1);
+ double *d_ug0; d_zeros(&d_ug0, ng[0], 1);
+ for(ii=0; ii<nb[0]; ii++)
+ {
+ if(ii<nu[0]) // input
+ {
+ d_lb0[ii] = - 0.5; // umin
+ d_ub0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb0[ii] = - 4.0; // xmin
+ d_ub0[ii] = 4.0; // xmax
+ }
+ idxb0[ii] = ii;
+ }
+ for(ii=0; ii<ng[0]; ii++)
+ {
+ if(ii<nu[0]-nb[0]) // input
+ {
+ d_lg0[ii] = - 0.5; // umin
+ d_ug0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg0[ii] = - 4.0; // xmin
+ d_ug0[ii] = 4.0; // xmax
+ }
+ }
+
+ int *idxb1; int_zeros(&idxb1, nb[1], 1);
+ double *d_lb1; d_zeros(&d_lb1, nb[1], 1);
+ double *d_ub1; d_zeros(&d_ub1, nb[1], 1);
+ double *d_lg1; d_zeros(&d_lg1, ng[1], 1);
+ double *d_ug1; d_zeros(&d_ug1, ng[1], 1);
+ for(ii=0; ii<nb[1]; ii++)
+ {
+ if(ii<nu[1]) // input
+ {
+ d_lb1[ii] = - 0.5; // umin
+ d_ub1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb1[ii] = - 4.0; // xmin
+ d_ub1[ii] = 4.0; // xmax
+ }
+ idxb1[ii] = ii;
+ }
+ for(ii=0; ii<ng[1]; ii++)
+ {
+ if(ii<nu[1]-nb[1]) // input
+ {
+ d_lg1[ii] = - 0.5; // umin
+ d_ug1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg1[ii] = - 4.0; // xmin
+ d_ug1[ii] = 4.0; // xmax
+ }
+ }
+
+
+ int *idxbN; int_zeros(&idxbN, nb[N], 1);
+ double *d_lbN; d_zeros(&d_lbN, nb[N], 1);
+ double *d_ubN; d_zeros(&d_ubN, nb[N], 1);
+ double *d_lgN; d_zeros(&d_lgN, ng[N], 1);
+ double *d_ugN; d_zeros(&d_ugN, ng[N], 1);
+ for(ii=0; ii<nb[N]; ii++)
+ {
+ d_lbN[ii] = - 4.0; // xmin
+ d_ubN[ii] = 4.0; // xmax
+ idxbN[ii] = ii;
+ }
+ for(ii=0; ii<ng[N]; ii++)
+ {
+ d_lgN[ii] = - 4.0; // dmin
+ d_ugN[ii] = 4.0; // dmax
+ }
+
+ double *C0; d_zeros(&C0, ng[0], nx[0]);
+ double *D0; d_zeros(&D0, ng[0], nu[0]);
+ for(ii=0; ii<nu[0]-nb[0] & ii<ng[0]; ii++)
+ D0[ii+(nb[0]+ii)*ng[0]] = 1.0;
+ for(; ii<ng[0]; ii++)
+ C0[ii+(nb[0]+ii-nu[0])*ng[0]] = 1.0;
+
+ double *C1; d_zeros(&C1, ng[1], nx[1]);
+ double *D1; d_zeros(&D1, ng[1], nu[1]);
+ for(ii=0; ii<nu[1]-nb[1] & ii<ng[1]; ii++)
+ D1[ii+(nb[1]+ii)*ng[1]] = 1.0;
+ for(; ii<ng[1]; ii++)
+ C1[ii+(nb[1]+ii-nu[1])*ng[1]] = 1.0;
+
+ double *CN; d_zeros(&CN, ng[N], nx[N]);
+ double *DN; d_zeros(&DN, ng[N], nu[N]);
+ for(ii=0; ii<nu[N]-nb[N] & ii<ng[N]; ii++)
+ DN[ii+(nb[N]+ii)*ng[N]] = 1.0;
+ for(; ii<ng[N]; ii++)
+ CN[ii+(nb[N]+ii-nu[N])*ng[N]] = 1.0;
+
+#if PRINT
+ // box constraints
+ int_print_mat(1, nb[0], idxb0, 1);
+ d_print_mat(1, nb[0], d_lb0, 1);
+ d_print_mat(1, nb[0], d_ub0, 1);
+ int_print_mat(1, nb[1], idxb1, 1);
+ d_print_mat(1, nb[1], d_lb1, 1);
+ d_print_mat(1, nb[1], d_ub1, 1);
+ int_print_mat(1, nb[N], idxbN, 1);
+ d_print_mat(1, nb[N], d_lbN, 1);
+ d_print_mat(1, nb[N], d_ubN, 1);
+ // general constraints
+ d_print_mat(1, ng[0], d_lg0, 1);
+ d_print_mat(1, ng[0], d_ug0, 1);
+ d_print_mat(ng[0], nu[0], D0, ng[0]);
+ d_print_mat(ng[0], nx[0], C0, ng[0]);
+ d_print_mat(1, ng[1], d_lg1, 1);
+ d_print_mat(1, ng[1], d_ug1, 1);
+ d_print_mat(ng[1], nu[1], D1, ng[1]);
+ d_print_mat(ng[1], nx[1], C1, ng[1]);
+ d_print_mat(1, ng[N], d_lgN, 1);
+ d_print_mat(1, ng[N], d_ugN, 1);
+ d_print_mat(ng[N], nu[N], DN, ng[N]);
+ d_print_mat(ng[N], nx[N], CN, ng[N]);
+#endif
+
+/************************************************
+* array of matrices
+************************************************/
+
+ double *hA[N];
+ double *hB[N];
+ double *hb[N];
+ double *hQ[N+1];
+ double *hS[N+1];
+ double *hR[N+1];
+ double *hq[N+1];
+ double *hr[N+1];
+ double *hd_lb[N+1];
+ double *hd_ub[N+1];
+ double *hd_lg[N+1];
+ double *hd_ug[N+1];
+ double *hC[N+1];
+ double *hD[N+1];
+ int *hidxb[N+1];
+
+ hA[0] = A;
+ hB[0] = B;
+ hb[0] = b0;
+ hQ[0] = Q;
+ hS[0] = S;
+ hR[0] = R;
+ hq[0] = q;
+ hr[0] = r0;
+ hidxb[0] = idxb0;
+ hd_lb[0] = d_lb0;
+ hd_ub[0] = d_ub0;
+ hd_lg[0] = d_lg0;
+ hd_ug[0] = d_ug0;
+ hC[0] = C0;
+ hD[0] = D0;
+ for(ii=1; ii<N; ii++)
+ {
+ hA[ii] = A;
+ hB[ii] = B;
+ hb[ii] = b;
+ hQ[ii] = Q;
+ hS[ii] = S;
+ hR[ii] = R;
+ hq[ii] = q;
+ hr[ii] = r;
+ hidxb[ii] = idxb1;
+ hd_lb[ii] = d_lb1;
+ hd_ub[ii] = d_ub1;
+ hd_lg[ii] = d_lg1;
+ hd_ug[ii] = d_ug1;
+ hC[ii] = C1;
+ hD[ii] = D1;
+ }
+ hQ[N] = Q;
+ hS[N] = S;
+ hR[N] = R;
+ hq[N] = q;
+ hr[N] = r;
+ hidxb[N] = idxbN;
+ hd_lb[N] = d_lbN;
+ hd_ub[N] = d_ubN;
+ hd_lg[N] = d_lgN;
+ hd_ug[N] = d_ugN;
+ hC[N] = CN;
+ hD[N] = DN;
+
+/************************************************
+* ocp qp
+************************************************/
+
+ int ocp_qp_size = d_memsize_ocp_qp(N, nx, nu, nb, ng);
+ printf("\nqp size = %d\n", ocp_qp_size);
+ void *ocp_qp_mem = malloc(ocp_qp_size);
+
+ struct d_ocp_qp ocp_qp;
+ d_create_ocp_qp(N, nx, nu, nb, ng, &ocp_qp, ocp_qp_mem);
+ d_cvt_colmaj_to_ocp_qp(hA, hB, hb, hQ, hS, hR, hq, hr, hidxb, hd_lb, hd_ub, hC, hD, hd_lg, hd_ug, &ocp_qp);
+
+#if 1
+ printf("\nN = %d\n", ocp_qp.N);
+ for(ii=0; ii<N; ii++)
+ d_print_strmat(ocp_qp.nu[ii]+ocp_qp.nx[ii]+1, ocp_qp.nx[ii+1], ocp_qp.BAbt+ii, 0, 0);
+ for(ii=0; ii<N; ii++)
+ d_print_tran_strvec(ocp_qp.nx[ii+1], ocp_qp.b+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(ocp_qp.nu[ii]+ocp_qp.nx[ii]+1, ocp_qp.nu[ii]+ocp_qp.nx[ii], ocp_qp.RSQrq+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ocp_qp.nu[ii]+ocp_qp.nx[ii], ocp_qp.rq+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ int_print_mat(1, nb[ii], ocp_qp.idxb[ii], 1);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ocp_qp.nb[ii], ocp_qp.d_lb+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ocp_qp.nb[ii], ocp_qp.d_ub+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(ocp_qp.nu[ii]+ocp_qp.nx[ii], ocp_qp.ng[ii], ocp_qp.DCt+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ocp_qp.ng[ii], ocp_qp.d_lg+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ocp_qp.ng[ii], ocp_qp.d_ug+ii, 0);
+#endif
+
+/************************************************
+* dense qp
+************************************************/
+
+ int nvc = 0;
+ int nec = 0;
+ int nbc = 0;
+ int ngc = 0;
+
+ d_compute_qp_size_ocp2dense(N, nx, nu, nb, hidxb, ng, &nvc, &nec, &nbc, &ngc);
+ printf("\nnv = %d, ne = %d, nb = %d, ng = %d\n\n", nvc, nec, nbc, ngc);
+
+ int dense_qp_size = d_memsize_dense_qp(nvc, nec, nbc, ngc);
+ printf("\nqp size = %d\n", dense_qp_size);
+ void *dense_qp_mem = malloc(dense_qp_size);
+
+ struct d_dense_qp dense_qp;
+ d_create_dense_qp(nvc, nec, nbc, ngc, &dense_qp, dense_qp_mem);
+
+ int cond_size = d_memsize_cond_qp_ocp2dense(&ocp_qp, &dense_qp);
+ printf("\ncond size = %d\n", cond_size);
+ void *cond_mem = malloc(cond_size);
+
+ struct d_cond_qp_ocp2dense_workspace cond_ws;
+ d_create_cond_qp_ocp2dense(&ocp_qp, &dense_qp, &cond_ws, cond_mem);
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+ d_cond_qp_ocp2dense(&ocp_qp, &dense_qp, &cond_ws);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ double time_cond = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+#if 1
+ d_print_strmat(nvc+1, nvc, dense_qp.Hg, 0, 0);
+ d_print_strmat(nec, nvc, dense_qp.A, 0, 0);
+ d_print_strmat(nvc, ngc, dense_qp.Ct, 0, 0);
+ d_print_tran_strvec(nvc, dense_qp.g, 0);
+ d_print_tran_strvec(nec, dense_qp.b, 0);
+ d_print_tran_strvec(2*nbc+2*ngc, dense_qp.d, 0);
+ d_print_tran_strvec(nbc, dense_qp.d_lb, 0);
+ d_print_tran_strvec(nbc, dense_qp.d_ub, 0);
+ d_print_tran_strvec(ngc, dense_qp.d_lg, 0);
+ d_print_tran_strvec(ngc, dense_qp.d_ug, 0);
+#endif
+
+#if 0
+ int nu_tmp = 0;
+ for(ii=0; ii<N; ii++)
+ {
+ nu_tmp += nu[ii];
+ d_print_strmat(nu_tmp+nx[0]+1, nx[ii+1], cond_ws.Gamma+ii, 0, 0);
+ }
+#endif
+
+/************************************************
+* dense qp sol
+************************************************/
+
+ int dense_qp_sol_size = d_memsize_dense_qp_sol(nvc, nec, nbc, ngc);
+ printf("\ndense qp sol size = %d\n", dense_qp_sol_size);
+ void *dense_qp_sol_mem = malloc(dense_qp_sol_size);
+
+ struct d_dense_qp_sol dense_qp_sol;
+ d_create_dense_qp_sol(nvc, nec, nbc, ngc, &dense_qp_sol, dense_qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct d_ipm_hard_dense_qp_arg dense_arg;
+ dense_arg.alpha_min = 1e-8;
+ dense_arg.mu_max = 1e-12;
+ dense_arg.iter_max = 20;
+ dense_arg.mu0 = 1.0;
+
+ int dense_ipm_size = d_memsize_ipm_hard_dense_qp(&dense_qp, &dense_arg);
+ printf("\ndense ipm size = %d\n", dense_ipm_size);
+ void *dense_ipm_mem = malloc(dense_ipm_size);
+
+ struct d_ipm_hard_dense_qp_workspace dense_workspace;
+ d_create_ipm_hard_dense_qp(&dense_qp, &dense_arg, &dense_workspace, dense_ipm_mem);
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// d_solve_ipm_hard_dense_qp(&dense_qp, &dense_qp_sol, &dense_workspace);
+ d_solve_ipm2_hard_dense_qp(&dense_qp, &dense_qp_sol, &dense_workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ double time_dense_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+
+ printf("\nsolution\n\n");
+ printf("\nv\n");
+ d_print_tran_strvec(nvc, dense_qp_sol.v, 0);
+ printf("\npi\n");
+ d_print_tran_strvec(nec, dense_qp_sol.pi, 0);
+ printf("\nlam_lb\n");
+ d_print_tran_strvec(nbc, dense_qp_sol.lam_lb, 0);
+ printf("\nlam_ub\n");
+ d_print_tran_strvec(nbc, dense_qp_sol.lam_ub, 0);
+ printf("\nlam_lg\n");
+ d_print_tran_strvec(ngc, dense_qp_sol.lam_lg, 0);
+ printf("\nlam_ug\n");
+ d_print_tran_strvec(ngc, dense_qp_sol.lam_ug, 0);
+ printf("\nt_lb\n");
+ d_print_tran_strvec(nbc, dense_qp_sol.t_lb, 0);
+ printf("\nt_ub\n");
+ d_print_tran_strvec(nbc, dense_qp_sol.t_ub, 0);
+ printf("\nt_lg\n");
+ d_print_tran_strvec(ngc, dense_qp_sol.t_lg, 0);
+ printf("\nt_ug\n");
+ d_print_tran_strvec(ngc, dense_qp_sol.t_ug, 0);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ d_print_e_tran_strvec(nvc, dense_workspace.res_g, 0);
+ printf("\nres_b\n");
+ d_print_e_tran_strvec(nec, dense_workspace.res_b, 0);
+ printf("\nres_d\n");
+ d_print_e_tran_strvec(2*nbc+2*ngc, dense_workspace.res_d, 0);
+ printf("\nres_m\n");
+ d_print_e_tran_strvec(2*nbc+2*ngc, dense_workspace.res_m, 0);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", dense_workspace.res_mu);
+
+ printf("\nipm iter = %d\n", dense_workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ d_print_e_tran_mat(5, dense_workspace.iter, dense_workspace.stat, 5);
+
+ printf("\ncond time = %e [s], dense ipm time = %e [s]\n\n", time_cond, time_dense_ipm);
+
+/************************************************
+* ocp qp sol
+************************************************/
+
+ int ocp_qp_sol_size = d_memsize_ocp_qp_sol(N, nx, nu, nb, ng);
+ printf("\nocp qp sol size = %d\n", ocp_qp_sol_size);
+ void *ocp_qp_sol_mem = malloc(ocp_qp_sol_size);
+
+ struct d_ocp_qp_sol ocp_qp_sol;
+ d_create_ocp_qp_sol(N, nx, nu, nb, ng, &ocp_qp_sol, ocp_qp_sol_mem);
+
+/************************************************
+* expand solution
+************************************************/
+
+ d_expand_sol_dense2ocp(&ocp_qp, &dense_qp_sol, &ocp_qp_sol, &cond_ws);
+
+ printf("\nfull space solution\n\n");
+ printf("\nux\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(nu[ii]+nx[ii], ocp_qp_sol.ux+ii, 0);
+ printf("\npi\n");
+ for(ii=0; ii<N; ii++)
+ d_print_tran_strvec(nx[ii+1], ocp_qp_sol.pi+ii, 0);
+ printf("\nlam_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(nb[ii], ocp_qp_sol.lam_lb+ii, 0);
+ printf("\nlam_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(nb[ii], ocp_qp_sol.lam_ub+ii, 0);
+ printf("\nlam_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ng[ii], ocp_qp_sol.lam_lg+ii, 0);
+ printf("\nlam_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ng[ii], ocp_qp_sol.lam_ug+ii, 0);
+ printf("\nt_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(nb[ii], ocp_qp_sol.t_lb+ii, 0);
+ printf("\nt_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(nb[ii], ocp_qp_sol.t_ub+ii, 0);
+ printf("\nt_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ng[ii], ocp_qp_sol.t_lg+ii, 0);
+ printf("\nt_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(ng[ii], ocp_qp_sol.t_ug+ii, 0);
+
+/************************************************
+* free memory
+************************************************/
+
+ d_free(A);
+ d_free(B);
+ d_free(b);
+ d_free(x0);
+ d_free(Q);
+ d_free(R);
+ d_free(S);
+ d_free(q);
+ d_free(r);
+ d_free(r0);
+ int_free(idxb0);
+ d_free(d_lb0);
+ d_free(d_ub0);
+ int_free(idxb1);
+ d_free(d_lb1);
+ d_free(d_ub1);
+ int_free(idxbN);
+ d_free(d_lbN);
+ d_free(d_ubN);
+ d_free(C0);
+ d_free(D0);
+ d_free(d_lg0);
+ d_free(d_ug0);
+ d_free(C1);
+ d_free(D1);
+ d_free(d_lg1);
+ d_free(d_ug1);
+ d_free(CN);
+ d_free(DN);
+ d_free(d_lgN);
+ d_free(d_ugN);
+
+ free(ocp_qp_mem);
+ free(ocp_qp_sol_mem);
+ free(dense_qp_mem);
+ free(dense_qp_sol_mem);
+ free(cond_mem);
+ free(dense_ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }
diff --git a/test_problems/test_d_dense.c b/test_problems/test_d_dense.c
new file mode 100644
index 0000000..c70415a
--- /dev/null
+++ b/test_problems/test_d_dense.c
@@ -0,0 +1,264 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_d_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_d_aux.h>
+#include <blasfeo_d_blas.h>
+
+#include "../include/hpipm_d_dense_qp.h"
+#include "../include/hpipm_d_dense_qp_sol.h"
+#include "../include/hpipm_d_dense_qp_ipm_hard.h"
+
+
+
+#define PRINT 1
+
+
+
+#if ! defined(EXT_DEP)
+/* prints a matrix in column-major format */
+void d_print_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format */
+void d_print_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints a matrix in column-major format (exponential notation) */
+void d_print_e_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format (exponential notation) */
+void d_print_e_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
+
+
+int main()
+ {
+
+ int ii;
+
+/************************************************
+* qp dimension and data
+************************************************/
+
+ int nv = 2;
+ int ne = 1;
+ int nb = 2;
+ int ng = 0;
+
+ double H[] = {4.0, 1.0, 1.0, 2.0};
+ double g[] = {1.0, 1.0};
+ double A[] = {1.0, 1.0};
+ double b[] = {1.0};
+// double d_lb[] = {0.0, 0.0};
+// double d_ub[] = {INFINITY, INFINITY};
+ double d_lb[] = {-1.0, -1.0};
+ double d_ub[] = {1.5, 0.5};
+ int idxb[] = {0, 1};
+ double C[] = {1.0, 0.0, 0.0, 1.0};
+ double d_lg[] = {-1.0, -1.0};
+ double d_ug[] = {1.5, 0.5};
+
+/************************************************
+* dense qp
+************************************************/
+
+ int qp_size = d_memsize_dense_qp(nv, ne, nb, ng);
+ printf("\nqp size = %d\n", qp_size);
+ void *qp_mem = malloc(qp_size);
+
+ struct d_dense_qp qp;
+ d_create_dense_qp(nv, ne, nb, ng, &qp, qp_mem);
+ d_cvt_colmaj_to_dense_qp(H, g, A, b, idxb, d_lb, d_ub, C, d_lg, d_ug, &qp);
+
+#if 0
+ d_print_strmat(nv+1, nv, qp.Hg, 0, 0);
+ d_print_strmat(ne, nv, qp.A, 0, 0);
+ d_print_strmat(nv, ng, qp.Ct, 0, 0);
+ d_print_strvec(nv, qp.g, 0);
+ d_print_strvec(ne, qp.b, 0);
+ d_print_strvec(2*nb+2*ng, qp.d, 0);
+ d_print_strvec(nb, qp.d_lb, 0);
+ d_print_strvec(nb, qp.d_ub, 0);
+ d_print_strvec(ng, qp.d_lg, 0);
+ d_print_strvec(ng, qp.d_ug, 0);
+#endif
+
+/************************************************
+* dense qp sol
+************************************************/
+
+ int qp_sol_size = d_memsize_dense_qp_sol(nv, ne, nb, ng);
+ printf("\nqp sol size = %d\n", qp_sol_size);
+ void *qp_sol_mem = malloc(qp_sol_size);
+
+ struct d_dense_qp_sol qp_sol;
+ d_create_dense_qp_sol(nv, ne, nb, ng, &qp_sol, qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct d_ipm_hard_dense_qp_arg arg;
+ arg.alpha_min = 1e-8;
+ arg.mu_max = 1e-12;
+ arg.iter_max = 20;
+ arg.mu0 = 1.0;
+
+ int ipm_size = d_memsize_ipm_hard_dense_qp(&qp, &arg);
+ printf("\nipm size = %d\n", ipm_size);
+ void *ipm_mem = malloc(ipm_size);
+
+ struct d_ipm_hard_dense_qp_workspace workspace;
+ d_create_ipm_hard_dense_qp(&qp, &arg, &workspace, ipm_mem);
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// d_solve_ipm_hard_dense_qp(&qp, &qp_sol, &workspace);
+ d_solve_ipm2_hard_dense_qp(&qp, &qp_sol, &workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ double time_dense_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+ printf("\nsolution\n\n");
+ printf("\nv\n");
+ d_print_mat(1, nv, qp_sol.v->pa, 1);
+ printf("\npi\n");
+ d_print_mat(1, ne, qp_sol.pi->pa, 1);
+ printf("\nlam_lb\n");
+ d_print_mat(1, nb, qp_sol.lam_lb->pa, 1);
+ printf("\nlam_ub\n");
+ d_print_mat(1, nb, qp_sol.lam_ub->pa, 1);
+ printf("\nlam_lg\n");
+ d_print_mat(1, ng, qp_sol.lam_lg->pa, 1);
+ printf("\nlam_ug\n");
+ d_print_mat(1, ng, qp_sol.lam_ug->pa, 1);
+ printf("\nt_lb\n");
+ d_print_mat(1, nb, qp_sol.t_lb->pa, 1);
+ printf("\nt_ub\n");
+ d_print_mat(1, nb, qp_sol.t_ub->pa, 1);
+ printf("\nt_lg\n");
+ d_print_mat(1, ng, qp_sol.t_lg->pa, 1);
+ printf("\nt_ug\n");
+ d_print_mat(1, ng, qp_sol.t_ug->pa, 1);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ d_print_e_mat(1, nv, workspace.res_g->pa, 1);
+ printf("\nres_b\n");
+ d_print_e_mat(1, ne, workspace.res_b->pa, 1);
+ printf("\nres_d\n");
+ d_print_e_mat(1, 2*nb+2*ng, workspace.res_d->pa, 1);
+ printf("\nres_m\n");
+ d_print_e_mat(1, 2*nb+2*ng, workspace.res_m->pa, 1);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", workspace.res_mu);
+
+ printf("\nipm iter = %d\n", workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ d_print_e_tran_mat(5, workspace.iter, workspace.stat, 5);
+
+ printf("\ndense ipm time = %e [s]\n\n", time_dense_ipm);
+
+/************************************************
+* free memory
+************************************************/
+
+ free(qp_mem);
+ free(qp_sol_mem);
+ free(ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }
+
+
diff --git a/test_problems/test_d_ocp.c b/test_problems/test_d_ocp.c
new file mode 100644
index 0000000..cc0d96f
--- /dev/null
+++ b/test_problems/test_d_ocp.c
@@ -0,0 +1,833 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_d_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_d_aux.h>
+#include <blasfeo_d_blas.h>
+
+#include "../include/hpipm_d_ocp_qp.h"
+#include "../include/hpipm_d_ocp_qp_sol.h"
+#include "../include/hpipm_d_ocp_qp_ipm_hard.h"
+
+#include "d_tools.h"
+
+
+
+#if ! defined(EXT_DEP)
+/* creates a zero matrix */
+void d_zeros(double **pA, int row, int col)
+ {
+ *pA = malloc((row*col)*sizeof(double));
+ double *A = *pA;
+ int i;
+ for(i=0; i<row*col; i++) A[i] = 0.0;
+ }
+/* frees matrix */
+void d_free(double *pA)
+ {
+ free( pA );
+ }
+/* prints a matrix in column-major format */
+void d_print_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format */
+void d_print_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints a matrix in column-major format (exponential notation) */
+void d_print_e_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format (exponential notation) */
+void d_print_e_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* creates a zero matrix aligned */
+void int_zeros(int **pA, int row, int col)
+ {
+ void *temp = malloc((row*col)*sizeof(int));
+ *pA = temp;
+ int *A = *pA;
+ int i;
+ for(i=0; i<row*col; i++) A[i] = 0;
+ }
+/* frees matrix */
+void int_free(int *pA)
+ {
+ free( pA );
+ }
+/* prints a matrix in column-major format */
+void int_print_mat(int row, int col, int *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<row; i++)
+ {
+ for(j=0; j<col; j++)
+ {
+ printf("%d ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
+
+
+#define KEEP_X0 0
+
+// printing
+#define PRINT 1
+
+/************************************************
+Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts.
+************************************************/
+void mass_spring_system(double Ts, int nx, int nu, int N, double *A, double *B, double *b, double *x0)
+ {
+
+ int nx2 = nx*nx;
+
+ int info = 0;
+
+ int pp = nx/2; // number of masses
+
+/************************************************
+* build the continuous time system
+************************************************/
+
+ double *T; d_zeros(&T, pp, pp);
+ int ii;
+ for(ii=0; ii<pp; ii++) T[ii*(pp+1)] = -2;
+ for(ii=0; ii<pp-1; ii++) T[ii*(pp+1)+1] = 1;
+ for(ii=1; ii<pp; ii++) T[ii*(pp+1)-1] = 1;
+
+ double *Z; d_zeros(&Z, pp, pp);
+ double *I; d_zeros(&I, pp, pp); for(ii=0; ii<pp; ii++) I[ii*(pp+1)]=1.0; // = eye(pp);
+ double *Ac; d_zeros(&Ac, nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac, nx);
+ dmcopy(pp, pp, T, pp, Ac+pp, nx);
+ dmcopy(pp, pp, I, pp, Ac+pp*nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac+pp*(nx+1), nx);
+ free(T);
+ free(Z);
+ free(I);
+
+ d_zeros(&I, nu, nu); for(ii=0; ii<nu; ii++) I[ii*(nu+1)]=1.0; //I = eye(nu);
+ double *Bc; d_zeros(&Bc, nx, nu);
+ dmcopy(nu, nu, I, nu, Bc+pp, nx);
+ free(I);
+
+/************************************************
+* compute the discrete time system
+************************************************/
+
+ double *bb; d_zeros(&bb, nx, 1);
+ dmcopy(nx, 1, bb, nx, b, nx);
+
+ dmcopy(nx, nx, Ac, nx, A, nx);
+ dscal_3l(nx2, Ts, A);
+ expm(nx, A);
+
+ d_zeros(&T, nx, nx);
+ d_zeros(&I, nx, nx); for(ii=0; ii<nx; ii++) I[ii*(nx+1)]=1.0; //I = eye(nx);
+ dmcopy(nx, nx, A, nx, T, nx);
+ daxpy_3l(nx2, -1.0, I, T);
+ dgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx);
+ free(T);
+ free(I);
+
+ int *ipiv = (int *) malloc(nx*sizeof(int));
+ dgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info);
+ free(ipiv);
+
+ free(Ac);
+ free(Bc);
+ free(bb);
+
+
+/************************************************
+* initial state
+************************************************/
+
+ if(nx==4)
+ {
+ x0[0] = 5;
+ x0[1] = 10;
+ x0[2] = 15;
+ x0[3] = 20;
+ }
+ else
+ {
+ int jj;
+ for(jj=0; jj<nx; jj++)
+ x0[jj] = 1;
+ }
+
+ }
+
+
+
+int main()
+ {
+
+
+ // local variables
+
+ int ii, jj;
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+
+
+ // problem size
+
+ int nx_ = 8; // number of states (it has to be even for the mass-spring system test problem)
+ int nu_ = 3; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
+ int N = 5; // horizon lenght
+
+
+
+ // stage-wise variant size
+
+ int nx[N+1];
+#if KEEP_X0
+ nx[0] = nx_;
+#else
+ nx[0] = 0;
+#endif
+ for(ii=1; ii<=N; ii++)
+ nx[ii] = nx_;
+// nx[N] = 0;
+
+ int nu[N+1];
+ for(ii=0; ii<N; ii++)
+ nu[ii] = nu_;
+ nu[N] = 0;
+
+#if 1
+ int nb[N+1];
+#if KEEP_X0
+ nb[0] = nu[0]+nx[0]/2;
+#else
+ nb[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[1]+nx[1]/2;
+ nb[N] = nx[N]/2;
+
+ int ng[N+1];
+ ng[0] = 0;
+ for(ii=1; ii<N; ii++)
+ ng[ii] = 0;
+ ng[N] = 0;
+#elif 0
+ int nb[N+1];
+ nb[0] = 0;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = 0;
+ nb[N] = 0;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nu[0]+nx[0]/2;
+#else
+ ng[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nu[1]+nx[1]/2;
+ ng[N] = nx[N]/2;
+#else
+ int nb[N+1];
+ nb[0] = nu[0] + nx[0]/2;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[ii] + nx[ii]/2;
+ nb[N] = nu[N] + nx[N]/2;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nx[0]/2;
+#else
+ ng[0] = 0;
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nx[1]/2;
+ ng[N] = nx[N]/2;
+#endif
+
+/************************************************
+* dynamical system
+************************************************/
+
+ double *A; d_zeros(&A, nx_, nx_); // states update matrix
+
+ double *B; d_zeros(&B, nx_, nu_); // inputs matrix
+
+ double *b; d_zeros(&b, nx_, 1); // states offset
+ double *x0; d_zeros(&x0, nx_, 1); // initial state
+
+ double Ts = 0.5; // sampling time
+ mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0);
+
+ for(jj=0; jj<nx_; jj++)
+ b[jj] = 0.1;
+
+ for(jj=0; jj<nx_; jj++)
+ x0[jj] = 0;
+ x0[0] = 2.5;
+ x0[1] = 2.5;
+
+ double *b0; d_zeros(&b0, nx_, 1);
+ dgemv_n_3l(nx_, nx_, A, nx_, x0, b0);
+ daxpy_3l(nx_, 1.0, b, b0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, A, nx_);
+ d_print_mat(nx_, nu_, B, nu_);
+ d_print_mat(1, nx_, b, 1);
+ d_print_mat(1, nx_, x0, 1);
+ d_print_mat(1, nx_, b0, 1);
+#endif
+
+/************************************************
+* cost function
+************************************************/
+
+ double *Q; d_zeros(&Q, nx_, nx_);
+ for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;
+
+ double *R; d_zeros(&R, nu_, nu_);
+ for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;
+
+ double *S; d_zeros(&S, nu_, nx_);
+
+ double *q; d_zeros(&q, nx_, 1);
+ for(ii=0; ii<nx_; ii++) q[ii] = 0.1;
+
+ double *r; d_zeros(&r, nu_, 1);
+ for(ii=0; ii<nu_; ii++) r[ii] = 0.2;
+
+ double *r0; d_zeros(&r0, nu_, 1);
+ dgemv_n_3l(nu_, nx_, S, nu_, x0, r0);
+ daxpy_3l(nu_, 1.0, r, r0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, Q, nx_);
+ d_print_mat(nu_, nu_, R, nu_);
+ d_print_mat(nu_, nx_, S, nu_);
+ d_print_mat(1, nx_, q, 1);
+ d_print_mat(1, nu_, r, 1);
+ d_print_mat(1, nu_, r0, 1);
+#endif
+
+ // maximum element in cost functions
+ double mu0 = 2.0;
+
+/************************************************
+* box & general constraints
+************************************************/
+
+ int *idxb0; int_zeros(&idxb0, nb[0], 1);
+ double *d_lb0; d_zeros(&d_lb0, nb[0], 1);
+ double *d_ub0; d_zeros(&d_ub0, nb[0], 1);
+ double *d_lg0; d_zeros(&d_lg0, ng[0], 1);
+ double *d_ug0; d_zeros(&d_ug0, ng[0], 1);
+ for(ii=0; ii<nb[0]; ii++)
+ {
+ if(ii<nu[0]) // input
+ {
+ d_lb0[ii] = - 0.5; // umin
+ d_ub0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb0[ii] = - 4.0; // xmin
+ d_ub0[ii] = 4.0; // xmax
+ }
+ idxb0[ii] = ii;
+ }
+ for(ii=0; ii<ng[0]; ii++)
+ {
+ if(ii<nu[0]-nb[0]) // input
+ {
+ d_lg0[ii] = - 0.5; // umin
+ d_ug0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg0[ii] = - 4.0; // xmin
+ d_ug0[ii] = 4.0; // xmax
+ }
+ }
+
+ int *idxb1; int_zeros(&idxb1, nb[1], 1);
+ double *d_lb1; d_zeros(&d_lb1, nb[1], 1);
+ double *d_ub1; d_zeros(&d_ub1, nb[1], 1);
+ double *d_lg1; d_zeros(&d_lg1, ng[1], 1);
+ double *d_ug1; d_zeros(&d_ug1, ng[1], 1);
+ for(ii=0; ii<nb[1]; ii++)
+ {
+ if(ii<nu[1]) // input
+ {
+ d_lb1[ii] = - 0.5; // umin
+ d_ub1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb1[ii] = - 4.0; // xmin
+ d_ub1[ii] = 4.0; // xmax
+ }
+ idxb1[ii] = ii;
+ }
+ for(ii=0; ii<ng[1]; ii++)
+ {
+ if(ii<nu[1]-nb[1]) // input
+ {
+ d_lg1[ii] = - 0.5; // umin
+ d_ug1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg1[ii] = - 4.0; // xmin
+ d_ug1[ii] = 4.0; // xmax
+ }
+ }
+
+
+ int *idxbN; int_zeros(&idxbN, nb[N], 1);
+ double *d_lbN; d_zeros(&d_lbN, nb[N], 1);
+ double *d_ubN; d_zeros(&d_ubN, nb[N], 1);
+ double *d_lgN; d_zeros(&d_lgN, ng[N], 1);
+ double *d_ugN; d_zeros(&d_ugN, ng[N], 1);
+ for(ii=0; ii<nb[N]; ii++)
+ {
+ d_lbN[ii] = - 4.0; // xmin
+ d_ubN[ii] = 4.0; // xmax
+ idxbN[ii] = ii;
+ }
+ for(ii=0; ii<ng[N]; ii++)
+ {
+ d_lgN[ii] = - 4.0; // dmin
+ d_ugN[ii] = 4.0; // dmax
+ }
+
+ double *C0; d_zeros(&C0, ng[0], nx[0]);
+ double *D0; d_zeros(&D0, ng[0], nu[0]);
+ for(ii=0; ii<nu[0]-nb[0] & ii<ng[0]; ii++)
+ D0[ii+(nb[0]+ii)*ng[0]] = 1.0;
+ for(; ii<ng[0]; ii++)
+ C0[ii+(nb[0]+ii-nu[0])*ng[0]] = 1.0;
+
+ double *C1; d_zeros(&C1, ng[1], nx[1]);
+ double *D1; d_zeros(&D1, ng[1], nu[1]);
+ for(ii=0; ii<nu[1]-nb[1] & ii<ng[1]; ii++)
+ D1[ii+(nb[1]+ii)*ng[1]] = 1.0;
+ for(; ii<ng[1]; ii++)
+ C1[ii+(nb[1]+ii-nu[1])*ng[1]] = 1.0;
+
+ double *CN; d_zeros(&CN, ng[N], nx[N]);
+ double *DN; d_zeros(&DN, ng[N], nu[N]);
+ for(ii=0; ii<nu[N]-nb[N] & ii<ng[N]; ii++)
+ DN[ii+(nb[N]+ii)*ng[N]] = 1.0;
+ for(; ii<ng[N]; ii++)
+ CN[ii+(nb[N]+ii-nu[N])*ng[N]] = 1.0;
+
+#if PRINT
+ // box constraints
+ int_print_mat(1, nb[0], idxb0, 1);
+ d_print_mat(1, nb[0], d_lb0, 1);
+ d_print_mat(1, nb[0], d_ub0, 1);
+ int_print_mat(1, nb[1], idxb1, 1);
+ d_print_mat(1, nb[1], d_lb1, 1);
+ d_print_mat(1, nb[1], d_ub1, 1);
+ int_print_mat(1, nb[N], idxbN, 1);
+ d_print_mat(1, nb[N], d_lbN, 1);
+ d_print_mat(1, nb[N], d_ubN, 1);
+ // general constraints
+ d_print_mat(1, ng[0], d_lg0, 1);
+ d_print_mat(1, ng[0], d_ug0, 1);
+ d_print_mat(ng[0], nu[0], D0, ng[0]);
+ d_print_mat(ng[0], nx[0], C0, ng[0]);
+ d_print_mat(1, ng[1], d_lg1, 1);
+ d_print_mat(1, ng[1], d_ug1, 1);
+ d_print_mat(ng[1], nu[1], D1, ng[1]);
+ d_print_mat(ng[1], nx[1], C1, ng[1]);
+ d_print_mat(1, ng[N], d_lgN, 1);
+ d_print_mat(1, ng[N], d_ugN, 1);
+ d_print_mat(ng[N], nu[N], DN, ng[N]);
+ d_print_mat(ng[N], nx[N], CN, ng[N]);
+#endif
+
+/************************************************
+* array of matrices
+************************************************/
+
+ double *hA[N];
+ double *hB[N];
+ double *hb[N];
+ double *hQ[N+1];
+ double *hS[N+1];
+ double *hR[N+1];
+ double *hq[N+1];
+ double *hr[N+1];
+ double *hd_lb[N+1];
+ double *hd_ub[N+1];
+ double *hd_lg[N+1];
+ double *hd_ug[N+1];
+ double *hC[N+1];
+ double *hD[N+1];
+ int *hidxb[N+1];
+
+ hA[0] = A;
+ hB[0] = B;
+ hb[0] = b0;
+ hQ[0] = Q;
+ hS[0] = S;
+ hR[0] = R;
+ hq[0] = q;
+ hr[0] = r0;
+ hidxb[0] = idxb0;
+ hd_lb[0] = d_lb0;
+ hd_ub[0] = d_ub0;
+ hd_lg[0] = d_lg0;
+ hd_ug[0] = d_ug0;
+ hC[0] = C0;
+ hD[0] = D0;
+ for(ii=1; ii<N; ii++)
+ {
+ hA[ii] = A;
+ hB[ii] = B;
+ hb[ii] = b;
+ hQ[ii] = Q;
+ hS[ii] = S;
+ hR[ii] = R;
+ hq[ii] = q;
+ hr[ii] = r;
+ hidxb[ii] = idxb1;
+ hd_lb[ii] = d_lb1;
+ hd_ub[ii] = d_ub1;
+ hd_lg[ii] = d_lg1;
+ hd_ug[ii] = d_ug1;
+ hC[ii] = C1;
+ hD[ii] = D1;
+ }
+ hQ[N] = Q;
+ hS[N] = S;
+ hR[N] = R;
+ hq[N] = q;
+ hr[N] = r;
+ hidxb[N] = idxbN;
+ hd_lb[N] = d_lbN;
+ hd_ub[N] = d_ubN;
+ hd_lg[N] = d_lgN;
+ hd_ug[N] = d_ugN;
+ hC[N] = CN;
+ hD[N] = DN;
+
+/************************************************
+* ocp qp
+************************************************/
+
+ int qp_size = d_memsize_ocp_qp(N, nx, nu, nb, ng);
+ printf("\nqp size = %d\n", qp_size);
+ void *qp_mem = malloc(qp_size);
+
+ struct d_ocp_qp qp;
+ d_create_ocp_qp(N, nx, nu, nb, ng, &qp, qp_mem);
+ d_cvt_colmaj_to_ocp_qp(hA, hB, hb, hQ, hS, hR, hq, hr, hidxb, hd_lb, hd_ub, hC, hD, hd_lg, hd_ug, &qp);
+#if 0
+ printf("\nN = %d\n", qp.N);
+ for(ii=0; ii<N; ii++)
+ d_print_strmat(qp.nu[ii]+qp.nx[ii]+1, qp.nx[ii+1], qp.BAbt+ii, 0, 0);
+ for(ii=0; ii<N; ii++)
+ d_print_tran_strvec(qp.nx[ii+1], qp.b+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(qp.nu[ii]+qp.nx[ii]+1, qp.nu[ii]+qp.nx[ii], qp.RSQrq+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(qp.nu[ii]+qp.nx[ii], qp.rq+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ int_print_mat(1, nb[ii], qp.idxb[ii], 1);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(qp.nb[ii], qp.d_lb+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(qp.nb[ii], qp.d_ub+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(qp.nu[ii]+qp.nx[ii], qp.ng[ii], qp.DCt+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(qp.ng[ii], qp.d_lg+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(qp.ng[ii], qp.d_ug+ii, 0);
+ return;
+#endif
+
+/************************************************
+* ocp qp sol
+************************************************/
+
+ int qp_sol_size = d_memsize_ocp_qp_sol(N, nx, nu, nb, ng);
+ printf("\nqp sol size = %d\n", qp_sol_size);
+ void *qp_sol_mem = malloc(qp_sol_size);
+
+ struct d_ocp_qp_sol qp_sol;
+ d_create_ocp_qp_sol(N, nx, nu, nb, ng, &qp_sol, qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct d_ipm_hard_ocp_qp_arg arg;
+ arg.alpha_min = 1e-8;
+ arg.mu_max = 1e-12;
+ arg.iter_max = 20;
+ arg.mu0 = 2.0;
+
+ int ipm_size = d_memsize_ipm_hard_ocp_qp(&qp, &arg);
+ printf("\nipm size = %d\n", ipm_size);
+ void *ipm_mem = malloc(ipm_size);
+
+ struct d_ipm_hard_ocp_qp_workspace workspace;
+ d_create_ipm_hard_ocp_qp(&qp, &arg, &workspace, ipm_mem);
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// d_solve_ipm_hard_ocp_qp(&qp, &qp_sol, &workspace);
+ d_solve_ipm2_hard_ocp_qp(&qp, &qp_sol, &workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ double time_ocp_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+/************************************************
+* extract and print solution
+************************************************/
+
+ double *u[N+1]; for(ii=0; ii<=N; ii++) d_zeros(u+ii, nu[ii], 1);
+ double *x[N+1]; for(ii=0; ii<=N; ii++) d_zeros(x+ii, nx[ii], 1);
+ double *pi[N]; for(ii=0; ii<N; ii++) d_zeros(pi+ii, nx[ii+1], 1);
+ double *lam_lb[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_lb+ii, nb[ii], 1);
+ double *lam_ub[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_ub+ii, nb[ii], 1);
+ double *lam_lg[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_lg+ii, ng[ii], 1);
+ double *lam_ug[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_ug+ii, ng[ii], 1);
+
+ d_cvt_ocp_qp_sol_to_colmaj(&qp, &qp_sol, u, x, pi, lam_lb, lam_ub, lam_lg, lam_ug);
+
+#if 1
+ printf("\nsolution\n\n");
+ printf("\nu\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nu[ii], u[ii], 1);
+ printf("\nx\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nx[ii], x[ii], 1);
+ printf("\npi\n");
+ for(ii=0; ii<N; ii++)
+ d_print_mat(1, nx[ii+1], pi[ii], 1);
+ printf("\nlam_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], lam_lb[ii], 1);
+ printf("\nlam_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], lam_ub[ii], 1);
+ printf("\nlam_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], lam_lg[ii], 1);
+ printf("\nlam_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], lam_ug[ii], 1);
+
+ printf("\nt_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], (qp_sol.t_lb+ii)->pa, 1);
+ printf("\nt_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], (qp_sol.t_ub+ii)->pa, 1);
+ printf("\nt_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], (qp_sol.t_lg+ii)->pa, 1);
+ printf("\nt_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], (qp_sol.t_ug+ii)->pa, 1);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nu[ii]+nx[ii], (workspace.res_g+ii)->pa, 1);
+ printf("\nres_b\n");
+ for(ii=0; ii<N; ii++)
+ d_print_e_mat(1, nx[ii+1], (workspace.res_b+ii)->pa, 1);
+ printf("\nres_m_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_m_lb+ii)->pa, 1);
+ printf("\nres_m_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_m_ub+ii)->pa, 1);
+ printf("\nres_m_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_m_lg+ii)->pa, 1);
+ printf("\nres_m_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_m_ug+ii)->pa, 1);
+ printf("\nres_d_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_d_lb+ii)->pa, 1);
+ printf("\nres_d_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_d_ub+ii)->pa, 1);
+ printf("\nres_d_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_d_lg+ii)->pa, 1);
+ printf("\nres_d_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_d_ug+ii)->pa, 1);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", workspace.res_mu);
+#endif
+
+ printf("\nipm iter = %d\n", workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ d_print_e_tran_mat(5, workspace.iter, workspace.stat, 5);
+
+ printf("\nocp ipm time = %e [s]\n\n", time_ocp_ipm);
+
+/************************************************
+* free memory
+************************************************/
+
+ d_free(A);
+ d_free(B);
+ d_free(b);
+ d_free(x0);
+ d_free(Q);
+ d_free(R);
+ d_free(S);
+ d_free(q);
+ d_free(r);
+ d_free(r0);
+ int_free(idxb0);
+ d_free(d_lb0);
+ d_free(d_ub0);
+ int_free(idxb1);
+ d_free(d_lb1);
+ d_free(d_ub1);
+ int_free(idxbN);
+ d_free(d_lbN);
+ d_free(d_ubN);
+ d_free(C0);
+ d_free(D0);
+ d_free(d_lg0);
+ d_free(d_ug0);
+ d_free(C1);
+ d_free(D1);
+ d_free(d_lg1);
+ d_free(d_ug1);
+ d_free(CN);
+ d_free(DN);
+ d_free(d_lgN);
+ d_free(d_ugN);
+
+ for(ii=0; ii<N; ii++)
+ {
+ d_free(u[ii]);
+ d_free(x[ii]);
+ d_free(pi[ii]);
+ d_free(lam_lb[ii]);
+ d_free(lam_ub[ii]);
+ d_free(lam_lg[ii]);
+ d_free(lam_ug[ii]);
+ }
+ d_free(u[ii]);
+ d_free(x[ii]);
+ d_free(lam_lb[ii]);
+ d_free(lam_ub[ii]);
+ d_free(lam_lg[ii]);
+ d_free(lam_ug[ii]);
+
+ free(qp_mem);
+ free(qp_sol_mem);
+ free(ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }
diff --git a/test_problems/test_m_ocp.c b/test_problems/test_m_ocp.c
new file mode 100644
index 0000000..7bca2d2
--- /dev/null
+++ b/test_problems/test_m_ocp.c
@@ -0,0 +1,876 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_d_aux_ext_dep.h>
+#include <blasfeo_s_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_d_aux.h>
+#include <blasfeo_d_blas.h>
+
+#include "../include/hpipm_d_ocp_qp.h"
+#include "../include/hpipm_s_ocp_qp.h"
+#include "../include/hpipm_m_ocp_qp.h"
+#include "../include/hpipm_d_ocp_qp_sol.h"
+#include "../include/hpipm_d_ocp_qp_ipm_hard.h"
+#include "../include/hpipm_m_ocp_qp_ipm_hard.h"
+
+#include "d_tools.h"
+
+
+
+#if ! defined(EXT_DEP)
+/* creates a zero matrix */
+void d_zeros(double **pA, int row, int col)
+ {
+ *pA = malloc((row*col)*sizeof(double));
+ double *A = *pA;
+ int i;
+ for(i=0; i<row*col; i++) A[i] = 0.0;
+ }
+/* frees matrix */
+void d_free(double *pA)
+ {
+ free( pA );
+ }
+/* prints a matrix in column-major format */
+void d_print_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format */
+void d_print_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%9.5f ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints a matrix in column-major format (exponential notation) */
+void d_print_e_mat(int m, int n, double *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<m; i++)
+ {
+ for(j=0; j<n; j++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* prints the transposed of a matrix in column-major format (exponential notation) */
+void d_print_e_tran_mat(int row, int col, double *A, int lda)
+ {
+ int i, j;
+ for(j=0; j<col; j++)
+ {
+ for(i=0; i<row; i++)
+ {
+ printf("%e\t", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+/* creates a zero matrix aligned */
+void int_zeros(int **pA, int row, int col)
+ {
+ void *temp = malloc((row*col)*sizeof(int));
+ *pA = temp;
+ int *A = *pA;
+ int i;
+ for(i=0; i<row*col; i++) A[i] = 0;
+ }
+/* frees matrix */
+void int_free(int *pA)
+ {
+ free( pA );
+ }
+/* prints a matrix in column-major format */
+void int_print_mat(int row, int col, int *A, int lda)
+ {
+ int i, j;
+ for(i=0; i<row; i++)
+ {
+ for(j=0; j<col; j++)
+ {
+ printf("%d ", A[i+lda*j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
+
+
+#define KEEP_X0 0
+
+// printing
+#define PRINT 1
+
+/************************************************
+Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts.
+************************************************/
+void mass_spring_system(double Ts, int nx, int nu, int N, double *A, double *B, double *b, double *x0)
+ {
+
+ int nx2 = nx*nx;
+
+ int info = 0;
+
+ int pp = nx/2; // number of masses
+
+/************************************************
+* build the continuous time system
+************************************************/
+
+ double *T; d_zeros(&T, pp, pp);
+ int ii;
+ for(ii=0; ii<pp; ii++) T[ii*(pp+1)] = -2;
+ for(ii=0; ii<pp-1; ii++) T[ii*(pp+1)+1] = 1;
+ for(ii=1; ii<pp; ii++) T[ii*(pp+1)-1] = 1;
+
+ double *Z; d_zeros(&Z, pp, pp);
+ double *I; d_zeros(&I, pp, pp); for(ii=0; ii<pp; ii++) I[ii*(pp+1)]=1.0; // = eye(pp);
+ double *Ac; d_zeros(&Ac, nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac, nx);
+ dmcopy(pp, pp, T, pp, Ac+pp, nx);
+ dmcopy(pp, pp, I, pp, Ac+pp*nx, nx);
+ dmcopy(pp, pp, Z, pp, Ac+pp*(nx+1), nx);
+ free(T);
+ free(Z);
+ free(I);
+
+ d_zeros(&I, nu, nu); for(ii=0; ii<nu; ii++) I[ii*(nu+1)]=1.0; //I = eye(nu);
+ double *Bc; d_zeros(&Bc, nx, nu);
+ dmcopy(nu, nu, I, nu, Bc+pp, nx);
+ free(I);
+
+/************************************************
+* compute the discrete time system
+************************************************/
+
+ double *bb; d_zeros(&bb, nx, 1);
+ dmcopy(nx, 1, bb, nx, b, nx);
+
+ dmcopy(nx, nx, Ac, nx, A, nx);
+ dscal_3l(nx2, Ts, A);
+ expm(nx, A);
+
+ d_zeros(&T, nx, nx);
+ d_zeros(&I, nx, nx); for(ii=0; ii<nx; ii++) I[ii*(nx+1)]=1.0; //I = eye(nx);
+ dmcopy(nx, nx, A, nx, T, nx);
+ daxpy_3l(nx2, -1.0, I, T);
+ dgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx);
+ free(T);
+ free(I);
+
+ int *ipiv = (int *) malloc(nx*sizeof(int));
+ dgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info);
+ free(ipiv);
+
+ free(Ac);
+ free(Bc);
+ free(bb);
+
+
+/************************************************
+* initial state
+************************************************/
+
+ if(nx==4)
+ {
+ x0[0] = 5;
+ x0[1] = 10;
+ x0[2] = 15;
+ x0[3] = 20;
+ }
+ else
+ {
+ int jj;
+ for(jj=0; jj<nx; jj++)
+ x0[jj] = 1;
+ }
+
+ }
+
+
+
+int main()
+ {
+
+
+ // local variables
+
+ int ii, jj;
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+
+
+ // problem size
+
+ int nx_ = 8; // number of states (it has to be even for the mass-spring system test problem)
+ int nu_ = 3; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
+ int N = 5; // horizon lenght
+
+
+
+ // stage-wise variant size
+
+ int nx[N+1];
+#if KEEP_X0
+ nx[0] = nx_;
+#else
+ nx[0] = 0;
+#endif
+ for(ii=1; ii<=N; ii++)
+ nx[ii] = nx_;
+// nx[N] = 0;
+
+ int nu[N+1];
+ for(ii=0; ii<N; ii++)
+ nu[ii] = nu_;
+ nu[N] = 0;
+
+#if 1
+ int nb[N+1];
+#if KEEP_X0
+ nb[0] = nu[0]+nx[0]/2;
+#else
+ nb[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[1]+nx[1]/2;
+ nb[N] = nx[N]/2;
+
+ int ng[N+1];
+ ng[0] = 0;
+ for(ii=1; ii<N; ii++)
+ ng[ii] = 0;
+ ng[N] = 0;
+#elif 0
+ int nb[N+1];
+ nb[0] = 0;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = 0;
+ nb[N] = 0;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nu[0]+nx[0]/2;
+#else
+ ng[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nu[1]+nx[1]/2;
+ ng[N] = nx[N]/2;
+#else
+ int nb[N+1];
+ nb[0] = nu[0] + nx[0]/2;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[ii] + nx[ii]/2;
+ nb[N] = nu[N] + nx[N]/2;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nx[0]/2;
+#else
+ ng[0] = 0;
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nx[1]/2;
+ ng[N] = nx[N]/2;
+#endif
+
+/************************************************
+* dynamical system
+************************************************/
+
+ double *A; d_zeros(&A, nx_, nx_); // states update matrix
+
+ double *B; d_zeros(&B, nx_, nu_); // inputs matrix
+
+ double *b; d_zeros(&b, nx_, 1); // states offset
+ double *x0; d_zeros(&x0, nx_, 1); // initial state
+
+ double Ts = 0.5; // sampling time
+ mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0);
+
+ for(jj=0; jj<nx_; jj++)
+ b[jj] = 0.1;
+
+ for(jj=0; jj<nx_; jj++)
+ x0[jj] = 0;
+ x0[0] = 2.5;
+ x0[1] = 2.5;
+
+ double *b0; d_zeros(&b0, nx_, 1);
+ dgemv_n_3l(nx_, nx_, A, nx_, x0, b0);
+ daxpy_3l(nx_, 1.0, b, b0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, A, nx_);
+ d_print_mat(nx_, nu_, B, nu_);
+ d_print_mat(1, nx_, b, 1);
+ d_print_mat(1, nx_, x0, 1);
+ d_print_mat(1, nx_, b0, 1);
+#endif
+
+/************************************************
+* cost function
+************************************************/
+
+ double *Q; d_zeros(&Q, nx_, nx_);
+ for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;
+
+ double *R; d_zeros(&R, nu_, nu_);
+ for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;
+
+ double *S; d_zeros(&S, nu_, nx_);
+
+ double *q; d_zeros(&q, nx_, 1);
+ for(ii=0; ii<nx_; ii++) q[ii] = 0.1;
+
+ double *r; d_zeros(&r, nu_, 1);
+ for(ii=0; ii<nu_; ii++) r[ii] = 0.2;
+
+ double *r0; d_zeros(&r0, nu_, 1);
+ dgemv_n_3l(nu_, nx_, S, nu_, x0, r0);
+ daxpy_3l(nu_, 1.0, r, r0);
+
+#if PRINT
+ d_print_mat(nx_, nx_, Q, nx_);
+ d_print_mat(nu_, nu_, R, nu_);
+ d_print_mat(nu_, nx_, S, nu_);
+ d_print_mat(1, nx_, q, 1);
+ d_print_mat(1, nu_, r, 1);
+ d_print_mat(1, nu_, r0, 1);
+#endif
+
+ // maximum element in cost functions
+ double mu0 = 2.0;
+
+/************************************************
+* box & general constraints
+************************************************/
+
+ int *idxb0; int_zeros(&idxb0, nb[0], 1);
+ double *d_lb0; d_zeros(&d_lb0, nb[0], 1);
+ double *d_ub0; d_zeros(&d_ub0, nb[0], 1);
+ double *d_lg0; d_zeros(&d_lg0, ng[0], 1);
+ double *d_ug0; d_zeros(&d_ug0, ng[0], 1);
+ for(ii=0; ii<nb[0]; ii++)
+ {
+ if(ii<nu[0]) // input
+ {
+ d_lb0[ii] = - 0.5; // umin
+ d_ub0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb0[ii] = - 4.0; // xmin
+ d_ub0[ii] = 4.0; // xmax
+ }
+ idxb0[ii] = ii;
+ }
+ for(ii=0; ii<ng[0]; ii++)
+ {
+ if(ii<nu[0]-nb[0]) // input
+ {
+ d_lg0[ii] = - 0.5; // umin
+ d_ug0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg0[ii] = - 4.0; // xmin
+ d_ug0[ii] = 4.0; // xmax
+ }
+ }
+
+ int *idxb1; int_zeros(&idxb1, nb[1], 1);
+ double *d_lb1; d_zeros(&d_lb1, nb[1], 1);
+ double *d_ub1; d_zeros(&d_ub1, nb[1], 1);
+ double *d_lg1; d_zeros(&d_lg1, ng[1], 1);
+ double *d_ug1; d_zeros(&d_ug1, ng[1], 1);
+ for(ii=0; ii<nb[1]; ii++)
+ {
+ if(ii<nu[1]) // input
+ {
+ d_lb1[ii] = - 0.5; // umin
+ d_ub1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb1[ii] = - 4.0; // xmin
+ d_ub1[ii] = 4.0; // xmax
+ }
+ idxb1[ii] = ii;
+ }
+ for(ii=0; ii<ng[1]; ii++)
+ {
+ if(ii<nu[1]-nb[1]) // input
+ {
+ d_lg1[ii] = - 0.5; // umin
+ d_ug1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg1[ii] = - 4.0; // xmin
+ d_ug1[ii] = 4.0; // xmax
+ }
+ }
+
+
+ int *idxbN; int_zeros(&idxbN, nb[N], 1);
+ double *d_lbN; d_zeros(&d_lbN, nb[N], 1);
+ double *d_ubN; d_zeros(&d_ubN, nb[N], 1);
+ double *d_lgN; d_zeros(&d_lgN, ng[N], 1);
+ double *d_ugN; d_zeros(&d_ugN, ng[N], 1);
+ for(ii=0; ii<nb[N]; ii++)
+ {
+ d_lbN[ii] = - 4.0; // xmin
+ d_ubN[ii] = 4.0; // xmax
+ idxbN[ii] = ii;
+ }
+ for(ii=0; ii<ng[N]; ii++)
+ {
+ d_lgN[ii] = - 4.0; // dmin
+ d_ugN[ii] = 4.0; // dmax
+ }
+
+ double *C0; d_zeros(&C0, ng[0], nx[0]);
+ double *D0; d_zeros(&D0, ng[0], nu[0]);
+ for(ii=0; ii<nu[0]-nb[0] & ii<ng[0]; ii++)
+ D0[ii+(nb[0]+ii)*ng[0]] = 1.0;
+ for(; ii<ng[0]; ii++)
+ C0[ii+(nb[0]+ii-nu[0])*ng[0]] = 1.0;
+
+ double *C1; d_zeros(&C1, ng[1], nx[1]);
+ double *D1; d_zeros(&D1, ng[1], nu[1]);
+ for(ii=0; ii<nu[1]-nb[1] & ii<ng[1]; ii++)
+ D1[ii+(nb[1]+ii)*ng[1]] = 1.0;
+ for(; ii<ng[1]; ii++)
+ C1[ii+(nb[1]+ii-nu[1])*ng[1]] = 1.0;
+
+ double *CN; d_zeros(&CN, ng[N], nx[N]);
+ double *DN; d_zeros(&DN, ng[N], nu[N]);
+ for(ii=0; ii<nu[N]-nb[N] & ii<ng[N]; ii++)
+ DN[ii+(nb[N]+ii)*ng[N]] = 1.0;
+ for(; ii<ng[N]; ii++)
+ CN[ii+(nb[N]+ii-nu[N])*ng[N]] = 1.0;
+
+#if PRINT
+ // box constraints
+ int_print_mat(1, nb[0], idxb0, 1);
+ d_print_mat(1, nb[0], d_lb0, 1);
+ d_print_mat(1, nb[0], d_ub0, 1);
+ int_print_mat(1, nb[1], idxb1, 1);
+ d_print_mat(1, nb[1], d_lb1, 1);
+ d_print_mat(1, nb[1], d_ub1, 1);
+ int_print_mat(1, nb[N], idxbN, 1);
+ d_print_mat(1, nb[N], d_lbN, 1);
+ d_print_mat(1, nb[N], d_ubN, 1);
+ // general constraints
+ d_print_mat(1, ng[0], d_lg0, 1);
+ d_print_mat(1, ng[0], d_ug0, 1);
+ d_print_mat(ng[0], nu[0], D0, ng[0]);
+ d_print_mat(ng[0], nx[0], C0, ng[0]);
+ d_print_mat(1, ng[1], d_lg1, 1);
+ d_print_mat(1, ng[1], d_ug1, 1);
+ d_print_mat(ng[1], nu[1], D1, ng[1]);
+ d_print_mat(ng[1], nx[1], C1, ng[1]);
+ d_print_mat(1, ng[N], d_lgN, 1);
+ d_print_mat(1, ng[N], d_ugN, 1);
+ d_print_mat(ng[N], nu[N], DN, ng[N]);
+ d_print_mat(ng[N], nx[N], CN, ng[N]);
+#endif
+
+/************************************************
+* array of matrices
+************************************************/
+
+ double *hA[N];
+ double *hB[N];
+ double *hb[N];
+ double *hQ[N+1];
+ double *hS[N+1];
+ double *hR[N+1];
+ double *hq[N+1];
+ double *hr[N+1];
+ double *hd_lb[N+1];
+ double *hd_ub[N+1];
+ double *hd_lg[N+1];
+ double *hd_ug[N+1];
+ double *hC[N+1];
+ double *hD[N+1];
+ int *hidxb[N+1];
+
+ hA[0] = A;
+ hB[0] = B;
+ hb[0] = b0;
+ hQ[0] = Q;
+ hS[0] = S;
+ hR[0] = R;
+ hq[0] = q;
+ hr[0] = r0;
+ hidxb[0] = idxb0;
+ hd_lb[0] = d_lb0;
+ hd_ub[0] = d_ub0;
+ hd_lg[0] = d_lg0;
+ hd_ug[0] = d_ug0;
+ hC[0] = C0;
+ hD[0] = D0;
+ for(ii=1; ii<N; ii++)
+ {
+ hA[ii] = A;
+ hB[ii] = B;
+ hb[ii] = b;
+ hQ[ii] = Q;
+ hS[ii] = S;
+ hR[ii] = R;
+ hq[ii] = q;
+ hr[ii] = r;
+ hidxb[ii] = idxb1;
+ hd_lb[ii] = d_lb1;
+ hd_ub[ii] = d_ub1;
+ hd_lg[ii] = d_lg1;
+ hd_ug[ii] = d_ug1;
+ hC[ii] = C1;
+ hD[ii] = D1;
+ }
+ hQ[N] = Q;
+ hS[N] = S;
+ hR[N] = R;
+ hq[N] = q;
+ hr[N] = r;
+ hidxb[N] = idxbN;
+ hd_lb[N] = d_lbN;
+ hd_ub[N] = d_ubN;
+ hd_lg[N] = d_lgN;
+ hd_ug[N] = d_ugN;
+ hC[N] = CN;
+ hD[N] = DN;
+
+/************************************************
+* d ocp qp
+************************************************/
+
+ int d_qp_size = d_memsize_ocp_qp(N, nx, nu, nb, ng);
+ printf("\nd qp size = %d\n", d_qp_size);
+ void *d_qp_mem = malloc(d_qp_size);
+
+ struct d_ocp_qp d_qp;
+ d_create_ocp_qp(N, nx, nu, nb, ng, &d_qp, d_qp_mem);
+ d_cvt_colmaj_to_ocp_qp(hA, hB, hb, hQ, hS, hR, hq, hr, hidxb, hd_lb, hd_ub, hC, hD, hd_lg, hd_ug, &d_qp);
+#if 0
+ printf("\nN = %d\n", d_qp.N);
+ for(ii=0; ii<N; ii++)
+ d_print_strmat(d_qp.nu[ii]+d_qp.nx[ii]+1, d_qp.nx[ii+1], d_qp.BAbt+ii, 0, 0);
+ for(ii=0; ii<N; ii++)
+ d_print_tran_strvec(d_qp.nx[ii+1], d_qp.b+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(d_qp.nu[ii]+d_qp.nx[ii]+1, d_qp.nu[ii]+d_qp.nx[ii], d_qp.RSQrq+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(d_qp.nu[ii]+d_qp.nx[ii], d_qp.rq+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ int_print_mat(1, nb[ii], d_qp.idxb[ii], 1);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(d_qp.nb[ii], d_qp.d_lb+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(d_qp.nb[ii], d_qp.d_ub+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_strmat(d_qp.nu[ii]+d_qp.nx[ii], d_qp.ng[ii], d_qp.DCt+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(d_qp.ng[ii], d_qp.d_lg+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ d_print_tran_strvec(d_qp.ng[ii], d_qp.d_ug+ii, 0);
+// return;
+#endif
+
+/************************************************
+* s ocp qp
+************************************************/
+
+ int s_qp_size = s_memsize_ocp_qp(N, nx, nu, nb, ng);
+ printf("\ns qp size = %d\n", s_qp_size);
+ void *s_qp_mem = malloc(s_qp_size);
+
+ struct s_ocp_qp s_qp;
+ s_create_ocp_qp(N, nx, nu, nb, ng, &s_qp, s_qp_mem);
+// d_cvt_colmaj_to_ocp_qp(hA, hB, hb, hQ, hS, hR, hq, hr, hidxb, hd_lb, hd_ub, hC, hD, hd_lg, hd_ug, &d_qp);
+ m_cvt_d_ocp_qp_to_s_ocp_qp(&d_qp, &s_qp);
+
+#if 0
+ printf("\nN = %d\n", s_qp.N);
+ for(ii=0; ii<N; ii++)
+ s_print_strmat(s_qp.nu[ii]+s_qp.nx[ii]+1, s_qp.nx[ii+1], s_qp.BAbt+ii, 0, 0);
+ for(ii=0; ii<N; ii++)
+ s_print_tran_strvec(s_qp.nx[ii+1], s_qp.b+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_strmat(s_qp.nu[ii]+s_qp.nx[ii]+1, s_qp.nu[ii]+s_qp.nx[ii], s_qp.RSQrq+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(s_qp.nu[ii]+s_qp.nx[ii], s_qp.rq+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ int_print_mat(1, nb[ii], s_qp.idxb[ii], 1);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(s_qp.nb[ii], s_qp.d_lb+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(s_qp.nb[ii], s_qp.d_ub+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_strmat(s_qp.nu[ii]+s_qp.nx[ii], s_qp.ng[ii], s_qp.DCt+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(s_qp.ng[ii], s_qp.d_lg+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(s_qp.ng[ii], s_qp.d_ug+ii, 0);
+// return;
+#endif
+
+/************************************************
+* ocp qp sol
+************************************************/
+
+ int d_qp_sol_size = d_memsize_ocp_qp_sol(N, nx, nu, nb, ng);
+ printf("\nd qp sol size = %d\n", d_qp_sol_size);
+ void *d_qp_sol_mem = malloc(d_qp_sol_size);
+
+ struct d_ocp_qp_sol d_qp_sol;
+ d_create_ocp_qp_sol(N, nx, nu, nb, ng, &d_qp_sol, d_qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct m_ipm_hard_ocp_qp_arg arg;
+ arg.alpha_min = 1e-8;
+ arg.mu_max = 1e-12;
+ arg.iter_max = 20;
+ arg.mu0 = 2.0;
+
+ int ipm_size = m_memsize_ipm_hard_ocp_qp(&d_qp, &s_qp, &arg);
+ printf("\nipm size = %d\n", ipm_size);
+ void *m_ipm_mem = malloc(ipm_size);
+
+ struct m_ipm_hard_ocp_qp_workspace workspace;
+ m_create_ipm_hard_ocp_qp(&d_qp, &s_qp, &arg, &workspace, m_ipm_mem);
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// m_solve_ipm_hard_ocp_qp(&d_qp, &s_qp, &d_qp_sol, &workspace);
+ m_solve_ipm2_hard_ocp_qp(&d_qp, &s_qp, &d_qp_sol, &workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ double time_ocp_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+/************************************************
+* extract and print solution
+************************************************/
+
+ double *u[N+1]; for(ii=0; ii<=N; ii++) d_zeros(u+ii, nu[ii], 1);
+ double *x[N+1]; for(ii=0; ii<=N; ii++) d_zeros(x+ii, nx[ii], 1);
+ double *pi[N]; for(ii=0; ii<N; ii++) d_zeros(pi+ii, nx[ii+1], 1);
+ double *lam_lb[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_lb+ii, nb[ii], 1);
+ double *lam_ub[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_ub+ii, nb[ii], 1);
+ double *lam_lg[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_lg+ii, ng[ii], 1);
+ double *lam_ug[N+1]; for(ii=0; ii<=N; ii++) d_zeros(lam_ug+ii, ng[ii], 1);
+
+ d_cvt_ocp_qp_sol_to_colmaj(&d_qp, &d_qp_sol, u, x, pi, lam_lb, lam_ub, lam_lg, lam_ug);
+
+#if 1
+ printf("\nsolution\n\n");
+ printf("\nu\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nu[ii], u[ii], 1);
+ printf("\nx\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nx[ii], x[ii], 1);
+ printf("\npi\n");
+ for(ii=0; ii<N; ii++)
+ d_print_mat(1, nx[ii+1], pi[ii], 1);
+ printf("\nlam_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], lam_lb[ii], 1);
+ printf("\nlam_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], lam_ub[ii], 1);
+ printf("\nlam_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], lam_lg[ii], 1);
+ printf("\nlam_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], lam_ug[ii], 1);
+
+ printf("\nt_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], (d_qp_sol.t_lb+ii)->pa, 1);
+ printf("\nt_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, nb[ii], (d_qp_sol.t_ub+ii)->pa, 1);
+ printf("\nt_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], (d_qp_sol.t_lg+ii)->pa, 1);
+ printf("\nt_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_mat(1, ng[ii], (d_qp_sol.t_ug+ii)->pa, 1);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nu[ii]+nx[ii], (workspace.res_g+ii)->pa, 1);
+ printf("\nres_b\n");
+ for(ii=0; ii<N; ii++)
+ d_print_e_mat(1, nx[ii+1], (workspace.res_b+ii)->pa, 1);
+ printf("\nres_m_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_m_lb+ii)->pa, 1);
+ printf("\nres_m_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_m_ub+ii)->pa, 1);
+ printf("\nres_m_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_m_lg+ii)->pa, 1);
+ printf("\nres_m_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_m_ug+ii)->pa, 1);
+ printf("\nres_d_lb\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_d_lb+ii)->pa, 1);
+ printf("\nres_d_ub\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, nb[ii], (workspace.res_d_ub+ii)->pa, 1);
+ printf("\nres_d_lg\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_d_lg+ii)->pa, 1);
+ printf("\nres_d_ug\n");
+ for(ii=0; ii<=N; ii++)
+ d_print_e_mat(1, ng[ii], (workspace.res_d_ug+ii)->pa, 1);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", workspace.res_mu);
+#endif
+
+ printf("\nipm iter = %d\n", workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ d_print_e_tran_mat(5, workspace.iter, workspace.stat, 5);
+
+ printf("\nocp ipm time = %e [s]\n\n", time_ocp_ipm);
+
+/************************************************
+* free memory
+************************************************/
+
+ d_free(A);
+ d_free(B);
+ d_free(b);
+ d_free(x0);
+ d_free(Q);
+ d_free(R);
+ d_free(S);
+ d_free(q);
+ d_free(r);
+ d_free(r0);
+ int_free(idxb0);
+ d_free(d_lb0);
+ d_free(d_ub0);
+ int_free(idxb1);
+ d_free(d_lb1);
+ d_free(d_ub1);
+ int_free(idxbN);
+ d_free(d_lbN);
+ d_free(d_ubN);
+ d_free(C0);
+ d_free(D0);
+ d_free(d_lg0);
+ d_free(d_ug0);
+ d_free(C1);
+ d_free(D1);
+ d_free(d_lg1);
+ d_free(d_ug1);
+ d_free(CN);
+ d_free(DN);
+ d_free(d_lgN);
+ d_free(d_ugN);
+
+ for(ii=0; ii<N; ii++)
+ {
+ d_free(u[ii]);
+ d_free(x[ii]);
+ d_free(pi[ii]);
+ d_free(lam_lb[ii]);
+ d_free(lam_ub[ii]);
+ d_free(lam_lg[ii]);
+ d_free(lam_ug[ii]);
+ }
+ d_free(u[ii]);
+ d_free(x[ii]);
+ d_free(lam_lb[ii]);
+ d_free(lam_ub[ii]);
+ d_free(lam_lg[ii]);
+ d_free(lam_ug[ii]);
+
+ free(d_qp_mem);
+ free(s_qp_mem);
+ free(d_qp_sol_mem);
+ free(m_ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }
diff --git a/test_problems/test_s_dense.c b/test_problems/test_s_dense.c
new file mode 100644
index 0000000..4dbd1fb
--- /dev/null
+++ b/test_problems/test_s_dense.c
@@ -0,0 +1,202 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_s_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_s_aux.h>
+#include <blasfeo_s_blas.h>
+
+#include "../include/hpipm_s_dense_qp.h"
+#include "../include/hpipm_s_dense_qp_sol.h"
+#include "../include/hpipm_s_dense_qp_ipm_hard.h"
+
+
+
+#define PRINT 1
+
+
+
+int main()
+ {
+
+ int ii;
+
+/************************************************
+* qp dimension and data
+************************************************/
+
+ int nv = 2;
+ int ne = 1;
+ int nb = 2;
+ int ng = 0;
+
+ float H[] = {4.0, 1.0, 1.0, 2.0};
+ float g[] = {1.0, 1.0};
+ float A[] = {1.0, 1.0};
+ float b[] = {1.0};
+// float d_lb[] = {0.0, 0.0};
+// float d_ub[] = {INFINITY, INFINITY};
+ float d_lb[] = {-1.0, -1.0};
+ float d_ub[] = {1.5, 0.5};
+ int idxb[] = {0, 1};
+ float C[] = {1.0, 0.0, 0.0, 1.0};
+ float d_lg[] = {-1.0, -1.0};
+ float d_ug[] = {1.5, 0.5};
+
+/************************************************
+* dense qp
+************************************************/
+
+ int qp_size = s_memsize_dense_qp(nv, ne, nb, ng);
+ printf("\nqp size = %d\n", qp_size);
+ void *qp_mem = malloc(qp_size);
+
+ struct s_dense_qp qp;
+ s_create_dense_qp(nv, ne, nb, ng, &qp, qp_mem);
+ s_cvt_colmaj_to_dense_qp(H, g, A, b, idxb, d_lb, d_ub, C, d_lg, d_ug, &qp);
+
+ s_print_strmat(nv+1, nv, qp.Hg, 0, 0);
+ s_print_strmat(ne, nv, qp.A, 0, 0);
+ s_print_strmat(nv, ng, qp.Ct, 0, 0);
+ s_print_strvec(nv, qp.g, 0);
+ s_print_strvec(ne, qp.b, 0);
+ s_print_strvec(2*nb+2*ng, qp.d, 0);
+ s_print_strvec(nb, qp.d_lb, 0);
+ s_print_strvec(nb, qp.d_ub, 0);
+ s_print_strvec(ng, qp.d_lg, 0);
+ s_print_strvec(ng, qp.d_ug, 0);
+
+/************************************************
+* dense qp sol
+************************************************/
+
+ int qp_sol_size = s_memsize_dense_qp_sol(nv, ne, nb, ng);
+ printf("\nqp sol size = %d\n", qp_sol_size);
+ void *qp_sol_mem = malloc(qp_sol_size);
+
+ struct s_dense_qp_sol qp_sol;
+ s_create_dense_qp_sol(nv, ne, nb, ng, &qp_sol, qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct s_ipm_hard_dense_qp_arg arg;
+ arg.alpha_min = 1e-8;
+ arg.mu_max = 1e-12;
+ arg.iter_max = 20;
+ arg.mu0 = 1.0;
+
+ int ipm_size = s_memsize_ipm_hard_dense_qp(&qp, &arg);
+ printf("\nipm size = %d\n", ipm_size);
+ void *ipm_mem = malloc(ipm_size);
+
+ struct s_ipm_hard_dense_qp_workspace workspace;
+ s_create_ipm_hard_dense_qp(&qp, &arg, &workspace, ipm_mem);
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// s_solve_ipm_hard_dense_qp(&qp, &qp_sol, &workspace);
+ s_solve_ipm2_hard_dense_qp(&qp, &qp_sol, &workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ float time_dense_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+ printf("\nsolution\n\n");
+ printf("\nv\n");
+ s_print_tran_strvec(nv, qp_sol.v, 0);
+ printf("\npi\n");
+ s_print_tran_strvec(ne, qp_sol.pi, 0);
+ printf("\nlam_lb\n");
+ s_print_tran_strvec(nb, qp_sol.lam_lb, 0);
+ printf("\nlam_ub\n");
+ s_print_tran_strvec(nb, qp_sol.lam_ub, 0);
+ printf("\nlam_lg\n");
+ s_print_tran_strvec(ng, qp_sol.lam_lg, 0);
+ printf("\nlam_ug\n");
+ s_print_tran_strvec(ng, qp_sol.lam_ug, 0);
+ printf("\nt_lb\n");
+ s_print_tran_strvec(nb, qp_sol.t_lb, 0);
+ printf("\nt_ub\n");
+ s_print_tran_strvec(nb, qp_sol.t_ub, 0);
+ printf("\nt_lg\n");
+ s_print_tran_strvec(ng, qp_sol.t_lg, 0);
+ printf("\nt_ug\n");
+ s_print_tran_strvec(ng, qp_sol.t_ug, 0);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ s_print_e_tran_strvec(nv, workspace.res_g, 0);
+ printf("\nres_b\n");
+ s_print_e_tran_strvec(ne, workspace.res_b, 0);
+ printf("\nres_d\n");
+ s_print_e_tran_strvec(2*nb+2*ng, workspace.res_d, 0);
+ printf("\nres_m\n");
+ s_print_e_tran_strvec(2*nb+2*ng, workspace.res_m, 0);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", workspace.res_mu);
+
+ printf("\nipm iter = %d\n", workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ s_print_e_tran_mat(5, workspace.iter, workspace.stat, 5);
+
+ printf("\ndense ipm time = %e [s]\n\n", time_dense_ipm);
+
+/************************************************
+* free memory
+************************************************/
+
+ free(qp_mem);
+ free(qp_sol_mem);
+ free(ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }
+
+
+
diff --git a/test_problems/test_s_ocp.c b/test_problems/test_s_ocp.c
new file mode 100644
index 0000000..f9d5209
--- /dev/null
+++ b/test_problems/test_s_ocp.c
@@ -0,0 +1,696 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_v_aux_ext_dep.h>
+#include <blasfeo_s_aux_ext_dep.h>
+#include <blasfeo_i_aux_ext_dep.h>
+#include <blasfeo_s_aux.h>
+#include <blasfeo_s_blas.h>
+
+#include "../include/hpipm_s_ocp_qp.h"
+#include "../include/hpipm_s_ocp_qp_sol.h"
+#include "../include/hpipm_s_ocp_qp_ipm_hard.h"
+
+#include "s_tools.h"
+
+
+
+#define KEEP_X0 0
+
+// printing
+#define PRINT 1
+
+/************************************************
+Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts.
+************************************************/
+void mass_spring_system(float Ts, int nx, int nu, int N, float *A, float *B, float *b, float *x0)
+ {
+
+ int nx2 = nx*nx;
+
+ int info = 0;
+
+ int pp = nx/2; // number of masses
+
+/************************************************
+* build the continuous time system
+************************************************/
+
+ float *T; s_zeros(&T, pp, pp);
+ int ii;
+ for(ii=0; ii<pp; ii++) T[ii*(pp+1)] = -2;
+ for(ii=0; ii<pp-1; ii++) T[ii*(pp+1)+1] = 1;
+ for(ii=1; ii<pp; ii++) T[ii*(pp+1)-1] = 1;
+
+ float *Z; s_zeros(&Z, pp, pp);
+ float *I; s_zeros(&I, pp, pp); for(ii=0; ii<pp; ii++) I[ii*(pp+1)]=1.0; // = eye(pp);
+ float *Ac; s_zeros(&Ac, nx, nx);
+ smcopy(pp, pp, Z, pp, Ac, nx);
+ smcopy(pp, pp, T, pp, Ac+pp, nx);
+ smcopy(pp, pp, I, pp, Ac+pp*nx, nx);
+ smcopy(pp, pp, Z, pp, Ac+pp*(nx+1), nx);
+ free(T);
+ free(Z);
+ free(I);
+
+ s_zeros(&I, nu, nu); for(ii=0; ii<nu; ii++) I[ii*(nu+1)]=1.0; //I = eye(nu);
+ float *Bc; s_zeros(&Bc, nx, nu);
+ smcopy(nu, nu, I, nu, Bc+pp, nx);
+ free(I);
+
+/************************************************
+* compute the discrete time system
+************************************************/
+
+ float *bb; s_zeros(&bb, nx, 1);
+ smcopy(nx, 1, bb, nx, b, nx);
+
+ smcopy(nx, nx, Ac, nx, A, nx);
+ sscal_3l(nx2, Ts, A);
+ expm(nx, A);
+
+ s_zeros(&T, nx, nx);
+ s_zeros(&I, nx, nx); for(ii=0; ii<nx; ii++) I[ii*(nx+1)]=1.0; //I = eye(nx);
+ smcopy(nx, nx, A, nx, T, nx);
+ saxpy_3l(nx2, -1.0, I, T);
+ sgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx);
+ free(T);
+ free(I);
+
+ int *ipiv = (int *) malloc(nx*sizeof(int));
+ sgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info);
+ free(ipiv);
+
+ free(Ac);
+ free(Bc);
+ free(bb);
+
+
+/************************************************
+* initial state
+************************************************/
+
+ if(nx==4)
+ {
+ x0[0] = 5;
+ x0[1] = 10;
+ x0[2] = 15;
+ x0[3] = 20;
+ }
+ else
+ {
+ int jj;
+ for(jj=0; jj<nx; jj++)
+ x0[jj] = 1;
+ }
+
+ }
+
+
+
+int main()
+ {
+
+
+ // local variables
+
+ int ii, jj;
+
+ int rep, nrep=1000;
+
+ struct timeval tv0, tv1;
+
+
+
+ // problem size
+
+ int nx_ = 16; // number of states (it has to be even for the mass-spring system test problem)
+ int nu_ = 7; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
+ int N = 5; // horizon lenght
+
+
+
+ // stage-wise variant size
+
+ int nx[N+1];
+#if KEEP_X0
+ nx[0] = nx_;
+#else
+ nx[0] = 0;
+#endif
+ for(ii=1; ii<=N; ii++)
+ nx[ii] = nx_;
+// nx[N] = 0;
+
+ int nu[N+1];
+ for(ii=0; ii<N; ii++)
+ nu[ii] = nu_;
+ nu[N] = 0;
+
+#if 1
+ int nb[N+1];
+#if KEEP_X0
+ nb[0] = nu[0]+nx[0]/2;
+#else
+ nb[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[1]+nx[1]/2;
+ nb[N] = nx[N]/2;
+
+ int ng[N+1];
+ ng[0] = 0;
+ for(ii=1; ii<N; ii++)
+ ng[ii] = 0;
+ ng[N] = 0;
+#elif 0
+ int nb[N+1];
+ nb[0] = 0;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = 0;
+ nb[N] = 0;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nu[0]+nx[0]/2;
+#else
+ ng[0] = nu[0];
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nu[1]+nx[1]/2;
+ ng[N] = nx[N]/2;
+#else
+ int nb[N+1];
+ nb[0] = nu[0] + nx[0]/2;
+ for(ii=1; ii<N; ii++)
+ nb[ii] = nu[ii] + nx[ii]/2;
+ nb[N] = nu[N] + nx[N]/2;
+
+ int ng[N+1];
+#if KEEP_X0
+ ng[0] = nx[0]/2;
+#else
+ ng[0] = 0;
+#endif
+ for(ii=1; ii<N; ii++)
+ ng[ii] = nx[1]/2;
+ ng[N] = nx[N]/2;
+#endif
+
+/************************************************
+* dynamical system
+************************************************/
+
+ float *A; s_zeros(&A, nx_, nx_); // states update matrix
+
+ float *B; s_zeros(&B, nx_, nu_); // inputs matrix
+
+ float *b; s_zeros(&b, nx_, 1); // states offset
+ float *x0; s_zeros(&x0, nx_, 1); // initial state
+
+ float Ts = 0.5; // sampling time
+ mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0);
+
+ for(jj=0; jj<nx_; jj++)
+ b[jj] = 0.1;
+
+ for(jj=0; jj<nx_; jj++)
+ x0[jj] = 0;
+ x0[0] = 2.5;
+ x0[1] = 2.5;
+
+ float *b0; s_zeros(&b0, nx_, 1);
+ sgemv_n_3l(nx_, nx_, A, nx_, x0, b0);
+ saxpy_3l(nx_, 1.0, b, b0);
+
+#if PRINT
+ s_print_mat(nx_, nx_, A, nx_);
+ s_print_mat(nx_, nu_, B, nu_);
+ s_print_mat(1, nx_, b, 1);
+ s_print_mat(1, nx_, x0, 1);
+ s_print_mat(1, nx_, b0, 1);
+#endif
+
+/************************************************
+* cost function
+************************************************/
+
+ float *Q; s_zeros(&Q, nx_, nx_);
+ for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;
+
+ float *R; s_zeros(&R, nu_, nu_);
+ for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;
+
+ float *S; s_zeros(&S, nu_, nx_);
+
+ float *q; s_zeros(&q, nx_, 1);
+ for(ii=0; ii<nx_; ii++) q[ii] = 0.1;
+
+ float *r; s_zeros(&r, nu_, 1);
+ for(ii=0; ii<nu_; ii++) r[ii] = 0.2;
+
+ float *r0; s_zeros(&r0, nu_, 1);
+ sgemv_n_3l(nu_, nx_, S, nu_, x0, r0);
+ saxpy_3l(nu_, 1.0, r, r0);
+
+#if PRINT
+ s_print_mat(nx_, nx_, Q, nx_);
+ s_print_mat(nu_, nu_, R, nu_);
+ s_print_mat(nu_, nx_, S, nu_);
+ s_print_mat(1, nx_, q, 1);
+ s_print_mat(1, nu_, r, 1);
+ s_print_mat(1, nu_, r0, 1);
+#endif
+
+ // maximum element in cost functions
+ float mu0 = 2.0;
+
+/************************************************
+* box & general constraints
+************************************************/
+
+ int *idxb0; int_zeros(&idxb0, nb[0], 1);
+ float *d_lb0; s_zeros(&d_lb0, nb[0], 1);
+ float *d_ub0; s_zeros(&d_ub0, nb[0], 1);
+ float *d_lg0; s_zeros(&d_lg0, ng[0], 1);
+ float *d_ug0; s_zeros(&d_ug0, ng[0], 1);
+ for(ii=0; ii<nb[0]; ii++)
+ {
+ if(ii<nu[0]) // input
+ {
+ d_lb0[ii] = - 0.5; // umin
+ d_ub0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb0[ii] = - 4.0; // xmin
+ d_ub0[ii] = 4.0; // xmax
+ }
+ idxb0[ii] = ii;
+ }
+ for(ii=0; ii<ng[0]; ii++)
+ {
+ if(ii<nu[0]-nb[0]) // input
+ {
+ d_lg0[ii] = - 0.5; // umin
+ d_ug0[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg0[ii] = - 4.0; // xmin
+ d_ug0[ii] = 4.0; // xmax
+ }
+ }
+
+ int *idxb1; int_zeros(&idxb1, nb[1], 1);
+ float *d_lb1; s_zeros(&d_lb1, nb[1], 1);
+ float *d_ub1; s_zeros(&d_ub1, nb[1], 1);
+ float *d_lg1; s_zeros(&d_lg1, ng[1], 1);
+ float *d_ug1; s_zeros(&d_ug1, ng[1], 1);
+ for(ii=0; ii<nb[1]; ii++)
+ {
+ if(ii<nu[1]) // input
+ {
+ d_lb1[ii] = - 0.5; // umin
+ d_ub1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lb1[ii] = - 4.0; // xmin
+ d_ub1[ii] = 4.0; // xmax
+ }
+ idxb1[ii] = ii;
+ }
+ for(ii=0; ii<ng[1]; ii++)
+ {
+ if(ii<nu[1]-nb[1]) // input
+ {
+ d_lg1[ii] = - 0.5; // umin
+ d_ug1[ii] = 0.5; // umax
+ }
+ else // state
+ {
+ d_lg1[ii] = - 4.0; // xmin
+ d_ug1[ii] = 4.0; // xmax
+ }
+ }
+
+
+ int *idxbN; int_zeros(&idxbN, nb[N], 1);
+ float *d_lbN; s_zeros(&d_lbN, nb[N], 1);
+ float *d_ubN; s_zeros(&d_ubN, nb[N], 1);
+ float *d_lgN; s_zeros(&d_lgN, ng[N], 1);
+ float *d_ugN; s_zeros(&d_ugN, ng[N], 1);
+ for(ii=0; ii<nb[N]; ii++)
+ {
+ d_lbN[ii] = - 4.0; // xmin
+ d_ubN[ii] = 4.0; // xmax
+ idxbN[ii] = ii;
+ }
+ for(ii=0; ii<ng[N]; ii++)
+ {
+ d_lgN[ii] = - 4.0; // dmin
+ d_ugN[ii] = 4.0; // dmax
+ }
+
+ float *C0; s_zeros(&C0, ng[0], nx[0]);
+ float *D0; s_zeros(&D0, ng[0], nu[0]);
+ for(ii=0; ii<nu[0]-nb[0] & ii<ng[0]; ii++)
+ D0[ii+(nb[0]+ii)*ng[0]] = 1.0;
+ for(; ii<ng[0]; ii++)
+ C0[ii+(nb[0]+ii-nu[0])*ng[0]] = 1.0;
+
+ float *C1; s_zeros(&C1, ng[1], nx[1]);
+ float *D1; s_zeros(&D1, ng[1], nu[1]);
+ for(ii=0; ii<nu[1]-nb[1] & ii<ng[1]; ii++)
+ D1[ii+(nb[1]+ii)*ng[1]] = 1.0;
+ for(; ii<ng[1]; ii++)
+ C1[ii+(nb[1]+ii-nu[1])*ng[1]] = 1.0;
+
+ float *CN; s_zeros(&CN, ng[N], nx[N]);
+ float *DN; s_zeros(&DN, ng[N], nu[N]);
+ for(ii=0; ii<nu[N]-nb[N] & ii<ng[N]; ii++)
+ DN[ii+(nb[N]+ii)*ng[N]] = 1.0;
+ for(; ii<ng[N]; ii++)
+ CN[ii+(nb[N]+ii-nu[N])*ng[N]] = 1.0;
+
+#if PRINT
+ // box constraints
+ int_print_mat(1, nb[0], idxb0, 1);
+ s_print_mat(1, nb[0], d_lb0, 1);
+ s_print_mat(1, nb[0], d_ub0, 1);
+ int_print_mat(1, nb[1], idxb1, 1);
+ s_print_mat(1, nb[1], d_lb1, 1);
+ s_print_mat(1, nb[1], d_ub1, 1);
+ int_print_mat(1, nb[N], idxbN, 1);
+ s_print_mat(1, nb[N], d_lbN, 1);
+ s_print_mat(1, nb[N], d_ubN, 1);
+ // general constraints
+ s_print_mat(1, ng[0], d_lg0, 1);
+ s_print_mat(1, ng[0], d_ug0, 1);
+ s_print_mat(ng[0], nu[0], D0, ng[0]);
+ s_print_mat(ng[0], nx[0], C0, ng[0]);
+ s_print_mat(1, ng[1], d_lg1, 1);
+ s_print_mat(1, ng[1], d_ug1, 1);
+ s_print_mat(ng[1], nu[1], D1, ng[1]);
+ s_print_mat(ng[1], nx[1], C1, ng[1]);
+ s_print_mat(1, ng[N], d_lgN, 1);
+ s_print_mat(1, ng[N], d_ugN, 1);
+ s_print_mat(ng[N], nu[N], DN, ng[N]);
+ s_print_mat(ng[N], nx[N], CN, ng[N]);
+#endif
+
+/************************************************
+* array of matrices
+************************************************/
+
+ float *hA[N];
+ float *hB[N];
+ float *hb[N];
+ float *hQ[N+1];
+ float *hS[N+1];
+ float *hR[N+1];
+ float *hq[N+1];
+ float *hr[N+1];
+ float *hd_lb[N+1];
+ float *hd_ub[N+1];
+ float *hd_lg[N+1];
+ float *hd_ug[N+1];
+ float *hC[N+1];
+ float *hD[N+1];
+ int *hidxb[N+1];
+
+ hA[0] = A;
+ hB[0] = B;
+ hb[0] = b0;
+ hQ[0] = Q;
+ hS[0] = S;
+ hR[0] = R;
+ hq[0] = q;
+ hr[0] = r0;
+ hidxb[0] = idxb0;
+ hd_lb[0] = d_lb0;
+ hd_ub[0] = d_ub0;
+ hd_lg[0] = d_lg0;
+ hd_ug[0] = d_ug0;
+ hC[0] = C0;
+ hD[0] = D0;
+ for(ii=1; ii<N; ii++)
+ {
+ hA[ii] = A;
+ hB[ii] = B;
+ hb[ii] = b;
+ hQ[ii] = Q;
+ hS[ii] = S;
+ hR[ii] = R;
+ hq[ii] = q;
+ hr[ii] = r;
+ hidxb[ii] = idxb1;
+ hd_lb[ii] = d_lb1;
+ hd_ub[ii] = d_ub1;
+ hd_lg[ii] = d_lg1;
+ hd_ug[ii] = d_ug1;
+ hC[ii] = C1;
+ hD[ii] = D1;
+ }
+ hQ[N] = Q;
+ hS[N] = S;
+ hR[N] = R;
+ hq[N] = q;
+ hr[N] = r;
+ hidxb[N] = idxbN;
+ hd_lb[N] = d_lbN;
+ hd_ub[N] = d_ubN;
+ hd_lg[N] = d_lgN;
+ hd_ug[N] = d_ugN;
+ hC[N] = CN;
+ hD[N] = DN;
+
+/************************************************
+* ocp qp
+************************************************/
+
+ int qp_size = s_memsize_ocp_qp(N, nx, nu, nb, ng);
+ printf("\nqp size = %d\n", qp_size);
+ void *qp_mem = malloc(qp_size);
+
+ struct s_ocp_qp qp;
+ s_create_ocp_qp(N, nx, nu, nb, ng, &qp, qp_mem);
+ s_cvt_colmaj_to_ocp_qp(hA, hB, hb, hQ, hS, hR, hq, hr, hidxb, hd_lb, hd_ub, hC, hD, hd_lg, hd_ug, &qp);
+#if 0
+ printf("\nN = %d\n", qp.N);
+ for(ii=0; ii<N; ii++)
+ s_print_strmat(qp.nu[ii]+qp.nx[ii]+1, qp.nx[ii+1], qp.BAbt+ii, 0, 0);
+ for(ii=0; ii<N; ii++)
+ s_print_tran_strvec(qp.nx[ii+1], qp.b+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_strmat(qp.nu[ii]+qp.nx[ii]+1, qp.nu[ii]+qp.nx[ii], qp.RSQrq+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(qp.nu[ii]+qp.nx[ii], qp.rq+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ int_print_mat(1, nb[ii], qp.idxb[ii], 1);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(qp.nb[ii], qp.d_lb+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(qp.nb[ii], qp.d_ub+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_strmat(qp.nu[ii]+qp.nx[ii], qp.ng[ii], qp.DCt+ii, 0, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(qp.ng[ii], qp.d_lg+ii, 0);
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(qp.ng[ii], qp.d_ug+ii, 0);
+ return;
+#endif
+
+/************************************************
+* ocp qp
+************************************************/
+
+ int qp_sol_size = s_memsize_ocp_qp_sol(N, nx, nu, nb, ng);
+ printf("\nqp sol size = %d\n", qp_sol_size);
+ void *qp_sol_mem = malloc(qp_sol_size);
+
+ struct s_ocp_qp_sol qp_sol;
+ s_create_ocp_qp_sol(N, nx, nu, nb, ng, &qp_sol, qp_sol_mem);
+
+/************************************************
+* ipm
+************************************************/
+
+ struct s_ipm_hard_ocp_qp_arg arg;
+ arg.alpha_min = 1e-8;
+ arg.mu_max = 1e-12;
+ arg.iter_max = 20;
+ arg.mu0 = 2.0;
+
+ int ipm_size = s_memsize_ipm_hard_ocp_qp(&qp, &arg);
+ printf("\nipm size = %d\n", ipm_size);
+ void *ipm_mem = malloc(ipm_size);
+
+ struct s_ipm_hard_ocp_qp_workspace workspace;
+ s_create_ipm_hard_ocp_qp(&qp, &arg, &workspace, ipm_mem);
+
+ gettimeofday(&tv0, NULL); // start
+
+ for(rep=0; rep<nrep; rep++)
+ {
+// s_solve_ipm_hard_ocp_qp(&qp, &qp_sol, &workspace);
+ s_solve_ipm2_hard_ocp_qp(&qp, &qp_sol, &workspace);
+ }
+
+ gettimeofday(&tv1, NULL); // stop
+
+ float time_ocp_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
+
+#if 1
+ printf("\nsolution\n\n");
+ printf("\nux\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(nu[ii]+nx[ii], qp_sol.ux+ii, 0);
+ printf("\npi\n");
+ for(ii=0; ii<N; ii++)
+ s_print_tran_strvec(nx[ii+1], qp_sol.pi+ii, 0);
+ printf("\nlam_lb\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(nb[ii], qp_sol.lam_lb+ii, 0);
+ printf("\nlam_ub\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(nb[ii], qp_sol.lam_ub+ii, 0);
+ printf("\nlam_lg\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(ng[ii], qp_sol.lam_lg+ii, 0);
+ printf("\nlam_ug\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(ng[ii], qp_sol.lam_ug+ii, 0);
+ printf("\nt_lb\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(nb[ii], qp_sol.t_lb+ii, 0);
+ printf("\nt_ub\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(nb[ii], qp_sol.t_ub+ii, 0);
+ printf("\nt_lg\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(ng[ii], qp_sol.t_lg+ii, 0);
+ printf("\nt_ug\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_tran_strvec(ng[ii], qp_sol.t_ug+ii, 0);
+
+ printf("\nresiduals\n\n");
+ printf("\nres_g\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(nu[ii]+nx[ii], workspace.res_g+ii, 0);
+ printf("\nres_b\n");
+ for(ii=0; ii<N; ii++)
+ s_print_e_tran_strvec(nx[ii+1], workspace.res_b+ii, 0);
+ printf("\nres_m_lb\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(nb[ii], workspace.res_m_lb+ii, 0);
+ printf("\nres_m_ub\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(nb[ii], workspace.res_m_ub+ii, 0);
+ printf("\nres_m_lg\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(ng[ii], workspace.res_m_lg+ii, 0);
+ printf("\nres_m_ug\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(ng[ii], workspace.res_m_ug+ii, 0);
+ printf("\nres_d_lb\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(nb[ii], workspace.res_d_lb+ii, 0);
+ printf("\nres_d_ub\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(nb[ii], workspace.res_d_ub+ii, 0);
+ printf("\nres_d_lg\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(ng[ii], workspace.res_d_lg+ii, 0);
+ printf("\nres_d_ug\n");
+ for(ii=0; ii<=N; ii++)
+ s_print_e_tran_strvec(ng[ii], workspace.res_d_ug+ii, 0);
+ printf("\nres_mu\n");
+ printf("\n%e\n\n", workspace.res_mu);
+#endif
+
+ printf("\nipm iter = %d\n", workspace.iter);
+ printf("\nalpha_aff\tmu_aff\t\tsigma\t\talpha\t\tmu\n");
+ s_print_e_tran_mat(5, workspace.iter, workspace.stat, 5);
+
+ printf("\nocp ipm time = %e [s]\n\n", time_ocp_ipm);
+
+/************************************************
+* free memory
+************************************************/
+
+ s_free(A);
+ s_free(B);
+ s_free(b);
+ s_free(x0);
+ s_free(Q);
+ s_free(R);
+ s_free(S);
+ s_free(q);
+ s_free(r);
+ s_free(r0);
+ int_free(idxb0);
+ s_free(d_lb0);
+ s_free(d_ub0);
+ int_free(idxb1);
+ s_free(d_lb1);
+ s_free(d_ub1);
+ int_free(idxbN);
+ s_free(d_lbN);
+ s_free(d_ubN);
+ s_free(C0);
+ s_free(D0);
+ s_free(d_lg0);
+ s_free(d_ug0);
+ s_free(C1);
+ s_free(D1);
+ s_free(d_lg1);
+ s_free(d_ug1);
+ s_free(CN);
+ s_free(DN);
+ s_free(d_lgN);
+ s_free(d_ugN);
+
+ free(qp_mem);
+ free(qp_sol_mem);
+ free(ipm_mem);
+
+/************************************************
+* return
+************************************************/
+
+ return 0;
+
+ }