Squashed 'third_party/hpipm/' content from commit c2c0261
Change-Id: I05e186a6e1e1f075aa629092e7ad86e6116ca711
git-subtree-dir: third_party/hpipm
git-subtree-split: c2c0261e8ded36f636d9c4390a2899bdc4c999cc
diff --git a/ocp_qp/m_ocp_qp_kkt.c b/ocp_qp/m_ocp_qp_kkt.c
new file mode 100644
index 0000000..b3b5a71
--- /dev/null
+++ b/ocp_qp/m_ocp_qp_kkt.c
@@ -0,0 +1,426 @@
+/**************************************************************************************************
+* *
+* This file is part of HPIPM. *
+* *
+* HPIPM -- High Performance Interior Point Method. *
+* Copyright (C) 2017 by Gianluca Frison. *
+* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
+* All rights reserved. *
+* *
+* HPMPC is free software; you can redistribute it and/or *
+* modify it under the terms of the GNU Lesser General Public *
+* License as published by the Free Software Foundation; either *
+* version 2.1 of the License, or (at your option) any later version. *
+* *
+* HPMPC is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
+* See the GNU Lesser General Public License for more details. *
+* *
+* You should have received a copy of the GNU Lesser General Public *
+* License along with HPMPC; if not, write to the Free Software *
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
+* *
+* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
+* *
+**************************************************************************************************/
+
+
+
+#include <math.h>
+
+#include <blasfeo_target.h>
+#include <blasfeo_common.h>
+#include <blasfeo_d_aux.h>
+#include <blasfeo_s_aux.h>
+#include <blasfeo_m_aux.h>
+#include <blasfeo_d_blas.h>
+#include <blasfeo_s_blas.h>
+
+#include "../include/hpipm_d_ocp_qp.h"
+#include "../include/hpipm_s_ocp_qp.h"
+#include "../include/hpipm_d_ocp_qp_sol.h"
+#include "../include/hpipm_d_ocp_qp_ipm_hard.h"
+#include "../include/hpipm_m_ocp_qp_ipm_hard.h"
+#include "../include/hpipm_d_core_qp_ipm_hard.h"
+#include "../include/hpipm_d_core_qp_ipm_hard_aux.h"
+
+
+
+// backward Riccati recursion
+void m_fact_solve_kkt_step_hard_ocp_qp(struct d_ocp_qp *d_qp, struct s_ocp_qp *s_qp, struct m_ipm_hard_ocp_qp_workspace *ws)
+ {
+
+ int N = s_qp->N;
+ int *nx = s_qp->nx;
+ int *nu = s_qp->nu;
+ int *nb = s_qp->nb;
+ int *ng = s_qp->ng;
+
+ struct s_strmat *BAbt = s_qp->BAbt;
+ struct s_strmat *RSQrq = s_qp->RSQrq;
+ struct s_strmat *DCt = s_qp->DCt;
+ struct d_strmat *d_DCt = d_qp->DCt;
+ int **idxb = s_qp->idxb;
+ int **d_idxb = d_qp->idxb;
+
+ struct s_strmat *L = ws->L;
+ struct s_strmat *AL = ws->AL;
+ struct d_strvec *d_res_b = ws->res_b;
+ struct d_strvec *d_res_g = ws->res_g;
+ struct s_strvec *res_b = ws->sres_b;
+ struct s_strvec *res_g = ws->sres_g;
+ struct d_strvec *d_dux = ws->dux;
+ struct d_strvec *d_dpi = ws->dpi;
+ struct s_strvec *dux = ws->sdux;
+ struct s_strvec *dpi = ws->sdpi;
+ struct d_strvec *d_dt_lb = ws->dt_lb;
+ struct d_strvec *d_dt_lg = ws->dt_lg;
+ struct d_strvec *d_Qx_lg = ws->Qx_lg;
+ struct d_strvec *d_Qx_lb = ws->Qx_lb;
+ struct d_strvec *d_qx_lg = ws->qx_lg;
+ struct d_strvec *d_qx_lb = ws->qx_lb;
+ struct s_strvec *Qx_lg = ws->sQx_lg;
+ struct s_strvec *Qx_lb = ws->sQx_lb;
+ struct s_strvec *qx_lg = ws->sqx_lg;
+ struct s_strvec *qx_lb = ws->sqx_lb;
+ struct s_strvec *Pb = ws->Pb;
+ struct s_strvec *tmp_nxM = ws->tmp_nxM;
+
+ //
+ int ii;
+
+ struct d_ipm_hard_core_qp_workspace *cws = ws->core_workspace;
+
+// if(nb>0 | ng>0)
+// {
+ d_compute_Qx_qx_hard_qp(cws);
+// }
+
+
+
+ // cvt double => single
+ for(ii=0; ii<N; ii++)
+ {
+ m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
+ m_cvt_d2s_strvec(nx[ii+1], d_res_b+ii, 0, res_b+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_Qx_lb+ii, 0, Qx_lb+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_Qx_lg+ii, 0, Qx_lg+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
+ }
+ ii = N;
+ m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_Qx_lb+ii, 0, Qx_lb+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_Qx_lg+ii, 0, Qx_lg+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
+
+
+
+ // factorization and backward substitution
+
+ // last stage
+#if defined(DOUBLE_PRECISION)
+ strcp_l_libstr(nu[N]+nx[N], RSQrq+N, 0, 0, L+N, 0, 0); // TODO dtrcp_l_libstr with m and n, for m>=n
+#else
+ sgecp_libstr(nu[N]+nx[N], nu[N]+nx[N], RSQrq+N, 0, 0, L+N, 0, 0); // TODO dtrcp_l_libstr with m and n, for m>=n
+#endif
+ srowin_libstr(nu[N]+nx[N], 1.0, res_g+N, 0, L+N, nu[N]+nx[N], 0);
+ if(nb[N]>0)
+ {
+ sdiaad_sp_libstr(nb[N], 1.0, Qx_lb+N, 0, idxb[N], L+N, 0, 0);
+ srowad_sp_libstr(nb[N], 1.0, qx_lb+N, 0, idxb[N], L+N, nu[N]+nx[N], 0);
+ }
+ if(ng[N]>0)
+ {
+ sgemm_r_diag_libstr(nu[N]+nx[N], ng[N], 1.0, DCt+N, 0, 0, Qx_lg+N, 0, 0.0, AL+0, 0, 0, AL+0, 0, 0);
+ srowin_libstr(ng[N], 1.0, qx_lg+N, 0, AL+0, nu[N]+nx[N], 0);
+ ssyrk_spotrf_ln_libstr(nu[N]+nx[N]+1, nu[N]+nx[N], ng[N], AL+0, 0, 0, DCt+N, 0, 0, L+N, 0, 0, L+N, 0, 0);
+ }
+ else
+ {
+ spotrf_l_mn_libstr(nu[N]+nx[N]+1, nu[N]+nx[N], L+N, 0, 0, L+N, 0, 0);
+ }
+
+ // middle stages
+ for(ii=0; ii<N; ii++)
+ {
+ sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], BAbt+(N-ii-1), 0, 0, AL, 0, 0);
+ srowin_libstr(nx[N-ii], 1.0, res_b+(N-ii-1), 0, AL, nu[N-ii-1]+nx[N-ii-1], 0);
+ strmm_rlnn_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nx[N-ii], 1.0, L+(N-ii), nu[N-ii], nu[N-ii], AL, 0, 0, AL, 0, 0);
+ srowex_libstr(nx[N-ii], 1.0, AL, nu[N-ii-1]+nx[N-ii-1], 0, tmp_nxM, 0);
+ strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], tmp_nxM, 0, Pb+(N-ii-1), 0);
+ sgead_libstr(1, nx[N-ii], 1.0, L+(N-ii), nu[N-ii]+nx[N-ii], nu[N-ii], AL, nu[N-ii-1]+nx[N-ii-1], 0);
+
+#if defined(DOUBLE_PRECISION)
+ strcp_l_libstr(nu[N-ii-1]+nx[N-ii-1], RSQrq+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
+#else
+ sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nu[N-ii-1]+nx[N-ii-1], RSQrq+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
+#endif
+ srowin_libstr(nu[N-ii-1]+nx[N-ii-1], 1.0, res_g+(N-ii-1), 0, L+(N-ii-1), nu[N-ii-1]+nx[N-ii-1], 0);
+
+ if(nb[N-ii-1]>0)
+ {
+ sdiaad_sp_libstr(nb[N-ii-1], 1.0, Qx_lb+(N-ii-1), 0, idxb[N-ii-1], L+(N-ii-1), 0, 0);
+ srowad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+(N-ii-1), 0, idxb[N-ii-1], L+(N-ii-1), nu[N-ii-1]+nx[N-ii-1], 0);
+ }
+
+ if(ng[N-ii-1]>0)
+ {
+ sgemm_r_diag_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, Qx_lg+N-ii-1, 0, 0.0, AL+0, 0, nx[N-ii], AL+0, 0, nx[N-ii]);
+ srowin_libstr(ng[N-ii-1], 1.0, qx_lg+N-ii-1, 0, AL+0, nu[N-ii-1]+nx[N-ii-1], nx[N-ii]);
+ sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], AL+0, 0, 0, AL+1, 0, 0);
+ sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], DCt+N-ii-1, 0, 0, AL+1, 0, nx[N-ii]);
+ ssyrk_spotrf_ln_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], nx[N-ii]+ng[N-ii-1], AL+0, 0, 0, AL+1, 0, 0, L+N-ii-1, 0, 0, L+N-ii-1, 0, 0);
+ }
+ else
+ {
+ ssyrk_spotrf_ln_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], nx[N-ii], AL, 0, 0, AL, 0, 0, L+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
+ }
+
+// d_print_strmat(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], L+(N-ii-1), 0, 0);
+ }
+
+ // forward substitution
+
+ // first stage
+ ii = 0;
+ srowex_libstr(nu[ii]+nx[ii], -1.0, L+(ii), nu[ii]+nx[ii], 0, dux+ii, 0);
+ strsv_ltn_libstr(nu[ii]+nx[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
+ sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+(ii+1), nu[ii+1]);
+ srowex_libstr(nx[ii+1], 1.0, L+(ii+1), nu[ii+1]+nx[ii+1], nu[ii+1], tmp_nxM, 0);
+ strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dux+(ii+1), nu[ii+1], dpi+ii, 0);
+ saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
+ strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dpi+ii, 0, dpi+ii, 0);
+
+// d_print_tran_strvec(nu[ii]+nx[ii], dux+ii, 0);
+
+ // middle stages
+ for(ii=1; ii<N; ii++)
+ {
+ srowex_libstr(nu[ii], -1.0, L+(ii), nu[ii]+nx[ii], 0, dux+ii, 0);
+ strsv_ltn_mn_libstr(nu[ii]+nx[ii], nu[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
+ sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+(ii+1), nu[ii+1]);
+ srowex_libstr(nx[ii+1], 1.0, L+(ii+1), nu[ii+1]+nx[ii+1], nu[ii+1], tmp_nxM, 0);
+ strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dux+(ii+1), nu[ii+1], dpi+ii, 0);
+ saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
+ strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dpi+ii, 0, dpi+ii, 0);
+
+// d_print_tran_strvec(nu[ii]+nx[ii], dux+ii, 0);
+ }
+
+
+
+ // cvt single => double
+ for(ii=0; ii<N; ii++)
+ {
+ m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
+ m_cvt_s2d_strvec(nx[ii+1], dpi+ii, 0, d_dpi+ii, 0);
+ }
+ ii = N;
+ m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
+
+
+
+// if(nb>0)
+// {
+ for(ii=0; ii<=N; ii++)
+ dvecex_sp_libstr(nb[ii], 1.0, d_idxb[ii], d_dux+ii, 0, d_dt_lb+ii, 0);
+// }
+
+// if(ng>0)
+// {
+ for(ii=0; ii<=N; ii++)
+ dgemv_t_libstr(nu[ii]+nx[ii], ng[ii], 1.0, d_DCt+ii, 0, 0, d_dux+ii, 0, 0.0, d_dt_lg+ii, 0, d_dt_lg+ii, 0);
+// }
+
+// if(nb>0 | ng>0)
+// {
+ d_compute_lam_t_hard_qp(cws);
+// }
+
+ return;
+
+ }
+
+
+
+// backward Riccati recursion
+void m_solve_kkt_step_hard_ocp_qp(struct d_ocp_qp *d_qp, struct s_ocp_qp *s_qp, struct m_ipm_hard_ocp_qp_workspace *ws)
+ {
+
+ int N = s_qp->N;
+ int *nx = s_qp->nx;
+ int *nu = s_qp->nu;
+ int *nb = s_qp->nb;
+ int *ng = s_qp->ng;
+
+ struct s_strmat *BAbt = s_qp->BAbt;
+ struct s_strmat *RSQrq = s_qp->RSQrq;
+ struct s_strmat *DCt = s_qp->DCt;
+ struct d_strmat *d_DCt = d_qp->DCt;
+ int **idxb = s_qp->idxb;
+ int **d_idxb = d_qp->idxb;
+
+ struct s_strmat *L = ws->L;
+ struct s_strmat *AL = ws->AL;
+ struct d_strvec *d_res_b = ws->res_b;
+ struct d_strvec *d_res_g = ws->res_g;
+ struct s_strvec *res_b = ws->sres_b;
+ struct s_strvec *res_g = ws->sres_g;
+ struct d_strvec *d_dux = ws->dux;
+ struct d_strvec *d_dpi = ws->dpi;
+ struct s_strvec *dux = ws->sdux;
+ struct s_strvec *dpi = ws->sdpi;
+ struct d_strvec *d_dt_lb = ws->dt_lb;
+ struct d_strvec *d_dt_lg = ws->dt_lg;
+ struct d_strvec *d_qx_lg = ws->qx_lg;
+ struct d_strvec *d_qx_lb = ws->qx_lb;
+ struct s_strvec *qx_lg = ws->sqx_lg;
+ struct s_strvec *qx_lb = ws->sqx_lb;
+ struct s_strvec *Pb = ws->Pb;
+ struct s_strvec *tmp_nxM = ws->tmp_nxM;
+
+ //
+ int ii;
+
+ struct d_ipm_hard_core_qp_workspace *cws = ws->core_workspace;
+
+// if(nb>0 | ng>0)
+// {
+ d_compute_qx_hard_qp(cws);
+// }
+
+
+
+ // cvt double => single
+ for(ii=0; ii<N; ii++)
+ {
+ m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
+ m_cvt_d2s_strvec(nx[ii+1], d_res_b+ii, 0, res_b+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
+ }
+ ii = N;
+ m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
+ m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
+ m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
+
+
+
+ // backward substitution
+
+ // last stage
+ sveccp_libstr(nu[N]+nx[N], res_g+N, 0, dux+N, 0);
+ if(nb[N]>0)
+ {
+ svecad_sp_libstr(nb[N], 1.0, qx_lb+N, 0, idxb[N], dux+N, 0);
+ }
+ // general constraints
+ if(ng[N]>0)
+ {
+ sgemv_n_libstr(nu[N]+nx[N], ng[N], 1.0, DCt+N, 0, 0, qx_lg+N, 0, 1.0, dux+N, 0, dux+N, 0);
+ }
+
+ // middle stages
+ for(ii=0; ii<N-1; ii++)
+ {
+ sveccp_libstr(nu[N-ii-1]+nx[N-ii-1], res_g+N-ii-1, 0, dux+N-ii-1, 0);
+ if(nb[N-ii-1]>0)
+ {
+ svecad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+N-ii-1, 0, idxb[N-ii-1], dux+N-ii-1, 0);
+ }
+ if(ng[N-ii-1]>0)
+ {
+ sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, qx_lg+N-ii-1, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+ }
+ if(ws->compute_Pb)
+ {
+ strmv_ltn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], res_b+N-ii-1, 0, Pb+(N-ii-1), 0);
+ strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], Pb+(N-ii-1), 0, Pb+(N-ii-1), 0);
+ }
+ saxpy_libstr(nx[N-ii], 1.0, dux+N-ii, nu[N-ii], Pb+N-ii-1, 0, tmp_nxM, 0);
+ sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], 1.0, BAbt+N-ii-1, 0, 0, tmp_nxM, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+ strsv_lnn_mn_libstr(nu[N-ii-1]+nx[N-ii-1], nu[N-ii-1], L+N-ii-1, 0, 0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+ }
+
+ // first stage
+ ii = N-1;
+ sveccp_libstr(nu[N-ii-1]+nx[N-ii-1], res_g+N-ii-1, 0, dux+N-ii-1, 0);
+ if(nb[N-ii-1]>0)
+ {
+ svecad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+N-ii-1, 0, idxb[N-ii-1], dux+N-ii-1, 0);
+ }
+ if(ng[N-ii-1]>0)
+ {
+ sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, qx_lg+N-ii-1, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+ }
+ if(ws->compute_Pb)
+ {
+ strmv_ltn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], res_b+N-ii-1, 0, Pb+(N-ii-1), 0);
+ strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], Pb+(N-ii-1), 0, Pb+(N-ii-1), 0);
+ }
+ saxpy_libstr(nx[N-ii], 1.0, dux+N-ii, nu[N-ii], Pb+N-ii-1, 0, tmp_nxM, 0);
+ sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], 1.0, BAbt+N-ii-1, 0, 0, tmp_nxM, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+ strsv_lnn_libstr(nu[N-ii-1]+nx[N-ii-1], L+N-ii-1, 0, 0, dux+N-ii-1, 0, dux+N-ii-1, 0);
+
+ // first stage
+ ii = 0;
+ sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], dpi+ii, 0);
+ svecsc_libstr(nu[ii]+nx[ii], -1.0, dux+ii, 0);
+ strsv_ltn_libstr(nu[ii]+nx[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
+ sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+ii+1, nu[ii+1]);
+ sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], tmp_nxM, 0);
+ strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
+ strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
+ saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
+
+ // middle stages
+ for(ii=1; ii<N; ii++)
+ {
+ sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], dpi+ii, 0);
+ svecsc_libstr(nu[ii], -1.0, dux+ii, 0);
+ strsv_ltn_mn_libstr(nu[ii]+nx[ii], nu[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
+ sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+ii+1, nu[ii+1]);
+ sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], tmp_nxM, 0);
+ strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
+ strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
+ saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
+ }
+
+
+
+ // cvt single => double
+ for(ii=0; ii<N; ii++)
+ {
+ m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
+ m_cvt_s2d_strvec(nx[ii+1], dpi+ii, 0, d_dpi+ii, 0);
+ }
+ ii = N;
+ m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
+
+
+
+// if(nb>0)
+// {
+ for(ii=0; ii<=N; ii++)
+ dvecex_sp_libstr(nb[ii], 1.0, d_idxb[ii], d_dux+ii, 0, d_dt_lb+ii, 0);
+// }
+
+// if(ng>0)
+// {
+ for(ii=0; ii<=N; ii++)
+ dgemv_t_libstr(nu[ii]+nx[ii], ng[ii], 1.0, d_DCt+ii, 0, 0, d_dux+ii, 0, 0.0, d_dt_lg+ii, 0, d_dt_lg+ii, 0);
+// }
+
+// if(nb>0 | ng>0)
+// {
+ d_compute_lam_t_hard_qp(cws);
+// }
+
+ return;
+
+ }
+
+