Blame - ocp_qp/m_ocp_qp_kkt.c - RealtimeRoboticsGroup/test

blob: b3b5a716b7098a9144cbf4f60cb0a4785b080761 [file] [log] [blame]

Austin Schuh	16ce3c7	2018-01-28 16:17:08 -0800	[diff] [blame^]	1	/**************************************************************************************************
				2	* *
				3	* This file is part of HPIPM. *
				4	* *
				5	* HPIPM -- High Performance Interior Point Method. *
				6	* Copyright (C) 2017 by Gianluca Frison. *
				7	* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
				8	* All rights reserved. *
				9	* *
				10	* HPMPC is free software; you can redistribute it and/or *
				11	* modify it under the terms of the GNU Lesser General Public *
				12	* License as published by the Free Software Foundation; either *
				13	* version 2.1 of the License, or (at your option) any later version. *
				14	* *
				15	* HPMPC is distributed in the hope that it will be useful, *
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
				18	* See the GNU Lesser General Public License for more details. *
				19	* *
				20	* You should have received a copy of the GNU Lesser General Public *
				21	* License along with HPMPC; if not, write to the Free Software *
				22	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
				23	* *
				24	* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
				25	* *
				26	**************************************************************************************************/
				27
				28
				29
				30	#include <math.h>
				31
				32	#include <blasfeo_target.h>
				33	#include <blasfeo_common.h>
				34	#include <blasfeo_d_aux.h>
				35	#include <blasfeo_s_aux.h>
				36	#include <blasfeo_m_aux.h>
				37	#include <blasfeo_d_blas.h>
				38	#include <blasfeo_s_blas.h>
				39
				40	#include "../include/hpipm_d_ocp_qp.h"
				41	#include "../include/hpipm_s_ocp_qp.h"
				42	#include "../include/hpipm_d_ocp_qp_sol.h"
				43	#include "../include/hpipm_d_ocp_qp_ipm_hard.h"
				44	#include "../include/hpipm_m_ocp_qp_ipm_hard.h"
				45	#include "../include/hpipm_d_core_qp_ipm_hard.h"
				46	#include "../include/hpipm_d_core_qp_ipm_hard_aux.h"
				47
				48
				49
				50	// backward Riccati recursion
				51	void m_fact_solve_kkt_step_hard_ocp_qp(struct d_ocp_qp d_qp, struct s_ocp_qp s_qp, struct m_ipm_hard_ocp_qp_workspace *ws)
				52	{
				53
				54	int N = s_qp->N;
				55	int *nx = s_qp->nx;
				56	int *nu = s_qp->nu;
				57	int *nb = s_qp->nb;
				58	int *ng = s_qp->ng;
				59
				60	struct s_strmat *BAbt = s_qp->BAbt;
				61	struct s_strmat *RSQrq = s_qp->RSQrq;
				62	struct s_strmat *DCt = s_qp->DCt;
				63	struct d_strmat *d_DCt = d_qp->DCt;
				64	int **idxb = s_qp->idxb;
				65	int **d_idxb = d_qp->idxb;
				66
				67	struct s_strmat *L = ws->L;
				68	struct s_strmat *AL = ws->AL;
				69	struct d_strvec *d_res_b = ws->res_b;
				70	struct d_strvec *d_res_g = ws->res_g;
				71	struct s_strvec *res_b = ws->sres_b;
				72	struct s_strvec *res_g = ws->sres_g;
				73	struct d_strvec *d_dux = ws->dux;
				74	struct d_strvec *d_dpi = ws->dpi;
				75	struct s_strvec *dux = ws->sdux;
				76	struct s_strvec *dpi = ws->sdpi;
				77	struct d_strvec *d_dt_lb = ws->dt_lb;
				78	struct d_strvec *d_dt_lg = ws->dt_lg;
				79	struct d_strvec *d_Qx_lg = ws->Qx_lg;
				80	struct d_strvec *d_Qx_lb = ws->Qx_lb;
				81	struct d_strvec *d_qx_lg = ws->qx_lg;
				82	struct d_strvec *d_qx_lb = ws->qx_lb;
				83	struct s_strvec *Qx_lg = ws->sQx_lg;
				84	struct s_strvec *Qx_lb = ws->sQx_lb;
				85	struct s_strvec *qx_lg = ws->sqx_lg;
				86	struct s_strvec *qx_lb = ws->sqx_lb;
				87	struct s_strvec *Pb = ws->Pb;
				88	struct s_strvec *tmp_nxM = ws->tmp_nxM;
				89
				90	//
				91	int ii;
				92
				93	struct d_ipm_hard_core_qp_workspace *cws = ws->core_workspace;
				94
				95	// if(nb>0 \| ng>0)
				96	// {
				97	d_compute_Qx_qx_hard_qp(cws);
				98	// }
				99
				100
				101
				102	// cvt double => single
				103	for(ii=0; ii<N; ii++)
				104	{
				105	m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
				106	m_cvt_d2s_strvec(nx[ii+1], d_res_b+ii, 0, res_b+ii, 0);
				107	m_cvt_d2s_strvec(nb[ii], d_Qx_lb+ii, 0, Qx_lb+ii, 0);
				108	m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
				109	m_cvt_d2s_strvec(ng[ii], d_Qx_lg+ii, 0, Qx_lg+ii, 0);
				110	m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
				111	}
				112	ii = N;
				113	m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
				114	m_cvt_d2s_strvec(nb[ii], d_Qx_lb+ii, 0, Qx_lb+ii, 0);
				115	m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
				116	m_cvt_d2s_strvec(ng[ii], d_Qx_lg+ii, 0, Qx_lg+ii, 0);
				117	m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
				118
				119
				120
				121	// factorization and backward substitution
				122
				123	// last stage
				124	#if defined(DOUBLE_PRECISION)
				125	strcp_l_libstr(nu[N]+nx[N], RSQrq+N, 0, 0, L+N, 0, 0); // TODO dtrcp_l_libstr with m and n, for m>=n
				126	#else
				127	sgecp_libstr(nu[N]+nx[N], nu[N]+nx[N], RSQrq+N, 0, 0, L+N, 0, 0); // TODO dtrcp_l_libstr with m and n, for m>=n
				128	#endif
				129	srowin_libstr(nu[N]+nx[N], 1.0, res_g+N, 0, L+N, nu[N]+nx[N], 0);
				130	if(nb[N]>0)
				131	{
				132	sdiaad_sp_libstr(nb[N], 1.0, Qx_lb+N, 0, idxb[N], L+N, 0, 0);
				133	srowad_sp_libstr(nb[N], 1.0, qx_lb+N, 0, idxb[N], L+N, nu[N]+nx[N], 0);
				134	}
				135	if(ng[N]>0)
				136	{
				137	sgemm_r_diag_libstr(nu[N]+nx[N], ng[N], 1.0, DCt+N, 0, 0, Qx_lg+N, 0, 0.0, AL+0, 0, 0, AL+0, 0, 0);
				138	srowin_libstr(ng[N], 1.0, qx_lg+N, 0, AL+0, nu[N]+nx[N], 0);
				139	ssyrk_spotrf_ln_libstr(nu[N]+nx[N]+1, nu[N]+nx[N], ng[N], AL+0, 0, 0, DCt+N, 0, 0, L+N, 0, 0, L+N, 0, 0);
				140	}
				141	else
				142	{
				143	spotrf_l_mn_libstr(nu[N]+nx[N]+1, nu[N]+nx[N], L+N, 0, 0, L+N, 0, 0);
				144	}
				145
				146	// middle stages
				147	for(ii=0; ii<N; ii++)
				148	{
				149	sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], BAbt+(N-ii-1), 0, 0, AL, 0, 0);
				150	srowin_libstr(nx[N-ii], 1.0, res_b+(N-ii-1), 0, AL, nu[N-ii-1]+nx[N-ii-1], 0);
				151	strmm_rlnn_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nx[N-ii], 1.0, L+(N-ii), nu[N-ii], nu[N-ii], AL, 0, 0, AL, 0, 0);
				152	srowex_libstr(nx[N-ii], 1.0, AL, nu[N-ii-1]+nx[N-ii-1], 0, tmp_nxM, 0);
				153	strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], tmp_nxM, 0, Pb+(N-ii-1), 0);
				154	sgead_libstr(1, nx[N-ii], 1.0, L+(N-ii), nu[N-ii]+nx[N-ii], nu[N-ii], AL, nu[N-ii-1]+nx[N-ii-1], 0);
				155
				156	#if defined(DOUBLE_PRECISION)
				157	strcp_l_libstr(nu[N-ii-1]+nx[N-ii-1], RSQrq+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
				158	#else
				159	sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nu[N-ii-1]+nx[N-ii-1], RSQrq+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
				160	#endif
				161	srowin_libstr(nu[N-ii-1]+nx[N-ii-1], 1.0, res_g+(N-ii-1), 0, L+(N-ii-1), nu[N-ii-1]+nx[N-ii-1], 0);
				162
				163	if(nb[N-ii-1]>0)
				164	{
				165	sdiaad_sp_libstr(nb[N-ii-1], 1.0, Qx_lb+(N-ii-1), 0, idxb[N-ii-1], L+(N-ii-1), 0, 0);
				166	srowad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+(N-ii-1), 0, idxb[N-ii-1], L+(N-ii-1), nu[N-ii-1]+nx[N-ii-1], 0);
				167	}
				168
				169	if(ng[N-ii-1]>0)
				170	{
				171	sgemm_r_diag_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, Qx_lg+N-ii-1, 0, 0.0, AL+0, 0, nx[N-ii], AL+0, 0, nx[N-ii]);
				172	srowin_libstr(ng[N-ii-1], 1.0, qx_lg+N-ii-1, 0, AL+0, nu[N-ii-1]+nx[N-ii-1], nx[N-ii]);
				173	sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], AL+0, 0, 0, AL+1, 0, 0);
				174	sgecp_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], DCt+N-ii-1, 0, 0, AL+1, 0, nx[N-ii]);
				175	ssyrk_spotrf_ln_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], nx[N-ii]+ng[N-ii-1], AL+0, 0, 0, AL+1, 0, 0, L+N-ii-1, 0, 0, L+N-ii-1, 0, 0);
				176	}
				177	else
				178	{
				179	ssyrk_spotrf_ln_libstr(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], nx[N-ii], AL, 0, 0, AL, 0, 0, L+(N-ii-1), 0, 0, L+(N-ii-1), 0, 0);
				180	}
				181
				182	// d_print_strmat(nu[N-ii-1]+nx[N-ii-1]+1, nu[N-ii-1]+nx[N-ii-1], L+(N-ii-1), 0, 0);
				183	}
				184
				185	// forward substitution
				186
				187	// first stage
				188	ii = 0;
				189	srowex_libstr(nu[ii]+nx[ii], -1.0, L+(ii), nu[ii]+nx[ii], 0, dux+ii, 0);
				190	strsv_ltn_libstr(nu[ii]+nx[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
				191	sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+(ii+1), nu[ii+1]);
				192	srowex_libstr(nx[ii+1], 1.0, L+(ii+1), nu[ii+1]+nx[ii+1], nu[ii+1], tmp_nxM, 0);
				193	strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dux+(ii+1), nu[ii+1], dpi+ii, 0);
				194	saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
				195	strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dpi+ii, 0, dpi+ii, 0);
				196
				197	// d_print_tran_strvec(nu[ii]+nx[ii], dux+ii, 0);
				198
				199	// middle stages
				200	for(ii=1; ii<N; ii++)
				201	{
				202	srowex_libstr(nu[ii], -1.0, L+(ii), nu[ii]+nx[ii], 0, dux+ii, 0);
				203	strsv_ltn_mn_libstr(nu[ii]+nx[ii], nu[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
				204	sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+(ii+1), nu[ii+1]);
				205	srowex_libstr(nx[ii+1], 1.0, L+(ii+1), nu[ii+1]+nx[ii+1], nu[ii+1], tmp_nxM, 0);
				206	strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dux+(ii+1), nu[ii+1], dpi+ii, 0);
				207	saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
				208	strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+(ii+1), nu[ii+1], nu[ii+1], dpi+ii, 0, dpi+ii, 0);
				209
				210	// d_print_tran_strvec(nu[ii]+nx[ii], dux+ii, 0);
				211	}
				212
				213
				214
				215	// cvt single => double
				216	for(ii=0; ii<N; ii++)
				217	{
				218	m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
				219	m_cvt_s2d_strvec(nx[ii+1], dpi+ii, 0, d_dpi+ii, 0);
				220	}
				221	ii = N;
				222	m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
				223
				224
				225
				226	// if(nb>0)
				227	// {
				228	for(ii=0; ii<=N; ii++)
				229	dvecex_sp_libstr(nb[ii], 1.0, d_idxb[ii], d_dux+ii, 0, d_dt_lb+ii, 0);
				230	// }
				231
				232	// if(ng>0)
				233	// {
				234	for(ii=0; ii<=N; ii++)
				235	dgemv_t_libstr(nu[ii]+nx[ii], ng[ii], 1.0, d_DCt+ii, 0, 0, d_dux+ii, 0, 0.0, d_dt_lg+ii, 0, d_dt_lg+ii, 0);
				236	// }
				237
				238	// if(nb>0 \| ng>0)
				239	// {
				240	d_compute_lam_t_hard_qp(cws);
				241	// }
				242
				243	return;
				244
				245	}
				246
				247
				248
				249	// backward Riccati recursion
				250	void m_solve_kkt_step_hard_ocp_qp(struct d_ocp_qp d_qp, struct s_ocp_qp s_qp, struct m_ipm_hard_ocp_qp_workspace *ws)
				251	{
				252
				253	int N = s_qp->N;
				254	int *nx = s_qp->nx;
				255	int *nu = s_qp->nu;
				256	int *nb = s_qp->nb;
				257	int *ng = s_qp->ng;
				258
				259	struct s_strmat *BAbt = s_qp->BAbt;
				260	struct s_strmat *RSQrq = s_qp->RSQrq;
				261	struct s_strmat *DCt = s_qp->DCt;
				262	struct d_strmat *d_DCt = d_qp->DCt;
				263	int **idxb = s_qp->idxb;
				264	int **d_idxb = d_qp->idxb;
				265
				266	struct s_strmat *L = ws->L;
				267	struct s_strmat *AL = ws->AL;
				268	struct d_strvec *d_res_b = ws->res_b;
				269	struct d_strvec *d_res_g = ws->res_g;
				270	struct s_strvec *res_b = ws->sres_b;
				271	struct s_strvec *res_g = ws->sres_g;
				272	struct d_strvec *d_dux = ws->dux;
				273	struct d_strvec *d_dpi = ws->dpi;
				274	struct s_strvec *dux = ws->sdux;
				275	struct s_strvec *dpi = ws->sdpi;
				276	struct d_strvec *d_dt_lb = ws->dt_lb;
				277	struct d_strvec *d_dt_lg = ws->dt_lg;
				278	struct d_strvec *d_qx_lg = ws->qx_lg;
				279	struct d_strvec *d_qx_lb = ws->qx_lb;
				280	struct s_strvec *qx_lg = ws->sqx_lg;
				281	struct s_strvec *qx_lb = ws->sqx_lb;
				282	struct s_strvec *Pb = ws->Pb;
				283	struct s_strvec *tmp_nxM = ws->tmp_nxM;
				284
				285	//
				286	int ii;
				287
				288	struct d_ipm_hard_core_qp_workspace *cws = ws->core_workspace;
				289
				290	// if(nb>0 \| ng>0)
				291	// {
				292	d_compute_qx_hard_qp(cws);
				293	// }
				294
				295
				296
				297	// cvt double => single
				298	for(ii=0; ii<N; ii++)
				299	{
				300	m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
				301	m_cvt_d2s_strvec(nx[ii+1], d_res_b+ii, 0, res_b+ii, 0);
				302	m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
				303	m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
				304	}
				305	ii = N;
				306	m_cvt_d2s_strvec(nu[ii]+nx[ii], d_res_g+ii, 0, res_g+ii, 0);
				307	m_cvt_d2s_strvec(nb[ii], d_qx_lb+ii, 0, qx_lb+ii, 0);
				308	m_cvt_d2s_strvec(ng[ii], d_qx_lg+ii, 0, qx_lg+ii, 0);
				309
				310
				311
				312	// backward substitution
				313
				314	// last stage
				315	sveccp_libstr(nu[N]+nx[N], res_g+N, 0, dux+N, 0);
				316	if(nb[N]>0)
				317	{
				318	svecad_sp_libstr(nb[N], 1.0, qx_lb+N, 0, idxb[N], dux+N, 0);
				319	}
				320	// general constraints
				321	if(ng[N]>0)
				322	{
				323	sgemv_n_libstr(nu[N]+nx[N], ng[N], 1.0, DCt+N, 0, 0, qx_lg+N, 0, 1.0, dux+N, 0, dux+N, 0);
				324	}
				325
				326	// middle stages
				327	for(ii=0; ii<N-1; ii++)
				328	{
				329	sveccp_libstr(nu[N-ii-1]+nx[N-ii-1], res_g+N-ii-1, 0, dux+N-ii-1, 0);
				330	if(nb[N-ii-1]>0)
				331	{
				332	svecad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+N-ii-1, 0, idxb[N-ii-1], dux+N-ii-1, 0);
				333	}
				334	if(ng[N-ii-1]>0)
				335	{
				336	sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, qx_lg+N-ii-1, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				337	}
				338	if(ws->compute_Pb)
				339	{
				340	strmv_ltn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], res_b+N-ii-1, 0, Pb+(N-ii-1), 0);
				341	strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], Pb+(N-ii-1), 0, Pb+(N-ii-1), 0);
				342	}
				343	saxpy_libstr(nx[N-ii], 1.0, dux+N-ii, nu[N-ii], Pb+N-ii-1, 0, tmp_nxM, 0);
				344	sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], 1.0, BAbt+N-ii-1, 0, 0, tmp_nxM, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				345	strsv_lnn_mn_libstr(nu[N-ii-1]+nx[N-ii-1], nu[N-ii-1], L+N-ii-1, 0, 0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				346	}
				347
				348	// first stage
				349	ii = N-1;
				350	sveccp_libstr(nu[N-ii-1]+nx[N-ii-1], res_g+N-ii-1, 0, dux+N-ii-1, 0);
				351	if(nb[N-ii-1]>0)
				352	{
				353	svecad_sp_libstr(nb[N-ii-1], 1.0, qx_lb+N-ii-1, 0, idxb[N-ii-1], dux+N-ii-1, 0);
				354	}
				355	if(ng[N-ii-1]>0)
				356	{
				357	sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], ng[N-ii-1], 1.0, DCt+N-ii-1, 0, 0, qx_lg+N-ii-1, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				358	}
				359	if(ws->compute_Pb)
				360	{
				361	strmv_ltn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], res_b+N-ii-1, 0, Pb+(N-ii-1), 0);
				362	strmv_lnn_libstr(nx[N-ii], nx[N-ii], L+(N-ii), nu[N-ii], nu[N-ii], Pb+(N-ii-1), 0, Pb+(N-ii-1), 0);
				363	}
				364	saxpy_libstr(nx[N-ii], 1.0, dux+N-ii, nu[N-ii], Pb+N-ii-1, 0, tmp_nxM, 0);
				365	sgemv_n_libstr(nu[N-ii-1]+nx[N-ii-1], nx[N-ii], 1.0, BAbt+N-ii-1, 0, 0, tmp_nxM, 0, 1.0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				366	strsv_lnn_libstr(nu[N-ii-1]+nx[N-ii-1], L+N-ii-1, 0, 0, dux+N-ii-1, 0, dux+N-ii-1, 0);
				367
				368	// first stage
				369	ii = 0;
				370	sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], dpi+ii, 0);
				371	svecsc_libstr(nu[ii]+nx[ii], -1.0, dux+ii, 0);
				372	strsv_ltn_libstr(nu[ii]+nx[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
				373	sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+ii+1, nu[ii+1]);
				374	sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], tmp_nxM, 0);
				375	strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
				376	strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
				377	saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
				378
				379	// middle stages
				380	for(ii=1; ii<N; ii++)
				381	{
				382	sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], dpi+ii, 0);
				383	svecsc_libstr(nu[ii], -1.0, dux+ii, 0);
				384	strsv_ltn_mn_libstr(nu[ii]+nx[ii], nu[ii], L+ii, 0, 0, dux+ii, 0, dux+ii, 0);
				385	sgemv_t_libstr(nu[ii]+nx[ii], nx[ii+1], 1.0, BAbt+ii, 0, 0, dux+ii, 0, 1.0, res_b+ii, 0, dux+ii+1, nu[ii+1]);
				386	sveccp_libstr(nx[ii+1], dux+ii+1, nu[ii+1], tmp_nxM, 0);
				387	strmv_ltn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
				388	strmv_lnn_libstr(nx[ii+1], nx[ii+1], L+ii+1, nu[ii+1], nu[ii+1], tmp_nxM, 0, tmp_nxM, 0);
				389	saxpy_libstr(nx[ii+1], 1.0, tmp_nxM, 0, dpi+ii, 0, dpi+ii, 0);
				390	}
				391
				392
				393
				394	// cvt single => double
				395	for(ii=0; ii<N; ii++)
				396	{
				397	m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
				398	m_cvt_s2d_strvec(nx[ii+1], dpi+ii, 0, d_dpi+ii, 0);
				399	}
				400	ii = N;
				401	m_cvt_s2d_strvec(nu[ii]+nx[ii], dux+ii, 0, d_dux+ii, 0);
				402
				403
				404
				405	// if(nb>0)
				406	// {
				407	for(ii=0; ii<=N; ii++)
				408	dvecex_sp_libstr(nb[ii], 1.0, d_idxb[ii], d_dux+ii, 0, d_dt_lb+ii, 0);
				409	// }
				410
				411	// if(ng>0)
				412	// {
				413	for(ii=0; ii<=N; ii++)
				414	dgemv_t_libstr(nu[ii]+nx[ii], ng[ii], 1.0, d_DCt+ii, 0, 0, d_dux+ii, 0, 0.0, d_dt_lg+ii, 0, d_dt_lg+ii, 0);
				415	// }
				416
				417	// if(nb>0 \| ng>0)
				418	// {
				419	d_compute_lam_t_hard_qp(cws);
				420	// }
				421
				422	return;
				423
				424	}
				425
				426