blob: 03b9fc668a601f1e8d6a18c93421f37f0a4bcde0 [file] [log] [blame]
Austin Schuh9a24b372018-01-28 16:12:29 -08001/**************************************************************************************************
2* *
3* This file is part of BLASFEO. *
4* *
5* BLASFEO -- BLAS For Embedded Optimization. *
6* Copyright (C) 2016-2017 by Gianluca Frison. *
7* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
8* All rights reserved. *
9* *
10* HPMPC is free software; you can redistribute it and/or *
11* modify it under the terms of the GNU Lesser General Public *
12* License as published by the Free Software Foundation; either *
13* version 2.1 of the License, or (at your option) any later version. *
14* *
15* HPMPC is distributed in the hope that it will be useful, *
16* but WITHOUT ANY WARRANTY; without even the implied warranty of *
17* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
18* See the GNU Lesser General Public License for more details. *
19* *
20* You should have received a copy of the GNU Lesser General Public *
21* License along with HPMPC; if not, write to the Free Software *
22* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
23* *
24* Author: Gianluca Frison, giaf (at) dtu.dk *
25* gianluca.frison (at) imtek.uni-freiburg.de *
26* *
27**************************************************************************************************/
28
29#include <stdlib.h>
30#include <stdio.h>
31#include <sys/time.h>
32
33#include "tools.h"
34
35#include "../include/blasfeo_common.h"
36#include "../include/blasfeo_i_aux_ext_dep.h"
37#include "../include/blasfeo_s_aux_ext_dep.h"
38#include "../include/blasfeo_s_aux.h"
39#include "../include/blasfeo_s_kernel.h"
40#include "../include/blasfeo_s_blas.h"
41
42
43
44static void s_back_ric_sv_libstr(int N, int *nx, int *nu, struct s_strmat *hsBAbt, struct s_strmat *hsRSQrq, struct s_strmat *hsL, struct s_strvec *hsux, struct s_strvec *hspi, struct s_strmat *hswork_mat, struct s_strvec *hswork_vec)
45 {
46
47 int nn;
48
49 // factorization and backward substitution
50
51 // last stage
52 spotrf_l_libstr(nx[N]+1, nx[N], &hsRSQrq[N], 0, 0, &hsL[N], 0, 0);
53
54 // middle stages
55 for(nn=0; nn<N; nn++)
56 {
57 strmm_rlnn_libstr(nu[N-nn-1]+nx[N-nn-1]+1, nx[N-nn], 1.0, &hsL[N-nn], nu[N-nn], nu[N-nn], &hsBAbt[N-nn-1], 0, 0, &hswork_mat[0], 0, 0);
58 sgead_libstr(1, nx[N-nn], 1.0, &hsL[N-nn], nu[N-nn]+nx[N-nn], nu[N-nn], &hswork_mat[0], nu[N-nn-1]+nx[N-nn-1], 0);
59#if 1
60 ssyrk_spotrf_ln_libstr(nu[N-nn-1]+nx[N-nn-1]+1, nu[N-nn-1]+nx[N-nn-1], nx[N-nn], &hswork_mat[0], 0, 0, &hswork_mat[0], 0, 0, &hsRSQrq[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
61#else
62 ssyrk_ln_libstr(nu[N-nn-1]+nx[N-nn-1]+1, nu[N-nn-1]+nx[N-nn-1], nx[N-nn], 1.0, &hswork_mat[0], 0, 0, &hswork_mat[0], 0, 0, 1.0, &hsRSQrq[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
63 spotrf_l_libstr(nu[N-nn-1]+nx[N-nn-1]+1, nu[N-nn-1]+nx[N-nn-1], &hsL[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
64#endif
65 }
66
67 // forward substitution
68
69 // first stage
70 nn = 0;
71 srowex_libstr(nu[nn]+nx[nn], -1.0, &hsL[nn], nu[nn]+nx[nn], 0, &hsux[nn], 0);
72 strsv_ltn_libstr(nu[nn]+nx[nn], nu[nn]+nx[nn], &hsL[nn], 0, 0, &hsux[nn], 0, &hsux[nn], 0);
73 srowex_libstr(nx[nn+1], 1.0, &hsBAbt[nn], nu[nn]+nx[nn], 0, &hsux[nn+1], nu[nn+1]);
74 sgemv_t_libstr(nu[nn]+nx[nn], nx[nn+1], 1.0, &hsBAbt[nn], 0, 0, &hsux[nn], 0, 1.0, &hsux[nn+1], nu[nn+1], &hsux[nn+1], nu[nn+1]);
75 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hspi[nn], 0);
76 srowex_libstr(nx[nn+1], 1.0, &hsL[nn+1], nu[nn+1]+nx[nn+1], nu[nn+1], &hswork_vec[0], 0);
77 strmv_ltn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hspi[nn], 0, &hspi[nn], 0);
78 saxpy_libstr(nx[nn+1], 1.0, &hswork_vec[0], 0, &hspi[nn], 0);
79 strmv_lnn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hspi[nn], 0, &hspi[nn], 0);
80
81 // middle stages
82 for(nn=1; nn<N; nn++)
83 {
84 srowex_libstr(nu[nn], -1.0, &hsL[nn], nu[nn]+nx[nn], 0, &hsux[nn], 0);
85 strsv_ltn_libstr(nu[nn]+nx[nn], nu[nn], &hsL[nn], 0, 0, &hsux[nn], 0, &hsux[nn], 0);
86 srowex_libstr(nx[nn+1], 1.0, &hsBAbt[nn], nu[nn]+nx[nn], 0, &hsux[nn+1], nu[nn+1]);
87 sgemv_t_libstr(nu[nn]+nx[nn], nx[nn+1], 1.0, &hsBAbt[nn], 0, 0, &hsux[nn], 0, 1.0, &hsux[nn+1], nu[nn+1], &hsux[nn+1], nu[nn+1]);
88 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hspi[nn], 0);
89 srowex_libstr(nx[nn+1], 1.0, &hsL[nn+1], nu[nn+1]+nx[nn+1], nu[nn+1], &hswork_vec[0], 0);
90 strmv_ltn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hspi[nn], 0, &hspi[nn], 0);
91 saxpy_libstr(nx[nn+1], 1.0, &hswork_vec[0], 0, &hspi[nn], 0);
92 strmv_lnn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hspi[nn], 0, &hspi[nn], 0);
93 }
94
95 return;
96
97 }
98
99
100
101static void s_back_ric_trf_libstr(int N, int *nx, int *nu, struct s_strmat *hsBAbt, struct s_strmat *hsRSQrq, struct s_strmat *hsL, struct s_strmat *hswork_mat)
102 {
103
104 int nn;
105
106 // factorization
107
108 // last stage
109 spotrf_l_libstr(nx[N], nx[N], &hsRSQrq[N], 0, 0, &hsL[N], 0, 0);
110
111 // middle stages
112 for(nn=0; nn<N; nn++)
113 {
114 strmm_rlnn_libstr(nu[N-nn-1]+nx[N-nn-1], nx[N-nn], 1.0, &hsL[N-nn], nu[N-nn], nu[N-nn], &hsBAbt[N-nn-1], 0, 0, &hswork_mat[0], 0, 0);
115#if 1
116 ssyrk_spotrf_ln_libstr(nu[N-nn-1]+nx[N-nn-1], nu[N-nn-1]+nx[N-nn-1], nx[N-nn], &hswork_mat[0], 0, 0, &hswork_mat[0], 0, 0, &hsRSQrq[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
117#else
118 ssyrk_ln_libstr(nu[N-nn-1]+nx[N-nn-1], nu[N-nn-1]+nx[N-nn-1], nx[N-nn], 1.0, &hswork_mat[0], 0, 0, &hswork_mat[0], 0, 0, 1.0, &hsRSQrq[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
119 spotrf_l_libstr(nu[N-nn-1]+nx[N-nn-1], nu[N-nn-1]+nx[N-nn-1], &hsL[N-nn-1], 0, 0, &hsL[N-nn-1], 0, 0);
120#endif
121 }
122
123 return;
124
125 }
126
127
128
129static void s_back_ric_trs_libstr(int N, int *nx, int *nu, struct s_strmat *hsBAbt, struct s_strvec *hsb, struct s_strvec *hsrq, struct s_strmat *hsL, struct s_strvec *hsPb, struct s_strvec *hsux, struct s_strvec *hspi, struct s_strvec *hswork_vec)
130 {
131
132 int nn;
133
134 // backward substitution
135
136 // last stage
137 sveccp_libstr(nu[N]+nx[N], 1.0, &hsrq[N], 0, &hsux[N], 0);
138
139 // middle stages
140 for(nn=0; nn<N-1; nn++)
141 {
142 // compute Pb
143 strmv_ltn_libstr(nx[N-nn], nx[N-nn], &hsL[N-nn], nu[N-nn], nu[N-nn], &hsb[N-nn-1], 0, &hsPb[N-nn-1], 0);
144 strmv_lnn_libstr(nx[N-nn], nx[N-nn], &hsL[N-nn], nu[N-nn], nu[N-nn], &hsPb[N-nn-1], 0, &hsPb[N-nn-1], 0);
145 sveccp_libstr(nu[N-nn-1]+nx[N-nn-1], 1.0, &hsrq[N-nn-1], 0, &hsux[N-nn-1], 0);
146 sveccp_libstr(nx[N-nn], 1.0, &hsPb[N-nn-1], 0, &hswork_vec[0], 0);
147 saxpy_libstr(nx[N-nn], 1.0, &hsux[N-nn], nu[N-nn], &hswork_vec[0], 0);
148 sgemv_n_libstr(nu[N-nn-1]+nx[N-nn-1], nx[N-nn], 1.0, &hsBAbt[N-nn-1], 0, 0, &hswork_vec[0], 0, 1.0, &hsux[N-nn-1], 0, &hsux[N-nn-1], 0);
149 strsv_lnn_libstr(nu[N-nn-1]+nx[N-nn-1], nu[N-nn-1], &hsL[N-nn-1], 0, 0, &hsux[N-nn-1], 0, &hsux[N-nn-1], 0);
150 }
151
152 // first stage
153 nn = N-1;
154 strmv_ltn_libstr(nx[N-nn], nx[N-nn], &hsL[N-nn], nu[N-nn], nu[N-nn], &hsb[N-nn-1], 0, &hsPb[N-nn-1], 0);
155 strmv_lnn_libstr(nx[N-nn], nx[N-nn], &hsL[N-nn], nu[N-nn], nu[N-nn], &hsPb[N-nn-1], 0, &hsPb[N-nn-1], 0);
156 sveccp_libstr(nu[N-nn-1]+nx[N-nn-1], 1.0, &hsrq[N-nn-1], 0, &hsux[N-nn-1], 0);
157 sveccp_libstr(nx[N-nn], 1.0, &hsPb[N-nn-1], 0, &hswork_vec[0], 0);
158 saxpy_libstr(nx[N-nn], 1.0, &hsux[N-nn], nu[N-nn], &hswork_vec[0], 0);
159 sgemv_n_libstr(nu[N-nn-1]+nx[N-nn-1], nx[N-nn], 1.0, &hsBAbt[N-nn-1], 0, 0, &hswork_vec[0], 0, 1.0, &hsux[N-nn-1], 0, &hsux[N-nn-1], 0);
160 strsv_lnn_libstr(nu[N-nn-1]+nx[N-nn-1], nu[N-nn-1]+nx[N-nn-1], &hsL[N-nn-1], 0, 0, &hsux[N-nn-1], 0, &hsux[N-nn-1], 0);
161
162 // forward substitution
163
164 // first stage
165 nn = 0;
166 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hspi[nn], 0);
167 sveccp_libstr(nu[nn]+nx[nn], -1.0, &hsux[nn], 0, &hsux[nn], 0);
168 strsv_ltn_libstr(nu[nn]+nx[nn], nu[nn]+nx[nn], &hsL[nn], 0, 0, &hsux[nn], 0, &hsux[nn], 0);
169 sgemv_t_libstr(nu[nn]+nx[nn], nx[nn+1], 1.0, &hsBAbt[nn], 0, 0, &hsux[nn], 0, 1.0, &hsb[nn], 0, &hsux[nn+1], nu[nn+1]);
170 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hswork_vec[0], 0);
171 strmv_ltn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hswork_vec[0], 0, &hswork_vec[0], 0);
172 strmv_lnn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hswork_vec[0], 0, &hswork_vec[0], 0);
173 saxpy_libstr(nx[nn+1], 1.0, &hswork_vec[0], 0, &hspi[nn], 0);
174
175 // middle stages
176 for(nn=1; nn<N; nn++)
177 {
178 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hspi[nn], 0);
179 sveccp_libstr(nu[nn], -1.0, &hsux[nn], 0, &hsux[nn], 0);
180 strsv_ltn_libstr(nu[nn]+nx[nn], nu[nn], &hsL[nn], 0, 0, &hsux[nn], 0, &hsux[nn], 0);
181 sgemv_t_libstr(nu[nn]+nx[nn], nx[nn+1], 1.0, &hsBAbt[nn], 0, 0, &hsux[nn], 0, 1.0, &hsb[nn], 0, &hsux[nn+1], nu[nn+1]);
182 sveccp_libstr(nx[nn+1], 1.0, &hsux[nn+1], nu[nn+1], &hswork_vec[0], 0);
183 strmv_ltn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hswork_vec[0], 0, &hswork_vec[0], 0);
184 strmv_lnn_libstr(nx[nn+1], nx[nn+1], &hsL[nn+1], nu[nn+1], nu[nn+1], &hswork_vec[0], 0, &hswork_vec[0], 0);
185 saxpy_libstr(nx[nn+1], 1.0, &hswork_vec[0], 0, &hspi[nn], 0);
186 }
187
188 return;
189
190 }
191
192
193
194/************************************************
195Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts.
196************************************************/
197static void d_mass_spring_system(double Ts, int nx, int nu, int N, double *A, double *B, double *b, double *x0)
198 {
199
200 int nx2 = nx*nx;
201
202 int info = 0;
203
204 int pp = nx/2; // number of masses
205
206/************************************************
207* build the continuous time system
208************************************************/
209
210 double *T; d_zeros(&T, pp, pp);
211 int ii;
212 for(ii=0; ii<pp; ii++) T[ii*(pp+1)] = -2;
213 for(ii=0; ii<pp-1; ii++) T[ii*(pp+1)+1] = 1;
214 for(ii=1; ii<pp; ii++) T[ii*(pp+1)-1] = 1;
215
216 double *Z; d_zeros(&Z, pp, pp);
217 double *I; d_zeros(&I, pp, pp); for(ii=0; ii<pp; ii++) I[ii*(pp+1)]=1.0; // = eye(pp);
218 double *Ac; d_zeros(&Ac, nx, nx);
219 dmcopy(pp, pp, Z, pp, Ac, nx);
220 dmcopy(pp, pp, T, pp, Ac+pp, nx);
221 dmcopy(pp, pp, I, pp, Ac+pp*nx, nx);
222 dmcopy(pp, pp, Z, pp, Ac+pp*(nx+1), nx);
223 free(T);
224 free(Z);
225 free(I);
226
227 d_zeros(&I, nu, nu); for(ii=0; ii<nu; ii++) I[ii*(nu+1)]=1.0; //I = eye(nu);
228 double *Bc; d_zeros(&Bc, nx, nu);
229 dmcopy(nu, nu, I, nu, Bc+pp, nx);
230 free(I);
231
232/************************************************
233* compute the discrete time system
234************************************************/
235
236 double *bb; d_zeros(&bb, nx, 1);
237 dmcopy(nx, 1, bb, nx, b, nx);
238
239 dmcopy(nx, nx, Ac, nx, A, nx);
240 dscal_3l(nx2, Ts, A);
241 expm(nx, A);
242
243 d_zeros(&T, nx, nx);
244 d_zeros(&I, nx, nx); for(ii=0; ii<nx; ii++) I[ii*(nx+1)]=1.0; //I = eye(nx);
245 dmcopy(nx, nx, A, nx, T, nx);
246 daxpy_3l(nx2, -1.0, I, T);
247 dgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx);
248 free(T);
249 free(I);
250
251 int *ipiv = (int *) malloc(nx*sizeof(int));
252 dgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info);
253 free(ipiv);
254
255 free(Ac);
256 free(Bc);
257 free(bb);
258
259
260/************************************************
261* initial state
262************************************************/
263
264 if(nx==4)
265 {
266 x0[0] = 5;
267 x0[1] = 10;
268 x0[2] = 15;
269 x0[3] = 20;
270 }
271 else
272 {
273 int jj;
274 for(jj=0; jj<nx; jj++)
275 x0[jj] = 1;
276 }
277
278 }
279
280
281
282int main()
283 {
284
285 printf("\nExample of LU factorization and backsolve\n\n");
286
287#if defined(LA_HIGH_PERFORMANCE)
288
289 printf("\nLA provided by BLASFEO\n\n");
290
291#elif defined(LA_BLAS)
292
293 printf("\nLA provided by BLAS\n\n");
294
295#elif defined(LA_REFERENCE)
296
297 printf("\nLA provided by REFERENCE\n\n");
298
299#else
300
301 printf("\nLA provided by ???\n\n");
302 exit(2);
303
304#endif
305
306 // loop index
307 int ii;
308
309/************************************************
310* problem size
311************************************************/
312
313 // problem size
314 int N = 4;
315 int nx_ = 4;
316 int nu_ = 1;
317
318 // stage-wise variant size
319 int nx[N+1];
320 nx[0] = 0;
321 for(ii=1; ii<=N; ii++)
322 nx[ii] = nx_;
323 nx[N] = nx_;
324
325 int nu[N+1];
326 for(ii=0; ii<N; ii++)
327 nu[ii] = nu_;
328 nu[N] = 0;
329
330/************************************************
331* dynamical system
332************************************************/
333
334 double *Ad; d_zeros(&Ad, nx_, nx_); // states update matrix
335
336 double *Bd; d_zeros(&Bd, nx_, nu_); // inputs matrix
337
338 double *bd; d_zeros(&bd, nx_, 1); // states offset
339 double *x0d; d_zeros(&x0d, nx_, 1); // initial state
340
341 double Ts = 0.5; // sampling time
342 d_mass_spring_system(Ts, nx_, nu_, N, Ad, Bd, bd, x0d);
343
344 float *A; s_zeros(&A, nx_, nx_); for(ii=0; ii<nx_*nx_; ii++) A[ii] = (float) Ad[ii];
345 float *B; s_zeros(&B, nx_, nu_); for(ii=0; ii<nx_*nu_; ii++) B[ii] = (float) Bd[ii];
346 float *b; s_zeros(&b, nx_, 1); for(ii=0; ii<nx_; ii++) b[ii] = (float) bd[ii];
347 float *x0; s_zeros(&x0, nx_, 1); for(ii=0; ii<nx_; ii++) x0[ii] = (float) x0d[ii];
348
349 for(ii=0; ii<nx_; ii++)
350 b[ii] = 0.1;
351
352 for(ii=0; ii<nx_; ii++)
353 x0[ii] = 0;
354 x0[0] = 2.5;
355 x0[1] = 2.5;
356
357 s_print_mat(nx_, nx_, A, nx_);
358 s_print_mat(nx_, nu_, B, nx_);
359 s_print_mat(1, nx_, b, 1);
360 s_print_mat(1, nx_, x0, 1);
361
362/************************************************
363* cost function
364************************************************/
365
366 float *R; s_zeros(&R, nu_, nu_);
367 for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;
368
369 float *S; s_zeros(&S, nu_, nx_);
370
371 float *Q; s_zeros(&Q, nx_, nx_);
372 for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;
373
374 float *r; s_zeros(&r, nu_, 1);
375 for(ii=0; ii<nu_; ii++) r[ii] = 0.2;
376
377 float *q; s_zeros(&q, nx_, 1);
378 for(ii=0; ii<nx_; ii++) q[ii] = 0.1;
379
380 s_print_mat(nu_, nu_, R, nu_);
381 s_print_mat(nu_, nx_, S, nu_);
382 s_print_mat(nx_, nx_, Q, nx_);
383 s_print_mat(1, nu_, r, 1);
384 s_print_mat(1, nx_, q, 1);
385
386/************************************************
387* matrices as strmat
388************************************************/
389
390 struct s_strmat sA;
391 s_allocate_strmat(nx_, nx_, &sA);
392 s_cvt_mat2strmat(nx_, nx_, A, nx_, &sA, 0, 0);
393 struct s_strvec sb;
394 s_allocate_strvec(nx_, &sb);
395 s_cvt_vec2strvec(nx_, b, &sb, 0);
396 struct s_strvec sx0;
397 s_allocate_strvec(nx_, &sx0);
398 s_cvt_vec2strvec(nx_, x0, &sx0, 0);
399 struct s_strvec sb0;
400 s_allocate_strvec(nx_, &sb0);
401 float *b0; d_zeros(&b0, nx_, 1); // states offset
402 sgemv_n_libstr(nx_, nx_, 1.0, &sA, 0, 0, &sx0, 0, 1.0, &sb, 0, &sb0, 0);
403 s_print_tran_strvec(nx_, &sb0, 0);
404
405 struct s_strmat sBbt0;
406 s_allocate_strmat(nu_+nx_+1, nx_, &sBbt0);
407 s_cvt_tran_mat2strmat(nx_, nx_, B, nx_, &sBbt0, 0, 0);
408 srowin_libstr(nx_, 1.0, &sb0, 0, &sBbt0, nu_, 0);
409 s_print_strmat(nu_+1, nx_, &sBbt0, 0, 0);
410
411 struct s_strmat sBAbt1;
412 s_allocate_strmat(nu_+nx_+1, nx_, &sBAbt1);
413 s_cvt_tran_mat2strmat(nx_, nu_, B, nx_, &sBAbt1, 0, 0);
414 s_cvt_tran_mat2strmat(nx_, nx_, A, nx_, &sBAbt1, nu_, 0);
415 s_cvt_tran_mat2strmat(nx_, 1, b, nx_, &sBAbt1, nu_+nx_, 0);
416 s_print_strmat(nu_+nx_+1, nx_, &sBAbt1, 0, 0);
417
418 struct s_strvec sr0; // XXX no need to update r0 since S=0
419 s_allocate_strvec(nu_, &sr0);
420 s_cvt_vec2strvec(nu_, r, &sr0, 0);
421
422 struct s_strmat sRr0;
423 s_allocate_strmat(nu_+1, nu_, &sRr0);
424 s_cvt_mat2strmat(nu_, nu_, R, nu_, &sRr0, 0, 0);
425 srowin_libstr(nu_, 1.0, &sr0, 0, &sRr0, nu_, 0);
426 s_print_strmat(nu_+1, nu_, &sRr0, 0, 0);
427
428 struct s_strvec srq1;
429 s_allocate_strvec(nu_+nx_, &srq1);
430 s_cvt_vec2strvec(nu_, r, &srq1, 0);
431 s_cvt_vec2strvec(nx_, q, &srq1, nu_);
432
433 struct s_strmat sRSQrq1;
434 s_allocate_strmat(nu_+nx_+1, nu_+nx_, &sRSQrq1);
435 s_cvt_mat2strmat(nu_, nu_, R, nu_, &sRSQrq1, 0, 0);
436 s_cvt_tran_mat2strmat(nu_, nx_, S, nu_, &sRSQrq1, nu_, 0);
437 s_cvt_mat2strmat(nx_, nx_, Q, nx_, &sRSQrq1, nu_, nu_);
438 srowin_libstr(nu_+nx_, 1.0, &srq1, 0, &sRSQrq1, nu_+nx_, 0);
439 s_print_strmat(nu_+nx_+1, nu_+nx_, &sRSQrq1, 0, 0);
440
441 struct s_strvec sqN;
442 s_allocate_strvec(nx_, &sqN);
443 s_cvt_vec2strvec(nx_, q, &sqN, 0);
444
445 struct s_strmat sQqN;
446 s_allocate_strmat(nx_+1, nx_, &sQqN);
447 s_cvt_mat2strmat(nx_, nx_, Q, nx_, &sQqN, 0, 0);
448 srowin_libstr(nx_, 1.0, &sqN, 0, &sQqN, nx_, 0);
449 s_print_strmat(nx_+1, nx_, &sQqN, 0, 0);
450
451/************************************************
452* array of matrices
453************************************************/
454
455 struct s_strmat hsBAbt[N];
456 struct s_strvec hsb[N];
457 struct s_strmat hsRSQrq[N+1];
458 struct s_strvec hsrq[N+1];
459 struct s_strmat hsL[N+1];
460 struct s_strvec hsPb[N];
461 struct s_strvec hsux[N+1];
462 struct s_strvec hspi[N];
463 struct s_strmat hswork_mat[1];
464 struct s_strvec hswork_vec[1];
465
466 hsBAbt[0] = sBbt0;
467 hsb[0] = sb0;
468 hsRSQrq[0] = sRr0;
469 hsrq[0] = sr0;
470 s_allocate_strmat(nu_+1, nu_, &hsL[0]);
471 s_allocate_strvec(nx_, &hsPb[0]);
472 s_allocate_strvec(nx_+nu_+1, &hsux[0]);
473 s_allocate_strvec(nx_, &hspi[0]);
474 for(ii=1; ii<N; ii++)
475 {
476 hsBAbt[ii] = sBAbt1;
477 hsb[ii] = sb;
478 hsRSQrq[ii] = sRSQrq1;
479 hsrq[ii] = srq1;
480 s_allocate_strmat(nu_+nx_+1, nu_+nx_, &hsL[ii]);
481 s_allocate_strvec(nx_, &hsPb[ii]);
482 s_allocate_strvec(nx_+nu_+1, &hsux[ii]);
483 s_allocate_strvec(nx_, &hspi[ii]);
484 }
485 hsRSQrq[N] = sQqN;
486 hsrq[N] = sqN;
487 s_allocate_strmat(nx_+1, nx_, &hsL[N]);
488 s_allocate_strvec(nx_+nu_+1, &hsux[N]);
489 s_allocate_strmat(nu_+nx_+1, nx_, &hswork_mat[0]);
490 s_allocate_strvec(nx_, &hswork_vec[0]);
491
492// for(ii=0; ii<N; ii++)
493// d_print_strmat(nu[ii]+nx[ii]+1, nx[ii+1], &hsBAbt[ii], 0, 0);
494// return 0;
495
496/************************************************
497* call Riccati solver
498************************************************/
499
500 // timing
501 struct timeval tv0, tv1, tv2, tv3;
502 int nrep = 1000;
503 int rep;
504
505 gettimeofday(&tv0, NULL); // time
506
507 for(rep=0; rep<nrep; rep++)
508 {
509 s_back_ric_sv_libstr(N, nx, nu, hsBAbt, hsRSQrq, hsL, hsux, hspi, hswork_mat, hswork_vec);
510 }
511
512 gettimeofday(&tv1, NULL); // time
513
514 for(rep=0; rep<nrep; rep++)
515 {
516 s_back_ric_trf_libstr(N, nx, nu, hsBAbt, hsRSQrq, hsL, hswork_mat);
517 }
518
519 gettimeofday(&tv2, NULL); // time
520
521 for(rep=0; rep<nrep; rep++)
522 {
523 s_back_ric_trs_libstr(N, nx, nu, hsBAbt, hsb, hsrq, hsL, hsPb, hsux, hspi, hswork_vec);
524 }
525
526 gettimeofday(&tv3, NULL); // time
527
528 float time_sv = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
529 float time_trf = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6);
530 float time_trs = (float) (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6);
531
532 // print sol
533 printf("\nux = \n\n");
534 for(ii=0; ii<=N; ii++)
535 s_print_tran_strvec(nu[ii]+nx[ii], &hsux[ii], 0);
536
537 printf("\npi = \n\n");
538 for(ii=0; ii<N; ii++)
539 s_print_tran_strvec(nx[ii+1], &hspi[ii], 0);
540
541// printf("\nL = \n\n");
542// for(ii=0; ii<=N; ii++)
543// s_print_strmat(nu[ii]+nx[ii]+1, nu[ii]+nx[ii], &hsL[ii], 0, 0);
544
545 printf("\ntime sv\t\ttime trf\t\ttime trs\n");
546 printf("\n%e\t%e\t%e\n", time_sv, time_trf, time_trs);
547 printf("\n");
548
549/************************************************
550* free memory
551************************************************/
552
553 d_free(Ad);
554 d_free(Bd);
555 d_free(bd);
556 d_free(x0d);
557 s_free(A);
558 s_free(B);
559 s_free(b);
560 s_free(x0);
561 s_free(R);
562 s_free(S);
563 s_free(Q);
564 s_free(r);
565 s_free(q);
566 s_free(b0);
567 s_free_strmat(&sA);
568 s_free_strvec(&sb);
569 s_free_strmat(&sBbt0);
570 s_free_strvec(&sb0);
571 s_free_strmat(&sBAbt1);
572 s_free_strmat(&sRr0);
573 s_free_strvec(&sr0);
574 s_free_strmat(&sRSQrq1);
575 s_free_strvec(&srq1);
576 s_free_strmat(&sQqN);
577 s_free_strvec(&sqN);
578 s_free_strmat(&hsL[0]);
579 s_free_strvec(&hsPb[0]);
580 s_free_strvec(&hsux[0]);
581 s_free_strvec(&hspi[0]);
582 for(ii=1; ii<N; ii++)
583 {
584 s_free_strmat(&hsL[ii]);
585 s_free_strvec(&hsPb[ii]);
586 s_free_strvec(&hsux[ii]);
587 s_free_strvec(&hspi[ii]);
588 }
589 s_free_strmat(&hsL[N]);
590 s_free_strvec(&hsux[N]);
591 s_free_strmat(&hswork_mat[0]);
592 s_free_strvec(&hswork_vec[0]);
593
594
595/************************************************
596* return
597************************************************/
598
599 return 0;
600
601 }
602
603
604
605