blob: 8aa5c90c87ce2a984a7a17e915a9e8da64db167c [file] [log] [blame]
Austin Schuh16ce3c72018-01-28 16:17:08 -08001/**************************************************************************************************
2* *
3* This file is part of HPMPC. *
4* *
5* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
6* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
7* *
8* HPMPC is free software; you can redistribute it and/or *
9* modify it under the terms of the GNU Lesser General Public *
10* License as published by the Free Software Foundation; either *
11* version 2.1 of the License, or (at your option) any later version. *
12* *
13* HPMPC is distributed in the hope that it will be useful, *
14* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
16* See the GNU Lesser General Public License for more details. *
17* *
18* You should have received a copy of the GNU Lesser General Public *
19* License along with HPMPC; if not, write to the Free Software *
20* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
21* *
22* Author: Gianluca Frison, giaf (at) dtu.dk *
23* *
24**************************************************************************************************/
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <math.h>
29
30#include <blasfeo_d_aux_ext_dep.h>
31
32
33
34// matrix-vector multiplication
35void dgemv_n_3l(int m, int n, double *A, int lda , double *x, double *z)
36 {
37
38 int ii, jj;
39
40 for(ii=0; ii<m; ii++)
41 {
42 z[ii] = 0.0;
43 for(jj=0; jj<n; jj++)
44 {
45 z[ii] += A[ii+lda*jj] * x[jj];
46 }
47 }
48
49 return;
50
51 }
52
53
54// matrix-matrix multiplication
55void dgemm_nn_3l(int m, int n, int k, double *A, int lda , double *B, int ldb, double *C, int ldc)
56 {
57
58 int ii, jj, kk;
59
60 for(jj=0; jj<n; jj++)
61 {
62 for(ii=0; ii<m; ii++)
63 {
64 C[ii+ldc*jj] = 0;
65 for(kk=0; kk<k; kk++)
66 {
67 C[ii+ldc*jj] += A[ii+lda*kk] * B[kk+ldb*jj];
68 }
69 }
70 }
71
72 return;
73
74 }
75
76
77void daxpy_3l(int n, double da, double *dx, double *dy)
78 {
79 int i;
80 for(i=0; i<n; i++)
81 {
82 dy[i] += da*dx[i];
83 }
84 }
85
86
87
88void dscal_3l(int n, double da, double *dx)
89 {
90 int i;
91 for(i=0; i<n; i++)
92 {
93 dx[i] *= da;
94 }
95 }
96
97
98
99/************************************************
100 Routine that copies a matrix
101************************************************/
102void dmcopy(int row, int col, double *A, int lda, double *B, int ldb)
103 {
104 int i, j;
105 for(j=0; j<col; j++)
106 {
107 for(i=0; i<row; i++)
108 {
109 B[i+j*ldb] = A[i+j*lda];
110 }
111 }
112 }
113
114
115
116int idamax_3l(int n, double *x)
117 {
118
119 if(n<=0)
120 return 0;
121 if(n==1)
122 return 0;
123
124 double dabs;
125 double dmax = (x[0]>0 ? x[0] : -x[0]);
126 int idmax = 0;
127 int jj;
128 for(jj=1; jj<n; jj++)
129 {
130 dabs = (x[jj]>0 ? x[jj] : -x[jj]);
131 if(dabs>dmax)
132 {
133 dmax = dabs;
134 idmax = jj;
135 }
136 }
137
138 return idmax;
139
140 }
141
142
143
144void dswap_3l(int n, double *x, int incx, double *y, int incy)
145 {
146
147 if(n<=0)
148 return;
149
150 double temp;
151 int jj;
152 for(jj=0; jj<n; jj++)
153 {
154 temp = x[0];
155 x[0] = y[0];
156 y[0] = temp;
157 x += incx;
158 y += incy;
159 }
160
161 }
162
163
164
165void dger_3l(int m, int n, double alpha, double *x, int incx, double *y, int incy, double *A, int lda)
166 {
167
168 if(m==0 || n==0 || alpha==0.0)
169 return;
170
171 int i, j;
172 double *px, *py, temp;
173
174 py = y;
175 for(j=0; j<n; j++)
176 {
177 temp = alpha * py[0];
178 px = x;
179 for(i=0; i<m; i++)
180 {
181 A[i+lda*j] += px[0] * temp;
182 px += incx;
183 }
184 py += incy;
185 }
186
187 return;
188
189 }
190
191
192
193void dgetf2_3l(int m, int n, double *A, int lda, int *ipiv, int *info)
194 {
195
196 if(m<=0 || n<=0)
197 return;
198
199 int i, j, jp;
200
201 double Ajj;
202
203 int size_min = ( m<n ? m : n );
204
205 for(j=0; j<size_min; j++)
206 // find the pivot and test for singularity
207 {
208 jp = j + idamax_3l(m-j, &A[j+lda*j]);
209 ipiv[j] = jp;
210 if( A[jp+lda*j]!=0)
211 {
212 // apply the interchange to columns 0:n-1
213 if(jp!=j)
214 {
215 dswap_3l(n, &A[j], lda, &A[jp], lda);
216 }
217 // compute elements j+1:m-1 of j-th column
218 if(j<m-1)
219 {
220 Ajj = A[j+lda*j];
221 if( ( Ajj>0 ? Ajj : -Ajj ) >= 2.22e-16 )
222 {
223 dscal_3l(m-j-1, 1.0/Ajj, &A[j+1+lda*j]);
224 }
225 else
226 {
227 for(i=j+1; i<m; i++)
228 {
229 A[i+lda*j] /= Ajj;
230 }
231 }
232 }
233 }
234 else if(*info==0)
235 {
236 *info = j+1;
237 }
238
239 if( j < size_min )
240 {
241 // update trailing submatrix
242 dger_3l(m-j-1, n-j-1, -1.0, &A[j+1+lda*j], 1, &A[j+lda*(j+1)], lda, &A[j+1+lda*(j+1)], lda);
243 }
244
245 }
246
247 return;
248
249 }
250
251
252
253void dlaswp_3l(int n, double *A, int lda, int k1, int k2, int *ipiv)
254 {
255
256 int i, j, k, ix, ix0, i1, i2, n32, ip;
257 double temp;
258
259 ix0 = k1;
260 i1 = k1;
261 i2 = k2;
262
263 n32 = (n/32)*32;
264 if(n32!=0)
265 {
266 for(j=0; j<n32; j+=32)
267 {
268 ix = ix0;
269 for(i=i1; i<i2; i++)
270 {
271 ip = ipiv[ix];
272 if(ip!=i)
273 {
274 for(k=j; k<j+32; k++)
275 {
276 temp = A[i+lda*k];
277 A[i+lda*k] = A[ip+lda*k];
278 A[ip+lda*k] = temp;
279 }
280 }
281 ix++;
282 }
283 }
284 }
285 if(n32!=n)
286 {
287 ix = ix0;
288 for(i=i1; i<i2; i++)
289 {
290 ip = ipiv[ix];
291 if(ip!=i)
292 {
293 for(k=n32; k<n; k++)
294 {
295 temp = A[i+lda*k];
296 A[i+lda*k] = A[ip+lda*k];
297 A[ip+lda*k] = temp;
298 }
299 }
300 ix++;
301 }
302 }
303
304 return;
305
306 }
307
308
309
310// left lower no-transp unit
311void dtrsm_l_l_n_u_3l(int m, int n, double *A, int lda, double *B, int ldb)
312 {
313
314 if(m==0 || n==0)
315 return;
316
317 int i, j, k;
318
319 for(j=0; j<n; j++)
320 {
321 for(k=0; k<m; k++)
322 {
323 for(i=k+1; i<m; i++)
324 {
325 B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
326 }
327 }
328 }
329
330 return;
331
332 }
333
334
335
336// left upper no-transp non-unit
337void dtrsm_l_u_n_n_3l(int m, int n, double *A, int lda, double *B, int ldb)
338 {
339
340 if(m==0 || n==0)
341 return;
342
343 int i, j, k;
344
345 for(j=0; j<n; j++)
346 {
347 for(k=m-1; k>=0; k--)
348 {
349 B[k+ldb*j] /= A[k+lda*k];
350 for(i=0; i<k; i++)
351 {
352 B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
353 }
354 }
355 }
356
357 return;
358
359 }
360
361
362
363void dgetrs_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
364 {
365
366 if(n==0 || nrhs==0)
367 return;
368
369 // solve A * X = B
370
371 // apply row interchanges to the rhs
372 dlaswp_3l(nrhs, B, ldb, 0, n, ipiv);
373
374 // solve L*X = B, overwriting B with X
375 dtrsm_l_l_n_u_3l(n, nrhs, A, lda, B, ldb);
376
377 // solve U*X = B, overwriting B with X
378 dtrsm_l_u_n_n_3l(n, nrhs, A, lda, B, ldb);
379
380 return;
381
382 }
383
384
385
386void dgesv_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
387 {
388
389 // compute the LU factorization of A
390 dgetf2_3l(n, n, A, lda, ipiv, info);
391
392 if(*info==0)
393 {
394 // solve the system A*X = B, overwriting B with X
395 dgetrs_3l(n, nrhs, A, lda, ipiv, B, ldb, info);
396 }
397
398 return;
399
400 }
401
402
403
404/* one norm of a matrix */
405double onenorm(int row, int col, double *ptrA)
406 {
407 double max, temp;
408 int i, j;
409 temp = 0;
410 for(j=0; j<col; j++)
411 {
412 temp = fabs(*(ptrA+j*row));
413 for(i=1; i<row; i++)
414 {
415 temp += fabs(*(ptrA+j*row+i));
416 }
417 if(j==0) max = temp;
418 else if(max>temp) temp = max;
419 }
420 return temp;
421 }
422
423
424
425/* computes the Pade approximation of degree m of the matrix A */
426void padeapprox(int m, int row, double *A)
427 {
428 int row2 = row*row;
429/* int i1 = 1;*/
430/* double d0 = 0;*/
431/* double d1 = 1;*/
432/* double dm1 = -1;*/
433
434 int ii;
435
436 double *U = malloc(row*row*sizeof(double));
437 double *V = malloc(row*row*sizeof(double));
438
439 if(m==3)
440 {
441 double c[] = {120, 60, 12, 1};
442 double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
443 double *A2 = malloc(row*row*sizeof(double));
444// char ta = 'n'; double alpha = 1; double beta = 0;
445// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
446 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
447 double *temp = malloc(row*row*sizeof(double));
448// dscal_(&row2, &d0, temp, &i1);
449 dscal_3l(row2, 0, temp);
450// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
451 daxpy_3l(row2, c[3], A2, temp);
452// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
453 daxpy_3l(row2, c[1], A0, temp);
454// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
455 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
456// dscal_(&row2, &d0, V, &i1);
457 dscal_3l(row2, 0, V);
458// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
459 daxpy_3l(row2, c[2], A2, V);
460// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
461 daxpy_3l(row2, c[0], A0, V);
462 free(A0);
463 free(A2);
464 free(temp);
465 }
466 else if(m==5)
467 {
468 double c[] = {30240, 15120, 3360, 420, 30, 1};
469 double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
470 double *A2 = malloc(row*row*sizeof(double));
471 double *A4 = malloc(row*row*sizeof(double));
472// char ta = 'n'; double alpha = 1; double beta = 0;
473// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
474 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
475// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
476 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
477 dmcopy(row, row, A4, row, V, row);
478 double *temp = malloc(row*row*sizeof(double));
479 dmcopy(row, row, A4, row, temp, row);
480// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
481 daxpy_3l(row2, c[3], A2, temp);
482// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
483 daxpy_3l(row2, c[1], A0, temp);
484// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
485 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
486// dscal_(&row2, &c[4], V, &i1);
487 dscal_3l(row2, c[4], V);
488// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
489 daxpy_3l(row2, c[2], A2, V);
490// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
491 daxpy_3l(row2, c[0], A0, V);
492 free(A0);
493 free(A2);
494 free(A4);
495 free(temp);
496 }
497 else if(m==7)
498 {
499 double c[] = {17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1};
500 double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
501 double *A2 = malloc(row*row*sizeof(double));
502 double *A4 = malloc(row*row*sizeof(double));
503 double *A6 = malloc(row*row*sizeof(double));
504// char ta = 'n'; double alpha = 1; double beta = 1;
505// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
506 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
507// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
508 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
509// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
510 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
511 double *temp = malloc(row*row*sizeof(double));
512// dscal_(&row2, &d0, temp, &i1);
513 dscal_3l(row2, 0, temp);
514// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
515 daxpy_3l(row2, c[3], A2, temp);
516// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
517 daxpy_3l(row2, c[1], A0, temp);
518// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
519 daxpy_3l(row2, c[5], A4, temp);
520// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
521 daxpy_3l(row2, c[7], A6, temp);
522// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
523 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
524// dscal_(&row2, &d0, V, &i1);
525 dscal_3l(row2, 0, V);
526// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
527 daxpy_3l(row2, c[2], A2, V);
528// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
529 daxpy_3l(row2, c[0], A0, V);
530// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
531 daxpy_3l(row2, c[4], A4, V);
532// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
533 daxpy_3l(row2, c[6], A6, V);
534 free(A0);
535 free(A2);
536 free(A4);
537 free(A6);
538 free(temp);
539 }
540 else if(m==9)
541 {
542 double c[] = {17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1};
543 double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
544 double *A2 = malloc(row*row*sizeof(double));
545 double *A4 = malloc(row*row*sizeof(double));
546 double *A6 = malloc(row*row*sizeof(double));
547 double *A8 = malloc(row*row*sizeof(double));
548// char ta = 'n'; double alpha = 1; double beta = 0;
549// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
550 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
551// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
552 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
553// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
554 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
555// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, A2, &row, &beta, A8, &row);
556 dgemm_nn_3l(row, row, row, A6, row, A2, row, A8, row);
557 dmcopy(row, row, A8, row, V, row);
558 double *temp = malloc(row*row*sizeof(double));
559 dmcopy(row, row, A8, row, temp, row);
560// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
561 daxpy_3l(row2, c[3], A2, temp);
562// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
563 daxpy_3l(row2, c[1], A0, temp);
564// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
565 daxpy_3l(row2, c[5], A4, temp);
566// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
567 daxpy_3l(row2, c[7], A6, temp);
568// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
569 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
570// dscal_(&row2, &c[8], V, &i1);
571 dscal_3l(row2, c[8], V);
572// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
573 daxpy_3l(row2, c[2], A2, V);
574// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
575 daxpy_3l(row2, c[0], A0, V);
576// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
577 daxpy_3l(row2, c[4], A4, V);
578// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
579 daxpy_3l(row2, c[6], A6, V);
580 free(A0);
581 free(A2);
582 free(A4);
583 free(A6);
584 free(A8);
585 free(temp);
586 }
587 else if(m==13) // tested
588 {
589 double c[] = {64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1};
590 double *A0 = malloc(row*row*sizeof(double)); for(ii=0; ii<row2; ii++) A0[ii] = 0.0; for(ii=0; ii<row; ii++) A0[ii*(row+1)] = 1.0;
591 double *A2 = malloc(row*row*sizeof(double));
592 double *A4 = malloc(row*row*sizeof(double));
593 double *A6 = malloc(row*row*sizeof(double));
594// char ta = 'n'; double alpha = 1; double beta = 0;
595// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
596 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
597// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
598 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
599// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
600 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
601 dmcopy(row, row, A2, row, U, row);
602 double *temp = malloc(row*row*sizeof(double));
603// dscal_(&row2, &c[9], U, &i1);
604 dscal_3l(row2, c[9], U);
605// daxpy_(&row2, &c[11], A4, &i1, U, &i1);
606 daxpy_3l(row2, c[11], A4, U);
607// daxpy_(&row2, &c[13], A6, &i1, U, &i1);
608 daxpy_3l(row2, c[13], A6, U);
609// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, U, &row, &beta, temp, &row);
610 dgemm_nn_3l(row, row, row, A6, row, U, row, temp, row);
611// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
612 daxpy_3l(row2, c[7], A6, temp);
613// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
614 daxpy_3l(row2, c[5], A4, temp);
615// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
616 daxpy_3l(row2, c[3], A2, temp);
617// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
618 daxpy_3l(row2, c[1], A0, temp);
619// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
620 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
621 dmcopy(row, row, A2, row, temp, row);
622// dscal_(&row2, &c[8], V, &i1);
623 dscal_3l(row2, c[8], V);
624// daxpy_(&row2, &c[12], A6, &i1, temp, &i1);
625 daxpy_3l(row2, c[12], A6, temp);
626// daxpy_(&row2, &c[10], A4, &i1, temp, &i1);
627 daxpy_3l(row2, c[10], A4, temp);
628// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, temp, &row, &beta, V, &row);
629 dgemm_nn_3l(row, row, row, A6, row, temp, row, V, row);
630// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
631 daxpy_3l(row2, c[6], A6, V);
632// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
633 daxpy_3l(row2, c[4], A4, V);
634// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
635 daxpy_3l(row2, c[2], A2, V);
636// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
637 daxpy_3l(row2, c[0], A0, V);
638 free(A0);
639 free(A2);
640 free(A4);
641 free(A6);
642 free(temp);
643 }
644 else
645 {
646 printf("%s\n", "Wrong Pade approximatin degree");
647 exit(1);
648 }
649 double *D = malloc(row*row*sizeof(double));
650// dcopy_(&row2, V, &i1, A, &i1);
651 dmcopy(row, row, V, row, A, row);
652// daxpy_(&row2, &d1, U, &i1, A, &i1);
653 daxpy_3l(row2, 1.0, U, A);
654// dcopy_(&row2, V, &i1, D, &i1);
655 dmcopy(row, row, V, row, D, row);
656// daxpy_(&row2, &dm1, U, &i1, D, &i1);
657 daxpy_3l(row2, -1.0, U, D);
658 int *ipiv = (int *) malloc(row*sizeof(int));
659 int info = 0;
660// dgesv_(&row, &row, D, &row, ipiv, A, &row, &info);
661 dgesv_3l(row, row, D, row, ipiv, A, row, &info);
662 free(ipiv);
663 free(D);
664 free(U);
665 free(V);
666 }
667
668
669
670void expm(int row, double *A)
671 {
672
673 int i;
674
675 int m_vals[] = {3, 5, 7, 9, 13};
676 double theta[] = {0.01495585217958292, 0.2539398330063230, 0.9504178996162932, 2.097847961257068, 5.371920351148152};
677 int lentheta = 5;
678
679 double normA = onenorm(row, row, A);
680
681 if(normA<=theta[4])
682 {
683 for(i=0; i<lentheta; i++)
684 {
685 if(normA<=theta[i])
686 {
687 padeapprox(m_vals[i], row, A);
688 break;
689 }
690 }
691 }
692 else
693 {
694 int s;
695 double t = frexp(normA/(theta[4]), &s);
696 s = s - (t==0.5);
697 t = pow(2,-s);
698 int row2 = row*row;
699/* int i1 = 1;*/
700// dscal_(&row2, &t, A, &i1);
701 dscal_3l(row2, t, A);
702 padeapprox(m_vals[4], row, A);
703 double *temp = malloc(row*row*sizeof(double));
704// char ta = 'n'; double alpha = 1; double beta = 0;
705 for(i=0; i<s; i++)
706 {
707// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, temp, &row);
708 dgemm_nn_3l(row, row, row, A, row, A, row, temp, row);
709 dmcopy(row, row, temp, row, A, row);
710 }
711 free(temp);
712 }
713 }
714
715