blob: 51d9e95ad5483d6d9e6db44d4a6588624e69ec5a [file] [log] [blame]
Austin Schuh9a24b372018-01-28 16:12:29 -08001/**************************************************************************************************
2* *
3* This file is part of HPMPC. *
4* *
5* HPMPC -- Library for High-Performance implementation of solvers for MPC. *
6* Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved. *
7* *
8* HPMPC is free software; you can redistribute it and/or *
9* modify it under the terms of the GNU Lesser General Public *
10* License as published by the Free Software Foundation; either *
11* version 2.1 of the License, or (at your option) any later version. *
12* *
13* HPMPC is distributed in the hope that it will be useful, *
14* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
16* See the GNU Lesser General Public License for more details. *
17* *
18* You should have received a copy of the GNU Lesser General Public *
19* License along with HPMPC; if not, write to the Free Software *
20* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
21* *
22* Author: Gianluca Frison, giaf (at) dtu.dk *
23* *
24**************************************************************************************************/
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <math.h>
29
30//#include "../include/aux_d.h"
31
32//void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
33//void dgesv_(int *n, int *nrhs, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info);
34//void dcopy_(int *n, double *dx, int *incx, double *dy, int *incy);
35//void daxpy_(int *n, double *da, double *dx, int *incx, double *dy, int *incy);
36//void dscal_(int *n, double *da, double *dx, int *incx);
37
38int posix_memalign(void **memptr, size_t alignment, size_t size);
39
40
41
42/************************************************
43 matrix-matrix multiplication
44************************************************/
45void dgemm_nn_3l(int m, int n, int k, double *A, int lda , double *B, int ldb, double *C, int ldc)
46 {
47
48 int ii, jj, kk;
49
50 for(jj=0; jj<n; jj++)
51 {
52 for(ii=0; ii<m; ii++)
53 {
54 C[ii+ldc*jj] = 0;
55 for(kk=0; kk<k; kk++)
56 {
57 C[ii+ldc*jj] += A[ii+lda*kk] * B[kk+ldb*jj];
58 }
59 }
60 }
61
62 return;
63
64 }
65
66
67void daxpy_3l(int n, double da, double *dx, double *dy)
68 {
69 int i;
70 for(i=0; i<n; i++)
71 {
72 dy[i] += da*dx[i];
73 }
74 }
75
76
77
78void dscal_3l(int n, double da, double *dx)
79 {
80 int i;
81 for(i=0; i<n; i++)
82 {
83 dx[i] *= da;
84 }
85 }
86
87
88
89/************************************************
90 Routine that copies a matrix
91************************************************/
92void dmcopy(int row, int col, double *A, int lda, double *B, int ldb)
93 {
94 int i, j;
95 for(j=0; j<col; j++)
96 {
97 for(i=0; i<row; i++)
98 {
99 B[i+j*ldb] = A[i+j*lda];
100 }
101 }
102 }
103
104
105
106int idamax_3l(int n, double *x)
107 {
108
109 if(n<=0)
110 return 0;
111 if(n==1)
112 return 0;
113
114 double dabs;
115 double dmax = (x[0]>0 ? x[0] : -x[0]);
116 int idmax = 0;
117 int jj;
118 for(jj=1; jj<n; jj++)
119 {
120 dabs = (x[jj]>0 ? x[jj] : -x[jj]);
121 if(dabs>dmax)
122 {
123 dmax = dabs;
124 idmax = jj;
125 }
126 }
127
128 return idmax;
129
130 }
131
132
133
134void dswap_3l(int n, double *x, int incx, double *y, int incy)
135 {
136
137 if(n<=0)
138 return;
139
140 double temp;
141 int jj;
142 for(jj=0; jj<n; jj++)
143 {
144 temp = x[0];
145 x[0] = y[0];
146 y[0] = temp;
147 x += incx;
148 y += incy;
149 }
150
151 }
152
153
154
155void dger_3l(int m, int n, double alpha, double *x, int incx, double *y, int incy, double *A, int lda)
156 {
157
158 if(m==0 || n==0 || alpha==0.0)
159 return;
160
161 int i, j;
162 double *px, *py, temp;
163
164 py = y;
165 for(j=0; j<n; j++)
166 {
167 temp = alpha * py[0];
168 px = x;
169 for(i=0; i<m; i++)
170 {
171 A[i+lda*j] += px[0] * temp;
172 px += incx;
173 }
174 py += incy;
175 }
176
177 return;
178
179 }
180
181
182
183void dgetf2_3l(int m, int n, double *A, int lda, int *ipiv, int *info)
184 {
185
186 if(m<=0 || n<=0)
187 return;
188
189 int i, j, jp;
190
191 double Ajj;
192
193 int size_min = ( m<n ? m : n );
194
195 for(j=0; j<size_min; j++)
196 // find the pivot and test for singularity
197 {
198 jp = j + idamax_3l(m-j, &A[j+lda*j]);
199 ipiv[j] = jp;
200 if( A[jp+lda*j]!=0)
201 {
202 // apply the interchange to columns 0:n-1
203 if(jp!=j)
204 {
205 dswap_3l(n, &A[j], lda, &A[jp], lda);
206 }
207 // compute elements j+1:m-1 of j-th column
208 if(j<m-1)
209 {
210 Ajj = A[j+lda*j];
211 if( ( Ajj>0 ? Ajj : -Ajj ) >= 2.22e-16 )
212 {
213 dscal_3l(m-j-1, 1.0/Ajj, &A[j+1+lda*j]);
214 }
215 else
216 {
217 for(i=j+1; i<m; i++)
218 {
219 A[i+lda*j] /= Ajj;
220 }
221 }
222 }
223 }
224 else if(*info==0)
225 {
226 *info = j+1;
227 }
228
229 if( j < size_min )
230 {
231 // update trailing submatrix
232 dger_3l(m-j-1, n-j-1, -1.0, &A[j+1+lda*j], 1, &A[j+lda*(j+1)], lda, &A[j+1+lda*(j+1)], lda);
233 }
234
235 }
236
237 return;
238
239 }
240
241
242
243void dlaswp_3l(int n, double *A, int lda, int k1, int k2, int *ipiv)
244 {
245
246 int i, j, k, ix, ix0, i1, i2, n32, ip;
247 double temp;
248
249 ix0 = k1;
250 i1 = k1;
251 i2 = k2;
252
253 n32 = (n/32)*32;
254 if(n32!=0)
255 {
256 for(j=0; j<n32; j+=32)
257 {
258 ix = ix0;
259 for(i=i1; i<i2; i++)
260 {
261 ip = ipiv[ix];
262 if(ip!=i)
263 {
264 for(k=j; k<j+32; k++)
265 {
266 temp = A[i+lda*k];
267 A[i+lda*k] = A[ip+lda*k];
268 A[ip+lda*k] = temp;
269 }
270 }
271 ix++;
272 }
273 }
274 }
275 if(n32!=n)
276 {
277 ix = ix0;
278 for(i=i1; i<i2; i++)
279 {
280 ip = ipiv[ix];
281 if(ip!=i)
282 {
283 for(k=n32; k<n; k++)
284 {
285 temp = A[i+lda*k];
286 A[i+lda*k] = A[ip+lda*k];
287 A[ip+lda*k] = temp;
288 }
289 }
290 ix++;
291 }
292 }
293
294 return;
295
296 }
297
298
299
300// left lower no-transp unit
301void dtrsm_l_l_n_u_3l(int m, int n, double *A, int lda, double *B, int ldb)
302 {
303
304 if(m==0 || n==0)
305 return;
306
307 int i, j, k;
308
309 for(j=0; j<n; j++)
310 {
311 for(k=0; k<m; k++)
312 {
313 for(i=k+1; i<m; i++)
314 {
315 B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
316 }
317 }
318 }
319
320 return;
321
322 }
323
324
325
326// left upper no-transp non-unit
327void dtrsm_l_u_n_n_3l(int m, int n, double *A, int lda, double *B, int ldb)
328 {
329
330 if(m==0 || n==0)
331 return;
332
333 int i, j, k;
334
335 for(j=0; j<n; j++)
336 {
337 for(k=m-1; k>=0; k--)
338 {
339 B[k+ldb*j] /= A[k+lda*k];
340 for(i=0; i<k; i++)
341 {
342 B[i+ldb*j] -= B[k+ldb*j] * A[i+lda*k];
343 }
344 }
345 }
346
347 return;
348
349 }
350
351
352
353void dgetrs_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
354 {
355
356 if(n==0 || nrhs==0)
357 return;
358
359 // solve A * X = B
360
361 // apply row interchanges to the rhs
362 dlaswp_3l(nrhs, B, ldb, 0, n, ipiv);
363
364 // solve L*X = B, overwriting B with X
365 dtrsm_l_l_n_u_3l(n, nrhs, A, lda, B, ldb);
366
367 // solve U*X = B, overwriting B with X
368 dtrsm_l_u_n_n_3l(n, nrhs, A, lda, B, ldb);
369
370 return;
371
372 }
373
374
375
376void dgesv_3l(int n, int nrhs, double *A, int lda, int *ipiv, double *B, int ldb, int *info)
377 {
378
379 // compute the LU factorization of A
380 dgetf2_3l(n, n, A, lda, ipiv, info);
381
382 if(*info==0)
383 {
384 // solve the system A*X = B, overwriting B with X
385 dgetrs_3l(n, nrhs, A, lda, ipiv, B, ldb, info);
386 }
387
388 return;
389
390 }
391
392
393
394/* one norm of a matrix */
395double onenorm(int row, int col, double *ptrA)
396 {
397 double max, temp;
398 int i, j;
399 temp = 0;
400 for(j=0; j<col; j++)
401 {
402 temp = abs(*(ptrA+j*row));
403 for(i=1; i<row; i++)
404 {
405 temp += abs(*(ptrA+j*row+i));
406 }
407 if(j==0) max = temp;
408 else if(max>temp) temp = max;
409 }
410 return temp;
411 }
412
413
414
415/* computes the Pade approximation of degree m of the matrix A */
416void padeapprox(int m, int row, double *A)
417 {
418 int ii;
419 int row2 = row*row;
420/* int i1 = 1;*/
421/* double d0 = 0;*/
422/* double d1 = 1;*/
423/* double dm1 = -1;*/
424
425 double *U = (double *) malloc(row*row*sizeof(double)); // d_zeros(&U, row, row);
426 double *V = (double *) malloc(row*row*sizeof(double)); // d_zeros(&V, row, row);
427
428 if(m==3)
429 {
430 double c[] = {120, 60, 12, 1};
431 double *A0 = (double *) malloc(row*row*sizeof(double)); // d_eye(&A0, row);
432 for(ii=0; ii<row*row; ii++)
433 A0[ii] = 0.0;
434 for(ii=0; ii<row; ii++)
435 A0[ii*(row+1)] = 1.0;
436 double *A2 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
437 double *temp = malloc(row*row*sizeof(double)); // d_zeros(&temp, row, row);
438// char ta = 'n'; double alpha = 1; double beta = 0;
439// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
440 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
441// dscal_(&row2, &d0, temp, &i1);
442 dscal_3l(row2, 0, temp);
443// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
444 daxpy_3l(row2, c[3], A2, temp);
445// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
446 daxpy_3l(row2, c[1], A0, temp);
447// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
448 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
449// dscal_(&row2, &d0, V, &i1);
450 dscal_3l(row2, 0, V);
451// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
452 daxpy_3l(row2, c[2], A2, V);
453// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
454 daxpy_3l(row2, c[0], A0, V);
455 free(A0);
456 free(A2);
457 free(temp);
458 }
459 else if(m==5)
460 {
461 double c[] = {30240, 15120, 3360, 420, 30, 1};
462 double *A0 = (double *) malloc(row*row*sizeof(double)); // d_eye(&A0, row);
463 for(ii=0; ii<row*row; ii++)
464 A0[ii] = 0.0;
465 for(ii=0; ii<row; ii++)
466 A0[ii*(row+1)] = 1.0;
467 double *A2 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
468 double *A4 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
469 double *temp = malloc(row*row*sizeof(double)); // d_zeros(&temp, row, row);
470// char ta = 'n'; double alpha = 1; double beta = 0;
471// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
472 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
473// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
474 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
475 dmcopy(row, row, A4, row, V, row);
476 dmcopy(row, row, A4, row, temp, row);
477// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
478 daxpy_3l(row2, c[3], A2, temp);
479// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
480 daxpy_3l(row2, c[1], A0, temp);
481// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
482 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
483// dscal_(&row2, &c[4], V, &i1);
484 dscal_3l(row2, c[4], V);
485// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
486 daxpy_3l(row2, c[2], A2, V);
487// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
488 daxpy_3l(row2, c[0], A0, V);
489 free(A0);
490 free(A2);
491 free(A4);
492 free(temp);
493 }
494 else if(m==7)
495 {
496 double c[] = {17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1};
497 double *A0 = (double *) malloc(row*row*sizeof(double)); // d_eye(&A0, row);
498 for(ii=0; ii<row*row; ii++)
499 A0[ii] = 0.0;
500 for(ii=0; ii<row; ii++)
501 A0[ii*(row+1)] = 1.0;
502 double *A2 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
503 double *A4 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
504 double *A6 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
505 double *temp = malloc(row*row*sizeof(double)); // d_zeros(&temp, row, row);
506// char ta = 'n'; double alpha = 1; double beta = 1;
507// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
508 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
509// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
510 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
511// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
512 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
513// dscal_(&row2, &d0, temp, &i1);
514 dscal_3l(row2, 0, temp);
515// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
516 daxpy_3l(row2, c[3], A2, temp);
517// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
518 daxpy_3l(row2, c[1], A0, temp);
519// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
520 daxpy_3l(row2, c[5], A4, temp);
521// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
522 daxpy_3l(row2, c[7], A6, temp);
523// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
524 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
525// dscal_(&row2, &d0, V, &i1);
526 dscal_3l(row2, 0, V);
527// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
528 daxpy_3l(row2, c[2], A2, V);
529// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
530 daxpy_3l(row2, c[0], A0, V);
531// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
532 daxpy_3l(row2, c[4], A4, V);
533// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
534 daxpy_3l(row2, c[6], A6, V);
535 free(A0);
536 free(A2);
537 free(A4);
538 free(A6);
539 free(temp);
540 }
541 else if(m==9)
542 {
543 double c[] = {17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1};
544 double *A0 = (double *) malloc(row*row*sizeof(double)); // d_eye(&A0, row);
545 for(ii=0; ii<row*row; ii++)
546 A0[ii] = 0.0;
547 for(ii=0; ii<row; ii++)
548 A0[ii*(row+1)] = 1.0;
549 double *A2 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
550 double *A4 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
551 double *A6 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
552 double *A8 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
553 double *temp = malloc(row*row*sizeof(double)); // d_zeros(&temp, row, row);
554// char ta = 'n'; double alpha = 1; double beta = 0;
555// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
556 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
557// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
558 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
559// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
560 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
561// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, A2, &row, &beta, A8, &row);
562 dgemm_nn_3l(row, row, row, A6, row, A2, row, A8, row);
563 dmcopy(row, row, A8, row, V, row);
564 dmcopy(row, row, A8, row, temp, row);
565// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
566 daxpy_3l(row2, c[3], A2, temp);
567// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
568 daxpy_3l(row2, c[1], A0, temp);
569// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
570 daxpy_3l(row2, c[5], A4, temp);
571// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
572 daxpy_3l(row2, c[7], A6, temp);
573// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
574 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
575// dscal_(&row2, &c[8], V, &i1);
576 dscal_3l(row2, c[8], V);
577// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
578 daxpy_3l(row2, c[2], A2, V);
579// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
580 daxpy_3l(row2, c[0], A0, V);
581// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
582 daxpy_3l(row2, c[4], A4, V);
583// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
584 daxpy_3l(row2, c[6], A6, V);
585 free(A0);
586 free(A2);
587 free(A4);
588 free(A6);
589 free(A8);
590 free(temp);
591 }
592 else if(m==13) // tested
593 {
594 double c[] = {64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1};
595 double *A0 = (double *) malloc(row*row*sizeof(double)); // d_eye(&A0, row);
596 for(ii=0; ii<row*row; ii++)
597 A0[ii] = 0.0;
598 for(ii=0; ii<row; ii++)
599 A0[ii*(row+1)] = 1.0;
600 double *A2 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
601 double *A4 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
602 double *A6 = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
603 double *temp = malloc(row*row*sizeof(double)); // d_zeros(&temp, row, row);
604// char ta = 'n'; double alpha = 1; double beta = 0;
605// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, A2, &row);
606 dgemm_nn_3l(row, row, row, A, row, A, row, A2, row);
607// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, &beta, A4, &row);
608 dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row);
609// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, &beta, A6, &row);
610 dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row);
611 dmcopy(row, row, A2, row, U, row);
612// dscal_(&row2, &c[9], U, &i1);
613 dscal_3l(row2, c[9], U);
614// daxpy_(&row2, &c[11], A4, &i1, U, &i1);
615 daxpy_3l(row2, c[11], A4, U);
616// daxpy_(&row2, &c[13], A6, &i1, U, &i1);
617 daxpy_3l(row2, c[13], A6, U);
618// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, U, &row, &beta, temp, &row);
619 dgemm_nn_3l(row, row, row, A6, row, U, row, temp, row);
620// daxpy_(&row2, &c[7], A6, &i1, temp, &i1);
621 daxpy_3l(row2, c[7], A6, temp);
622// daxpy_(&row2, &c[5], A4, &i1, temp, &i1);
623 daxpy_3l(row2, c[5], A4, temp);
624// daxpy_(&row2, &c[3], A2, &i1, temp, &i1);
625 daxpy_3l(row2, c[3], A2, temp);
626// daxpy_(&row2, &c[1], A0, &i1, temp, &i1);
627 daxpy_3l(row2, c[1], A0, temp);
628// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, &row, &beta, U, &row);
629 dgemm_nn_3l(row, row, row, A, row, temp, row, U, row);
630 dmcopy(row, row, A2, row, temp, row);
631// dscal_(&row2, &c[8], V, &i1);
632 dscal_3l(row2, c[8], V);
633// daxpy_(&row2, &c[12], A6, &i1, temp, &i1);
634 daxpy_3l(row2, c[12], A6, temp);
635// daxpy_(&row2, &c[10], A4, &i1, temp, &i1);
636 daxpy_3l(row2, c[10], A4, temp);
637// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, temp, &row, &beta, V, &row);
638 dgemm_nn_3l(row, row, row, A6, row, temp, row, V, row);
639// daxpy_(&row2, &c[6], A6, &i1, V, &i1);
640 daxpy_3l(row2, c[6], A6, V);
641// daxpy_(&row2, &c[4], A4, &i1, V, &i1);
642 daxpy_3l(row2, c[4], A4, V);
643// daxpy_(&row2, &c[2], A2, &i1, V, &i1);
644 daxpy_3l(row2, c[2], A2, V);
645// daxpy_(&row2, &c[0], A0, &i1, V, &i1);
646 daxpy_3l(row2, c[0], A0, V);
647 free(A0);
648 free(A2);
649 free(A4);
650 free(A6);
651 free(temp);
652 }
653 else
654 {
655 printf("%s\n", "Wrong Pade approximatin degree");
656 exit(1);
657 }
658 double *D = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
659// dcopy_(&row2, V, &i1, A, &i1);
660 dmcopy(row, row, V, row, A, row);
661// daxpy_(&row2, &d1, U, &i1, A, &i1);
662 daxpy_3l(row2, 1.0, U, A);
663// dcopy_(&row2, V, &i1, D, &i1);
664 dmcopy(row, row, V, row, D, row);
665// daxpy_(&row2, &dm1, U, &i1, D, &i1);
666 daxpy_3l(row2, -1.0, U, D);
667 int *ipiv = (int *) malloc(row*sizeof(int));
668 int info = 0;
669// dgesv_(&row, &row, D, &row, ipiv, A, &row, &info);
670 dgesv_3l(row, row, D, row, ipiv, A, row, &info);
671 free(ipiv);
672 free(D);
673 free(U);
674 free(V);
675 }
676
677
678
679void expm(int row, double *A)
680 {
681
682 int i;
683
684 int m_vals[] = {3, 5, 7, 9, 13};
685 double theta[] = {0.01495585217958292, 0.2539398330063230, 0.9504178996162932, 2.097847961257068, 5.371920351148152};
686 int lentheta = 5;
687
688 double normA = onenorm(row, row, A);
689
690 if(normA<=theta[4])
691 {
692 for(i=0; i<lentheta; i++)
693 {
694 if(normA<=theta[i])
695 {
696 padeapprox(m_vals[i], row, A);
697 break;
698 }
699 }
700 }
701 else
702 {
703 int s;
704 double t = frexp(normA/(theta[4]), &s);
705 s = s - (t==0.5);
706 t = pow(2,-s);
707 int row2 = row*row;
708/* int i1 = 1;*/
709// dscal_(&row2, &t, A, &i1);
710 dscal_3l(row2, t, A);
711 padeapprox(m_vals[4], row, A);
712 double *temp = (double *) malloc(row*row*sizeof(double)); // d_zeros(&A2, row, row);
713// char ta = 'n'; double alpha = 1; double beta = 0;
714 for(i=0; i<s; i++)
715 {
716// dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, &beta, temp, &row);
717 dgemm_nn_3l(row, row, row, A, row, A, row, temp, row);
718 dmcopy(row, row, temp, row, A, row);
719 }
720 free(temp);
721 }
722 }
723
724