blob: 32e1e0a3c2c50c88e2fbd967d2a1d361929582b4 [file] [log] [blame]
Austin Schuh9a24b372018-01-28 16:12:29 -08001/**************************************************************************************************
2* *
3* This file is part of BLASFEO. *
4* *
5* BLASFEO -- BLAS For Embedded Optimization. *
6* Copyright (C) 2016-2017 by Gianluca Frison. *
7* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
8* All rights reserved. *
9* *
10* HPMPC is free software; you can redistribute it and/or *
11* modify it under the terms of the GNU Lesser General Public *
12* License as published by the Free Software Foundation; either *
13* version 2.1 of the License, or (at your option) any later version. *
14* *
15* HPMPC is distributed in the hope that it will be useful, *
16* but WITHOUT ANY WARRANTY; without even the implied warranty of *
17* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
18* See the GNU Lesser General Public License for more details. *
19* *
20* You should have received a copy of the GNU Lesser General Public *
21* License along with HPMPC; if not, write to the Free Software *
22* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *
23* *
24* Author: Gianluca Frison, giaf (at) dtu.dk *
25* gianluca.frison (at) imtek.uni-freiburg.de *
26* *
27**************************************************************************************************/
28
29
30
31#if defined(LA_REFERENCE)
32
33
34
35void GEMV_N_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
36 {
37 int ii, jj;
38 REAL
39 y_0, y_1, y_2, y_3,
40 x_0, x_1;
41 int lda = sA->m;
42 REAL *pA = sA->pA + ai + aj*lda;
43 REAL *x = sx->pa + xi;
44 REAL *y = sy->pa + yi;
45 REAL *z = sz->pa + zi;
46#if 1 // y reg version
47 ii = 0;
48 for(; ii<m-1; ii+=2)
49 {
50 y_0 = 0.0;
51 y_1 = 0.0;
52 jj = 0;
53 for(; jj<n-1; jj+=2)
54 {
55 y_0 += pA[ii+0+lda*(jj+0)] * x[jj+0] + pA[ii+0+lda*(jj+1)] * x[jj+1];
56 y_1 += pA[ii+1+lda*(jj+0)] * x[jj+0] + pA[ii+1+lda*(jj+1)] * x[jj+1];
57 }
58 if(jj<n)
59 {
60 y_0 += pA[ii+0+lda*jj] * x[jj];
61 y_1 += pA[ii+1+lda*jj] * x[jj];
62 }
63 z[ii+0] = beta * y[ii+0] + alpha * y_0;
64 z[ii+1] = beta * y[ii+1] + alpha * y_1;
65 }
66 for(; ii<m; ii++)
67 {
68 y_0 = 0.0;
69 for(jj=0; jj<n; jj++)
70 {
71 y_0 += pA[ii+lda*jj] * x[jj];
72 }
73 z[ii] = beta * y[ii] + alpha * y_0;
74 }
75#else // x reg version
76 for(ii=0; ii<n; ii++)
77 {
78 z[ii] = beta * y[ii];
79 }
80 jj = 0;
81 for(; jj<n-1; jj+=2)
82 {
83 x_0 = alpha * x[jj+0];
84 x_1 = alpha * x[jj+1];
85 ii = 0;
86 for(; ii<m-1; ii+=2)
87 {
88 z[ii+0] += pA[ii+0+lda*(jj+0)] * x_0 + pA[ii+0+lda*(jj+1)] * x_1;
89 z[ii+1] += pA[ii+1+lda*(jj+0)] * x_0 + pA[ii+1+lda*(jj+1)] * x_1;
90 }
91 for(; ii<m; ii++)
92 {
93 z[ii] += pA[ii+lda*(jj+0)] * x_0;
94 z[ii] += pA[ii+lda*(jj+1)] * x_1;
95 }
96 }
97 for(; jj<n; jj++)
98 {
99 x_0 = alpha * x[jj+0];
100 for(ii=0; ii<m; ii++)
101 {
102 z[ii] += pA[ii+lda*(jj+0)] * x_0;
103 }
104 }
105#endif
106 return;
107 }
108
109
110
111void GEMV_T_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
112 {
113 int ii, jj;
114 REAL
115 y_0, y_1;
116 int lda = sA->m;
117 REAL *pA = sA->pA + ai + aj*lda;
118 REAL *x = sx->pa + xi;
119 REAL *y = sy->pa + yi;
120 REAL *z = sz->pa + zi;
121 jj = 0;
122 for(; jj<n-1; jj+=2)
123 {
124 y_0 = 0.0;
125 y_1 = 0.0;
126 ii = 0;
127 for(; ii<m-1; ii+=2)
128 {
129 y_0 += pA[ii+0+lda*(jj+0)] * x[ii+0] + pA[ii+1+lda*(jj+0)] * x[ii+1];
130 y_1 += pA[ii+0+lda*(jj+1)] * x[ii+0] + pA[ii+1+lda*(jj+1)] * x[ii+1];
131 }
132 if(ii<m)
133 {
134 y_0 += pA[ii+lda*(jj+0)] * x[ii];
135 y_1 += pA[ii+lda*(jj+1)] * x[ii];
136 }
137 z[jj+0] = beta * y[jj+0] + alpha * y_0;
138 z[jj+1] = beta * y[jj+1] + alpha * y_1;
139 }
140 for(; jj<n; jj++)
141 {
142 y_0 = 0.0;
143 for(ii=0; ii<m; ii++)
144 {
145 y_0 += pA[ii+lda*(jj+0)] * x[ii];
146 }
147 z[jj+0] = beta * y[jj+0] + alpha * y_0;
148 }
149 return;
150 }
151
152
153
154// TODO optimize !!!!!
155void GEMV_NT_LIBSTR(int m, int n, REAL alpha_n, REAL alpha_t, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx_n, int xi_n, struct STRVEC *sx_t, int xi_t, REAL beta_n, REAL beta_t, struct STRVEC *sy_n, int yi_n, struct STRVEC *sy_t, int yi_t, struct STRVEC *sz_n, int zi_n, struct STRVEC *sz_t, int zi_t)
156 {
157 int ii, jj;
158 REAL
159 a_00,
160 x_n_0,
161 y_t_0;
162 int lda = sA->m;
163 REAL *pA = sA->pA + ai + aj*lda;
164 REAL *x_n = sx_n->pa + xi_n;
165 REAL *x_t = sx_t->pa + xi_t;
166 REAL *y_n = sy_n->pa + yi_n;
167 REAL *y_t = sy_t->pa + yi_t;
168 REAL *z_n = sz_n->pa + zi_n;
169 REAL *z_t = sz_t->pa + zi_t;
170 for(ii=0; ii<m; ii++)
171 {
172 z_n[ii] = beta_n * y_n[ii];
173 }
174 for(jj=0; jj<n; jj++)
175 {
176 y_t_0 = 0.0;
177 x_n_0 = alpha_n * x_n[jj];
178 for(ii=0; ii<m; ii++)
179 {
180 a_00 = pA[ii+lda*jj];
181 z_n[ii] += a_00 * x_n_0;
182 y_t_0 += a_00 * x_t[ii];
183 }
184 z_t[jj] = beta_t * y_t[jj] + alpha_t * y_t_0;
185 }
186 return;
187 }
188
189
190
191// TODO optimize !!!!!
192void SYMV_L_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
193 {
194 int ii, jj;
195 REAL
196 y_0;
197 int lda = sA->m;
198 REAL *pA = sA->pA + ai + aj*lda;
199 REAL *x = sx->pa + xi;
200 REAL *y = sy->pa + yi;
201 REAL *z = sz->pa + zi;
202 for(ii=0; ii<n; ii++)
203 {
204 y_0 = 0.0;
205 jj = 0;
206 for(; jj<=ii; jj++)
207 {
208 y_0 += pA[ii+lda*jj] * x[jj];
209 }
210 for( ; jj<m; jj++)
211 {
212 y_0 += pA[jj+lda*ii] * x[jj];
213 }
214 z[ii] = beta * y[ii] + alpha * y_0;
215 }
216 return;
217 }
218
219
220
221void TRMV_LNN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
222 {
223 int ii, jj;
224 REAL
225 y_0, y_1;
226 int lda = sA->m;
227 REAL *pA = sA->pA + ai + aj*lda;
228 REAL *x = sx->pa + xi;
229 REAL *z = sz->pa + zi;
230 if(m-n>0)
231 {
232 GEMV_N_LIBSTR(m-n, n, 1.0, sA, ai+n, aj, sx, xi, 0.0, sz, zi+n, sz, zi+n);
233 }
234 if(n%2!=0)
235 {
236 ii = n-1;
237 y_0 = x[ii];
238 y_0 *= pA[ii+lda*ii];
239 for(jj=0; jj<ii; jj++)
240 {
241 y_0 += pA[ii+lda*jj] * x[jj];
242 }
243 z[ii] = y_0;
244 n -= 1;
245 }
246 for(ii=n-2; ii>=0; ii-=2)
247 {
248 y_0 = x[ii+0];
249 y_1 = x[ii+1];
250 y_1 *= pA[ii+1+lda*(ii+1)];
251 y_1 += pA[ii+1+lda*(ii+0)] * y_0;
252 y_0 *= pA[ii+0+lda*(ii+0)];
253 jj = 0;
254 for(; jj<ii-1; jj+=2)
255 {
256 y_0 += pA[ii+0+lda*(jj+0)] * x[jj+0] + pA[ii+0+lda*(jj+1)] * x[jj+1];
257 y_1 += pA[ii+1+lda*(jj+0)] * x[jj+0] + pA[ii+1+lda*(jj+1)] * x[jj+1];
258 }
259// XXX there is no clean up loop !!!!!
260// for(; jj<ii; jj++)
261// {
262// y_0 += pA[ii+0+lda*jj] * x[jj];
263// y_1 += pA[ii+1+lda*jj] * x[jj];
264// }
265 z[ii+0] = y_0;
266 z[ii+1] = y_1;
267 }
268 return;
269 }
270
271
272
273void TRMV_LTN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
274 {
275 int ii, jj;
276 REAL
277 y_0, y_1;
278 int lda = sA->m;
279 REAL *pA = sA->pA + ai + aj*lda;
280 REAL *x = sx->pa + xi;
281 REAL *z = sz->pa + zi;
282 jj = 0;
283 for(; jj<n-1; jj+=2)
284 {
285 y_0 = x[jj+0];
286 y_1 = x[jj+1];
287 y_0 *= pA[jj+0+lda*(jj+0)];
288 y_0 += pA[jj+1+lda*(jj+0)] * y_1;
289 y_1 *= pA[jj+1+lda*(jj+1)];
290 ii = jj+2;
291 for(; ii<m-1; ii+=2)
292 {
293 y_0 += pA[ii+0+lda*(jj+0)] * x[ii+0] + pA[ii+1+lda*(jj+0)] * x[ii+1];
294 y_1 += pA[ii+0+lda*(jj+1)] * x[ii+0] + pA[ii+1+lda*(jj+1)] * x[ii+1];
295 }
296 for(; ii<m; ii++)
297 {
298 y_0 += pA[ii+lda*(jj+0)] * x[ii];
299 y_1 += pA[ii+lda*(jj+1)] * x[ii];
300 }
301 z[jj+0] = y_0;
302 z[jj+1] = y_1;
303 }
304 for(; jj<n; jj++)
305 {
306 y_0 = x[jj];
307 y_0 *= pA[jj+lda*jj];
308 for(ii=jj+1; ii<m; ii++)
309 {
310 y_0 += pA[ii+lda*jj] * x[ii];
311 }
312 z[jj] = y_0;
313 }
314 return;
315 }
316
317
318
319void TRMV_UNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
320 {
321 int ii, jj;
322 REAL
323 y_0, y_1,
324 x_0, x_1;
325 int lda = sA->m;
326 REAL *pA = sA->pA + ai + aj*lda;
327 REAL *x = sx->pa + xi;
328 REAL *z = sz->pa + zi;
329#if 1 // y reg version
330 jj = 0;
331 for(; jj<m-1; jj+=2)
332 {
333 y_0 = x[jj+0];
334 y_1 = x[jj+1];
335 y_0 = pA[jj+0+lda*(jj+0)] * y_0;
336 y_0 += pA[jj+0+lda*(jj+1)] * y_1;
337 y_1 = pA[jj+1+lda*(jj+1)] * y_1;
338 ii = jj+2;
339 for(; ii<m-1; ii+=2)
340 {
341 y_0 += pA[jj+0+lda*(ii+0)] * x[ii+0] + pA[jj+0+lda*(ii+1)] * x[ii+1];
342 y_1 += pA[jj+1+lda*(ii+0)] * x[ii+0] + pA[jj+1+lda*(ii+1)] * x[ii+1];
343 }
344 if(ii<m)
345 {
346 y_0 += pA[jj+0+lda*(ii+0)] * x[ii+0];
347 y_1 += pA[jj+1+lda*(ii+0)] * x[ii+0];
348 }
349 z[jj+0] = y_0;
350 z[jj+1] = y_1;
351 }
352 for(; jj<m; jj++)
353 {
354 y_0 = pA[jj+lda*jj] * x[jj];
355 for(ii=jj+1; ii<m; ii++)
356 {
357 y_0 += pA[jj+lda*ii] * x[ii];
358 }
359 z[jj] = y_0;
360 }
361#else // x reg version
362 if(x != z)
363 {
364 for(ii=0; ii<m; ii++)
365 z[ii] = x[ii];
366 }
367 jj = 0;
368 for(; jj<m-1; jj+=2)
369 {
370 x_0 = z[jj+0];
371 x_1 = z[jj+1];
372 ii = 0;
373 for(; ii<jj-1; ii+=2)
374 {
375 z[ii+0] += pA[ii+0+lda*(jj+0)] * x_0 + pA[ii+0+lda*(jj+1)] * x_1;
376 z[ii+1] += pA[ii+1+lda*(jj+0)] * x_0 + pA[ii+1+lda*(jj+1)] * x_1;
377 }
378// XXX there is no clean-up loop, since jj+=2 !!!!!
379// for(; ii<jj; ii++)
380// {
381// z[ii+0] += pA[ii+0+lda*(jj+0)] * x_0 + pA[ii+0+lda*(jj+1)] * x_1;
382// }
383 x_0 *= pA[jj+0+lda*(jj+0)];
384 x_0 += pA[jj+0+lda*(jj+1)] * x_1;
385 x_1 *= pA[jj+1+lda*(jj+1)];
386 z[jj+0] = x_0;
387 z[jj+1] = x_1;
388 }
389 for(; jj<m; jj++)
390 {
391 x_0 = z[jj];
392 for(ii=0; ii<jj; ii++)
393 {
394 z[ii] += pA[ii+lda*jj] * x_0;
395 }
396 x_0 *= pA[jj+lda*jj];
397 z[jj] = x_0;
398 }
399#endif
400 return;
401 }
402
403
404
405void TRMV_UTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
406 {
407 int ii, jj;
408 REAL
409 y_0, y_1;
410 int lda = sA->m;
411 REAL *pA = sA->pA + ai + aj*lda;
412 REAL *x = sx->pa + xi;
413 REAL *z = sz->pa + zi;
414 if(m%2!=0)
415 {
416 jj = m-1;
417 y_0 = pA[jj+lda*jj] * x[jj];
418 for(ii=0; ii<jj; ii++)
419 {
420 y_0 += pA[ii+lda*jj] * x[ii];
421 }
422 z[jj] = y_0;
423 m -= 1; // XXX
424 }
425 for(jj=m-2; jj>=0; jj-=2)
426 {
427 y_1 = pA[jj+1+lda*(jj+1)] * x[jj+1];
428 y_1 += pA[jj+0+lda*(jj+1)] * x[jj+0];
429 y_0 = pA[jj+0+lda*(jj+0)] * x[jj+0];
430 for(ii=0; ii<jj-1; ii+=2)
431 {
432 y_0 += pA[ii+0+lda*(jj+0)] * x[ii+0] + pA[ii+1+lda*(jj+0)] * x[ii+1];
433 y_1 += pA[ii+0+lda*(jj+1)] * x[ii+0] + pA[ii+1+lda*(jj+1)] * x[ii+1];
434 }
435// XXX there is no clean-up loop !!!!!
436// if(ii<jj)
437// {
438// y_0 += pA[ii+lda*(jj+0)] * x[ii];
439// y_1 += pA[ii+lda*(jj+1)] * x[ii];
440// }
441 z[jj+0] = y_0;
442 z[jj+1] = y_1;
443 }
444 return;
445 }
446
447
448
449void TRSV_LNN_MN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
450 {
451 if(m==0 | n==0)
452 return;
453#if defined(DIM_CHECK)
454 // non-negative size
455 if(m<0) printf("\n****** trsv_lnn_mn_libstr : m<0 : %d<0 *****\n", m);
456 if(n<0) printf("\n****** trsv_lnn_mn_libstr : n<0 : %d<0 *****\n", n);
457 // non-negative offset
458 if(ai<0) printf("\n****** trsv_lnn_mn_libstr : ai<0 : %d<0 *****\n", ai);
459 if(aj<0) printf("\n****** trsv_lnn_mn_libstr : aj<0 : %d<0 *****\n", aj);
460 if(xi<0) printf("\n****** trsv_lnn_mn_libstr : xi<0 : %d<0 *****\n", xi);
461 if(zi<0) printf("\n****** trsv_lnn_mn_libstr : zi<0 : %d<0 *****\n", zi);
462 // inside matrix
463 // A: m x k
464 if(ai+m > sA->m) printf("\n***** trsv_lnn_mn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
465 if(aj+n > sA->n) printf("\n***** trsv_lnn_mn_libstr : aj+n > col(A) : %d+%d > %d *****\n", aj, n, sA->n);
466 // x: m
467 if(xi+m > sx->m) printf("\n***** trsv_lnn_mn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
468 // z: m
469 if(zi+m > sz->m) printf("\n***** trsv_lnn_mn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
470#endif
471 int ii, jj, j1;
472 REAL
473 y_0, y_1,
474 x_0, x_1;
475 int lda = sA->m;
476 REAL *pA = sA->pA + ai + aj*lda;
477 REAL *dA = sA->dA;
478 REAL *x = sx->pa + xi;
479 REAL *z = sz->pa + zi;
480 if(ai==0 & aj==0)
481 {
482 if(sA->use_dA!=1)
483 {
484 for(ii=0; ii<n; ii++)
485 dA[ii] = 1.0 / pA[ii+lda*ii];
486 sA->use_dA = 1;
487 }
488 }
489 else
490 {
491 for(ii=0; ii<n; ii++)
492 dA[ii] = 1.0 / pA[ii+lda*ii];
493 sA->use_dA = 0;
494 }
495#if 1 // y reg version
496 ii = 0;
497 for(; ii<n-1; ii+=2)
498 {
499 y_0 = x[ii+0];
500 y_1 = x[ii+1];
501 jj = 0;
502 for(; jj<ii-1; jj+=2)
503 {
504 y_0 -= pA[ii+0+lda*(jj+0)] * z[jj+0] + pA[ii+0+lda*(jj+1)] * z[jj+1];
505 y_1 -= pA[ii+1+lda*(jj+0)] * z[jj+0] + pA[ii+1+lda*(jj+1)] * z[jj+1];
506 }
507// XXX there is no clean-up loop !!!!!
508// if(jj<ii)
509// {
510// y_0 -= pA[ii+0+lda*(jj+0)] * z[jj+0];
511// y_1 -= pA[ii+1+lda*(jj+0)] * z[jj+0];
512// }
513 y_0 *= dA[ii+0];
514 y_1 -= pA[ii+1+lda*(jj+0)] * y_0;
515 y_1 *= dA[ii+1];
516 z[ii+0] = y_0;
517 z[ii+1] = y_1;
518 }
519 for(; ii<n; ii++)
520 {
521 y_0 = x[ii];
522 for(jj=0; jj<ii; jj++)
523 {
524 y_0 -= pA[ii+lda*jj] * z[jj];
525 }
526 y_0 *= dA[ii];
527 z[ii] = y_0;
528 }
529 for(; ii<m-1; ii+=2)
530 {
531 y_0 = x[ii+0];
532 y_1 = x[ii+1];
533 jj = 0;
534 for(; jj<n-1; jj+=2)
535 {
536 y_0 -= pA[ii+0+lda*(jj+0)] * z[jj+0] + pA[ii+0+lda*(jj+1)] * z[jj+1];
537 y_1 -= pA[ii+1+lda*(jj+0)] * z[jj+0] + pA[ii+1+lda*(jj+1)] * z[jj+1];
538 }
539 if(jj<n)
540 {
541 y_0 -= pA[ii+0+lda*(jj+0)] * z[jj+0];
542 y_1 -= pA[ii+1+lda*(jj+0)] * z[jj+0];
543 }
544 z[ii+0] = y_0;
545 z[ii+1] = y_1;
546 }
547 for(; ii<m; ii++)
548 {
549 y_0 = x[ii];
550 for(jj=0; jj<n; jj++)
551 {
552 y_0 -= pA[ii+lda*jj] * z[jj];
553 }
554 z[ii] = y_0;
555 }
556#else // x reg version
557 if(x != z)
558 {
559 for(ii=0; ii<m; ii++)
560 z[ii] = x[ii];
561 }
562 jj = 0;
563 for(; jj<n-1; jj+=2)
564 {
565 x_0 = dA[jj+0] * z[jj+0];
566 x_1 = z[jj+1] - pA[jj+1+lda*(jj+0)] * x_0;
567 x_1 = dA[jj+1] * x_1;
568 z[jj+0] = x_0;
569 z[jj+1] = x_1;
570 ii = jj+2;
571 for(; ii<m-1; ii+=2)
572 {
573 z[ii+0] -= pA[ii+0+lda*(jj+0)] * x_0 + pA[ii+0+lda*(jj+1)] * x_1;
574 z[ii+1] -= pA[ii+1+lda*(jj+0)] * x_0 + pA[ii+1+lda*(jj+1)] * x_1;
575 }
576 for(; ii<m; ii++)
577 {
578 z[ii] -= pA[ii+lda*(jj+0)] * x_0 + pA[ii+lda*(jj+1)] * x_1;
579 }
580 }
581 for(; jj<n; jj++)
582 {
583 x_0 = dA[jj] * z[jj];
584 z[jj] = x_0;
585 for(ii=jj+1; ii<m; ii++)
586 {
587 z[ii] -= pA[ii+lda*jj] * x_0;
588 }
589 }
590#endif
591 return;
592 }
593
594
595
596void TRSV_LTN_MN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
597 {
598 if(m==0)
599 return;
600#if defined(DIM_CHECK)
601 // non-negative size
602 if(m<0) printf("\n****** trsv_ltn_mn_libstr : m<0 : %d<0 *****\n", m);
603 if(n<0) printf("\n****** trsv_ltn_mn_libstr : n<0 : %d<0 *****\n", n);
604 // non-negative offset
605 if(ai<0) printf("\n****** trsv_ltn_mn_libstr : ai<0 : %d<0 *****\n", ai);
606 if(aj<0) printf("\n****** trsv_ltn_mn_libstr : aj<0 : %d<0 *****\n", aj);
607 if(xi<0) printf("\n****** trsv_ltn_mn_libstr : xi<0 : %d<0 *****\n", xi);
608 if(zi<0) printf("\n****** trsv_ltn_mn_libstr : zi<0 : %d<0 *****\n", zi);
609 // inside matrix
610 // A: m x k
611 if(ai+m > sA->m) printf("\n***** trsv_ltn_mn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
612 if(aj+n > sA->n) printf("\n***** trsv_ltn_mn_libstr : aj+n > col(A) : %d+%d > %d *****\n", aj, n, sA->n);
613 // x: m
614 if(xi+m > sx->m) printf("\n***** trsv_ltn_mn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
615 // z: m
616 if(zi+m > sz->m) printf("\n***** trsv_ltn_mn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
617#endif
618 int ii, jj;
619 REAL
620 y_0, y_1;
621 int lda = sA->m;
622 REAL *pA = sA->pA + ai + aj*lda;
623 REAL *dA = sA->dA;
624 REAL *x = sx->pa + xi;
625 REAL *z = sz->pa + zi;
626 if(ai==0 & aj==0)
627 {
628 if(sA->use_dA!=1)
629 {
630 for(ii=0; ii<n; ii++)
631 dA[ii] = 1.0 / pA[ii+lda*ii];
632 sA->use_dA = 1;
633 }
634 }
635 else
636 {
637 for(ii=0; ii<n; ii++)
638 dA[ii] = 1.0 / pA[ii+lda*ii];
639 sA->use_dA = 0;
640 }
641 if(n%2!=0)
642 {
643 jj = n-1;
644 y_0 = x[jj];
645 for(ii=jj+1; ii<m; ii++)
646 {
647 y_0 -= pA[ii+lda*jj] * z[ii];
648 }
649 y_0 *= dA[jj];
650 z[jj] = y_0;
651 jj -= 2;
652 }
653 else
654 {
655 jj = n-2;
656 }
657 for(; jj>=0; jj-=2)
658 {
659 y_0 = x[jj+0];
660 y_1 = x[jj+1];
661 ii = jj+2;
662 for(; ii<m-1; ii+=2)
663 {
664 y_0 -= pA[ii+0+lda*(jj+0)] * z[ii+0] + pA[ii+1+lda*(jj+0)] * z[ii+1];
665 y_1 -= pA[ii+0+lda*(jj+1)] * z[ii+0] + pA[ii+1+lda*(jj+1)] * z[ii+1];
666 }
667 if(ii<m)
668 {
669 y_0 -= pA[ii+lda*(jj+0)] * z[ii];
670 y_1 -= pA[ii+lda*(jj+1)] * z[ii];
671 }
672 y_1 *= dA[jj+1];
673 y_0 -= pA[jj+1+lda*(jj+0)] * y_1;
674 y_0 *= dA[jj+0];
675 z[jj+0] = y_0;
676 z[jj+1] = y_1;
677 }
678 return;
679 }
680
681
682
683void TRSV_LNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
684 {
685 if(m==0)
686 return;
687#if defined(DIM_CHECK)
688 // non-negative size
689 if(m<0) printf("\n****** trsv_lnn_libstr : m<0 : %d<0 *****\n", m);
690 // non-negative offset
691 if(ai<0) printf("\n****** trsv_lnn_libstr : ai<0 : %d<0 *****\n", ai);
692 if(aj<0) printf("\n****** trsv_lnn_libstr : aj<0 : %d<0 *****\n", aj);
693 if(xi<0) printf("\n****** trsv_lnn_libstr : xi<0 : %d<0 *****\n", xi);
694 if(zi<0) printf("\n****** trsv_lnn_libstr : zi<0 : %d<0 *****\n", zi);
695 // inside matrix
696 // A: m x k
697 if(ai+m > sA->m) printf("\n***** trsv_lnn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
698 if(aj+m > sA->n) printf("\n***** trsv_lnn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
699 // x: m
700 if(xi+m > sx->m) printf("\n***** trsv_lnn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
701 // z: m
702 if(zi+m > sz->m) printf("\n***** trsv_lnn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
703#endif
704 int ii, jj, j1;
705 REAL
706 y_0, y_1,
707 x_0, x_1;
708 int lda = sA->m;
709 REAL *pA = sA->pA + ai + aj*lda;
710 REAL *dA = sA->dA;
711 REAL *x = sx->pa + xi;
712 REAL *z = sz->pa + zi;
713 if(ai==0 & aj==0)
714 {
715 if(sA->use_dA!=1)
716 {
717 for(ii=0; ii<m; ii++)
718 dA[ii] = 1.0 / pA[ii+lda*ii];
719 sA->use_dA = 1;
720 }
721 }
722 else
723 {
724 for(ii=0; ii<m; ii++)
725 dA[ii] = 1.0 / pA[ii+lda*ii];
726 sA->use_dA = 0;
727 }
728 ii = 0;
729 for(; ii<m-1; ii+=2)
730 {
731 y_0 = x[ii+0];
732 y_1 = x[ii+1];
733 jj = 0;
734 for(; jj<ii-1; jj+=2)
735 {
736 y_0 -= pA[ii+0+lda*(jj+0)] * z[jj+0] + pA[ii+0+lda*(jj+1)] * z[jj+1];
737 y_1 -= pA[ii+1+lda*(jj+0)] * z[jj+0] + pA[ii+1+lda*(jj+1)] * z[jj+1];
738 }
739 y_0 *= dA[ii+0];
740 y_1 -= pA[ii+1+lda*(jj+0)] * y_0;
741 y_1 *= dA[ii+1];
742 z[ii+0] = y_0;
743 z[ii+1] = y_1;
744 }
745 for(; ii<m; ii++)
746 {
747 y_0 = x[ii];
748 for(jj=0; jj<ii; jj++)
749 {
750 y_0 -= pA[ii+lda*jj] * z[jj];
751 }
752 y_0 *= dA[ii];
753 z[ii] = y_0;
754 }
755 return;
756 }
757
758
759
760void TRSV_LNU_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
761 {
762 if(m==0)
763 return;
764#if defined(DIM_CHECK)
765 // non-negative size
766 if(m<0) printf("\n****** trsv_lnu_libstr : m<0 : %d<0 *****\n", m);
767 // non-negative offset
768 if(ai<0) printf("\n****** trsv_lnu_libstr : ai<0 : %d<0 *****\n", ai);
769 if(aj<0) printf("\n****** trsv_lnu_libstr : aj<0 : %d<0 *****\n", aj);
770 if(xi<0) printf("\n****** trsv_lnu_libstr : xi<0 : %d<0 *****\n", xi);
771 if(zi<0) printf("\n****** trsv_lnu_libstr : zi<0 : %d<0 *****\n", zi);
772 // inside matrix
773 // A: m x k
774 if(ai+m > sA->m) printf("\n***** trsv_lnu_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
775 if(aj+m > sA->n) printf("\n***** trsv_lnu_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
776 // x: m
777 if(xi+m > sx->m) printf("\n***** trsv_lnu_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
778 // z: m
779 if(zi+m > sz->m) printf("\n***** trsv_lnu_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
780#endif
781 printf("\n***** trsv_lnu_libstr : feature not implemented yet *****\n");
782 exit(1);
783 }
784
785
786
787void TRSV_LTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
788 {
789 if(m==0)
790 return;
791#if defined(DIM_CHECK)
792 // non-negative size
793 if(m<0) printf("\n****** trsv_ltn_libstr : m<0 : %d<0 *****\n", m);
794 // non-negative offset
795 if(ai<0) printf("\n****** trsv_ltn_libstr : ai<0 : %d<0 *****\n", ai);
796 if(aj<0) printf("\n****** trsv_ltn_libstr : aj<0 : %d<0 *****\n", aj);
797 if(xi<0) printf("\n****** trsv_ltn_libstr : xi<0 : %d<0 *****\n", xi);
798 if(zi<0) printf("\n****** trsv_ltn_libstr : zi<0 : %d<0 *****\n", zi);
799 // inside matrix
800 // A: m x k
801 if(ai+m > sA->m) printf("\n***** trsv_ltn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
802 if(aj+m > sA->n) printf("\n***** trsv_ltn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
803 // x: m
804 if(xi+m > sx->m) printf("\n***** trsv_ltn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
805 // z: m
806 if(zi+m > sz->m) printf("\n***** trsv_ltn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
807#endif
808 int ii, jj;
809 REAL
810 y_0, y_1;
811 int lda = sA->m;
812 REAL *pA = sA->pA + ai + aj*lda;
813 REAL *dA = sA->dA;
814 REAL *x = sx->pa + xi;
815 REAL *z = sz->pa + zi;
816 if(ai==0 & aj==0)
817 {
818 if(sA->use_dA!=1)
819 {
820 for(ii=0; ii<m; ii++)
821 dA[ii] = 1.0 / pA[ii+lda*ii];
822 sA->use_dA = 1;
823 }
824 }
825 else
826 {
827 for(ii=0; ii<m; ii++)
828 dA[ii] = 1.0 / pA[ii+lda*ii];
829 sA->use_dA = 0;
830 }
831 if(m%2!=0)
832 {
833 jj = m-1;
834 y_0 = x[jj];
835 y_0 *= dA[jj];
836 z[jj] = y_0;
837 jj -= 2;
838 }
839 else
840 {
841 jj = m-2;
842 }
843 for(; jj>=0; jj-=2)
844 {
845 y_0 = x[jj+0];
846 y_1 = x[jj+1];
847 ii = jj+2;
848 for(; ii<m-1; ii+=2)
849 {
850 y_0 -= pA[ii+0+lda*(jj+0)] * z[ii+0] + pA[ii+1+lda*(jj+0)] * z[ii+1];
851 y_1 -= pA[ii+0+lda*(jj+1)] * z[ii+0] + pA[ii+1+lda*(jj+1)] * z[ii+1];
852 }
853 if(ii<m)
854 {
855 y_0 -= pA[ii+lda*(jj+0)] * z[ii];
856 y_1 -= pA[ii+lda*(jj+1)] * z[ii];
857 }
858 y_1 *= dA[jj+1];
859 y_0 -= pA[jj+1+lda*(jj+0)] * y_1;
860 y_0 *= dA[jj+0];
861 z[jj+0] = y_0;
862 z[jj+1] = y_1;
863 }
864 return;
865 }
866
867
868
869void TRSV_LTU_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
870 {
871 if(m==0)
872 return;
873#if defined(DIM_CHECK)
874 // non-negative size
875 if(m<0) printf("\n****** trsv_ltu_libstr : m<0 : %d<0 *****\n", m);
876 // non-negative offset
877 if(ai<0) printf("\n****** trsv_ltu_libstr : ai<0 : %d<0 *****\n", ai);
878 if(aj<0) printf("\n****** trsv_ltu_libstr : aj<0 : %d<0 *****\n", aj);
879 if(xi<0) printf("\n****** trsv_ltu_libstr : xi<0 : %d<0 *****\n", xi);
880 if(zi<0) printf("\n****** trsv_ltu_libstr : zi<0 : %d<0 *****\n", zi);
881 // inside matrix
882 // A: m x k
883 if(ai+m > sA->m) printf("\n***** trsv_ltu_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
884 if(aj+m > sA->n) printf("\n***** trsv_ltu_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
885 // x: m
886 if(xi+m > sx->m) printf("\n***** trsv_ltu_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
887 // z: m
888 if(zi+m > sz->m) printf("\n***** trsv_ltu_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
889#endif
890 printf("\n***** trsv_ltu_libstr : feature not implemented yet *****\n");
891 exit(1);
892 }
893
894
895
896void TRSV_UNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
897 {
898 if(m==0)
899 return;
900#if defined(DIM_CHECK)
901 // non-negative size
902 if(m<0) printf("\n****** trsv_unn_libstr : m<0 : %d<0 *****\n", m);
903 // non-negative offset
904 if(ai<0) printf("\n****** trsv_unn_libstr : ai<0 : %d<0 *****\n", ai);
905 if(aj<0) printf("\n****** trsv_unn_libstr : aj<0 : %d<0 *****\n", aj);
906 if(xi<0) printf("\n****** trsv_unn_libstr : xi<0 : %d<0 *****\n", xi);
907 if(zi<0) printf("\n****** trsv_unn_libstr : zi<0 : %d<0 *****\n", zi);
908 // inside matrix
909 // A: m x k
910 if(ai+m > sA->m) printf("\n***** trsv_unn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
911 if(aj+m > sA->n) printf("\n***** trsv_unn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
912 // x: m
913 if(xi+m > sx->m) printf("\n***** trsv_unn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
914 // z: m
915 if(zi+m > sz->m) printf("\n***** trsv_unn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
916#endif
917 printf("\n***** trsv_unn_libstr : feature not implemented yet *****\n");
918 exit(1);
919 }
920
921
922
923void TRSV_UTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
924 {
925 if(m==0)
926 return;
927#if defined(DIM_CHECK)
928 // non-negative size
929 if(m<0) printf("\n****** trsv_utn_libstr : m<0 : %d<0 *****\n", m);
930 // non-negative offset
931 if(ai<0) printf("\n****** trsv_utn_libstr : ai<0 : %d<0 *****\n", ai);
932 if(aj<0) printf("\n****** trsv_utn_libstr : aj<0 : %d<0 *****\n", aj);
933 if(xi<0) printf("\n****** trsv_utn_libstr : xi<0 : %d<0 *****\n", xi);
934 if(zi<0) printf("\n****** trsv_utn_libstr : zi<0 : %d<0 *****\n", zi);
935 // inside matrix
936 // A: m x k
937 if(ai+m > sA->m) printf("\n***** trsv_utn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
938 if(aj+m > sA->n) printf("\n***** trsv_utn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
939 // x: m
940 if(xi+m > sx->m) printf("\n***** trsv_utn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
941 // z: m
942 if(zi+m > sz->m) printf("\n***** trsv_utn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
943#endif
944 printf("\n***** trsv_utn_libstr : feature not implemented yet *****\n");
945 exit(1);
946 }
947
948
949
950#elif defined(LA_BLAS)
951
952
953
954void GEMV_N_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
955 {
956 char cl = 'l';
957 char cn = 'n';
958 char cr = 'r';
959 char ct = 't';
960 char cu = 'u';
961 int i1 = 1;
962 int lda = sA->m;
963 REAL *pA = sA->pA + ai + aj*lda;
964 REAL *x = sx->pa + xi;
965 REAL *y = sy->pa + yi;
966 REAL *z = sz->pa + zi;
967 COPY(&m, y, &i1, z, &i1);
968 GEMV(&cn, &m, &n, &alpha, pA, &lda, x, &i1, &beta, z, &i1);
969 return;
970 }
971
972
973
974void GEMV_T_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
975 {
976 char cl = 'l';
977 char cn = 'n';
978 char cr = 'r';
979 char ct = 't';
980 char cu = 'u';
981 int i1 = 1;
982 int lda = sA->m;
983 REAL *pA = sA->pA + ai + aj*lda;
984 REAL *x = sx->pa + xi;
985 REAL *y = sy->pa + yi;
986 REAL *z = sz->pa + zi;
987 COPY(&n, y, &i1, z, &i1);
988 GEMV(&ct, &m, &n, &alpha, pA, &lda, x, &i1, &beta, z, &i1);
989 return;
990 }
991
992
993
994void GEMV_NT_LIBSTR(int m, int n, REAL alpha_n, REAL alpha_t, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx_n, int xi_n, struct STRVEC *sx_t, int xi_t, REAL beta_n, REAL beta_t, struct STRVEC *sy_n, int yi_n, struct STRVEC *sy_t, int yi_t, struct STRVEC *sz_n, int zi_n, struct STRVEC *sz_t, int zi_t)
995 {
996 char cl = 'l';
997 char cn = 'n';
998 char cr = 'r';
999 char ct = 't';
1000 char cu = 'u';
1001 int i1 = 1;
1002 int lda = sA->m;
1003 REAL *pA = sA->pA + ai + aj*lda;
1004 REAL *x_n = sx_n->pa + xi_n;
1005 REAL *x_t = sx_t->pa + xi_t;
1006 REAL *y_n = sy_n->pa + yi_n;
1007 REAL *y_t = sy_t->pa + yi_t;
1008 REAL *z_n = sz_n->pa + zi_n;
1009 REAL *z_t = sz_t->pa + zi_t;
1010 COPY(&m, y_n, &i1, z_n, &i1);
1011 GEMV(&cn, &m, &n, &alpha_n, pA, &lda, x_n, &i1, &beta_n, z_n, &i1);
1012 COPY(&n, y_t, &i1, z_t, &i1);
1013 GEMV(&ct, &m, &n, &alpha_t, pA, &lda, x_t, &i1, &beta_t, z_t, &i1);
1014 return;
1015 }
1016
1017
1018
1019void SYMV_L_LIBSTR(int m, int n, REAL alpha, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, REAL beta, struct STRVEC *sy, int yi, struct STRVEC *sz, int zi)
1020 {
1021 char cl = 'l';
1022 char cn = 'n';
1023 char cr = 'r';
1024 char ct = 't';
1025 char cu = 'u';
1026 int i1 = 1;
1027 REAL d1 = 1.0;
1028 int lda = sA->m;
1029 REAL *pA = sA->pA + ai + aj*lda;
1030 REAL *x = sx->pa + xi;
1031 REAL *y = sy->pa + yi;
1032 REAL *z = sz->pa + zi;
1033 int tmp = m-n;
1034 COPY(&m, y, &i1, z, &i1);
1035 SYMV(&cl, &n, &alpha, pA, &lda, x, &i1, &beta, z, &i1);
1036 GEMV(&cn, &tmp, &n, &alpha, pA+n, &lda, x, &i1, &beta, z+n, &i1);
1037 GEMV(&ct, &tmp, &n, &alpha, pA+n, &lda, x+n, &i1, &d1, z, &i1);
1038 return;
1039 }
1040
1041
1042
1043void TRMV_LNN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1044 {
1045 char cl = 'l';
1046 char cn = 'n';
1047 char cr = 'r';
1048 char ct = 't';
1049 char cu = 'u';
1050 int i1 = 1;
1051 REAL d1 = 1.0;
1052 REAL d0 = 0.0;
1053 REAL dm1 = -1.0;
1054 int lda = sA->m;
1055 REAL *pA = sA->pA + ai + aj*lda;
1056 REAL *x = sx->pa + xi;
1057 REAL *z = sz->pa + zi;
1058 int tmp = m-n;
1059 if(x!=z)
1060 COPY(&n, x, &i1, z, &i1);
1061 GEMV(&cn, &tmp, &n, &d1, pA+n, &lda, x, &i1, &d0, z+n, &i1);
1062 TRMV(&cl, &cn, &cn, &n, pA, &lda, z, &i1);
1063 return;
1064 }
1065
1066
1067
1068void TRMV_LTN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1069 {
1070 char cl = 'l';
1071 char cn = 'n';
1072 char cr = 'r';
1073 char ct = 't';
1074 char cu = 'u';
1075 int i1 = 1;
1076 REAL d1 = 1.0;
1077 REAL dm1 = -1.0;
1078 int lda = sA->m;
1079 REAL *pA = sA->pA + ai + aj*lda;
1080 REAL *x = sx->pa + xi;
1081 REAL *z = sz->pa + zi;
1082 int tmp = m-n;
1083 if(x!=z)
1084 COPY(&n, x, &i1, z, &i1);
1085 TRMV(&cl, &ct, &cn, &n, pA, &lda, z, &i1);
1086 GEMV(&ct, &tmp, &n, &d1, pA+n, &lda, x+n, &i1, &d1, z, &i1);
1087 return;
1088 }
1089
1090
1091
1092void TRMV_UNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1093 {
1094 char cl = 'l';
1095 char cn = 'n';
1096 char cr = 'r';
1097 char ct = 't';
1098 char cu = 'u';
1099 int i1 = 1;
1100 REAL d1 = 1.0;
1101 REAL dm1 = -1.0;
1102 int lda = sA->m;
1103 REAL *pA = sA->pA + ai + aj*lda;
1104 REAL *x = sx->pa + xi;
1105 REAL *z = sz->pa + zi;
1106 COPY(&m, x, &i1, z, &i1);
1107 TRMV(&cu, &cn, &cn, &m, pA, &lda, z, &i1);
1108 return;
1109 }
1110
1111
1112
1113void TRMV_UTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1114 {
1115 char cl = 'l';
1116 char cn = 'n';
1117 char cr = 'r';
1118 char ct = 't';
1119 char cu = 'u';
1120 int i1 = 1;
1121 REAL d1 = 1.0;
1122 REAL dm1 = -1.0;
1123 int lda = sA->m;
1124 REAL *pA = sA->pA + ai + aj*lda;
1125 REAL *x = sx->pa + xi;
1126 REAL *z = sz->pa + zi;
1127 COPY(&m, x, &i1, z, &i1);
1128 TRMV(&cu, &ct, &cn, &m, pA, &lda, z, &i1);
1129 return;
1130 }
1131
1132
1133
1134void TRSV_LNN_MN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1135 {
1136 if(m==0 | n==0)
1137 return;
1138#if defined(DIM_CHECK)
1139 // non-negative size
1140 if(m<0) printf("\n****** trsv_lnn_mn_libstr : m<0 : %d<0 *****\n", m);
1141 if(n<0) printf("\n****** trsv_lnn_mn_libstr : n<0 : %d<0 *****\n", n);
1142 // non-negative offset
1143 if(ai<0) printf("\n****** trsv_lnn_mn_libstr : ai<0 : %d<0 *****\n", ai);
1144 if(aj<0) printf("\n****** trsv_lnn_mn_libstr : aj<0 : %d<0 *****\n", aj);
1145 if(xi<0) printf("\n****** trsv_lnn_mn_libstr : xi<0 : %d<0 *****\n", xi);
1146 if(zi<0) printf("\n****** trsv_lnn_mn_libstr : zi<0 : %d<0 *****\n", zi);
1147 // inside matrix
1148 // A: m x k
1149 if(ai+m > sA->m) printf("\n***** trsv_lnn_mn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1150 if(aj+n > sA->n) printf("\n***** trsv_lnn_mn_libstr : aj+n > col(A) : %d+%d > %d *****\n", aj, n, sA->n);
1151 // x: m
1152 if(xi+m > sx->m) printf("\n***** trsv_lnn_mn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1153 // z: m
1154 if(zi+m > sz->m) printf("\n***** trsv_lnn_mn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1155#endif
1156 char cl = 'l';
1157 char cn = 'n';
1158 char cr = 'r';
1159 char ct = 't';
1160 char cu = 'u';
1161 int i1 = 1;
1162 REAL d1 = 1.0;
1163 REAL dm1 = -1.0;
1164 int mmn = m-n;
1165 int lda = sA->m;
1166 REAL *pA = sA->pA + ai + aj*lda;
1167 REAL *x = sx->pa + xi;
1168 REAL *z = sz->pa + zi;
1169 COPY(&m, x, &i1, z, &i1);
1170 TRSV(&cl, &cn, &cn, &n, pA, &lda, z, &i1);
1171 GEMV(&cn, &mmn, &n, &dm1, pA+n, &lda, z, &i1, &d1, z+n, &i1);
1172 return;
1173 }
1174
1175
1176
1177void TRSV_LTN_MN_LIBSTR(int m, int n, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1178 {
1179 if(m==0)
1180 return;
1181#if defined(DIM_CHECK)
1182 // non-negative size
1183 if(m<0) printf("\n****** trsv_ltn_mn_libstr : m<0 : %d<0 *****\n", m);
1184 if(n<0) printf("\n****** trsv_ltn_mn_libstr : n<0 : %d<0 *****\n", n);
1185 // non-negative offset
1186 if(ai<0) printf("\n****** trsv_ltn_mn_libstr : ai<0 : %d<0 *****\n", ai);
1187 if(aj<0) printf("\n****** trsv_ltn_mn_libstr : aj<0 : %d<0 *****\n", aj);
1188 if(xi<0) printf("\n****** trsv_ltn_mn_libstr : xi<0 : %d<0 *****\n", xi);
1189 if(zi<0) printf("\n****** trsv_ltn_mn_libstr : zi<0 : %d<0 *****\n", zi);
1190 // inside matrix
1191 // A: m x k
1192 if(ai+m > sA->m) printf("\n***** trsv_ltn_mn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1193 if(aj+n > sA->n) printf("\n***** trsv_ltn_mn_libstr : aj+n > col(A) : %d+%d > %d *****\n", aj, n, sA->n);
1194 // x: m
1195 if(xi+m > sx->m) printf("\n***** trsv_ltn_mn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1196 // z: m
1197 if(zi+m > sz->m) printf("\n***** trsv_ltn_mn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1198#endif
1199 char cl = 'l';
1200 char cn = 'n';
1201 char cr = 'r';
1202 char ct = 't';
1203 char cu = 'u';
1204 int i1 = 1;
1205 REAL d1 = 1.0;
1206 REAL dm1 = -1.0;
1207 int mmn = m-n;
1208 int lda = sA->m;
1209 REAL *pA = sA->pA + ai + aj*lda;
1210 REAL *x = sx->pa + xi;
1211 REAL *z = sz->pa + zi;
1212 COPY(&m, x, &i1, z, &i1);
1213 GEMV(&ct, &mmn, &n, &dm1, pA+n, &lda, z+n, &i1, &d1, z, &i1);
1214 TRSV(&cl, &ct, &cn, &n, pA, &lda, z, &i1);
1215 return;
1216 }
1217
1218
1219
1220void TRSV_LNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1221 {
1222 if(m==0)
1223 return;
1224#if defined(DIM_CHECK)
1225 // non-negative size
1226 if(m<0) printf("\n****** trsv_lnn_libstr : m<0 : %d<0 *****\n", m);
1227 // non-negative offset
1228 if(ai<0) printf("\n****** trsv_lnn_libstr : ai<0 : %d<0 *****\n", ai);
1229 if(aj<0) printf("\n****** trsv_lnn_libstr : aj<0 : %d<0 *****\n", aj);
1230 if(xi<0) printf("\n****** trsv_lnn_libstr : xi<0 : %d<0 *****\n", xi);
1231 if(zi<0) printf("\n****** trsv_lnn_libstr : zi<0 : %d<0 *****\n", zi);
1232 // inside matrix
1233 // A: m x k
1234 if(ai+m > sA->m) printf("\n***** trsv_lnn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1235 if(aj+m > sA->n) printf("\n***** trsv_lnn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1236 // x: m
1237 if(xi+m > sx->m) printf("\n***** trsv_lnn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1238 // z: m
1239 if(zi+m > sz->m) printf("\n***** trsv_lnn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1240#endif
1241 char cl = 'l';
1242 char cn = 'n';
1243 char cr = 'r';
1244 char ct = 't';
1245 char cu = 'u';
1246 int i1 = 1;
1247 REAL d1 = 1.0;
1248 REAL dm1 = -1.0;
1249 int lda = sA->m;
1250 REAL *pA = sA->pA + ai + aj*lda;
1251 REAL *x = sx->pa + xi;
1252 REAL *z = sz->pa + zi;
1253 COPY(&m, x, &i1, z, &i1);
1254 TRSV(&cl, &cn, &cn, &m, pA, &lda, z, &i1);
1255 return;
1256 }
1257
1258
1259
1260void TRSV_LNU_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1261 {
1262 if(m==0)
1263 return;
1264#if defined(DIM_CHECK)
1265 // non-negative size
1266 if(m<0) printf("\n****** trsv_lnu_libstr : m<0 : %d<0 *****\n", m);
1267 // non-negative offset
1268 if(ai<0) printf("\n****** trsv_lnu_libstr : ai<0 : %d<0 *****\n", ai);
1269 if(aj<0) printf("\n****** trsv_lnu_libstr : aj<0 : %d<0 *****\n", aj);
1270 if(xi<0) printf("\n****** trsv_lnu_libstr : xi<0 : %d<0 *****\n", xi);
1271 if(zi<0) printf("\n****** trsv_lnu_libstr : zi<0 : %d<0 *****\n", zi);
1272 // inside matrix
1273 // A: m x k
1274 if(ai+m > sA->m) printf("\n***** trsv_lnu_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1275 if(aj+m > sA->n) printf("\n***** trsv_lnu_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1276 // x: m
1277 if(xi+m > sx->m) printf("\n***** trsv_lnu_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1278 // z: m
1279 if(zi+m > sz->m) printf("\n***** trsv_lnu_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1280#endif
1281 char cl = 'l';
1282 char cn = 'n';
1283 char cr = 'r';
1284 char ct = 't';
1285 char cu = 'u';
1286 int i1 = 1;
1287 REAL d1 = 1.0;
1288 REAL dm1 = -1.0;
1289 int lda = sA->m;
1290 REAL *pA = sA->pA + ai + aj*lda;
1291 REAL *x = sx->pa + xi;
1292 REAL *z = sz->pa + zi;
1293 COPY(&m, x, &i1, z, &i1);
1294 TRSV(&cl, &cn, &cu, &m, pA, &lda, z, &i1);
1295 return;
1296 }
1297
1298
1299
1300void TRSV_LTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1301 {
1302 if(m==0)
1303 return;
1304#if defined(DIM_CHECK)
1305 // non-negative size
1306 if(m<0) printf("\n****** trsv_ltn_libstr : m<0 : %d<0 *****\n", m);
1307 // non-negative offset
1308 if(ai<0) printf("\n****** trsv_ltn_libstr : ai<0 : %d<0 *****\n", ai);
1309 if(aj<0) printf("\n****** trsv_ltn_libstr : aj<0 : %d<0 *****\n", aj);
1310 if(xi<0) printf("\n****** trsv_ltn_libstr : xi<0 : %d<0 *****\n", xi);
1311 if(zi<0) printf("\n****** trsv_ltn_libstr : zi<0 : %d<0 *****\n", zi);
1312 // inside matrix
1313 // A: m x k
1314 if(ai+m > sA->m) printf("\n***** trsv_ltn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1315 if(aj+m > sA->n) printf("\n***** trsv_ltn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1316 // x: m
1317 if(xi+m > sx->m) printf("\n***** trsv_ltn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1318 // z: m
1319 if(zi+m > sz->m) printf("\n***** trsv_ltn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1320#endif
1321 char cl = 'l';
1322 char cn = 'n';
1323 char cr = 'r';
1324 char ct = 't';
1325 char cu = 'u';
1326 int i1 = 1;
1327 REAL d1 = 1.0;
1328 REAL dm1 = -1.0;
1329 int lda = sA->m;
1330 REAL *pA = sA->pA + ai + aj*lda;
1331 REAL *x = sx->pa + xi;
1332 REAL *z = sz->pa + zi;
1333 COPY(&m, x, &i1, z, &i1);
1334 TRSV(&cl, &ct, &cn, &m, pA, &lda, z, &i1);
1335 return;
1336 }
1337
1338
1339
1340void TRSV_LTU_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1341 {
1342 if(m==0)
1343 return;
1344#if defined(DIM_CHECK)
1345 // non-negative size
1346 if(m<0) printf("\n****** trsv_ltu_libstr : m<0 : %d<0 *****\n", m);
1347 // non-negative offset
1348 if(ai<0) printf("\n****** trsv_ltu_libstr : ai<0 : %d<0 *****\n", ai);
1349 if(aj<0) printf("\n****** trsv_ltu_libstr : aj<0 : %d<0 *****\n", aj);
1350 if(xi<0) printf("\n****** trsv_ltu_libstr : xi<0 : %d<0 *****\n", xi);
1351 if(zi<0) printf("\n****** trsv_ltu_libstr : zi<0 : %d<0 *****\n", zi);
1352 // inside matrix
1353 // A: m x k
1354 if(ai+m > sA->m) printf("\n***** trsv_ltu_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1355 if(aj+m > sA->n) printf("\n***** trsv_ltu_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1356 // x: m
1357 if(xi+m > sx->m) printf("\n***** trsv_ltu_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1358 // z: m
1359 if(zi+m > sz->m) printf("\n***** trsv_ltu_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1360#endif
1361 char cl = 'l';
1362 char cn = 'n';
1363 char cr = 'r';
1364 char ct = 't';
1365 char cu = 'u';
1366 int i1 = 1;
1367 REAL d1 = 1.0;
1368 REAL dm1 = -1.0;
1369 int lda = sA->m;
1370 REAL *pA = sA->pA + ai + aj*lda;
1371 REAL *x = sx->pa + xi;
1372 REAL *z = sz->pa + zi;
1373 COPY(&m, x, &i1, z, &i1);
1374 TRSV(&cl, &ct, &cu, &m, pA, &lda, z, &i1);
1375 return;
1376 }
1377
1378
1379
1380void TRSV_UNN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1381 {
1382 if(m==0)
1383 return;
1384#if defined(DIM_CHECK)
1385 // non-negative size
1386 if(m<0) printf("\n****** trsv_unn_libstr : m<0 : %d<0 *****\n", m);
1387 // non-negative offset
1388 if(ai<0) printf("\n****** trsv_unn_libstr : ai<0 : %d<0 *****\n", ai);
1389 if(aj<0) printf("\n****** trsv_unn_libstr : aj<0 : %d<0 *****\n", aj);
1390 if(xi<0) printf("\n****** trsv_unn_libstr : xi<0 : %d<0 *****\n", xi);
1391 if(zi<0) printf("\n****** trsv_unn_libstr : zi<0 : %d<0 *****\n", zi);
1392 // inside matrix
1393 // A: m x k
1394 if(ai+m > sA->m) printf("\n***** trsv_unn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1395 if(aj+m > sA->n) printf("\n***** trsv_unn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1396 // x: m
1397 if(xi+m > sx->m) printf("\n***** trsv_unn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1398 // z: m
1399 if(zi+m > sz->m) printf("\n***** trsv_unn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1400#endif
1401 char cl = 'l';
1402 char cn = 'n';
1403 char cr = 'r';
1404 char ct = 't';
1405 char cu = 'u';
1406 int i1 = 1;
1407 REAL d1 = 1.0;
1408 REAL dm1 = -1.0;
1409 int lda = sA->m;
1410 REAL *pA = sA->pA + ai + aj*lda;
1411 REAL *x = sx->pa + xi;
1412 REAL *z = sz->pa + zi;
1413 COPY(&m, x, &i1, z, &i1);
1414 TRSV(&cu, &cn, &cn, &m, pA, &lda, z, &i1);
1415 return;
1416 }
1417
1418
1419
1420void TRSV_UTN_LIBSTR(int m, struct STRMAT *sA, int ai, int aj, struct STRVEC *sx, int xi, struct STRVEC *sz, int zi)
1421 {
1422 if(m==0)
1423 return;
1424#if defined(DIM_CHECK)
1425 // non-negative size
1426 if(m<0) printf("\n****** trsv_utn_libstr : m<0 : %d<0 *****\n", m);
1427 // non-negative offset
1428 if(ai<0) printf("\n****** trsv_utn_libstr : ai<0 : %d<0 *****\n", ai);
1429 if(aj<0) printf("\n****** trsv_utn_libstr : aj<0 : %d<0 *****\n", aj);
1430 if(xi<0) printf("\n****** trsv_utn_libstr : xi<0 : %d<0 *****\n", xi);
1431 if(zi<0) printf("\n****** trsv_utn_libstr : zi<0 : %d<0 *****\n", zi);
1432 // inside matrix
1433 // A: m x k
1434 if(ai+m > sA->m) printf("\n***** trsv_utn_libstr : ai+m > row(A) : %d+%d > %d *****\n", ai, m, sA->m);
1435 if(aj+m > sA->n) printf("\n***** trsv_utn_libstr : aj+m > col(A) : %d+%d > %d *****\n", aj, m, sA->n);
1436 // x: m
1437 if(xi+m > sx->m) printf("\n***** trsv_utn_libstr : xi+m > size(x) : %d+%d > %d *****\n", xi, m, sx->m);
1438 // z: m
1439 if(zi+m > sz->m) printf("\n***** trsv_utn_libstr : zi+m > size(z) : %d+%d > %d *****\n", zi, m, sz->m);
1440#endif
1441 char cl = 'l';
1442 char cn = 'n';
1443 char cr = 'r';
1444 char ct = 't';
1445 char cu = 'u';
1446 int i1 = 1;
1447 REAL d1 = 1.0;
1448 REAL dm1 = -1.0;
1449 int lda = sA->m;
1450 REAL *pA = sA->pA + ai + aj*lda;
1451 REAL *x = sx->pa + xi;
1452 REAL *z = sz->pa + zi;
1453 COPY(&m, x, &i1, z, &i1);
1454 TRSV(&cu, &ct, &cn, &m, pA, &lda, z, &i1);
1455 return;
1456 }
1457
1458
1459
1460#else
1461
1462#error : wrong LA choice
1463
1464#endif
1465
1466