Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_solve.hpp
1/* ========================================================================== */
2/* === KLU_solve ============================================================ */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
38 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
39 * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
40 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
41 * Numeric->Iwork).
42 */
43
44#ifndef KLU2_SOLVE_HPP
45#define KLU2_SOLVE_HPP
46
47#include "klu2_internal.h"
48#include "klu2.hpp"
49
50template <typename Entry, typename Int>
51Int KLU_solve
52(
53 /* inputs, not modified */
54 KLU_symbolic<Entry, Int> *Symbolic,
55 KLU_numeric<Entry, Int> *Numeric,
56 Int d, /* leading dimension of B */
57 Int nrhs, /* number of right-hand-sides */
58
59 /* right-hand-side on input, overwritten with solution to Ax=b on output */
60 /* TODO: Switched from double to Entry, check for all cases */
61 Entry B [ ], /* size n*nrhs, in column-oriented form, with
62 * leading dimension d. */
63 /* --------------- */
64 KLU_common<Entry, Int> *Common
65)
66{
67 Entry x [4], offik, s ;
68 double rs, *Rs ;
69 Entry *Offx, *X, *Bz, *Udiag ;
70 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
71 Unit **LUbx ;
72 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
73
74 /* ---------------------------------------------------------------------- */
75 /* check inputs */
76 /* ---------------------------------------------------------------------- */
77
78 if (Common == NULL)
79 {
80 return (FALSE) ;
81 }
82 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
83 B == NULL)
84 {
85 Common->status = KLU_INVALID ;
86 return (FALSE) ;
87 }
88 Common->status = KLU_OK ;
89
90 /* ---------------------------------------------------------------------- */
91 /* get the contents of the Symbolic object */
92 /* ---------------------------------------------------------------------- */
93
94 Bz = (Entry *) B ;
95 n = Symbolic->n ;
96 nblocks = Symbolic->nblocks ;
97 Q = Symbolic->Q ;
98 R = Symbolic->R ;
99
100 /* ---------------------------------------------------------------------- */
101 /* get the contents of the Numeric object */
102 /* ---------------------------------------------------------------------- */
103
104 ASSERT (nblocks == Numeric->nblocks) ;
105 Pnum = Numeric->Pnum ;
106 Offp = Numeric->Offp ;
107 Offi = Numeric->Offi ;
108 Offx = (Entry *) Numeric->Offx ;
109
110 Lip = Numeric->Lip ;
111 Llen = Numeric->Llen ;
112 Uip = Numeric->Uip ;
113 Ulen = Numeric->Ulen ;
114 LUbx = (Unit **) Numeric->LUbx ;
115 Udiag = (Entry *) Numeric->Udiag ;
116
117 Rs = Numeric->Rs ;
118 X = (Entry *) Numeric->Xwork ;
119
120 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
121
122 /* ---------------------------------------------------------------------- */
123 /* solve in chunks of 4 columns at a time */
124 /* ---------------------------------------------------------------------- */
125
126 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
127 {
128
129 /* ------------------------------------------------------------------ */
130 /* get the size of the current chunk */
131 /* ------------------------------------------------------------------ */
132
133 nr = MIN (nrhs - chunk, 4) ;
134
135 /* ------------------------------------------------------------------ */
136 /* scale and permute the right hand side, X = P*(R\B) */
137 /* ------------------------------------------------------------------ */
138
139 if (Rs == NULL)
140 {
141
142 /* no scaling */
143 switch (nr)
144 {
145
146 case 1:
147
148 for (k = 0 ; k < n ; k++)
149 {
150 X [k] = Bz [Pnum [k]] ;
151 }
152 break ;
153
154 case 2:
155
156 for (k = 0 ; k < n ; k++)
157 {
158 i = Pnum [k] ;
159 X [2*k ] = Bz [i ] ;
160 X [2*k + 1] = Bz [i + d ] ;
161 }
162 break ;
163
164 case 3:
165
166 for (k = 0 ; k < n ; k++)
167 {
168 i = Pnum [k] ;
169 X [3*k ] = Bz [i ] ;
170 X [3*k + 1] = Bz [i + d ] ;
171 X [3*k + 2] = Bz [i + d*2] ;
172 }
173 break ;
174
175 case 4:
176
177 for (k = 0 ; k < n ; k++)
178 {
179 i = Pnum [k] ;
180 X [4*k ] = Bz [i ] ;
181 X [4*k + 1] = Bz [i + d ] ;
182 X [4*k + 2] = Bz [i + d*2] ;
183 X [4*k + 3] = Bz [i + d*3] ;
184 }
185 break ;
186 }
187
188 }
189 else
190 {
191
192 switch (nr)
193 {
194
195 case 1:
196
197 for (k = 0 ; k < n ; k++)
198 {
199 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
200 }
201 break ;
202
203 case 2:
204
205 for (k = 0 ; k < n ; k++)
206 {
207 i = Pnum [k] ;
208 rs = Rs [k] ;
209 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
210 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
211 }
212 break ;
213
214 case 3:
215
216 for (k = 0 ; k < n ; k++)
217 {
218 i = Pnum [k] ;
219 rs = Rs [k] ;
220 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
221 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
222 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
223 }
224 break ;
225
226 case 4:
227
228 for (k = 0 ; k < n ; k++)
229 {
230 i = Pnum [k] ;
231 rs = Rs [k] ;
232 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
233 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
234 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
235 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
236 }
237 break ;
238 }
239 }
240
241 /* ------------------------------------------------------------------ */
242 /* solve X = (L*U + Off)\X */
243 /* ------------------------------------------------------------------ */
244
245 for (block = nblocks-1 ; block >= 0 ; block--)
246 {
247
248 /* -------------------------------------------------------------- */
249 /* the block of size nk is from rows/columns k1 to k2-1 */
250 /* -------------------------------------------------------------- */
251
252 k1 = R [block] ;
253 k2 = R [block+1] ;
254 nk = k2 - k1 ;
255 PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
256
257 /* solve the block system */
258 if (nk == 1)
259 {
260 s = Udiag [k1] ;
261 switch (nr)
262 {
263
264 case 1:
265 DIV (X [k1], X [k1], s) ;
266 break ;
267
268 case 2:
269 DIV (X [2*k1], X [2*k1], s) ;
270 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
271 break ;
272
273 case 3:
274 DIV (X [3*k1], X [3*k1], s) ;
275 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
276 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
277 break ;
278
279 case 4:
280 DIV (X [4*k1], X [4*k1], s) ;
281 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
282 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
283 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
284 break ;
285
286 }
287 }
288 else
289 {
290 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
291 X + nr*k1) ;
292 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
293 Udiag + k1, nr, X + nr*k1) ;
294 }
295
296 /* -------------------------------------------------------------- */
297 /* block back-substitution for the off-diagonal-block entries */
298 /* -------------------------------------------------------------- */
299
300 if (block > 0)
301 {
302 switch (nr)
303 {
304
305 case 1:
306
307 for (k = k1 ; k < k2 ; k++)
308 {
309 pend = Offp [k+1] ;
310 x [0] = X [k] ;
311 for (p = Offp [k] ; p < pend ; p++)
312 {
313 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
314 }
315 }
316 break ;
317
318 case 2:
319
320 for (k = k1 ; k < k2 ; k++)
321 {
322 pend = Offp [k+1] ;
323 x [0] = X [2*k ] ;
324 x [1] = X [2*k + 1] ;
325 for (p = Offp [k] ; p < pend ; p++)
326 {
327 i = Offi [p] ;
328 offik = Offx [p] ;
329 MULT_SUB (X [2*i], offik, x [0]) ;
330 MULT_SUB (X [2*i + 1], offik, x [1]) ;
331 }
332 }
333 break ;
334
335 case 3:
336
337 for (k = k1 ; k < k2 ; k++)
338 {
339 pend = Offp [k+1] ;
340 x [0] = X [3*k ] ;
341 x [1] = X [3*k + 1] ;
342 x [2] = X [3*k + 2] ;
343 for (p = Offp [k] ; p < pend ; p++)
344 {
345 i = Offi [p] ;
346 offik = Offx [p] ;
347 MULT_SUB (X [3*i], offik, x [0]) ;
348 MULT_SUB (X [3*i + 1], offik, x [1]) ;
349 MULT_SUB (X [3*i + 2], offik, x [2]) ;
350 }
351 }
352 break ;
353
354 case 4:
355
356 for (k = k1 ; k < k2 ; k++)
357 {
358 pend = Offp [k+1] ;
359 x [0] = X [4*k ] ;
360 x [1] = X [4*k + 1] ;
361 x [2] = X [4*k + 2] ;
362 x [3] = X [4*k + 3] ;
363 for (p = Offp [k] ; p < pend ; p++)
364 {
365 i = Offi [p] ;
366 offik = Offx [p] ;
367 MULT_SUB (X [4*i], offik, x [0]) ;
368 MULT_SUB (X [4*i + 1], offik, x [1]) ;
369 MULT_SUB (X [4*i + 2], offik, x [2]) ;
370 MULT_SUB (X [4*i + 3], offik, x [3]) ;
371 }
372 }
373 break ;
374 }
375 }
376 }
377
378 /* ------------------------------------------------------------------ */
379 /* permute the result, Bz = Q*X */
380 /* ------------------------------------------------------------------ */
381
382 switch (nr)
383 {
384
385 case 1:
386
387 for (k = 0 ; k < n ; k++)
388 {
389 Bz [Q [k]] = X [k] ;
390 }
391 break ;
392
393 case 2:
394
395 for (k = 0 ; k < n ; k++)
396 {
397 i = Q [k] ;
398 Bz [i ] = X [2*k ] ;
399 Bz [i + d ] = X [2*k + 1] ;
400 }
401 break ;
402
403 case 3:
404
405 for (k = 0 ; k < n ; k++)
406 {
407 i = Q [k] ;
408 Bz [i ] = X [3*k ] ;
409 Bz [i + d ] = X [3*k + 1] ;
410 Bz [i + d*2] = X [3*k + 2] ;
411 }
412 break ;
413
414 case 4:
415
416 for (k = 0 ; k < n ; k++)
417 {
418 i = Q [k] ;
419 Bz [i ] = X [4*k ] ;
420 Bz [i + d ] = X [4*k + 1] ;
421 Bz [i + d*2] = X [4*k + 2] ;
422 Bz [i + d*3] = X [4*k + 3] ;
423 }
424 break ;
425 }
426
427 /* ------------------------------------------------------------------ */
428 /* go to the next chunk of B */
429 /* ------------------------------------------------------------------ */
430
431 Bz += d*4 ;
432 }
433 return (TRUE) ;
434}
435
436
437// Require Xout and B to have equal number of entries, pre-allocated
438template <typename Entry, typename Int>
439Int KLU_solve2
440(
441 /* inputs, not modified */
442 KLU_symbolic<Entry, Int> *Symbolic,
443 KLU_numeric<Entry, Int> *Numeric,
444 Int d, /* leading dimension of B */
445 Int nrhs, /* number of right-hand-sides */
446
447 /* right-hand-side on input, overwritten with solution to Ax=b on output */
448 /* TODO: Switched from double to Entry, check for all cases */
449 Entry B [ ], /* size n*1, in column-oriented form, with
450 * leading dimension d. */
451 Entry Xout [ ], /* size n*1, in column-oriented form, with
452 * leading dimension d. */
453 /* --------------- */
454 KLU_common<Entry, Int> *Common
455)
456{
457 Entry x [4], offik, s ;
458 double rs, *Rs ;
459 Entry *Offx, *X, *Bz, *Udiag ;
460 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
461 Unit **LUbx ;
462 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
463
464 /* ---------------------------------------------------------------------- */
465 /* check inputs */
466 /* ---------------------------------------------------------------------- */
467
468 if (Common == NULL)
469 {
470 return (FALSE) ;
471 }
472 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
473 B == NULL || Xout == NULL)
474 {
475 Common->status = KLU_INVALID ;
476 return (FALSE) ;
477 }
478 Common->status = KLU_OK ;
479
480 /* ---------------------------------------------------------------------- */
481 /* get the contents of the Symbolic object */
482 /* ---------------------------------------------------------------------- */
483
484 Bz = (Entry *) B ;
485 n = Symbolic->n ;
486 nblocks = Symbolic->nblocks ;
487 Q = Symbolic->Q ;
488 R = Symbolic->R ;
489
490 /* ---------------------------------------------------------------------- */
491 /* get the contents of the Numeric object */
492 /* ---------------------------------------------------------------------- */
493
494 ASSERT (nblocks == Numeric->nblocks) ;
495 Pnum = Numeric->Pnum ;
496 Offp = Numeric->Offp ;
497 Offi = Numeric->Offi ;
498 Offx = (Entry *) Numeric->Offx ;
499
500 Lip = Numeric->Lip ;
501 Llen = Numeric->Llen ;
502 Uip = Numeric->Uip ;
503 Ulen = Numeric->Ulen ;
504 LUbx = (Unit **) Numeric->LUbx ;
505 Udiag = (Entry *) Numeric->Udiag ;
506
507 Rs = Numeric->Rs ;
508 X = (Entry *) Numeric->Xwork ;
509
510 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
511
512 /* ---------------------------------------------------------------------- */
513 /* solve in chunks of 4 columns at a time */
514 /* ---------------------------------------------------------------------- */
515
516 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
517 {
518
519 /* ------------------------------------------------------------------ */
520 /* get the size of the current chunk */
521 /* ------------------------------------------------------------------ */
522
523 nr = MIN (nrhs - chunk, 4) ;
524
525 /* ------------------------------------------------------------------ */
526 /* scale and permute the right hand side, X = P*(R\B) */
527 /* ------------------------------------------------------------------ */
528
529 if (Rs == NULL)
530 {
531
532 /* no scaling */
533 switch (nr)
534 {
535
536 case 1:
537
538 for (k = 0 ; k < n ; k++)
539 {
540 X [k] = Bz [Pnum [k]] ;
541 }
542 break ;
543
544 case 2:
545
546 for (k = 0 ; k < n ; k++)
547 {
548 i = Pnum [k] ;
549 X [2*k ] = Bz [i ] ;
550 X [2*k + 1] = Bz [i + d ] ;
551 }
552 break ;
553
554 case 3:
555
556 for (k = 0 ; k < n ; k++)
557 {
558 i = Pnum [k] ;
559 X [3*k ] = Bz [i ] ;
560 X [3*k + 1] = Bz [i + d ] ;
561 X [3*k + 2] = Bz [i + d*2] ;
562 }
563 break ;
564
565 case 4:
566
567 for (k = 0 ; k < n ; k++)
568 {
569 i = Pnum [k] ;
570 X [4*k ] = Bz [i ] ;
571 X [4*k + 1] = Bz [i + d ] ;
572 X [4*k + 2] = Bz [i + d*2] ;
573 X [4*k + 3] = Bz [i + d*3] ;
574 }
575 break ;
576 }
577
578 }
579 else
580 {
581
582 switch (nr)
583 {
584
585 case 1:
586
587 for (k = 0 ; k < n ; k++)
588 {
589 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
590 }
591 break ;
592
593 case 2:
594
595 for (k = 0 ; k < n ; k++)
596 {
597 i = Pnum [k] ;
598 rs = Rs [k] ;
599 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
600 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
601 }
602 break ;
603
604 case 3:
605
606 for (k = 0 ; k < n ; k++)
607 {
608 i = Pnum [k] ;
609 rs = Rs [k] ;
610 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
611 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
612 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
613 }
614 break ;
615
616 case 4:
617
618 for (k = 0 ; k < n ; k++)
619 {
620 i = Pnum [k] ;
621 rs = Rs [k] ;
622 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
623 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
624 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
625 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
626 }
627 break ;
628 }
629 }
630
631 /* ------------------------------------------------------------------ */
632 /* solve X = (L*U + Off)\X */
633 /* ------------------------------------------------------------------ */
634
635 for (block = nblocks-1 ; block >= 0 ; block--)
636 {
637
638 /* -------------------------------------------------------------- */
639 /* the block of size nk is from rows/columns k1 to k2-1 */
640 /* -------------------------------------------------------------- */
641
642 k1 = R [block] ;
643 k2 = R [block+1] ;
644 nk = k2 - k1 ;
645 PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
646
647 /* solve the block system */
648 if (nk == 1)
649 {
650 s = Udiag [k1] ;
651 switch (nr)
652 {
653
654 case 1:
655 DIV (X [k1], X [k1], s) ;
656 break ;
657
658 case 2:
659 DIV (X [2*k1], X [2*k1], s) ;
660 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
661 break ;
662
663 case 3:
664 DIV (X [3*k1], X [3*k1], s) ;
665 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
666 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
667 break ;
668
669 case 4:
670 DIV (X [4*k1], X [4*k1], s) ;
671 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
672 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
673 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
674 break ;
675
676 }
677 }
678 else
679 {
680 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
681 X + nr*k1) ;
682 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
683 Udiag + k1, nr, X + nr*k1) ;
684 }
685
686 /* -------------------------------------------------------------- */
687 /* block back-substitution for the off-diagonal-block entries */
688 /* -------------------------------------------------------------- */
689
690 if (block > 0)
691 {
692 switch (nr)
693 {
694
695 case 1:
696
697 for (k = k1 ; k < k2 ; k++)
698 {
699 pend = Offp [k+1] ;
700 x [0] = X [k] ;
701 for (p = Offp [k] ; p < pend ; p++)
702 {
703 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
704 }
705 }
706 break ;
707
708 case 2:
709
710 for (k = k1 ; k < k2 ; k++)
711 {
712 pend = Offp [k+1] ;
713 x [0] = X [2*k ] ;
714 x [1] = X [2*k + 1] ;
715 for (p = Offp [k] ; p < pend ; p++)
716 {
717 i = Offi [p] ;
718 offik = Offx [p] ;
719 MULT_SUB (X [2*i], offik, x [0]) ;
720 MULT_SUB (X [2*i + 1], offik, x [1]) ;
721 }
722 }
723 break ;
724
725 case 3:
726
727 for (k = k1 ; k < k2 ; k++)
728 {
729 pend = Offp [k+1] ;
730 x [0] = X [3*k ] ;
731 x [1] = X [3*k + 1] ;
732 x [2] = X [3*k + 2] ;
733 for (p = Offp [k] ; p < pend ; p++)
734 {
735 i = Offi [p] ;
736 offik = Offx [p] ;
737 MULT_SUB (X [3*i], offik, x [0]) ;
738 MULT_SUB (X [3*i + 1], offik, x [1]) ;
739 MULT_SUB (X [3*i + 2], offik, x [2]) ;
740 }
741 }
742 break ;
743
744 case 4:
745
746 for (k = k1 ; k < k2 ; k++)
747 {
748 pend = Offp [k+1] ;
749 x [0] = X [4*k ] ;
750 x [1] = X [4*k + 1] ;
751 x [2] = X [4*k + 2] ;
752 x [3] = X [4*k + 3] ;
753 for (p = Offp [k] ; p < pend ; p++)
754 {
755 i = Offi [p] ;
756 offik = Offx [p] ;
757 MULT_SUB (X [4*i], offik, x [0]) ;
758 MULT_SUB (X [4*i + 1], offik, x [1]) ;
759 MULT_SUB (X [4*i + 2], offik, x [2]) ;
760 MULT_SUB (X [4*i + 3], offik, x [3]) ;
761 }
762 }
763 break ;
764 }
765 }
766 }
767
768 /* ------------------------------------------------------------------ */
769 /* permute the result, Bz = Q*X */
770 /* store result in Xout */
771 /* ------------------------------------------------------------------ */
772
773 switch (nr)
774 {
775
776 case 1:
777
778 for (k = 0 ; k < n ; k++)
779 {
780 Xout [Q [k]] = X [k] ;
781 }
782 break ;
783
784 case 2:
785
786 for (k = 0 ; k < n ; k++)
787 {
788 i = Q [k] ;
789 Xout [i ] = X [2*k ] ;
790 Xout [i + d ] = X [2*k + 1] ;
791 }
792 break ;
793
794 case 3:
795
796 for (k = 0 ; k < n ; k++)
797 {
798 i = Q [k] ;
799 Xout [i ] = X [3*k ] ;
800 Xout [i + d ] = X [3*k + 1] ;
801 Xout [i + d*2] = X [3*k + 2] ;
802 }
803 break ;
804
805 case 4:
806
807 for (k = 0 ; k < n ; k++)
808 {
809 i = Q [k] ;
810 Xout [i ] = X [4*k ] ;
811 Xout [i + d ] = X [4*k + 1] ;
812 Xout [i + d*2] = X [4*k + 2] ;
813 Xout [i + d*3] = X [4*k + 3] ;
814 }
815 break ;
816 }
817
818 /* ------------------------------------------------------------------ */
819 /* go to the next chunk of B */
820 /* ------------------------------------------------------------------ */
821
822 Xout += d*4 ;
823 }
824 return (TRUE) ;
825}
826
827
828#endif