ergo
template_blas_trsm.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_BLAS_TRSM_HEADER
38#define TEMPLATE_BLAS_TRSM_HEADER
39
41
42template<class Treal>
43int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag,
44 const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
45 lda, Treal *b, const integer *ldb)
46{
47 /* System generated locals */
48 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
49 /* Local variables */
50 integer info;
51 Treal temp;
52 integer i__, j, k;
53 logical lside;
54 integer nrowa;
55 logical upper;
56 logical nounit;
57#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59/* Purpose
60 =======
61 DTRSM solves one of the matrix equations
62 op( A )*X = alpha*B, or X*op( A ) = alpha*B,
63 where alpha is a scalar, X and B are m by n matrices, A is a unit, or
64 non-unit, upper or lower triangular matrix and op( A ) is one of
65 op( A ) = A or op( A ) = A'.
66 The matrix X is overwritten on B.
67 Parameters
68 ==========
69 SIDE - CHARACTER*1.
70 On entry, SIDE specifies whether op( A ) appears on the left
71 or right of X as follows:
72 SIDE = 'L' or 'l' op( A )*X = alpha*B.
73 SIDE = 'R' or 'r' X*op( A ) = alpha*B.
74 Unchanged on exit.
75 UPLO - CHARACTER*1.
76 On entry, UPLO specifies whether the matrix A is an upper or
77 lower triangular matrix as follows:
78 UPLO = 'U' or 'u' A is an upper triangular matrix.
79 UPLO = 'L' or 'l' A is a lower triangular matrix.
80 Unchanged on exit.
81 TRANSA - CHARACTER*1.
82 On entry, TRANSA specifies the form of op( A ) to be used in
83 the matrix multiplication as follows:
84 TRANSA = 'N' or 'n' op( A ) = A.
85 TRANSA = 'T' or 't' op( A ) = A'.
86 TRANSA = 'C' or 'c' op( A ) = A'.
87 Unchanged on exit.
88 DIAG - CHARACTER*1.
89 On entry, DIAG specifies whether or not A is unit triangular
90 as follows:
91 DIAG = 'U' or 'u' A is assumed to be unit triangular.
92 DIAG = 'N' or 'n' A is not assumed to be unit
93 triangular.
94 Unchanged on exit.
95 M - INTEGER.
96 On entry, M specifies the number of rows of B. M must be at
97 least zero.
98 Unchanged on exit.
99 N - INTEGER.
100 On entry, N specifies the number of columns of B. N must be
101 at least zero.
102 Unchanged on exit.
103 ALPHA - DOUBLE PRECISION.
104 On entry, ALPHA specifies the scalar alpha. When alpha is
105 zero then A is not referenced and B need not be set before
106 entry.
107 Unchanged on exit.
108 A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
109 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
110 Before entry with UPLO = 'U' or 'u', the leading k by k
111 upper triangular part of the array A must contain the upper
112 triangular matrix and the strictly lower triangular part of
113 A is not referenced.
114 Before entry with UPLO = 'L' or 'l', the leading k by k
115 lower triangular part of the array A must contain the lower
116 triangular matrix and the strictly upper triangular part of
117 A is not referenced.
118 Note that when DIAG = 'U' or 'u', the diagonal elements of
119 A are not referenced either, but are assumed to be unity.
120 Unchanged on exit.
121 LDA - INTEGER.
122 On entry, LDA specifies the first dimension of A as declared
123 in the calling (sub) program. When SIDE = 'L' or 'l' then
124 LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
125 then LDA must be at least max( 1, n ).
126 Unchanged on exit.
127 B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
128 Before entry, the leading m by n part of the array B must
129 contain the right-hand side matrix B, and on exit is
130 overwritten by the solution matrix X.
131 LDB - INTEGER.
132 On entry, LDB specifies the first dimension of B as declared
133 in the calling (sub) program. LDB must be at least
134 max( 1, m ).
135 Unchanged on exit.
136 Level 3 Blas routine.
137 -- Written on 8-February-1989.
138 Jack Dongarra, Argonne National Laboratory.
139 Iain Duff, AERE Harwell.
140 Jeremy Du Croz, Numerical Algorithms Group Ltd.
141 Sven Hammarling, Numerical Algorithms Group Ltd.
142 Test the input parameters.
143 Parameter adjustments */
144 a_dim1 = *lda;
145 a_offset = 1 + a_dim1 * 1;
146 a -= a_offset;
147 b_dim1 = *ldb;
148 b_offset = 1 + b_dim1 * 1;
149 b -= b_offset;
150 /* Function Body */
151 lside = template_blas_lsame(side, "L");
152 if (lside) {
153 nrowa = *m;
154 } else {
155 nrowa = *n;
156 }
157 nounit = template_blas_lsame(diag, "N");
158 upper = template_blas_lsame(uplo, "U");
159 info = 0;
160 if (! lside && ! template_blas_lsame(side, "R")) {
161 info = 1;
162 } else if (! upper && ! template_blas_lsame(uplo, "L")) {
163 info = 2;
164 } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
165 "T") && ! template_blas_lsame(transa, "C")) {
166 info = 3;
167 } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
168 "N")) {
169 info = 4;
170 } else if (*m < 0) {
171 info = 5;
172 } else if (*n < 0) {
173 info = 6;
174 } else if (*lda < maxMACRO(1,nrowa)) {
175 info = 9;
176 } else if (*ldb < maxMACRO(1,*m)) {
177 info = 11;
178 }
179 if (info != 0) {
180 template_blas_erbla("TRSM ", &info);
181 return 0;
182 }
183/* Quick return if possible. */
184 if (*n == 0) {
185 return 0;
186 }
187/* And when alpha.eq.zero. */
188 if (*alpha == 0.) {
189 i__1 = *n;
190 for (j = 1; j <= i__1; ++j) {
191 i__2 = *m;
192 for (i__ = 1; i__ <= i__2; ++i__) {
193 b_ref(i__, j) = 0.;
194/* L10: */
195 }
196/* L20: */
197 }
198 return 0;
199 }
200/* Start the operations. */
201 if (lside) {
202 if (template_blas_lsame(transa, "N")) {
203/* Form B := alpha*inv( A )*B. */
204 if (upper) {
205 i__1 = *n;
206 for (j = 1; j <= i__1; ++j) {
207 if (*alpha != 1.) {
208 i__2 = *m;
209 for (i__ = 1; i__ <= i__2; ++i__) {
210 b_ref(i__, j) = *alpha * b_ref(i__, j);
211/* L30: */
212 }
213 }
214 for (k = *m; k >= 1; --k) {
215 if (b_ref(k, j) != 0.) {
216 if (nounit) {
217 b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
218 }
219 i__2 = k - 1;
220 for (i__ = 1; i__ <= i__2; ++i__) {
221 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
222 a_ref(i__, k);
223/* L40: */
224 }
225 }
226/* L50: */
227 }
228/* L60: */
229 }
230 } else {
231 i__1 = *n;
232 for (j = 1; j <= i__1; ++j) {
233 if (*alpha != 1.) {
234 i__2 = *m;
235 for (i__ = 1; i__ <= i__2; ++i__) {
236 b_ref(i__, j) = *alpha * b_ref(i__, j);
237/* L70: */
238 }
239 }
240 i__2 = *m;
241 for (k = 1; k <= i__2; ++k) {
242 if (b_ref(k, j) != 0.) {
243 if (nounit) {
244 b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
245 }
246 i__3 = *m;
247 for (i__ = k + 1; i__ <= i__3; ++i__) {
248 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
249 a_ref(i__, k);
250/* L80: */
251 }
252 }
253/* L90: */
254 }
255/* L100: */
256 }
257 }
258 } else {
259/* Form B := alpha*inv( A' )*B. */
260 if (upper) {
261 i__1 = *n;
262 for (j = 1; j <= i__1; ++j) {
263 i__2 = *m;
264 for (i__ = 1; i__ <= i__2; ++i__) {
265 temp = *alpha * b_ref(i__, j);
266 i__3 = i__ - 1;
267 for (k = 1; k <= i__3; ++k) {
268 temp -= a_ref(k, i__) * b_ref(k, j);
269/* L110: */
270 }
271 if (nounit) {
272 temp /= a_ref(i__, i__);
273 }
274 b_ref(i__, j) = temp;
275/* L120: */
276 }
277/* L130: */
278 }
279 } else {
280 i__1 = *n;
281 for (j = 1; j <= i__1; ++j) {
282 for (i__ = *m; i__ >= 1; --i__) {
283 temp = *alpha * b_ref(i__, j);
284 i__2 = *m;
285 for (k = i__ + 1; k <= i__2; ++k) {
286 temp -= a_ref(k, i__) * b_ref(k, j);
287/* L140: */
288 }
289 if (nounit) {
290 temp /= a_ref(i__, i__);
291 }
292 b_ref(i__, j) = temp;
293/* L150: */
294 }
295/* L160: */
296 }
297 }
298 }
299 } else {
300 if (template_blas_lsame(transa, "N")) {
301/* Form B := alpha*B*inv( A ). */
302 if (upper) {
303 i__1 = *n;
304 for (j = 1; j <= i__1; ++j) {
305 if (*alpha != 1.) {
306 i__2 = *m;
307 for (i__ = 1; i__ <= i__2; ++i__) {
308 b_ref(i__, j) = *alpha * b_ref(i__, j);
309/* L170: */
310 }
311 }
312 i__2 = j - 1;
313 for (k = 1; k <= i__2; ++k) {
314 if (a_ref(k, j) != 0.) {
315 i__3 = *m;
316 for (i__ = 1; i__ <= i__3; ++i__) {
317 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
318 b_ref(i__, k);
319/* L180: */
320 }
321 }
322/* L190: */
323 }
324 if (nounit) {
325 temp = 1. / a_ref(j, j);
326 i__2 = *m;
327 for (i__ = 1; i__ <= i__2; ++i__) {
328 b_ref(i__, j) = temp * b_ref(i__, j);
329/* L200: */
330 }
331 }
332/* L210: */
333 }
334 } else {
335 for (j = *n; j >= 1; --j) {
336 if (*alpha != 1.) {
337 i__1 = *m;
338 for (i__ = 1; i__ <= i__1; ++i__) {
339 b_ref(i__, j) = *alpha * b_ref(i__, j);
340/* L220: */
341 }
342 }
343 i__1 = *n;
344 for (k = j + 1; k <= i__1; ++k) {
345 if (a_ref(k, j) != 0.) {
346 i__2 = *m;
347 for (i__ = 1; i__ <= i__2; ++i__) {
348 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
349 b_ref(i__, k);
350/* L230: */
351 }
352 }
353/* L240: */
354 }
355 if (nounit) {
356 temp = 1. / a_ref(j, j);
357 i__1 = *m;
358 for (i__ = 1; i__ <= i__1; ++i__) {
359 b_ref(i__, j) = temp * b_ref(i__, j);
360/* L250: */
361 }
362 }
363/* L260: */
364 }
365 }
366 } else {
367/* Form B := alpha*B*inv( A' ). */
368 if (upper) {
369 for (k = *n; k >= 1; --k) {
370 if (nounit) {
371 temp = 1. / a_ref(k, k);
372 i__1 = *m;
373 for (i__ = 1; i__ <= i__1; ++i__) {
374 b_ref(i__, k) = temp * b_ref(i__, k);
375/* L270: */
376 }
377 }
378 i__1 = k - 1;
379 for (j = 1; j <= i__1; ++j) {
380 if (a_ref(j, k) != 0.) {
381 temp = a_ref(j, k);
382 i__2 = *m;
383 for (i__ = 1; i__ <= i__2; ++i__) {
384 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
385 i__, k);
386/* L280: */
387 }
388 }
389/* L290: */
390 }
391 if (*alpha != 1.) {
392 i__1 = *m;
393 for (i__ = 1; i__ <= i__1; ++i__) {
394 b_ref(i__, k) = *alpha * b_ref(i__, k);
395/* L300: */
396 }
397 }
398/* L310: */
399 }
400 } else {
401 i__1 = *n;
402 for (k = 1; k <= i__1; ++k) {
403 if (nounit) {
404 temp = 1. / a_ref(k, k);
405 i__2 = *m;
406 for (i__ = 1; i__ <= i__2; ++i__) {
407 b_ref(i__, k) = temp * b_ref(i__, k);
408/* L320: */
409 }
410 }
411 i__2 = *n;
412 for (j = k + 1; j <= i__2; ++j) {
413 if (a_ref(j, k) != 0.) {
414 temp = a_ref(j, k);
415 i__3 = *m;
416 for (i__ = 1; i__ <= i__3; ++i__) {
417 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
418 i__, k);
419/* L330: */
420 }
421 }
422/* L340: */
423 }
424 if (*alpha != 1.) {
425 i__2 = *m;
426 for (i__ = 1; i__ <= i__2; ++i__) {
427 b_ref(i__, k) = *alpha * b_ref(i__, k);
428/* L350: */
429 }
430 }
431/* L360: */
432 }
433 }
434 }
435 }
436 return 0;
437/* End of DTRSM . */
438} /* dtrsm_ */
439#undef b_ref
440#undef a_ref
441
442#endif
side
Definition: Matrix.h:75
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition: template_blas_trsm.h:43
#define a_ref(a_1, a_2)
#define b_ref(a_1, a_2)