ergo
template_lapack_ormqr.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_ORMQR_HEADER
38#define TEMPLATE_LAPACK_ORMQR_HEADER
39
40
41template<class Treal>
42int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n,
43 const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *
44 c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
45{
46/* -- LAPACK routine (version 3.0) --
47 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48 Courant Institute, Argonne National Lab, and Rice University
49 June 30, 1999
50
51
52 Purpose
53 =======
54
55 DORMQR overwrites the general real M-by-N matrix C with
56
57 SIDE = 'L' SIDE = 'R'
58 TRANS = 'N': Q * C C * Q
59 TRANS = 'T': Q**T * C C * Q**T
60
61 where Q is a real orthogonal matrix defined as the product of k
62 elementary reflectors
63
64 Q = H(1) H(2) . . . H(k)
65
66 as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
67 if SIDE = 'R'.
68
69 Arguments
70 =========
71
72 SIDE (input) CHARACTER*1
73 = 'L': apply Q or Q**T from the Left;
74 = 'R': apply Q or Q**T from the Right.
75
76 TRANS (input) CHARACTER*1
77 = 'N': No transpose, apply Q;
78 = 'T': Transpose, apply Q**T.
79
80 M (input) INTEGER
81 The number of rows of the matrix C. M >= 0.
82
83 N (input) INTEGER
84 The number of columns of the matrix C. N >= 0.
85
86 K (input) INTEGER
87 The number of elementary reflectors whose product defines
88 the matrix Q.
89 If SIDE = 'L', M >= K >= 0;
90 if SIDE = 'R', N >= K >= 0.
91
92 A (input) DOUBLE PRECISION array, dimension (LDA,K)
93 The i-th column must contain the vector which defines the
94 elementary reflector H(i), for i = 1,2,...,k, as returned by
95 DGEQRF in the first k columns of its array argument A.
96 A is modified by the routine but restored on exit.
97
98 LDA (input) INTEGER
99 The leading dimension of the array A.
100 If SIDE = 'L', LDA >= max(1,M);
101 if SIDE = 'R', LDA >= max(1,N).
102
103 TAU (input) DOUBLE PRECISION array, dimension (K)
104 TAU(i) must contain the scalar factor of the elementary
105 reflector H(i), as returned by DGEQRF.
106
107 C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
108 On entry, the M-by-N matrix C.
109 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
110
111 LDC (input) INTEGER
112 The leading dimension of the array C. LDC >= max(1,M).
113
114 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
115 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
116
117 LWORK (input) INTEGER
118 The dimension of the array WORK.
119 If SIDE = 'L', LWORK >= max(1,N);
120 if SIDE = 'R', LWORK >= max(1,M).
121 For optimum performance LWORK >= N*NB if SIDE = 'L', and
122 LWORK >= M*NB if SIDE = 'R', where NB is the optimal
123 blocksize.
124
125 If LWORK = -1, then a workspace query is assumed; the routine
126 only calculates the optimal size of the WORK array, returns
127 this value as the first entry of the WORK array, and no error
128 message related to LWORK is issued by XERBLA.
129
130 INFO (output) INTEGER
131 = 0: successful exit
132 < 0: if INFO = -i, the i-th argument had an illegal value
133
134 =====================================================================
135
136
137 Test the input arguments
138
139 Parameter adjustments */
140 /* Table of constant values */
141 integer c__1 = 1;
142 integer c_n1 = -1;
143 integer c__2 = 2;
144 integer c__65 = 65;
145
146 /* System generated locals */
147 address a__1[2];
148 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
149 i__5;
150 char ch__1[2];
151 /* Local variables */
153 integer i__;
154 Treal t[4160] /* was [65][64] */;
155 integer nbmin, iinfo, i1, i2, i3;
156 integer ib, ic, jc, nb, mi, ni;
157 integer nq, nw;
158 logical notran;
159 integer ldwork, lwkopt;
160 logical lquery;
161 integer iws;
162#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
163#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
164
165
166 a_dim1 = *lda;
167 a_offset = 1 + a_dim1 * 1;
168 a -= a_offset;
169 --tau;
170 c_dim1 = *ldc;
171 c_offset = 1 + c_dim1 * 1;
172 c__ -= c_offset;
173 --work;
174
175 /* Initialization added by Elias to get rid of compiler warnings. */
176 lwkopt = 0;
177 nb = 0;
178 /* Function Body */
179 *info = 0;
181 notran = template_blas_lsame(trans, "N");
182 lquery = *lwork == -1;
183
184/* NQ is the order of Q and NW is the minimum dimension of WORK */
185
186 if (left) {
187 nq = *m;
188 nw = *n;
189 } else {
190 nq = *n;
191 nw = *m;
192 }
193 if (! left && ! template_blas_lsame(side, "R")) {
194 *info = -1;
195 } else if (! notran && ! template_blas_lsame(trans, "T")) {
196 *info = -2;
197 } else if (*m < 0) {
198 *info = -3;
199 } else if (*n < 0) {
200 *info = -4;
201 } else if (*k < 0 || *k > nq) {
202 *info = -5;
203 } else if (*lda < maxMACRO(1,nq)) {
204 *info = -7;
205 } else if (*ldc < maxMACRO(1,*m)) {
206 *info = -10;
207 } else if (*lwork < maxMACRO(1,nw) && ! lquery) {
208 *info = -12;
209 }
210
211 if (*info == 0) {
212
213/* Determine the block size. NB may be at most NBMAX, where NBMAX
214 is used to define the local array T.
215
216 Computing MIN
217 Writing concatenation */
218 i__3[0] = 1, a__1[0] = side;
219 i__3[1] = 1, a__1[1] = trans;
220 template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
221 i__1 = 64, i__2 = template_lapack_ilaenv(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
222 ftnlen)6, (ftnlen)2);
223 nb = minMACRO(i__1,i__2);
224 lwkopt = maxMACRO(1,nw) * nb;
225 work[1] = (Treal) lwkopt;
226 }
227
228 if (*info != 0) {
229 i__1 = -(*info);
230 template_blas_erbla("ORMQR ", &i__1);
231 return 0;
232 } else if (lquery) {
233 return 0;
234 }
235
236/* Quick return if possible */
237
238 if (*m == 0 || *n == 0 || *k == 0) {
239 work[1] = 1.;
240 return 0;
241 }
242
243 nbmin = 2;
244 ldwork = nw;
245 if (nb > 1 && nb < *k) {
246 iws = nw * nb;
247 if (*lwork < iws) {
248 nb = *lwork / ldwork;
249/* Computing MAX
250 Writing concatenation */
251 i__3[0] = 1, a__1[0] = side;
252 i__3[1] = 1, a__1[1] = trans;
253 template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
254 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
255 ftnlen)6, (ftnlen)2);
256 nbmin = maxMACRO(i__1,i__2);
257 }
258 } else {
259 iws = nw;
260 }
261
262 if (nb < nbmin || nb >= *k) {
263
264/* Use unblocked code */
265
266 template_lapack_orm2r(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
267 c_offset], ldc, &work[1], &iinfo);
268 } else {
269
270/* Use blocked code */
271
272 if ( ( left && ! notran ) || ( ! left && notran ) ) {
273 i1 = 1;
274 i2 = *k;
275 i3 = nb;
276 } else {
277 i1 = (*k - 1) / nb * nb + 1;
278 i2 = 1;
279 i3 = -nb;
280 }
281
282 if (left) {
283 ni = *n;
284 jc = 1;
285 } else {
286 mi = *m;
287 ic = 1;
288 }
289
290 i__1 = i2;
291 i__2 = i3;
292 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
293/* Computing MIN */
294 i__4 = nb, i__5 = *k - i__ + 1;
295 ib = minMACRO(i__4,i__5);
296
297/* Form the triangular factor of the block reflector
298 H = H(i) H(i+1) . . . H(i+ib-1) */
299
300 i__4 = nq - i__ + 1;
301 template_lapack_larft("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__),
302 lda, &tau[i__], t, &c__65);
303 if (left) {
304
305/* H or H' is applied to C(i:m,1:n) */
306
307 mi = *m - i__ + 1;
308 ic = i__;
309 } else {
310
311/* H or H' is applied to C(1:m,i:n) */
312
313 ni = *n - i__ + 1;
314 jc = i__;
315 }
316
317/* Apply H or H' */
318
319 template_lapack_larfb(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &
320 a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &
321 work[1], &ldwork);
322/* L10: */
323 }
324 }
325 work[1] = (Treal) lwkopt;
326 return 0;
327
328/* End of DORMQR */
329
330} /* dormqr_ */
331
332#undef c___ref
333#undef a_ref
334
335
336#endif
side
Definition: Matrix.h:75
@ left
Definition: Matrix.h:75
void template_blas_s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
Definition: template_blas_common.cc:204
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
char * address
Definition: template_blas_common.h:43
int integer
Definition: template_blas_common.h:40
int ftnlen
Definition: template_blas_common.h:42
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:42
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:42
int template_lapack_orm2r(const char *side, const char *trans, const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *c__, const integer *ldc, Treal *work, integer *info)
Definition: template_lapack_orm2r.h:42
#define c___ref(a_1, a_2)
#define a_ref(a_1, a_2)
int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_ormqr.h:42