Actual source code: dvd_blas.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <private/vecimplslepc.h>
23: #include davidson.h
25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
27: PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29: PetscLogEvent SLEPC_VecsMult = 0;
31: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
32: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33: void *ptr);
37: /*
38: Compute C <- a*A*B + b*C, where
39: ldC, the leading dimension of C,
40: ldA, the leading dimension of A,
41: rA, cA, rows and columns of A,
42: At, if true use the transpose of A instead,
43: ldB, the leading dimension of B,
44: rB, cB, rows and columns of B,
45: Bt, if true use the transpose of B instead
46: */
47: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48: PetscScalar a,
49: const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50: const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
51: {
52: PetscErrorCode ierr;
53: PetscInt tmp;
54: PetscBLASInt m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55: const char *N = "N", *T = "C", *qA = N, *qB = N;
59: if ((rA == 0) || (cB == 0)) { return(0); }
61: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
63: /* Transpose if needed */
64: if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
65: if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
66:
67: /* Check size */
68: if (cA != rB) {
69: SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
70: }
71:
72: /* Do stub */
73: if ((rA == 1) && (cA == 1) && (cB == 1)) {
74: if (!At && !Bt) *C = *A * *B;
75: else if (At && !Bt) *C = PetscConj(*A) * *B;
76: else if (!At && Bt) *C = *A * PetscConj(*B);
77: else *C = PetscConj(*A) * PetscConj(*B);
78: m = n = k = 1;
79: } else {
80: m = rA; n = cB; k = cA;
81: BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
82: &ldB, &b, C, &ldC);
83: }
85: PetscLogFlops(m*n*2*k);
86: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
88: return(0);
89: }
93: /*
94: Compute C <- A*B, where
95: sC, structure of C,
96: ldC, the leading dimension of C,
97: sA, structure of A,
98: ldA, the leading dimension of A,
99: rA, cA, rows and columns of A,
100: At, if true use the transpose of A instead,
101: sB, structure of B,
102: ldB, the leading dimension of B,
103: rB, cB, rows and columns of B,
104: Bt, if true use the transpose of B instead
105: */
106: PetscErrorCode SlepcDenseMatProdTriang(
107: PetscScalar *C, MatType_t sC, PetscInt ldC,
108: const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
109: PetscBool At,
110: const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
111: PetscBool Bt)
112: {
113: PetscErrorCode ierr;
114: PetscInt tmp;
115: PetscScalar one=1.0, zero=0.0;
116: PetscBLASInt rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
120: if ((rA == 0) || (cB == 0)) { return(0); }
122: /* Transpose if needed */
123: if (At) tmp = rA, rA = cA, cA = tmp;
124: if (Bt) tmp = rB, rB = cB, cB = tmp;
125:
126: /* Check size */
127: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
128: if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
130: /* Optimized version: trivial case */
131: if ((rA == 1) && (cA == 1) && (cB == 1)) {
132: if (!At && !Bt) *C = *A * *B;
133: else if (At && !Bt) *C = PetscConj(*A) * *B;
134: else if (!At && Bt) *C = *A * PetscConj(*B);
135: else if (At && Bt) *C = PetscConj(*A) * PetscConj(*B);
136: return(0);
137: }
138:
139: /* Optimized versions: sA == 0 && sB == 0 */
140: if ((sA == 0) && (sB == 0)) {
141: if (At) tmp = rA, rA = cA, cA = tmp;
142: if (Bt) tmp = rB, rB = cB, cB = tmp;
143: SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
144: cB, Bt);
145: PetscFunctionReturn(ierr);
146: }
148: /* Optimized versions: A hermitian && (B not triang) */
149: if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
150: DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
151: DVD_ISNOT(sB,DVD_MAT_LTRIANG) ) {
152: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
153: rC = rA; cC = cB;
154: BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
155: (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
156: PetscLogFlops(rA*cB*cA);
157: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
158: return(0);
159: }
161: /* Optimized versions: B hermitian && (A not triang) */
162: if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
163: DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
164: DVD_ISNOT(sA,DVD_MAT_LTRIANG) ) {
165: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
166: rC = rA; cC = cB;
167: BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
168: (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
169: PetscLogFlops(rA*cB*cA);
170: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
171: return(0);
172: }
173:
174: SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
175: }
179: /*
180: Normalize the columns of the matrix A, where
181: ldA, the leading dimension of A,
182: rA, cA, rows and columns of A.
183: if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
184: are normalized as being one column.
185: */
186: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
187: PetscInt cA, PetscScalar *eigi)
188: {
189: PetscErrorCode ierr;
190: PetscInt i;
191: PetscScalar norm, norm0;
192: PetscBLASInt rA = _rA, one=1;
196: PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);
198: for(i=0; i<cA; i++) {
199: if(eigi && eigi[i] != 0.0) {
200: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
201: norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
202: norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
203: BLASscal_(&rA, &norm, &A[i*ldA], &one);
204: BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
205: i++;
206: } else {
207: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
208: norm = 1.0 / norm;
209: BLASscal_(&rA, &norm, &A[i*ldA], &one);
210: }
211: }
213: PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
215: return(0);
216: }
217:
221: /*
222: Compute A <- orth(A), where
223: ldA, the leading dimension of A,
224: rA, cA, rows and columns of A,
225: auxS, auxiliary vector of more size than cA+min(rA,cA),
226: lauxS, size of auxS,
227: ncA, new number of columns of A
228: */
229: PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
230: PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
231: PetscInt *ncA)
232: {
233: PetscErrorCode ierr;
234: PetscBLASInt ldA = _ldA, rA = _rA, cA = _cA,
235: info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
236: PetscScalar *tau = auxS, *w = tau + ltau;
240: /* Quick exit */
241: if ((_rA == 0) || (cA == 0)) { return(0); }
243: /* Memory check */
244: if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
245:
246: PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);
247: LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
248: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
249: LAPACKorgqr_(&rA, <au, <au, A, &ldA, tau, w, &lw, &info);
250: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
251: PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);
253: if (ncA) *ncA = ltau;
255: return(0);
256: }
260: /*
261: Y <- X, where
262: ldX, leading dimension of X,
263: rX, cX, rows and columns of X
264: ldY, leading dimension of Y
265: */
266: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
267: PetscInt ldX, PetscInt rX, PetscInt cX)
268: {
269: PetscErrorCode ierr;
270: PetscInt i;
274: if ((ldX < rX) || (ldY < rX)) {
275: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
276: }
277:
278: /* Quick exit */
279: if (Y == X) {
280: if (ldX != ldY) {
281: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
282: }
283: return(0);
284: }
286: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
287: for(i=0; i<cX; i++) {
288: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
289:
290: }
291: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
293: return(0);
294: }
298: /*
299: Y <- X, where
300: ldX, leading dimension of X,
301: rX, cX, rows and columns of X
302: ldY, leading dimension of Y
303: */
304: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
305: PetscScalar *X, MatType_t sX, PetscInt ldX,
306: PetscInt rX, PetscInt cX)
307: {
308: PetscErrorCode ierr;
309: PetscInt i,j,c;
313: if ((ldX < rX) || (ldY < rX)) {
314: SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
315: }
317: if (sY == 0 && sX == 0) {
318: SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
319: return(0);
320: }
322: if (rX != cX) {
323: SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
324: }
326: if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
327: DVD_ISNOT(sX,DVD_MAT_LTRIANG)) { /* UpTr to ... */
328: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
329: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
330: c = 0; /* so copy */
331: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
332: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
333: c = 1; /* so transpose */
334: else /* ... Full, */
335: c = 2; /* so reflect from up */
336: } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
337: DVD_IS(sX,DVD_MAT_LTRIANG)) { /* LoTr to ... */
338: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
339: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
340: c = 1; /* so transpose */
341: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
342: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
343: c = 0; /* so copy */
344: else /* ... Full, */
345: c = 3; /* so reflect fr. down */
346: } else /* Full to any, */
347: c = 0; /* so copy */
348:
349: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
351: switch(c) {
352: case 0: /* copy */
353: for(i=0; i<cX; i++) {
354: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
355:
356: }
357: break;
359: case 1: /* transpose */
360: for(i=0; i<cX; i++)
361: for(j=0; j<rX; j++)
362: Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
363: break;
365: case 2: /* reflection from up */
366: for(i=0; i<cX; i++)
367: for(j=0; j<PetscMin(i+1,rX); j++)
368: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
369: break;
371: case 3: /* reflection from down */
372: for(i=0; i<cX; i++)
373: for(j=i; j<rX; j++)
374: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
375: break;
376: }
377: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
379: return(0);
380: }
385: /*
386: Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
387: where X and Y are contiguous global vectors.
388: */
389: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
390: Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
391: PetscInt cM)
392: {
393: PetscErrorCode ierr;
397: SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
398:
400: return(0);
401: }
406: /*
407: Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
408: where X and Y are contiguous global vectors.
409: */
410: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
411: PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
412: PetscInt ldM, PetscInt rM, PetscInt cM)
413: {
414: PetscErrorCode ierr;
415: const PetscScalar *px;
416: PetscScalar *py;
417: PetscInt rX, rY, ldX, ldY, i, rcX;
420: SlepcValidVecsContiguous(Y,cM*dY,1);
421: SlepcValidVecsContiguous(X,cX,5);
423: /* Compute the real number of columns */
424: rcX = cX/dX;
425: if (rcX != rM) {
426: SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
427: }
429: if ((rcX == 0) || (rM == 0) || (cM == 0)) {
430: return(0);
431: } else if ((Y + cM <= X) || (X + cX <= Y) ||
432: ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
433: /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
435: /* Get the dense matrices and dimensions associated to Y and X */
436: VecGetLocalSize(X[0], &rX);
437: VecGetLocalSize(Y[0], &rY);
438: if (rX != rY) {
439: SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
440: }
441: VecGetArrayRead(X[0], &px);
442: VecGetArray(Y[0], &py);
444: /* Update the strides */
445: ldX = rX*dX; ldY= rY*dY;
447: /* Do operation */
448: SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
449: PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
450:
451: VecRestoreArrayRead(X[0], &px);
452: VecRestoreArray(Y[0], &py);
453: for(i=1; i<cM; i++) {
454: PetscObjectStateIncrease((PetscObject)Y[dY*i]);
455: }
457: } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
458: /* If not, call to SlepcUpdateVectors */
459: SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
460: ldM, PETSC_FALSE);
461: if (alpha != 1.0)
462: for (i=0; i<cM; i++) {
463: VecScale(Y[i], alpha);
464: }
465: } else {
466: SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
467: }
469: return(0);
470: }
474: /*
475: Compute X <- alpha * X[0:dX:cX-1] * M
476: where X is a matrix with non-consecutive columns
477: */
478: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
479: const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
480: PetscScalar *work, PetscInt lwork)
481: {
483: PetscScalar **px, *Y, *Z;
484: PetscInt rX, i, j, rY, rY0, ldY;
487: SlepcValidVecsContiguous(X,cX,1);
489: if (cX != rM) {
490: SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
491: }
493: rY = (lwork/2)/cX;
494: if (rY <= 0) {
495: SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
496: }
497: Y = work; Z = &Y[cX*rY]; ldY = rY;
499: if ((cX == 0) || (rM == 0) || (cM == 0)) {
500: return(0);
501: }
503: /* Get the dense vectors associated to the columns of X */
504: PetscMalloc(sizeof(Vec)*cX, &px);
505: for(i=0; i<cX; i++) {
506: VecGetArray(X[i], &px[i]);
507: }
508: VecGetLocalSize(X[0], &rX);
510: for(i=0, rY0=0; i<rX; i+=rY0) {
511: rY0 = PetscMin(rY, rX-i);
513: /* Y <- X[i0:i1,:] */
514: for(j=0; j<cX; j++) {
515: SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
516:
517: }
519: /* Z <- Y * M */
520: SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
521: M, ldM, rM, cM, PETSC_FALSE);
522:
524: /* X <- Z */
525: for(j=0; j<cM; j++) {
526: SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
527:
528: }
529: }
531: for(i=0; i<cX; i++) {
532: VecRestoreArray(X[i], &px[i]);
533: }
534: PetscFree(px);
536: return(0);
537: }
543: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
544: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
545: where W = U' * V.
546: workS0 and workS1 are an auxiliary scalar vector of size
547: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
548: is needed, and of size eU*eV.
549: */
550: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
551: Vec *U, PetscInt sU, PetscInt eU,
552: Vec *V, PetscInt sV, PetscInt eV,
553: PetscScalar *workS0, PetscScalar *workS1)
554: {
555: PetscErrorCode ierr;
556: PetscInt ldU, ldV, i, j, k;
557: const PetscScalar *pu, *pv;
558: PetscScalar *W, *Wr;
562: /* Check if quick exit */
563: if ((eU-sU == 0) || (eV-sV == 0))
564: return(0);
566: SlepcValidVecsContiguous(U,eU,4);
567: SlepcValidVecsContiguous(V,eV,7);
568:
569: /* Get the dense matrices and dimensions associated to U and V */
570: VecGetLocalSize(U[0], &ldU);
571: VecGetLocalSize(V[0], &ldV);
572: if (ldU != ldV) {
573: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
574: }
575: VecGetArrayRead(U[0], &pu);
576: VecGetArrayRead(V[0], &pv);
578: if (workS0)
579: W = workS0;
580: else {
581: PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
582:
583: }
585: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
587: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
588: /* Use the smart memory usage version */
590: /* W <- U' * V */
591: SlepcDenseMatProdTriang(W, sM, eU,
592: pu, 0, ldU, ldU, eU, PETSC_TRUE,
593: pv, 0, ldV, ldV, eV, PETSC_FALSE);
594:
595:
596: /* ReduceAll(W, SUM) */
597: MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
598: ((PetscObject)U[0])->comm);
599: /* Full M matrix */
600: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
601: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
602: if (workS1) {
603: Wr = workS1;
604: if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ((eU-sU)*sV+(eV-sV)*eU)) {
605: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
606: }
607: } else {
608: PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
609:
610: }
611:
612: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
613: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
614: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
615: pv , ldV, ldV, sV, PETSC_FALSE);
616:
617:
618: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
619: SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
620: pu, ldU, ldU, eU, PETSC_TRUE,
621: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
622:
623:
624: /* ReduceAll(W, SUM) */
625: MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
626: MPIU_SUM, ((PetscObject)U[0])->comm);
627:
628: /* M(...,...) <- W */
629: for (i=0,k=0; i<sV; i++)
630: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
631: for (i=sV; i<eV; i++)
632: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
633:
634: if (!workS1) {
635: PetscFree(Wr);
636: }
638: /* Upper triangular M matrix */
639: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
640: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
641: if (workS1) {
642: Wr = workS1;
643: if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) {
644: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
645: }
646: } else {
647: PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
648:
649: }
650:
651: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
652: SlepcDenseMatProd(W, eU, 0.0, 1.0,
653: pu, ldU, ldU, eU, PETSC_TRUE,
654: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
655:
656:
657: /* ReduceAll(W, SUM) */
658: MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
659: ((PetscObject)U[0])->comm);
660:
661: /* M(...,...) <- W */
662: for (i=sV,k=0; i<eV; i++)
663: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
665: if (!workS1) {
666: PetscFree(Wr);
667: }
669: /* Lower triangular M matrix */
670: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
671: DVD_IS(sM,DVD_MAT_LTRIANG)) {
672: if (workS1) {
673: Wr = workS1;
674: if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) {
675: SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
676: }
677: } else {
678: PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
679:
680: }
681:
682: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
683: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
684: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
685: pv , ldV, ldV, eV, PETSC_FALSE);
686:
687:
688: /* ReduceAll(W, SUM) */
689: MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
690: ((PetscObject)U[0])->comm);
691:
692: /* M(...,...) <- W */
693: for (i=0,k=0; i<eV; i++)
694: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
695:
696: if (!workS1) {
697: PetscFree(Wr);
698: }
699: }
701: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
703: if (!workS0) {
704: PetscFree(W);
705: }
707: VecRestoreArrayRead(U[0], &pu);
708: VecRestoreArrayRead(V[0], &pv);
709: return(0);
710: }
715: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
716: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
717: where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
718: workS0 and workS1 are an auxiliary scalar vector of size
719: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
720: is needed, and of size eU*eV.
721: */
722: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
723: Vec *U, PetscInt sU, PetscInt eU,
724: Vec *V, PetscInt sV, PetscInt eV)
725: {
726: PetscErrorCode ierr;
727: PetscInt ldU, ldV;
728: PetscScalar *pu, *pv;
732: /* Check if quick exit */
733: if ((eU-sU == 0) || (eV-sV == 0))
734: return(0);
735:
736: SlepcValidVecsContiguous(U,eU,4);
737: SlepcValidVecsContiguous(V,eV,7);
739: /* Get the dense matrices and dimensions associated to U and V */
740: VecGetLocalSize(U[0], &ldU);
741: VecGetLocalSize(V[0], &ldV);
742: if (ldU != ldV) {
743: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
744: }
745: VecGetArray(U[0], &pu);
746: VecGetArray(V[0], &pv);
748: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
749: /* M <- local_U' * local_V */
750: SlepcDenseMatProdTriang(M, sM, eU,
751: pu, 0, ldU, ldU, eU, PETSC_TRUE,
752: pv, 0, ldV, ldV, eV, PETSC_FALSE);
753:
754:
755: /* Full M matrix */
756: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
757: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
758: /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
759: SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
760: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
761: pv , ldV, ldV, sV, PETSC_FALSE);
762:
763:
764: /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
765: SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
766: pu, ldU, ldU, eU, PETSC_TRUE,
767: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
768:
769:
770: /* Other structures */
771: } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
773: VecRestoreArray(U[0], &pu);
774: PetscObjectStateDecrease((PetscObject)U[0]);
775: VecRestoreArray(V[0], &pv);
776: PetscObjectStateDecrease((PetscObject)V[0]);
778: return(0);
779: }
784: /* Computes M <- nprocs*M
785: where nprocs is the number of processors.
786: */
787: PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
788: PetscInt rM, PetscInt cM, Vec V)
789: {
790: int i,j,n;
794: /* Check if quick exit */
795: if ((rM == 0) || (cM == 0))
796: return(0);
797:
798: if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
800: MPI_Comm_size(((PetscObject)V)->comm, &n);
802: for(i=0; i<cM; i++)
803: for(j=0; j<rM; j++)
804: M[ldM*i+j]/= (PetscScalar)n;
806: return(0);
807: }
812: /* Computes N <- Allreduce( [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ] )
813: ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
814: where W = U' * V.
815: workS0 and workS1 are an auxiliary scalar vector of size
816: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
817: is needed, and of size eU*eV.
818: */
819: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
820: PetscInt rM, PetscInt cM, PetscScalar *auxS,
821: Vec V)
822: {
823: PetscErrorCode ierr;
824: PetscScalar *W, *Wr;
828: /* Check if quick exit */
829: if ((rM == 0) || (cM == 0))
830: return(0);
831:
832: if (auxS)
833: W = auxS;
834: else {
835: PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
836:
837: }
838: Wr = W + rM*cM;
840: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
842: if (sM == 0) {
843: /* W <- M */
844: SlepcDenseCopy(W, rM, M, ldM, rM, cM);
846: /* Wr <- ReduceAll(W, SUM) */
847: MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
848: ((PetscObject)V)->comm);
850: /* M <- Wr */
851: SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);
853: /* Other structures */
854: } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
856: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
858: if (!auxS) {
859: PetscFree(W);
860: }
862: return(0);
863: }
868: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
869: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
870: where W = U' * V.
871: r, a DvdReduction structure,
872: sr, an structure DvdMult_copy_func.
873: */
874: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
875: Vec *U, PetscInt sU, PetscInt eU,
876: Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
877: DvdMult_copy_func *sr)
878: {
879: PetscErrorCode ierr;
880: PetscInt ldU, ldV;
881: const PetscScalar *pu, *pv;
882: PetscScalar *W;
886: /* Check if quick exit */
887: if ((eU-sU == 0) || (eV-sV == 0))
888: return(0);
889:
890: SlepcValidVecsContiguous(U,eU,4);
891: SlepcValidVecsContiguous(V,eV,7);
893: /* Get the dense matrices and dimensions associated to U and V */
894: VecGetLocalSize(U[0], &ldU);
895: VecGetLocalSize(V[0], &ldV);
896: if (ldU != ldV) {
897: SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
898: }
899: VecGetArrayRead(U[0], &pu);
900: VecGetArrayRead(V[0], &pv);
902: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
904: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
905: /* Use the smart memory usage version */
907: /* Add the reduction to r */
908: SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
909:
911: /* W <- U' * V */
912: SlepcDenseMatProdTriang(W, sM, eU,
913: pu, 0, ldU, ldU, eU, PETSC_TRUE,
914: pv, 0, ldV, ldV, eV, PETSC_FALSE);
915:
916:
917: /* M <- ReduceAll(W, SUM) */
918: sr->M = M; sr->ld = ldM;
919: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
920: sr->i2 = eV;
922: /* Full M matrix */
923: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
924: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
925: /* Add the reduction to r */
926: SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
927: sr, &W);
928:
930: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
931: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
932: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
933: pv , ldV, ldV, sV, PETSC_FALSE);
934:
935:
936: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
937: SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
938: pu, ldU, ldU, eU, PETSC_TRUE,
939: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
940:
941:
942: /* M <- ReduceAll(W, SUM) */
943: sr->M = M; sr->ld = ldM;
944: sr->i0 = 0; sr->i1 = sV; sr->s0 = sU; sr->e0 = eU;
945: sr->i2 = eV; sr->s1 = 0; sr->e1 = eU;
947: /* Upper triangular M matrix */
948: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
949: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
950: /* Add the reduction to r */
951: SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
952:
953:
954: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
955: SlepcDenseMatProd(W, eU, 0.0, 1.0,
956: pu, ldU, ldU, eU, PETSC_TRUE,
957: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
958:
959:
960: /* M <- ReduceAll(W, SUM) */
961: sr->M = M; sr->ld = ldM;
962: sr->i0 = sV; sr->i1 = eV; sr->s0 = 0; sr->e0 = eU;
963: sr->i2 = eV;
964:
965: /* Lower triangular M matrix */
966: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
967: DVD_IS(sM,DVD_MAT_LTRIANG)) {
968: /* Add the reduction to r */
969: SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
970:
971:
972: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
973: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
974: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
975: pv , ldV, ldV, eV, PETSC_FALSE);
976:
977:
978: /* ReduceAll(W, SUM) */
979: sr->M = M; sr->ld = ldM;
980: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
981: sr->i2 = eV;
982: }
984: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
986: VecRestoreArrayRead(U[0], &pu);
987: VecRestoreArrayRead(V[0], &pv);
988: return(0);
989: }
993: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
994: void *ptr)
995: {
996: PetscInt i, j, k;
997: DvdMult_copy_func
998: *sr = (DvdMult_copy_func*)ptr;
1002: for (i=sr->i0,k=0; i<sr->i1; i++)
1003: for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1004: for (i=sr->i1; i<sr->i2; i++)
1005: for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1007: if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1009: return(0);
1010: }
1014: /* Orthonormalize a chunk of parallel vector.
1015: NOTE: wS0 and wS1 must be of size n*n.
1016: */
1017: PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1018: PetscScalar *wS1)
1019: {
1020: PetscErrorCode ierr;
1021: PetscBLASInt nn = n, info, ld;
1022: PetscInt ldV, i;
1023: PetscScalar *H, *T, one=1.0, *pv;
1024:
1027: if (!wS0) {
1028: PetscMalloc(sizeof(PetscScalar)*n*n, &H);
1029: } else
1030: H = wS0;
1031: if (!wS1) {
1032: PetscMalloc(sizeof(PetscScalar)*n*n, &T);
1033: } else
1034: T = wS1;
1036: /* H <- V' * V */
1037: VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL);
1039: /* H <- chol(H) */
1040: LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
1041: if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1043: /* V <- V * inv(H) */
1044: VecGetLocalSize(V[0], &ldV);
1045: VecGetArray(V[0], &pv);
1046: ld = ldV;
1047: BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1048: VecRestoreArray(V[0], &pv);
1049: for(i=1; i<n; i++) {
1050: PetscObjectStateIncrease((PetscObject)V[i]);
1051: }
1053: if (!wS0) {
1054: PetscFree(H);
1055: }
1056: if (!wS1) {
1057: PetscFree(T);
1058: }
1059:
1060: return(0);
1061: }
1065: /*
1066: Sum up several arrays with only one call to MPIReduce.
1067: */
1068: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1069: PetscInt max_size_ops,
1070: PetscScalar *in, PetscScalar *out,
1071: PetscInt max_size_in, DvdReduction *r,
1072: MPI_Comm comm)
1073: {
1076: r->in = in;
1077: r->out = out;
1078: r->size_in = 0;
1079: r->max_size_in = max_size_in;
1080: r->ops = ops;
1081: r->size_ops = 0;
1082: r->max_size_ops = max_size_ops;
1083: r->comm = comm;
1085: return(0);
1086: }
1090: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1091: DvdReductionPostF f, void *ptr,
1092: PetscScalar **in)
1093: {
1096: *in = r->in + r->size_in;
1097: r->ops[r->size_ops].out = r->out + r->size_in;
1098: r->ops[r->size_ops].size_out = size_in;
1099: r->ops[r->size_ops].f = f;
1100: r->ops[r->size_ops].ptr = ptr;
1101: if (++(r->size_ops) > r->max_size_ops) {
1102: SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1103: }
1104: if ((r->size_in+= size_in) > r->max_size_in) {
1105: SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1106: }
1108: return(0);
1109: }
1114: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1115: {
1116: PetscErrorCode ierr;
1117: PetscInt i;
1121: /* Check if quick exit */
1122: if (r->size_ops == 0)
1123: return(0);
1125: /* Call the MPIAllReduce routine */
1126: MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1127: r->comm);
1129: /* Call the postponed routines */
1130: for(i=0; i<r->size_ops; i++) {
1131: r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1132:
1133: }
1135: /* Tag the operation as done */
1136: r->size_ops = 0;
1138: return(0);
1139: }
1144: /* auxS: size_cX+V_new_e */
1145: PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1146: PetscInt size_cX, Vec *V, PetscInt V_new_s,
1147: PetscInt V_new_e, PetscScalar *auxS,
1148: PetscRandom rand)
1149: {
1150: PetscErrorCode ierr;
1151: PetscInt i, j;
1152: PetscBool lindep;
1153: PetscReal norm;
1154: PetscScalar *auxS0 = auxS;
1155:
1157:
1158: /* Orthonormalize V with IP */
1159: for (i=V_new_s; i<V_new_e; i++) {
1160: for(j=0; j<3; j++) {
1161: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1162: if (cX + size_cX == V) {
1163: /* If cX and V are contiguous, orthogonalize in one step */
1164: IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1165: V[i], auxS0, &norm, &lindep);
1166: } else if (DS) {
1167: /* Else orthogonalize first against DS, and then against cX and V */
1168: IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1169: V[i], auxS0, PETSC_NULL, &lindep);
1170: if(!lindep) {
1171: IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1172: V[i], auxS0, &norm, &lindep);
1173: }
1174: } else {
1175: /* Else orthogonalize first against cX and then against V */
1176: IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1177: V[i], auxS0, &norm, &lindep);
1178: }
1179: if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1180: PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1181: }
1182: if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1183: SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1184: }
1185: VecScale(V[i], 1.0/norm);
1186: }
1187:
1188: return(0);
1189: }
1194: /* auxS: size_cX+V_new_e+1 */
1195: PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS, PetscInt size_DS, Vec *cX,
1196: Vec *BcX, PetscInt size_cX, Vec *V, Vec *BV,
1197: PetscInt V_new_s, PetscInt V_new_e,
1198: PetscScalar *auxS, PetscRandom rand)
1199: {
1200: PetscErrorCode ierr;
1201: PetscInt i, j;
1202: PetscBool lindep;
1203: PetscReal norm;
1204: PetscScalar *auxS0 = auxS;
1205:
1207:
1208: /* Orthonormalize V with IP */
1209: for (i=V_new_s; i<V_new_e; i++) {
1210: for(j=0; j<3; j++) {
1211: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1212: if (cX + size_cX == V) {
1213: /* If cX and V are contiguous, orthogonalize in one step */
1214: IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX+i, PETSC_NULL, cX, BcX,
1215: V[i], BV[i], auxS0, &norm, &lindep);
1216: } else if (DS) {
1217: /* Else orthogonalize first against DS, and then against cX and V */
1218: IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX, PETSC_NULL, cX, BcX,
1219: V[i], BV[i], auxS0, PETSC_NULL, &lindep);
1220: if(!lindep) {
1221: IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV,
1222: V[i], BV[i], auxS0, &norm, &lindep);
1223: }
1224: } else {
1225: /* Else orthogonalize first against cX and then against V */
1226: IPBOrthogonalize(ip, size_cX, cX, BcX, i, PETSC_NULL, V, BV,
1227: V[i], BV[i], auxS0, &norm, &lindep);
1228: }
1229: if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1230: PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1231:
1232: }
1233: if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1234: SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1235: }
1236: VecScale(V[i], 1.0/norm);
1237: VecScale(BV[i], 1.0/norm);
1238: }
1239:
1240: return(0);
1241: }
1242:
1245: /*
1246: Compute eigenvectors associated to the Schur decomposition (S, T) and
1247: save the left vectors in pY and the right vectors in pX, where
1248: n, size of the eigenproblem
1249: ldS, ldT, leading dimension of S and T
1250: ldpX, ldpY, leading dimension of pX and pY
1251: auxS, auxiliar scalar of length:
1252: double standard 3n+n*n, double generalized 11n+4n*n,
1253: complex standard 3n+n*n, complex generalized 3n+2n*n
1254: size_auxS, the length of auxS
1255: doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1256: */
1257: PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1258: PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1259: PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1260: PetscInt size_auxS, PetscBool doProd)
1261: {
1262: #if defined(SLEPC_MISSING_LAPACK_GGEV)
1264: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1265: #else
1266: PetscErrorCode ierr;
1267: PetscBLASInt n, ldpX, ldpY, nout, info;
1268: PetscScalar *Sc, *Tc;
1269: const char *side, *howmny;
1270: #if defined(PETSC_USE_COMPLEX)
1271: PetscReal *auxR;
1272: #else
1273: PetscScalar *pA,*pB;
1274: PetscBLASInt n1, ldpA,ldpB;
1275: PetscScalar *alphar, *alphai, *beta;
1276: #endif
1277:
1279: n = PetscBLASIntCast(n_);
1280: ldpX = PetscBLASIntCast(ldpX_);
1281: ldpY = PetscBLASIntCast(ldpY_);
1283: if (pX && pY) side = "B";
1284: else if (pX) side = "R";
1285: else if (pY) side = "L";
1286: else { return(0); }
1288: if (!pX) ldpX = 1;
1289: if (!pY) ldpY = 1;
1291: howmny = doProd?"B":"A";
1293: Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1294: if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1295: else Tc = PETSC_NULL;
1296:
1297: /* Sc <- S, Tc <- T */
1298: SlepcDenseCopy(Sc, n, S, ldS, n, n);
1299: if (T) {
1300: SlepcDenseCopy(Tc, n, T, ldT, n, n);
1301: }
1303: if (T) {
1304: /* [eigr, pX] = eig(S, T) */
1305: #if defined(PETSC_USE_COMPLEX)
1306: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1307: if (size_auxS < 2*n)
1308: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1309: LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1310: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1311: #else
1312: alphar = auxS; auxS+= n; size_auxS-= n;
1313: alphai = auxS; auxS+= n; size_auxS-= n;
1314: beta = auxS; auxS+= n; size_auxS-= n;
1315: if (doProd) {
1316: if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1317: else pA = PETSC_NULL, ldpA = 0;
1318: if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1319: else pB = PETSC_NULL, ldpB = 0;
1320: } else {
1321: pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1322: }
1323: /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1324: of T that represent complex conjugate eigenpairs to be zero */
1325: n1 = size_auxS;
1326: if (size_auxS < 8*n)
1327: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xGGEV");
1328: LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
1329: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGGEV %d",info);
1330: if (doProd) {
1331: if (pX) {
1332: /* pX <- pX * pA */
1333: SlepcDenseCopy(Sc, n, pX, ldpX, n, n);
1334: SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1335: Sc, n, n, n, PETSC_FALSE,
1336: pA, n, n, n, PETSC_FALSE);
1337: }
1338: if (pY) {
1339: /* pY <- pY * pB */
1340: SlepcDenseCopy(Sc, n, pY, ldpY, n, n);
1341: SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1342: Sc, n, n, n, PETSC_FALSE,
1343: pB, n, n, n, PETSC_FALSE);
1344: }
1345: }
1346: #endif
1347: } else {
1348: /* [eigr, pX] = eig(S) */
1349: #if defined(PETSC_USE_COMPLEX)
1350: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1351: if (size_auxS < 2*n)
1352: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1353: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1354: #else
1355: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1356: #endif
1357: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1358: }
1359: return(0);
1360: #endif
1361: }
1366: PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscScalar *w, PetscScalar *V, PetscInt ldV)
1367: {
1368: PetscInt i, j, result, pos;
1369: PetscReal re;
1370: PetscScalar t;
1371: PetscBLASInt n_,one=1;
1372: PetscErrorCode ierr;
1375: n_ = PetscBLASIntCast(n);
1377: /* selection sort */
1378: for (i=k;i<n-1;i++) {
1379: re = PetscRealPart(w[i]);
1380: pos = 0;
1381: /* find minimum eigenvalue */
1382: for (j=i+1;j<n;j++) {
1383: EPSCompareEigenvalues(eps,re,0,PetscRealPart(w[j]),0,&result);
1384: if (result > 0) {
1385: re = PetscRealPart(w[j]);
1386: pos = j;
1387: }
1388: }
1389: /* interchange the pairs i and pos */
1390: if (pos) {
1391: BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1392: t = w[i]; w[i] = w[pos]; w[pos] = t;
1393: }
1394: }
1396: return(0);
1397: }