Actual source code: dvd_blas.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include slepc.h
23: #include slepceps.h
24: #include slepcblaslapack.h
25: #include davidson.h
27: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
28: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
29: PetscLogEvent SLEPC_SlepcDenseOrth = 0;
30: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
31: PetscLogEvent SLEPC_VecsMult = 0;
33: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
34: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
35: void *ptr);
39: PetscErrorCode dvd_blas_prof_init() {
40: PetscErrorCode ierr;
44: if (SLEPC_SlepcDenseMatProd) return(0);
45: PetscLogEventRegister("DenseMatProd", EPS_COOKIE,
46: &SLEPC_SlepcDenseMatProd);
47: PetscLogEventRegister("DenseOrth", EPS_COOKIE,
48: &SLEPC_SlepcDenseOrth);
49: PetscLogEventRegister("DenseMatNorm", EPS_COOKIE,
50: &SLEPC_SlepcDenseNorm);
51: PetscLogEventRegister("DenseCopy", EPS_COOKIE,
52: &SLEPC_SlepcDenseCopy);
53: PetscLogEventRegister("VecsMult", EPS_COOKIE,
54: &SLEPC_VecsMult);
55: return(0);
56: }
58: /*
59: Compute C <- a*A*B + b*C, where
60: ldC, the leading dimension of C,
61: ldA, the leading dimension of A,
62: rA, cA, rows and columns of A,
63: At, if true use the transpose of A instead,
64: ldB, the leading dimension of B,
65: rB, cB, rows and columns of B,
66: Bt, if true use the transpose of B instead
67: */
70: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
71: PetscScalar a,
72: const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscTruth At,
73: const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscTruth Bt)
74: {
75: PetscErrorCode ierr;
76: PetscInt tmp;
77: PetscBLASInt m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
78: const char *N = "N", *T = "C", *qA = N, *qB = N;
82: if ((rA == 0) || (cB == 0)) { return(0); }
84: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
86: /* Transpose if needed */
87: if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
88: if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
89:
90: /* Check size */
91: if (cA != rB) {
92: SETERRQ(1, "Matrix dimensions doesn't match!");
93: }
94:
95: /* Do stub */
96: if ((rA == 1) && (cA == 1) && (cB == 1)) {
97: *C = *A * *B;
98: m = n = k = 1;
99: } else {
100: m = rA; n = cB; k = cA;
101: BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
102: &ldB, &b, C, &ldC);
103: }
105: PetscLogFlops(m*n*2*k);
106: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
108: return(0);
109: }
111: /*
112: Compute C <- A*B, where
113: sC, structure of C,
114: ldC, the leading dimension of C,
115: sA, structure of A,
116: ldA, the leading dimension of A,
117: rA, cA, rows and columns of A,
118: At, if true use the transpose of A instead,
119: sB, structure of B,
120: ldB, the leading dimension of B,
121: rB, cB, rows and columns of B,
122: Bt, if true use the transpose of B instead
123: */
126: PetscErrorCode SlepcDenseMatProdTriang(
127: PetscScalar *C, MatType_t sC, PetscInt ldC,
128: const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
129: PetscTruth At,
130: const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
131: PetscTruth Bt)
132: {
133: PetscErrorCode ierr;
134: PetscInt tmp;
135: PetscScalar one=1.0, zero=0.0;
136: PetscBLASInt rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
140: if ((rA == 0) || (cB == 0)) { return(0); }
142: /* Transpose if needed */
143: if (At) tmp = rA, rA = cA, cA = tmp;
144: if (Bt) tmp = rB, rB = cB, cB = tmp;
145:
146: /* Check size */
147: if (cA != rB) SETERRQ(1, "Matrix dimensions doesn't match!");
148: if (sB != 0) SETERRQ(1, "It doesn't support B matrix type!");
150: /* Optimized version: trivial case */
151: if ((rA == 1) && (cA == 1) && (cB == 1)) {
152: if (!At && !Bt) *C = *A * *B;
153: else if (At && !Bt) *C = PetscConj(*A) * *B;
154: else if (!At && Bt) *C = *A * PetscConj(*B);
155: else if (At && Bt) *C = PetscConj(*A) * PetscConj(*B);
156: return(0);
157: }
158:
159: /* Optimized versions: sA == 0 && sB == 0 */
160: if ((sA == 0) && (sB == 0)) {
161: if (At) tmp = rA, rA = cA, cA = tmp;
162: if (Bt) tmp = rB, rB = cB, cB = tmp;
163: SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
164: cB, Bt);
165: PetscFunctionReturn(ierr);
166: }
168: /* Optimized versions: A hermitian && (B not triang) */
169: if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
170: DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
171: DVD_ISNOT(sB,DVD_MAT_LTRIANG) ) {
172: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
173: rC = rA; cC = cB;
174: BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
175: (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
176: PetscLogFlops(rA*cB*cA);
177: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
178: return(0);
179: }
181: /* Optimized versions: B hermitian && (A not triang) */
182: if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
183: DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
184: DVD_ISNOT(sA,DVD_MAT_LTRIANG) ) {
185: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
186: rC = rA; cC = cB;
187: BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
188: (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
189: PetscLogFlops(rA*cB*cA);
190: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
191: return(0);
192: }
193:
194: SETERRQ(1, "It doesn't support A matrix type!");
195: }
197: /*
198: Normalize the columns of the matrix A, where
199: ldA, the leading dimension of A,
200: rA, cA, rows and columns of A.
201: if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
202: are normalized as being one column.
203: */
206: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
207: PetscInt cA, PetscScalar *eigi)
208: {
209: PetscErrorCode ierr;
210: PetscInt i;
211: PetscScalar norm, norm0;
212: PetscBLASInt rA = _rA, one=1;
216: PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);
218: for(i=0; i<cA; i++) {
219: if(eigi && eigi[i] != 0.0) {
220: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
221: norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
222: norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
223: BLASscal_(&rA, &norm, &A[i*ldA], &one);
224: BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
225: i++;
226: } else {
227: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
228: norm = 1.0 / norm;
229: BLASscal_(&rA, &norm, &A[i*ldA], &one);
230: }
231: }
233: PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
235: return(0);
236: }
237:
239: /*
240: Compute A <- orth(A), where
241: ldA, the leading dimension of A,
242: rA, cA, rows and columns of A,
243: auxS, auxiliary vector of more size than cA+min(rA,cA),
244: lauxS, size of auxS,
245: ncA, new number of columns of A
246: */
249: PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
250: PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
251: PetscInt *ncA)
252: {
253: PetscErrorCode ierr;
254: PetscBLASInt ldA = _ldA, rA = _rA, cA = _cA,
255: info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
256: PetscScalar *tau = auxS, *w = tau + ltau;
260: /* Quick exit */
261: if ((_rA == 0) || (cA == 0)) { return(0); }
263: /* Memory check */
264: if (lw < cA) SETERRQ(1, "Insufficient memory for xGEQRF!");
265:
266: PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);
267: LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
268: if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
269: LAPACKorgqr_(&rA, <au, <au, A, &ldA, tau, w, &lw, &info);
270: if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
271: PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);
273: if (ncA) *ncA = ltau;
275: return(0);
276: }
278: /*
279: Y <- X, where
280: ldX, leading dimension of X,
281: rX, cX, rows and columns of X
282: ldY, leading dimension of Y
283: */
286: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
287: PetscInt ldX, PetscInt rX, PetscInt cX)
288: {
289: PetscErrorCode ierr;
290: PetscInt i;
294: if ((ldX < rX) || (ldY < rX)) {
295: SETERRQ(1, "Leading dimension error!");
296: }
297:
298: /* Quick exit */
299: if (Y == X) {
300: if (ldX != ldY) {
301: SETERRQ(1, "Leading dimension error!");
302: }
303: return(0);
304: }
306: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
307: for(i=0; i<cX; i++) {
308: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
309:
310: }
311: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
313: return(0);
314: }
316: /*
317: Y <- X, where
318: ldX, leading dimension of X,
319: rX, cX, rows and columns of X
320: ldY, leading dimension of Y
321: */
324: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
325: PetscScalar *X, MatType_t sX, PetscInt ldX,
326: PetscInt rX, PetscInt cX)
327: {
328: PetscErrorCode ierr;
329: PetscInt i,j,c;
333: if ((ldX < rX) || (ldY < rX)) {
334: SETERRQ(1, "Leading dimension error!");
335: }
337: if (rX != cX) {
338: SETERRQ(1, "SlepcDenseCopyTriang doesn't support rectangular matrices!");
339: }
341: if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
342: DVD_ISNOT(sX,DVD_MAT_LTRIANG)) { /* UpTr to ... */
343: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
344: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
345: c = 0; /* so copy */
346: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
347: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
348: c = 1; /* so transpose */
349: else /* ... Full, */
350: c = 2; /* so reflect from up */
351: } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
352: DVD_IS(sX,DVD_MAT_LTRIANG)) { /* LoTr to ... */
353: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
354: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
355: c = 1; /* so transpose */
356: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
357: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
358: c = 0; /* so copy */
359: else /* ... Full, */
360: c = 3; /* so reflect fr. down */
361: } else /* Full to any, */
362: c = 0; /* so copy */
363:
364: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
366: switch(c) {
367: case 0: /* copy */
368: for(i=0; i<cX; i++) {
369: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
370:
371: }
372: break;
374: case 1: /* transpose */
375: for(i=0; i<cX; i++)
376: for(j=0; j<rX; j++)
377: Y[ldY*j+i] = X[ldX*i+j];
378: break;
380: case 2: /* reflection from up */
381: for(i=0; i<cX; i++)
382: for(j=0; j<PetscMin(i+1,rX); j++)
383: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
384: break;
386: case 3: /* reflection from down */
387: for(i=0; i<cX; i++)
388: for(j=i; j<rX; j++)
389: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
390: break;
391: }
392: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
394: return(0);
395: }
398: /*
399: Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
400: where X and Y are contiguous global vectors.
401: */
404: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
405: Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
406: PetscInt cM)
407: {
408: PetscErrorCode ierr;
412: SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
413:
415: return(0);
416: }
419: /*
420: Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
421: where X and Y are contiguous global vectors.
422: */
425: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
426: PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
427: PetscInt ldM, PetscInt rM, PetscInt cM)
428: {
429: PetscErrorCode ierr;
430: PetscScalar *px, *py;
431: PetscInt rX, rY, ldX, ldY, i, rcX;
435: /* Compute the real number of columns */
436: rcX = cX/dX;
437: if (rcX != rM) {
438: SETERRQ(1, "Matrix dimensions doesn't match!");
439: }
441: if ((rcX == 0) || (rM == 0) || (cM == 0)) {
442: return(0);
443: } else if ((Y + cM <= X) || (X + cX <= Y) ||
444: ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
445: /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
447: /* Get the dense matrices and dimensions associated to Y and X */
448: VecGetLocalSize(X[0], &rX);
449: VecGetLocalSize(Y[0], &rY);
450: if (rX != rY) {
451: SETERRQ(1, "The multivectors doesn't have the same dimension!");
452: }
453: VecGetArray(X[0], &px);
454: VecGetArray(Y[0], &py);
456: /* Update the strides */
457: ldX = rX*dX; ldY= rY*dY;
459: /* Do operation */
460: SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
461: PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
462:
463: VecRestoreArray(X[0], &px);
464: PetscObjectStateDecrease((PetscObject)X[0]);
465: VecRestoreArray(Y[0], &py);
466: for(i=1; i<cM; i++) {
467: PetscObjectStateIncrease((PetscObject)Y[dY*i]);
468: }
470: } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
471: /* If not, call to SlepcUpdateVectors */
472: SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
473: ldM, PETSC_FALSE);
474: if (alpha != 1.0)
475: for (i=0; i<cM; i++) {
476: VecScale(Y[i], alpha);
477: }
478: } else {
479: SETERRQ(1, "I don't support this case!");
480: }
482: return(0);
483: }
485: /*
486: Compute X <- alpha * X[0:dX:cX-1] * M
487: where X is a matrix with non-consecutive columns
488: */
491: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
492: const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
493: PetscScalar *work, PetscInt lwork)
494: {
495: PetscErrorCode ierr;
496: PetscScalar **px, *Y, *Z;
497: PetscInt rX, i, j, rY, rY0, ldY;
501: if (cX != rM) {
502: SETERRQ(1, "Matrix dimensions doesn't match!");
503: }
505: rY = (lwork/2)/cX;
506: if (rY <= 0) {
507: SETERRQ(1, "Insufficient work space given!");
508: }
509: Y = work; Z = &Y[cX*rY]; ldY = rY;
511: if ((cX == 0) || (rM == 0) || (cM == 0)) {
512: return(0);
513: }
515: /* Get the dense vectors associated to the columns of X */
516: PetscMalloc(sizeof(Vec)*cX, &px);
517: for(i=0; i<cX; i++) {
518: VecGetArray(X[i], &px[i]);
519: }
520: VecGetLocalSize(X[0], &rX);
522: for(i=0, rY0=0; i<rX; i+=rY0) {
523: rY0 = PetscMin(rY, rX-i);
525: /* Y <- X[i0:i1,:] */
526: for(j=0; j<cX; j++) {
527: SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
528:
529: }
531: /* Z <- Y * M */
532: SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
533: M, ldM, rM, cM, PETSC_FALSE);
534:
536: /* X <- Z */
537: for(j=0; j<cM; j++) {
538: SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
539:
540: }
541: }
543: for(i=0; i<cX; i++) {
544: VecRestoreArray(X[i], &px[i]);
545: }
546: PetscFree(px);
548: return(0);
549: }
553: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
554: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
555: where W = U' * V.
556: workS0 and workS1 are an auxiliary scalar vector of size
557: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
558: is needed, and of size eU*eV.
559: */
563: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
564: Vec *U, PetscInt sU, PetscInt eU,
565: Vec *V, PetscInt sV, PetscInt eV,
566: PetscScalar *workS0, PetscScalar *workS1)
567: {
568: PetscErrorCode ierr;
569: PetscInt ldU, ldV, i, j, k;
570: PetscScalar *pu, *pv, *W, *Wr;
574: /* Check if quick exit */
575: if ((eU-sU == 0) || (eV-sV == 0))
576: return(0);
577:
578: /* Get the dense matrices and dimensions associated to U and V */
579: VecGetLocalSize(U[0], &ldU);
580: VecGetLocalSize(V[0], &ldV);
581: if (ldU != ldV) {
582: SETERRQ(1, "Matrix dimensions doesn't match!");
583: }
584: VecGetArray(U[0], &pu);
585: VecGetArray(V[0], &pv);
587: if (workS0)
588: W = workS0;
589: else {
590: PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
591:
592: }
594: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
596: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
597: /* Use the smart memory usage version */
599: /* W <- U' * V */
600: SlepcDenseMatProdTriang(W, sM, eU,
601: pu, 0, ldU, ldU, eU, PETSC_TRUE,
602: pv, 0, ldV, ldV, eV, PETSC_FALSE);
603:
604:
605: /* ReduceAll(W, SUM) */
606: MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
607: ((PetscObject)U[0])->comm);
608: /* Full M matrix */
609: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
610: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
611: if (workS1)
612: Wr = workS1;
613: else {
614: PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
615:
616: }
617:
618: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
619: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
620: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
621: pv , ldV, ldV, sV, PETSC_FALSE);
622:
623:
624: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
625: SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
626: pu, ldU, ldU, eU, PETSC_TRUE,
627: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
628:
629:
630: /* ReduceAll(W, SUM) */
631: MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
632: MPIU_SUM, ((PetscObject)U[0])->comm);
633:
634: /* M(...,...) <- W */
635: for (i=0,k=0; i<sV; i++)
636: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
637: for (i=sV; i<eV; i++)
638: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
639:
640: if (!workS1) {
641: PetscFree(Wr);
642: }
644: /* Upper triangular M matrix */
645: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
646: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
647: if (workS1)
648: Wr = workS1;
649: else {
650: PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
651:
652: }
653:
654: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
655: SlepcDenseMatProd(W, eU, 0.0, 1.0,
656: pu, ldU, ldU, eU, PETSC_TRUE,
657: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
658:
659:
660: /* ReduceAll(W, SUM) */
661: MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
662: ((PetscObject)U[0])->comm);
663:
664: /* M(...,...) <- W */
665: for (i=sV,k=0; i<eV; i++)
666: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
668: if (!workS1) {
669: PetscFree(Wr);
670: }
672: /* Lower triangular M matrix */
673: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
674: DVD_IS(sM,DVD_MAT_LTRIANG)) {
675: if (workS1)
676: Wr = workS1;
677: else {
678: PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
679:
680: }
681:
682: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
683: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
684: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
685: pv , ldV, ldV, eV, PETSC_FALSE);
686:
687:
688: /* ReduceAll(W, SUM) */
689: MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
690: ((PetscObject)U[0])->comm);
691:
692: /* M(...,...) <- W */
693: for (i=0,k=0; i<eV; i++)
694: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
695:
696: if (!workS1) {
697: PetscFree(Wr);
698: }
699: }
701: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
703: if (!workS0) {
704: PetscFree(W);
705: }
707: VecRestoreArray(U[0], &pu);
708: PetscObjectStateDecrease((PetscObject)U[0]);
709: VecRestoreArray(V[0], &pv);
710: PetscObjectStateDecrease((PetscObject)V[0]);
712: return(0);
713: }
717: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
718: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
719: where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
720: workS0 and workS1 are an auxiliary scalar vector of size
721: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
722: is needed, and of size eU*eV.
723: */
727: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
728: Vec *U, PetscInt sU, PetscInt eU,
729: Vec *V, PetscInt sV, PetscInt eV)
730: {
731: PetscErrorCode ierr;
732: PetscInt ldU, ldV;
733: PetscScalar *pu, *pv;
737: /* Check if quick exit */
738: if ((eU-sU == 0) || (eV-sV == 0))
739: return(0);
740:
741: /* Get the dense matrices and dimensions associated to U and V */
742: VecGetLocalSize(U[0], &ldU);
743: VecGetLocalSize(V[0], &ldV);
744: if (ldU != ldV) {
745: SETERRQ(1, "Matrix dimensions doesn't match!");
746: }
747: VecGetArray(U[0], &pu);
748: VecGetArray(V[0], &pv);
750: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
751: /* M <- local_U' * local_V */
752: SlepcDenseMatProdTriang(M, sM, eU,
753: pu, 0, ldU, ldU, eU, PETSC_TRUE,
754: pv, 0, ldV, ldV, eV, PETSC_FALSE);
755:
756:
757: /* Full M matrix */
758: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
759: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
760: /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
761: SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
762: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
763: pv , ldV, ldV, sV, PETSC_FALSE);
764:
765:
766: /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
767: SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
768: pu, ldU, ldU, eU, PETSC_TRUE,
769: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
770:
771:
772: /* Other structures */
773: } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");
775: VecRestoreArray(U[0], &pu);
776: PetscObjectStateDecrease((PetscObject)U[0]);
777: VecRestoreArray(V[0], &pv);
778: PetscObjectStateDecrease((PetscObject)V[0]);
780: return(0);
781: }
784: /* Computes N <- Allreduce( [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ] )
785: ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
786: where W = U' * V.
787: workS0 and workS1 are an auxiliary scalar vector of size
788: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
789: is needed, and of size eU*eV.
790: */
794: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
795: PetscInt rM, PetscInt cM, PetscScalar *auxS,
796: Vec V)
797: {
798: PetscErrorCode ierr;
799: PetscScalar *W, *Wr;
803: /* Check if quick exit */
804: if ((rM == 0) || (cM == 0))
805: return(0);
806:
807: if (auxS)
808: W = auxS;
809: else {
810: PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
811:
812: }
813: Wr = W + rM*cM;
815: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
817: if (sM == 0) {
818: /* W <- M */
819: SlepcDenseCopy(W, rM, M, ldM, rM, cM);
821: /* Wr <- ReduceAll(W, SUM) */
822: MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
823: ((PetscObject)V)->comm);
825: /* M <- Wr */
826: SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);
828: /* Other structures */
829: } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");
831: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
833: if (!auxS) {
834: PetscFree(W);
835: }
837: return(0);
838: }
841: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
842: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
843: where W = U' * V.
844: r, a DvdReduction structure,
845: sr, an structure DvdMult_copy_func.
846: */
850: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
851: Vec *U, PetscInt sU, PetscInt eU,
852: Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
853: DvdMult_copy_func *sr)
854: {
855: PetscErrorCode ierr;
856: PetscInt ldU, ldV;
857: PetscScalar *pu, *pv, *W;
861: /* Check if quick exit */
862: if ((eU-sU == 0) || (eV-sV == 0))
863: return(0);
864:
865: /* Get the dense matrices and dimensions associated to U and V */
866: VecGetLocalSize(U[0], &ldU);
867: VecGetLocalSize(V[0], &ldV);
868: if (ldU != ldV) {
869: SETERRQ(1, "Matrix dimensions doesn't match!");
870: }
871: VecGetArray(U[0], &pu);
872: VecGetArray(V[0], &pv);
874: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
876: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
877: /* Use the smart memory usage version */
879: /* Add the reduction to r */
880: SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
881:
883: /* W <- U' * V */
884: SlepcDenseMatProdTriang(W, sM, eU,
885: pu, 0, ldU, ldU, eU, PETSC_TRUE,
886: pv, 0, ldV, ldV, eV, PETSC_FALSE);
887:
888:
889: /* M <- ReduceAll(W, SUM) */
890: sr->M = M; sr->ld = ldM;
891: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
892: sr->i2 = eV;
894: /* Full M matrix */
895: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
896: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
897: /* Add the reduction to r */
898: SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
899: sr, &W);
900:
902: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
903: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
904: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
905: pv , ldV, ldV, sV, PETSC_FALSE);
906:
907:
908: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
909: SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
910: pu, ldU, ldU, eU, PETSC_TRUE,
911: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
912:
913:
914: /* M <- ReduceAll(W, SUM) */
915: sr->M = M; sr->ld = ldM;
916: sr->i0 = 0; sr->i1 = sV; sr->s0 = sU; sr->e0 = eU;
917: sr->i2 = eV; sr->s1 = 0; sr->e1 = eU;
919: /* Upper triangular M matrix */
920: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
921: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
922: /* Add the reduction to r */
923: SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
924:
925:
926: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
927: SlepcDenseMatProd(W, eU, 0.0, 1.0,
928: pu, ldU, ldU, eU, PETSC_TRUE,
929: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
930:
931:
932: /* M <- ReduceAll(W, SUM) */
933: sr->M = M; sr->ld = ldM;
934: sr->i0 = sV; sr->i1 = eV; sr->s0 = 0; sr->e0 = eU;
935: sr->i2 = eV;
936:
937: /* Lower triangular M matrix */
938: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
939: DVD_IS(sM,DVD_MAT_LTRIANG)) {
940: /* Add the reduction to r */
941: SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
942:
943:
944: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
945: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
946: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
947: pv , ldV, ldV, eV, PETSC_FALSE);
948:
949:
950: /* ReduceAll(W, SUM) */
951: sr->M = M; sr->ld = ldM;
952: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
953: sr->i2 = eV;
954: }
956: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
958: VecRestoreArray(U[0], &pu);
959: PetscObjectStateDecrease((PetscObject)U[0]);
960: VecRestoreArray(V[0], &pv);
961: PetscObjectStateDecrease((PetscObject)V[0]);
963: return(0);
964: }
968: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
969: void *ptr)
970: {
971: PetscInt i, j, k;
972: DvdMult_copy_func
973: *sr = (DvdMult_copy_func*)ptr;
977: for (i=sr->i0,k=0; i<sr->i1; i++)
978: for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
979: for (i=sr->i1; i<sr->i2; i++)
980: for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
982: if (k != size_out) SETERRQ(1, "Error in VecsMultS_copy_func!");
984: return(0);
985: }
987: /*
988: Sum up several arrays with only one call to MPIReduce.
989: */
993: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
994: PetscInt max_size_ops,
995: PetscScalar *in, PetscScalar *out,
996: PetscInt max_size_in, DvdReduction *r,
997: MPI_Comm comm)
998: {
1001: r->in = in;
1002: r->out = out;
1003: r->size_in = 0;
1004: r->max_size_in = max_size_in;
1005: r->ops = ops;
1006: r->size_ops = 0;
1007: r->max_size_ops = max_size_ops;
1008: r->comm = comm;
1010: return(0);
1011: }
1015: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1016: DvdReductionPostF f, void *ptr,
1017: PetscScalar **in)
1018: {
1021: *in = r->in + r->size_in;
1022: r->ops[r->size_ops].out = r->out + r->size_in;
1023: r->ops[r->size_ops].size_out = size_in;
1024: r->ops[r->size_ops].f = f;
1025: r->ops[r->size_ops].ptr = ptr;
1026: if (++(r->size_ops) > r->max_size_ops) {
1027: SETERRQ(1, "max_size_ops is not enought!");
1028: }
1029: if ((r->size_in+= size_in) > r->max_size_in) {
1030: SETERRQ(1, "max_size_in is not enought!");
1031: }
1033: return(0);
1034: }
1039: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1040: {
1041: PetscErrorCode ierr;
1042: PetscInt i;
1046: /* Check if quick exit */
1047: if (r->size_ops == 0)
1048: return(0);
1050: /* Call the MPIAllReduce routine */
1051: MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1052: r->comm);
1054: /* Call the postponed routines */
1055: for(i=0; i<r->size_ops; i++) {
1056: r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1057:
1058: }
1060: /* Tag the operation as done */
1061: r->size_ops = 0;
1063: return(0);
1064: }
1069: PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1070: PetscInt size_cX, Vec *V, PetscInt V_new_s,
1071: PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1072: PetscRandom rand)
1073: {
1074: PetscErrorCode ierr;
1075: PetscInt i, j;
1076: PetscTruth lindep;
1077: PetscReal norm;
1078: PetscScalar *auxS0 = auxS;
1079:
1081:
1082: /* Orthonormalize V with IP */
1083: for (i=V_new_s; i<V_new_e; i++) {
1084: for(j=0; j<3; j++) {
1085: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1086: if (cX + size_cX == V) {
1087: /* If cX and V are contiguous, orthogonalize in one step */
1088: IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1089: V[i], auxS0, &norm, &lindep);
1090: } else if (DS) {
1091: /* Else orthogonalize first against DS, and then against cX and V */
1092: IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1093: V[i], auxS0, PETSC_NULL, &lindep);
1094: if(!lindep) {
1095: IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1096: V[i], auxS0, &norm, &lindep);
1097: }
1098: } else {
1099: /* Else orthogonalize first against cX and then against V */
1100: IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1101: V[i], auxS0, &norm, &lindep);
1102: }
1103: if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1104: PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1105:
1106: }
1107: if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1108: SETERRQ(1, "Error during the orthonormalization of the eigenvectors!");
1109: }
1110: VecScale(V[i], 1.0/norm);
1111: }
1112:
1113: return(0);
1114: }
1115:
1118: /*
1119: Compute eigenvectors associated to the Schur decomposition (S, T) and
1120: save the left vectors in pY and the right vectors in pX, where
1121: n, size of the eigenproblem
1122: ldS, ldT, leading dimension of S and T
1123: ldpX, ldpY, leading dimension of pX and pY
1124: auxS, auxiliar scalar of length:
1125: double standard 3n+n*n, double generalized 11n+4n*n,
1126: complex standard 3n+n*n, complex generalized 3n+2n*n
1127: size_auxS, the length of auxS
1128: doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1129: */
1130: PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1131: PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1132: PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1133: PetscInt size_auxS, PetscTruth doProd)
1134: {
1135: PetscErrorCode ierr;
1136: PetscBLASInt n, ldpX, ldpY, nout, info;
1137: PetscScalar *Sc, *Tc;
1138: const char *side, *howmny;
1139: #if defined(PETSC_USE_COMPLEX)
1140: PetscReal *auxR;
1141: #else
1142: PetscScalar *pA,*pB;
1143: PetscBLASInt n1, ldpA,ldpB;
1144: PetscScalar *alphar, *alphai, *beta;
1145: #endif
1146:
1149: n = PetscBLASIntCast(n_);
1150: ldpX = PetscBLASIntCast(ldpX_);
1151: ldpY = PetscBLASIntCast(ldpY_);
1153: if (pX && pY) side = "B";
1154: else if (pX) side = "R";
1155: else if (pY) side = "L";
1156: else { return(0); }
1158: if (!pX) ldpX = 1;
1159: if (!pY) ldpY = 1;
1161: howmny = doProd?"B":"A";
1163: Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1164: if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1165: else Tc = PETSC_NULL;
1166:
1167: /* Sc <- S, Tc <- T */
1168: SlepcDenseCopy(Sc, n, S, ldS, n, n);
1169: if (T) {
1170: SlepcDenseCopy(Tc, n, T, ldT, n, n);
1171: }
1173: if (T) {
1174: /* [eigr, pX] = eig(S, T) */
1175: #if defined(PETSC_USE_COMPLEX)
1176: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1177: if (size_auxS < 2*n)
1178: SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTGEVC");
1179: LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1180: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTGEVC %i",info);
1181: #else
1182: alphar = auxS; auxS+= n; size_auxS-= n;
1183: alphai = auxS; auxS+= n; size_auxS-= n;
1184: beta = auxS; auxS+= n; size_auxS-= n;
1185: if (doProd) {
1186: if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1187: else pA = PETSC_NULL, ldpA = 0;
1188: if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1189: else pB = PETSC_NULL, ldpB = 0;
1190: } else {
1191: pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1192: }
1193: /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1194: of T that represent complex conjugate eigenpairs to be zero */
1195: n1 = size_auxS;
1196: if (size_auxS < 8*n)
1197: SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xGGEV");
1198: LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
1199: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xGGEV %i",info);
1200: if (doProd) {
1201: if (pX) {
1202: /* pX <- pX * pA */
1203: SlepcDenseCopy(Sc, n, pX, ldpX, n, n);
1204: SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1205: Sc, n, n, n, PETSC_FALSE,
1206: pA, n, n, n, PETSC_FALSE);
1207: }
1208: if (pY) {
1209: /* pY <- pY * pB */
1210: SlepcDenseCopy(Sc, n, pY, ldpY, n, n);
1211: SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1212: Sc, n, n, n, PETSC_FALSE,
1213: pB, n, n, n, PETSC_FALSE);
1214: }
1215: }
1216: #endif
1217: } else {
1218: /* [eigr, pX] = eig(S) */
1219: #if defined(PETSC_USE_COMPLEX)
1220: auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1221: if (size_auxS < 2*n)
1222: SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTREVC");
1223: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1224: #else
1225: LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1226: #endif
1227: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
1228: }
1230: return(0);
1231: }