Actual source code: dvd_blas.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22:  #include slepc.h
 23:  #include slepceps.h
 24:  #include slepcblaslapack.h
 25:  #include davidson.h

 27: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
 28: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
 29: PetscLogEvent SLEPC_SlepcDenseOrth = 0;
 30: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
 31: PetscLogEvent SLEPC_VecsMult = 0;

 33: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
 34: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
 35:                                    void *ptr);

 37: /*
 38:   Compute C <- a*A*B + b*C, where
 39:     ldC, the leading dimension of C,
 40:     ldA, the leading dimension of A,
 41:     rA, cA, rows and columns of A,
 42:     At, if true use the transpose of A instead,
 43:     ldB, the leading dimension of B,
 44:     rB, cB, rows and columns of B,
 45:     Bt, if true use the transpose of B instead
 46: */
 49: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
 50:   PetscScalar a,
 51:   const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscTruth At,
 52:   const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscTruth Bt)
 53: {
 54:   PetscErrorCode  ierr;
 55:   PetscInt        tmp;
 56:   PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
 57:   const char      *N = "N", *T = "C", *qA = N, *qB = N;


 61:   if ((rA == 0) || (cB == 0)) { return(0); }

 63:   PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);

 65:   /* Transpose if needed */
 66:   if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
 67:   if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
 68: 
 69:   /* Check size */
 70:   if (cA != rB) {
 71:     SETERRQ(1, "Matrix dimensions doesn't match!");
 72:   }
 73: 
 74:   /* Do stub */
 75:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
 76:     *C = *A * *B;
 77:     m = n = k = 1;
 78:   } else {
 79:     m = rA; n = cB; k = cA;
 80:     BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
 81:               &ldB, &b, C, &ldC);
 82:   }

 84:   PetscLogFlops(m*n*2*k);
 85:   PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);

 87:   return(0);
 88: }

 90: /*
 91:   Compute C <- A*B, where
 92:     sC, structure of C,
 93:     ldC, the leading dimension of C,
 94:     sA, structure of A,
 95:     ldA, the leading dimension of A,
 96:     rA, cA, rows and columns of A,
 97:     At, if true use the transpose of A instead,
 98:     sB, structure of B,
 99:     ldB, the leading dimension of B,
100:     rB, cB, rows and columns of B,
101:     Bt, if true use the transpose of B instead
102: */
105: PetscErrorCode SlepcDenseMatProdTriang(
106:   PetscScalar *C, MatType_t sC, PetscInt ldC,
107:   const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
108:   PetscTruth At,
109:   const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
110:   PetscTruth Bt)
111: {
112:   PetscErrorCode  ierr;
113:   PetscInt        tmp;
114:   PetscScalar     one=1.0, zero=0.0;
115:   PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;


119:   if ((rA == 0) || (cB == 0)) { return(0); }

121:   /* Transpose if needed */
122:   if (At) tmp = rA, rA = cA, cA = tmp;
123:   if (Bt) tmp = rB, rB = cB, cB = tmp;
124: 
125:   /* Check size */
126:   if (cA != rB) SETERRQ(1, "Matrix dimensions doesn't match!");
127:   if (sB != 0) SETERRQ(1, "It doesn't support B matrix type!");

129:   /* Optimized version: trivial case */
130:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
131:     if (!At && !Bt)     *C = *A * *B;
132:     else if (At && !Bt) *C = PetscConj(*A) * *B;
133:     else if (!At && Bt) *C = *A * PetscConj(*B);
134:     else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
135:     return(0);
136:   }
137: 
138:   /* Optimized versions: sA == 0 && sB == 0 */
139:   if ((sA == 0) && (sB == 0)) {
140:     if (At) tmp = rA, rA = cA, cA = tmp;
141:     if (Bt) tmp = rB, rB = cB, cB = tmp;
142:     SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
143:                              cB, Bt);
144:     PetscFunctionReturn(ierr);
145:   }

147:   /* Optimized versions: A hermitian && (B not triang) */
148:   if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
149:       DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
150:       DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
151:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
152:     rC = rA; cC = cB;
153:     BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
154:               (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
155:     PetscLogFlops(rA*cB*cA);
156:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
157:     return(0);
158:   }

160:   /* Optimized versions: B hermitian && (A not triang) */
161:   if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
162:       DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
163:       DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
164:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
165:     rC = rA; cC = cB;
166:     BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
167:               (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
168:     PetscLogFlops(rA*cB*cA);
169:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
170:     return(0);
171:   }
172: 
173:   SETERRQ(1, "It doesn't support A matrix type!");
174: }

176: /*
177:   Normalize the columns of the matrix A, where
178:     ldA, the leading dimension of A,
179:     rA, cA, rows and columns of A.
180:   if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
181:   are normalized as being one column.
182: */
185: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
186:                               PetscInt cA, PetscScalar *eigi)
187: {
188:   PetscErrorCode  ierr;
189:   PetscInt        i;
190:   PetscScalar     norm, norm0;
191:   PetscBLASInt    rA = _rA, one=1;


195:   PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);

197:   for(i=0; i<cA; i++) {
198:     if(eigi && eigi[i] != 0.0) {
199:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
200:       norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
201:       norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
202:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
203:       BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
204:       i++;
205:     } else {
206:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
207:       norm = 1.0 / norm;
208:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
209:      }
210:   }

212:   PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);

214:   return(0);
215: }
216: 

218: /*
219:   Compute A <- orth(A), where
220:     ldA, the leading dimension of A,
221:     rA, cA, rows and columns of A,
222:     auxS, auxiliary vector of more size than cA+min(rA,cA),
223:     lauxS, size of auxS,
224:     ncA, new number of columns of A
225: */
228: PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
229:                               PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
230:                               PetscInt *ncA)
231: {
232:   PetscErrorCode  ierr;
233:   PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
234:                   info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
235:   PetscScalar     *tau = auxS, *w = tau + ltau;


239:   /* Quick exit */
240:   if ((_rA == 0) || (cA == 0)) { return(0); }

242:   /* Memory check */
243:   if (lw < cA) SETERRQ(1, "Insufficient memory for xGEQRF!");
244: 
245:   PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);
246:   LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
247:   if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
248:   LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
249:   if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
250:   PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);

252:   if (ncA) *ncA = ltau;

254:   return(0);
255: }

257: /*
258:   Y <- X, where
259:   ldX, leading dimension of X,
260:   rX, cX, rows and columns of X
261:   ldY, leading dimension of Y
262: */
265: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
266:                               PetscInt ldX, PetscInt rX, PetscInt cX)
267: {
268:   PetscErrorCode  ierr;
269:   PetscInt        i;


273:   if ((ldX < rX) || (ldY < rX)) {
274:     SETERRQ(1, "Leading dimension error!");
275:   }
276: 
277:   /* Quick exit */
278:   if (Y == X) {
279:     if (ldX != ldY) {
280:       SETERRQ(1, "Leading dimension error!");
281:     }
282:     return(0);
283:   }

285:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
286:   for(i=0; i<cX; i++) {
287:     PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
288: 
289:   }
290:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

292:   return(0);
293: }

295: /*
296:   Y <- X, where
297:   ldX, leading dimension of X,
298:   rX, cX, rows and columns of X
299:   ldY, leading dimension of Y
300: */
303: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
304:                                     PetscScalar *X, MatType_t sX, PetscInt ldX,
305:                                     PetscInt rX, PetscInt cX)
306: {
307:   PetscErrorCode  ierr;
308:   PetscInt        i,j,c;


312:   if ((ldX < rX) || (ldY < rX)) {
313:     SETERRQ(1, "Leading dimension error!");
314:   }

316:   if (rX != cX) {
317:     SETERRQ(1, "SlepcDenseCopyTriang doesn't support rectangular matrices!");
318:   }

320:   if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
321:       DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
322:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
323:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
324:       c = 0;                                      /*     so copy */
325:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
326:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
327:       c = 1;                                      /*     so transpose */
328:     else                                          /* ... Full, */
329:       c = 2;                                      /*     so reflect from up */
330:   } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
331:              DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
332:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
333:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
334:       c = 1;                                      /*     so transpose */
335:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
336:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
337:       c = 0;                                      /*     so copy */
338:     else                                          /* ... Full, */
339:       c = 3;                                      /*     so reflect fr. down */
340:   } else                                          /* Full to any, */
341:     c = 0;                                        /*     so copy */
342: 
343:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);

345:   switch(c) {
346:   case 0: /* copy */
347:     for(i=0; i<cX; i++) {
348:       PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
349: 
350:     }
351:     break;

353:   case 1: /* transpose */
354:     for(i=0; i<cX; i++)
355:       for(j=0; j<rX; j++)
356:         Y[ldY*j+i] = X[ldX*i+j];
357:     break;

359:   case 2: /* reflection from up */
360:     for(i=0; i<cX; i++)
361:       for(j=0; j<PetscMin(i+1,rX); j++)
362:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
363:     break;

365:   case 3: /* reflection from down */
366:     for(i=0; i<cX; i++)
367:       for(j=i; j<rX; j++)
368:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
369:     break;
370:   }
371:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

373:   return(0);
374: }


377: /*
378:   Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
379:   where X and Y are contiguous global vectors.
380: */
383: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
384:   Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
385:   PetscInt cM)
386: {
387:   PetscErrorCode  ierr;


391:   SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
392: 

394:   return(0);
395: }


398: /*
399:   Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
400:   where X and Y are contiguous global vectors.
401: */
404: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
405:   PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
406:   PetscInt ldM, PetscInt rM, PetscInt cM)
407: {
408:   PetscErrorCode  ierr;
409:   PetscScalar     *px, *py;
410:   PetscInt        rX, rY, ldX, ldY, i, rcX;


414:   /* Compute the real number of columns */
415:   rcX = cX/dX;
416:   if (rcX != rM) {
417:     SETERRQ(1, "Matrix dimensions doesn't match!");
418:   }

420:   if ((rcX == 0) || (rM == 0) || (cM == 0)) {
421:     return(0);
422:   } else if ((Y + cM <= X) || (X + cX <= Y) ||
423:              ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
424:     /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */

426:     /* Get the dense matrices and dimensions associated to Y and X */
427:     VecGetLocalSize(X[0], &rX);
428:     VecGetLocalSize(Y[0], &rY);
429:     if (rX != rY) {
430:       SETERRQ(1, "The multivectors doesn't have the same dimension!");
431:     }
432:     VecGetArray(X[0], &px);
433:     VecGetArray(Y[0], &py);

435:     /* Update the strides */
436:     ldX = rX*dX; ldY= rY*dY;

438:     /* Do operation */
439:     SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
440:                     PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
441: 
442:     VecRestoreArray(X[0], &px);
443:     PetscObjectStateDecrease((PetscObject)X[0]);
444:     VecRestoreArray(Y[0], &py);
445:     for(i=1; i<cM; i++) {
446:       PetscObjectStateIncrease((PetscObject)Y[dY*i]);
447:     }

449:   } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
450:     /* If not, call to SlepcUpdateVectors */
451:     SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
452:                                     ldM, PETSC_FALSE);
453:     if (alpha != 1.0)
454:       for (i=0; i<cM; i++) {
455:         VecScale(Y[i], alpha);
456:       }
457:   } else {
458:     SETERRQ(1, "I don't support this case!");
459:   }

461:   return(0);
462: }

464: /*
465:   Compute X <- alpha * X[0:dX:cX-1] * M
466:   where X is a matrix with non-consecutive columns
467: */
470: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
471:   const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
472:   PetscScalar *work, PetscInt lwork)
473: {
474:   PetscErrorCode  ierr;
475:   PetscScalar     **px, *Y, *Z;
476:   PetscInt        rX, i, j, rY, rY0, ldY;


480:   if (cX != rM) {
481:     SETERRQ(1, "Matrix dimensions doesn't match!");
482:   }

484:   rY = (lwork/2)/cX;
485:   if (rY <= 0) {
486:     SETERRQ(1, "Insufficient work space given!");
487:   }
488:   Y = work; Z = &Y[cX*rY]; ldY = rY;

490:   if ((cX == 0) || (rM == 0) || (cM == 0)) {
491:     return(0);
492:   }

494:   /* Get the dense vectors associated to the columns of X */
495:   PetscMalloc(sizeof(Vec)*cX, &px);
496:   for(i=0; i<cX; i++) {
497:     VecGetArray(X[i], &px[i]);
498:   }
499:   VecGetLocalSize(X[0], &rX);

501:   for(i=0, rY0=0; i<rX; i+=rY0) {
502:     rY0 = PetscMin(rY, rX-i);

504:     /* Y <- X[i0:i1,:] */
505:     for(j=0; j<cX; j++) {
506:       SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
507: 
508:     }

510:     /* Z <- Y * M */
511:     SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
512:                                                  M, ldM, rM, cM, PETSC_FALSE);
513: 

515:     /* X <- Z */
516:     for(j=0; j<cM; j++) {
517:       SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
518: 
519:     }
520:   }

522:   for(i=0; i<cX; i++) {
523:     VecRestoreArray(X[i], &px[i]);
524:   }
525:   PetscFree(px);

527:   return(0);
528: }



532: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
533:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
534:   where W = U' * V.
535:   workS0 and workS1 are an auxiliary scalar vector of size
536:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
537:   is needed, and of size eU*eV.
538: */

542: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
543:                         Vec *U, PetscInt sU, PetscInt eU,
544:                         Vec *V, PetscInt sV, PetscInt eV,
545:                         PetscScalar *workS0, PetscScalar *workS1)
546: {
547:   PetscErrorCode  ierr;
548:   PetscInt        ldU, ldV, i, j, k;
549:   PetscScalar     *pu, *pv, *W, *Wr;


553:   /* Check if quick exit */
554:   if ((eU-sU == 0) || (eV-sV == 0))
555:     return(0);
556: 
557:   /* Get the dense matrices and dimensions associated to U and V */
558:   VecGetLocalSize(U[0], &ldU);
559:   VecGetLocalSize(V[0], &ldV);
560:   if (ldU != ldV) {
561:     SETERRQ(1, "Matrix dimensions doesn't match!");
562:   }
563:   VecGetArray(U[0], &pu);
564:   VecGetArray(V[0], &pv);

566:   if (workS0)
567:     W = workS0;
568:   else {
569:     PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
570: 
571:   }

573:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

575:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
576:     /* Use the smart memory usage version */

578:     /* W <- U' * V */
579:     SlepcDenseMatProdTriang(W, sM, eU,
580:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
581:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
582: 
583: 
584:     /* ReduceAll(W, SUM) */
585:     MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
586:                          ((PetscObject)U[0])->comm);
587:   /* Full M matrix */
588:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
589:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
590:     if (workS1)
591:       Wr = workS1;
592:     else {
593:       PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
594: 
595:     }
596: 
597:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
598:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
599:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
600:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
601: 
602: 
603:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
604:     SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
605:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
606:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
607: 
608: 
609:     /* ReduceAll(W, SUM) */
610:     MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
611:                       MPIU_SUM, ((PetscObject)U[0])->comm);
612: 
613:     /* M(...,...) <- W */
614:     for (i=0,k=0; i<sV; i++)
615:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
616:     for (i=sV; i<eV; i++)
617:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
618: 
619:     if (!workS1) {
620:       PetscFree(Wr);
621:     }

623:   /* Upper triangular M matrix */
624:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
625:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
626:     if (workS1)
627:       Wr = workS1;
628:     else {
629:       PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
630: 
631:     }
632: 
633:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
634:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
635:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
636:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
637: 
638: 
639:     /* ReduceAll(W, SUM) */
640:     MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
641:                          ((PetscObject)U[0])->comm);
642: 
643:     /* M(...,...) <- W */
644:     for (i=sV,k=0; i<eV; i++)
645:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

647:     if (!workS1) {
648:       PetscFree(Wr);
649:     }

651:   /* Lower triangular M matrix */
652:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
653:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
654:     if (workS1)
655:       Wr = workS1;
656:     else {
657:       PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
658: 
659:     }
660: 
661:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
662:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
663:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
664:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
665: 
666: 
667:     /* ReduceAll(W, SUM) */
668:     MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
669:                          ((PetscObject)U[0])->comm);
670: 
671:     /* M(...,...) <- W */
672:     for (i=0,k=0; i<eV; i++)
673:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
674: 
675:     if (!workS1) {
676:       PetscFree(Wr);
677:     }
678:   }

680:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

682:   if (!workS0) {
683:     PetscFree(W);
684:   }

686:   VecRestoreArray(U[0], &pu);
687:   PetscObjectStateDecrease((PetscObject)U[0]);
688:   VecRestoreArray(V[0], &pv);
689:   PetscObjectStateDecrease((PetscObject)V[0]);

691:   return(0);
692: }



696: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
697:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
698:   where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
699:   workS0 and workS1 are an auxiliary scalar vector of size
700:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
701:   is needed, and of size eU*eV.
702: */

706: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
707:                           Vec *U, PetscInt sU, PetscInt eU,
708:                           Vec *V, PetscInt sV, PetscInt eV)
709: {
710:   PetscErrorCode  ierr;
711:   PetscInt        ldU, ldV;
712:   PetscScalar     *pu, *pv;


716:   /* Check if quick exit */
717:   if ((eU-sU == 0) || (eV-sV == 0))
718:     return(0);
719: 
720:   /* Get the dense matrices and dimensions associated to U and V */
721:   VecGetLocalSize(U[0], &ldU);
722:   VecGetLocalSize(V[0], &ldV);
723:   if (ldU != ldV) {
724:     SETERRQ(1, "Matrix dimensions doesn't match!");
725:   }
726:   VecGetArray(U[0], &pu);
727:   VecGetArray(V[0], &pv);

729:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
730:     /* M <- local_U' * local_V */
731:     SlepcDenseMatProdTriang(M, sM, eU,
732:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
733:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
734: 
735: 
736:   /* Full M matrix */
737:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
738:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
739:     /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
740:     SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
741:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
742:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
743: 
744: 
745:     /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
746:     SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
747:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
748:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
749: 
750: 
751:   /* Other structures */
752:   } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");

754:   VecRestoreArray(U[0], &pu);
755:   PetscObjectStateDecrease((PetscObject)U[0]);
756:   VecRestoreArray(V[0], &pv);
757:   PetscObjectStateDecrease((PetscObject)V[0]);

759:   return(0);
760: }


763: /* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
764:                           ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
765:   where W = U' * V.
766:   workS0 and workS1 are an auxiliary scalar vector of size
767:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
768:   is needed, and of size eU*eV.
769: */

773: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
774:                           PetscInt rM, PetscInt cM, PetscScalar *auxS,
775:                           Vec V)
776: {
777:   PetscErrorCode  ierr;
778:   PetscScalar     *W, *Wr;


782:   /* Check if quick exit */
783:   if ((rM == 0) || (cM == 0))
784:     return(0);
785: 
786:   if (auxS)
787:     W = auxS;
788:   else {
789:     PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
790: 
791:   }
792:   Wr = W + rM*cM;

794:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

796:   if (sM == 0) {
797:     /* W <- M */
798:     SlepcDenseCopy(W, rM, M, ldM, rM, cM);

800:     /* Wr <- ReduceAll(W, SUM) */
801:     MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
802:                          ((PetscObject)V)->comm);

804:     /* M <- Wr */
805:     SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);

807:   /* Other structures */
808:   } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");

810:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

812:   if (!auxS) {
813:     PetscFree(W);
814:   }

816:   return(0);
817: }


820: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
821:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
822:   where W = U' * V.
823:   r, a DvdReduction structure,
824:   sr, an structure DvdMult_copy_func.
825: */

829: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
830:                          Vec *U, PetscInt sU, PetscInt eU,
831:                          Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
832:                          DvdMult_copy_func *sr)
833: {
834:   PetscErrorCode  ierr;
835:   PetscInt        ldU, ldV;
836:   PetscScalar     *pu, *pv, *W;


840:   /* Check if quick exit */
841:   if ((eU-sU == 0) || (eV-sV == 0))
842:     return(0);
843: 
844:   /* Get the dense matrices and dimensions associated to U and V */
845:   VecGetLocalSize(U[0], &ldU);
846:   VecGetLocalSize(V[0], &ldV);
847:   if (ldU != ldV) {
848:     SETERRQ(1, "Matrix dimensions doesn't match!");
849:   }
850:   VecGetArray(U[0], &pu);
851:   VecGetArray(V[0], &pv);

853:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

855:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
856:     /* Use the smart memory usage version */

858:     /* Add the reduction to r */
859:     SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
860: 

862:     /* W <- U' * V */
863:     SlepcDenseMatProdTriang(W, sM, eU,
864:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
865:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
866: 
867: 
868:     /* M <- ReduceAll(W, SUM) */
869:     sr->M = M;    sr->ld = ldM;
870:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
871:                   sr->i2 = eV;

873:   /* Full M matrix */
874:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
875:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
876:     /* Add the reduction to r */
877:     SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
878:                              sr, &W);
879: 

881:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
882:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
883:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
884:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
885: 
886: 
887:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
888:     SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
889:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
890:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
891: 
892: 
893:     /* M <- ReduceAll(W, SUM) */
894:     sr->M = M;    sr->ld = ldM;
895:     sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
896:                   sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;

898:   /* Upper triangular M matrix */
899:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
900:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
901:     /* Add the reduction to r */
902:     SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
903: 
904: 
905:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
906:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
907:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
908:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
909: 
910: 
911:     /* M <- ReduceAll(W, SUM) */
912:     sr->M = M;    sr->ld = ldM;
913:     sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
914:                   sr->i2 = eV;
915: 
916:   /* Lower triangular M matrix */
917:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
918:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
919:     /* Add the reduction to r */
920:     SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
921: 
922: 
923:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
924:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
925:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
926:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
927: 
928: 
929:     /* ReduceAll(W, SUM) */
930:     sr->M = M;    sr->ld = ldM;
931:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
932:                   sr->i2 = eV;
933:   }

935:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

937:   VecRestoreArray(U[0], &pu);
938:   PetscObjectStateDecrease((PetscObject)U[0]);
939:   VecRestoreArray(V[0], &pv);
940:   PetscObjectStateDecrease((PetscObject)V[0]);

942:   return(0);
943: }

947: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
948:                                    void *ptr)
949: {
950:   PetscInt        i, j, k;
951:   DvdMult_copy_func
952:                   *sr = (DvdMult_copy_func*)ptr;


956:   for (i=sr->i0,k=0; i<sr->i1; i++)
957:     for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
958:   for (i=sr->i1; i<sr->i2; i++)
959:     for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];

961:   if (k != size_out) SETERRQ(1, "Error in VecsMultS_copy_func!");

963:   return(0);
964: }

966: /* 
967:   Sum up several arrays with only one call to MPIReduce.
968: */

972: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
973:                                       PetscInt max_size_ops,
974:                                       PetscScalar *in, PetscScalar *out,
975:                                       PetscInt max_size_in, DvdReduction *r,
976:                                       MPI_Comm comm)
977: {

980:   r->in = in;
981:   r->out = out;
982:   r->size_in = 0;
983:   r->max_size_in = max_size_in;
984:   r->ops = ops;
985:   r->size_ops = 0;
986:   r->max_size_ops = max_size_ops;
987:   r->comm = comm;

989:   return(0);
990: }

994: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
995:                                  DvdReductionPostF f, void *ptr,
996:                                  PetscScalar **in)
997: {

1000:   *in = r->in + r->size_in;
1001:   r->ops[r->size_ops].out = r->out + r->size_in;
1002:   r->ops[r->size_ops].size_out = size_in;
1003:   r->ops[r->size_ops].f = f;
1004:   r->ops[r->size_ops].ptr = ptr;
1005:   if (++(r->size_ops) > r->max_size_ops) {
1006:     SETERRQ(1, "max_size_ops is not enought!");
1007:   }
1008:   if ((r->size_in+= size_in) > r->max_size_in) {
1009:     SETERRQ(1, "max_size_in is not enought!");
1010:   }

1012:   return(0);
1013: }


1018: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1019: {
1020:   PetscErrorCode  ierr;
1021:   PetscInt        i;


1025:   /* Check if quick exit */
1026:   if (r->size_ops == 0)
1027:     return(0);

1029:   /* Call the MPIAllReduce routine */
1030:   MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1031:                        r->comm);

1033:   /* Call the postponed routines */
1034:   for(i=0; i<r->size_ops; i++) {
1035:     r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1036: 
1037:   }

1039:   /* Tag the operation as done */
1040:   r->size_ops = 0;

1042:   return(0);
1043: }


1048: PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1049:                          PetscInt size_cX, Vec *V, PetscInt V_new_s,
1050:                          PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1051:                          PetscRandom rand)
1052: {
1053:   PetscErrorCode  ierr;
1054:   PetscInt        i, j;
1055:   PetscTruth      lindep;
1056:   PetscReal       norm;
1057:   PetscScalar     *auxS0 = auxS;
1058: 
1060: 
1061:   /* Orthonormalize V with IP */
1062:   for (i=V_new_s; i<V_new_e; i++) {
1063:     for(j=0; j<3; j++) {
1064:       if (j>0) { SlepcVecSetRandom(V[i], rand);  }
1065:       if (cX + size_cX == V) {
1066:         /* If cX and V are contiguous, orthogonalize in one step */
1067:         IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1068:                                V[i], auxS0, &norm, &lindep);
1069:       } else if (DS) {
1070:         /* Else orthogonalize first against DS, and then against cX and V */
1071:         IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1072:                                V[i], auxS0, PETSC_NULL, &lindep);
1073:         if(!lindep) {
1074:           IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1075:                                  V[i], auxS0, &norm, &lindep);
1076:         }
1077:       } else {
1078:         /* Else orthogonalize first against cX and then against V */
1079:         IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1080:                                V[i], auxS0, &norm, &lindep);
1081:       }
1082:       if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1083:       PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1084: 
1085:     }
1086:     if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1087:         SETERRQ(1, "Error during the orthonormalization of the eigenvectors!");
1088:     }
1089:     VecScale(V[i], 1.0/norm);
1090:   }
1091: 
1092:   return(0);
1093: }
1094: 
1097: /*
1098:   Compute eigenvectors associated to the Schur decomposition (S, T) and
1099:   save the left vectors in pY and the right vectors in pX, where
1100:   n, size of the eigenproblem
1101:   ldS, ldT, leading dimension of S and T
1102:   ldpX, ldpY, leading dimension of pX and pY
1103:   auxS, auxiliar scalar of length:
1104:     double standard 3n+n*n, double generalized 11n+4n*n,
1105:     complex standard 3n+n*n, complex generalized 3n+2n*n
1106:   size_auxS, the length of auxS
1107:   doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1108: */
1109: PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1110:   PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1111:   PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1112:   PetscInt size_auxS, PetscTruth doProd)
1113: {
1114:   PetscErrorCode  ierr;
1115:   PetscBLASInt    n, ldpX, ldpY, nout, info;
1116:   PetscScalar     *Sc, *Tc;
1117:   const char      *side, *howmny;
1118: #if defined(PETSC_USE_COMPLEX)
1119:   PetscReal       *auxR;
1120: #else
1121:   PetscScalar     *pA,*pB;
1122:   PetscBLASInt    n1, ldpA,ldpB;
1123:   PetscScalar     *alphar, *alphai, *beta;
1124: #endif
1125: 

1128:   n = PetscBLASIntCast(n_);
1129:   ldpX = PetscBLASIntCast(ldpX_);
1130:   ldpY = PetscBLASIntCast(ldpY_);

1132:   if (pX && pY) side = "B";
1133:   else if (pX)  side = "R";
1134:   else if (pY)  side = "L";
1135:   else { return(0); }

1137:   if (!pX) ldpX = 1;
1138:   if (!pY) ldpY = 1;

1140:   howmny = doProd?"B":"A";

1142:   Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1143:   if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1144:   else   Tc = PETSC_NULL;
1145: 
1146:   /* Sc <- S, Tc <- T */
1147:   SlepcDenseCopy(Sc, n, S, ldS, n, n);
1148:   if (T) {
1149:     SlepcDenseCopy(Tc, n, T, ldT, n, n);
1150:   }

1152:   if (T) {
1153:     /* [eigr, pX] = eig(S, T) */
1154: #if defined(PETSC_USE_COMPLEX)
1155:     auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1156:     if (size_auxS < 2*n)
1157:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTGEVC");
1158:     LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1159:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTGEVC %i",info);
1160: #else
1161:     alphar = auxS; auxS+= n; size_auxS-= n;
1162:     alphai = auxS; auxS+= n; size_auxS-= n;
1163:     beta = auxS; auxS+= n; size_auxS-= n;
1164:     if (doProd) {
1165:       if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1166:       else    pA = PETSC_NULL, ldpA = 0;
1167:       if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1168:       else    pB = PETSC_NULL, ldpB = 0;
1169:     } else {
1170:       pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1171:     }
1172:     /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1173:        of T that represent complex conjugate eigenpairs to be zero */
1174:     n1 = size_auxS;
1175:     if (size_auxS < 8*n)
1176:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xGGEV");
1177:     LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
1178:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xGGEV %i",info);
1179:     if (doProd) {
1180:       if (pX) {
1181:         /* pX <- pX * pA */
1182:         SlepcDenseCopy(Sc, n, pX, ldpX, n, n);
1183:         SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1184:                                  Sc, n, n, n, PETSC_FALSE,
1185:                                  pA, n, n, n, PETSC_FALSE);
1186:       }
1187:       if (pY) {
1188:         /* pY <- pY * pB */
1189:         SlepcDenseCopy(Sc, n, pY, ldpY, n, n);
1190:         SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1191:                                  Sc, n, n, n, PETSC_FALSE,
1192:                                  pB, n, n, n, PETSC_FALSE);
1193:       }
1194:     }
1195: #endif
1196:   } else {
1197:     /* [eigr, pX] = eig(S) */
1198: #if defined(PETSC_USE_COMPLEX)
1199:     auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1200:     if (size_auxS < 2*n)
1201:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTREVC");
1202:     LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1203: #else
1204:     LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1205: #endif
1206:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
1207:   }

1209:   return(0);
1210: }