Actual source code: dvd_blas.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22:  #include slepc.h
 23:  #include slepceps.h
 24:  #include slepcblaslapack.h
 25:  #include davidson.h

 27: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
 28: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
 29: PetscLogEvent SLEPC_SlepcDenseOrth = 0;
 30: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
 31: PetscLogEvent SLEPC_VecsMult = 0;

 33: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
 34: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
 35:                                    void *ptr);

 39: PetscErrorCode dvd_blas_prof_init() {
 40:   PetscErrorCode  ierr;


 44:   if (SLEPC_SlepcDenseMatProd) return(0);
 45:   PetscLogEventRegister("DenseMatProd", EPS_COOKIE,
 46:                                &SLEPC_SlepcDenseMatProd);
 47:   PetscLogEventRegister("DenseOrth", EPS_COOKIE,
 48:                                &SLEPC_SlepcDenseOrth);
 49:   PetscLogEventRegister("DenseMatNorm", EPS_COOKIE,
 50:                                &SLEPC_SlepcDenseNorm);
 51:   PetscLogEventRegister("DenseCopy", EPS_COOKIE,
 52:                                &SLEPC_SlepcDenseCopy);
 53:   PetscLogEventRegister("VecsMult", EPS_COOKIE,
 54:                                &SLEPC_VecsMult);
 55:   return(0);
 56: }

 58: /*
 59:   Compute C <- a*A*B + b*C, where
 60:     ldC, the leading dimension of C,
 61:     ldA, the leading dimension of A,
 62:     rA, cA, rows and columns of A,
 63:     At, if true use the transpose of A instead,
 64:     ldB, the leading dimension of B,
 65:     rB, cB, rows and columns of B,
 66:     Bt, if true use the transpose of B instead
 67: */
 70: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
 71:   PetscScalar a,
 72:   const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscTruth At,
 73:   const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscTruth Bt)
 74: {
 75:   PetscErrorCode  ierr;
 76:   PetscInt        tmp;
 77:   PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
 78:   const char      *N = "N", *T = "C", *qA = N, *qB = N;


 82:   if ((rA == 0) || (cB == 0)) { return(0); }

 84:   PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);

 86:   /* Transpose if needed */
 87:   if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
 88:   if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
 89: 
 90:   /* Check size */
 91:   if (cA != rB) {
 92:     SETERRQ(1, "Matrix dimensions doesn't match!");
 93:   }
 94: 
 95:   /* Do stub */
 96:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
 97:     *C = *A * *B;
 98:     m = n = k = 1;
 99:   } else {
100:     m = rA; n = cB; k = cA;
101:     BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
102:               &ldB, &b, C, &ldC);
103:   }

105:   PetscLogFlops(m*n*2*k);
106:   PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);

108:   return(0);
109: }

111: /*
112:   Compute C <- A*B, where
113:     sC, structure of C,
114:     ldC, the leading dimension of C,
115:     sA, structure of A,
116:     ldA, the leading dimension of A,
117:     rA, cA, rows and columns of A,
118:     At, if true use the transpose of A instead,
119:     sB, structure of B,
120:     ldB, the leading dimension of B,
121:     rB, cB, rows and columns of B,
122:     Bt, if true use the transpose of B instead
123: */
126: PetscErrorCode SlepcDenseMatProdTriang(
127:   PetscScalar *C, MatType_t sC, PetscInt ldC,
128:   const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
129:   PetscTruth At,
130:   const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
131:   PetscTruth Bt)
132: {
133:   PetscErrorCode  ierr;
134:   PetscInt        tmp;
135:   PetscScalar     one=1.0, zero=0.0;
136:   PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;


140:   if ((rA == 0) || (cB == 0)) { return(0); }

142:   /* Transpose if needed */
143:   if (At) tmp = rA, rA = cA, cA = tmp;
144:   if (Bt) tmp = rB, rB = cB, cB = tmp;
145: 
146:   /* Check size */
147:   if (cA != rB) SETERRQ(1, "Matrix dimensions doesn't match!");
148:   if (sB != 0) SETERRQ(1, "It doesn't support B matrix type!");

150:   /* Optimized version: trivial case */
151:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
152:     if (!At && !Bt)     *C = *A * *B;
153:     else if (At && !Bt) *C = PetscConj(*A) * *B;
154:     else if (!At && Bt) *C = *A * PetscConj(*B);
155:     else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
156:     return(0);
157:   }
158: 
159:   /* Optimized versions: sA == 0 && sB == 0 */
160:   if ((sA == 0) && (sB == 0)) {
161:     if (At) tmp = rA, rA = cA, cA = tmp;
162:     if (Bt) tmp = rB, rB = cB, cB = tmp;
163:     SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
164:                              cB, Bt);
165:     PetscFunctionReturn(ierr);
166:   }

168:   /* Optimized versions: A hermitian && (B not triang) */
169:   if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
170:       DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
171:       DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
172:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
173:     rC = rA; cC = cB;
174:     BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
175:               (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
176:     PetscLogFlops(rA*cB*cA);
177:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
178:     return(0);
179:   }

181:   /* Optimized versions: B hermitian && (A not triang) */
182:   if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
183:       DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
184:       DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
185:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
186:     rC = rA; cC = cB;
187:     BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
188:               (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
189:     PetscLogFlops(rA*cB*cA);
190:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
191:     return(0);
192:   }
193: 
194:   SETERRQ(1, "It doesn't support A matrix type!");
195: }

197: /*
198:   Normalize the columns of the matrix A, where
199:     ldA, the leading dimension of A,
200:     rA, cA, rows and columns of A.
201:   if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
202:   are normalized as being one column.
203: */
206: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
207:                               PetscInt cA, PetscScalar *eigi)
208: {
209:   PetscErrorCode  ierr;
210:   PetscInt        i;
211:   PetscScalar     norm, norm0;
212:   PetscBLASInt    rA = _rA, one=1;


216:   PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);

218:   for(i=0; i<cA; i++) {
219:     if(eigi && eigi[i] != 0.0) {
220:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
221:       norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
222:       norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
223:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
224:       BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
225:       i++;
226:     } else {
227:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
228:       norm = 1.0 / norm;
229:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
230:      }
231:   }

233:   PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);

235:   return(0);
236: }
237: 

239: /*
240:   Compute A <- orth(A), where
241:     ldA, the leading dimension of A,
242:     rA, cA, rows and columns of A,
243:     auxS, auxiliary vector of more size than cA+min(rA,cA),
244:     lauxS, size of auxS,
245:     ncA, new number of columns of A
246: */
249: PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
250:                               PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
251:                               PetscInt *ncA)
252: {
253:   PetscErrorCode  ierr;
254:   PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
255:                   info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
256:   PetscScalar     *tau = auxS, *w = tau + ltau;


260:   /* Quick exit */
261:   if ((_rA == 0) || (cA == 0)) { return(0); }

263:   /* Memory check */
264:   if (lw < cA) SETERRQ(1, "Insufficient memory for xGEQRF!");
265: 
266:   PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);
267:   LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
268:   if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
269:   LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
270:   if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
271:   PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);

273:   if (ncA) *ncA = ltau;

275:   return(0);
276: }

278: /*
279:   Y <- X, where
280:   ldX, leading dimension of X,
281:   rX, cX, rows and columns of X
282:   ldY, leading dimension of Y
283: */
286: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
287:                               PetscInt ldX, PetscInt rX, PetscInt cX)
288: {
289:   PetscErrorCode  ierr;
290:   PetscInt        i;


294:   if ((ldX < rX) || (ldY < rX)) {
295:     SETERRQ(1, "Leading dimension error!");
296:   }
297: 
298:   /* Quick exit */
299:   if (Y == X) {
300:     if (ldX != ldY) {
301:       SETERRQ(1, "Leading dimension error!");
302:     }
303:     return(0);
304:   }

306:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
307:   for(i=0; i<cX; i++) {
308:     PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
309: 
310:   }
311:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

313:   return(0);
314: }

316: /*
317:   Y <- X, where
318:   ldX, leading dimension of X,
319:   rX, cX, rows and columns of X
320:   ldY, leading dimension of Y
321: */
324: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
325:                                     PetscScalar *X, MatType_t sX, PetscInt ldX,
326:                                     PetscInt rX, PetscInt cX)
327: {
328:   PetscErrorCode  ierr;
329:   PetscInt        i,j,c;


333:   if ((ldX < rX) || (ldY < rX)) {
334:     SETERRQ(1, "Leading dimension error!");
335:   }

337:   if (rX != cX) {
338:     SETERRQ(1, "SlepcDenseCopyTriang doesn't support rectangular matrices!");
339:   }

341:   if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
342:       DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
343:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
344:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
345:       c = 0;                                      /*     so copy */
346:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
347:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
348:       c = 1;                                      /*     so transpose */
349:     else                                          /* ... Full, */
350:       c = 2;                                      /*     so reflect from up */
351:   } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
352:              DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
353:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
354:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
355:       c = 1;                                      /*     so transpose */
356:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
357:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
358:       c = 0;                                      /*     so copy */
359:     else                                          /* ... Full, */
360:       c = 3;                                      /*     so reflect fr. down */
361:   } else                                          /* Full to any, */
362:     c = 0;                                        /*     so copy */
363: 
364:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);

366:   switch(c) {
367:   case 0: /* copy */
368:     for(i=0; i<cX; i++) {
369:       PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
370: 
371:     }
372:     break;

374:   case 1: /* transpose */
375:     for(i=0; i<cX; i++)
376:       for(j=0; j<rX; j++)
377:         Y[ldY*j+i] = X[ldX*i+j];
378:     break;

380:   case 2: /* reflection from up */
381:     for(i=0; i<cX; i++)
382:       for(j=0; j<PetscMin(i+1,rX); j++)
383:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
384:     break;

386:   case 3: /* reflection from down */
387:     for(i=0; i<cX; i++)
388:       for(j=i; j<rX; j++)
389:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
390:     break;
391:   }
392:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

394:   return(0);
395: }


398: /*
399:   Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
400:   where X and Y are contiguous global vectors.
401: */
404: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
405:   Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
406:   PetscInt cM)
407: {
408:   PetscErrorCode  ierr;


412:   SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
413: 

415:   return(0);
416: }


419: /*
420:   Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
421:   where X and Y are contiguous global vectors.
422: */
425: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
426:   PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
427:   PetscInt ldM, PetscInt rM, PetscInt cM)
428: {
429:   PetscErrorCode  ierr;
430:   PetscScalar     *px, *py;
431:   PetscInt        rX, rY, ldX, ldY, i, rcX;


435:   /* Compute the real number of columns */
436:   rcX = cX/dX;
437:   if (rcX != rM) {
438:     SETERRQ(1, "Matrix dimensions doesn't match!");
439:   }

441:   if ((rcX == 0) || (rM == 0) || (cM == 0)) {
442:     return(0);
443:   } else if ((Y + cM <= X) || (X + cX <= Y) ||
444:              ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
445:     /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */

447:     /* Get the dense matrices and dimensions associated to Y and X */
448:     VecGetLocalSize(X[0], &rX);
449:     VecGetLocalSize(Y[0], &rY);
450:     if (rX != rY) {
451:       SETERRQ(1, "The multivectors doesn't have the same dimension!");
452:     }
453:     VecGetArray(X[0], &px);
454:     VecGetArray(Y[0], &py);

456:     /* Update the strides */
457:     ldX = rX*dX; ldY= rY*dY;

459:     /* Do operation */
460:     SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
461:                     PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
462: 
463:     VecRestoreArray(X[0], &px);
464:     PetscObjectStateDecrease((PetscObject)X[0]);
465:     VecRestoreArray(Y[0], &py);
466:     for(i=1; i<cM; i++) {
467:       PetscObjectStateIncrease((PetscObject)Y[dY*i]);
468:     }

470:   } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
471:     /* If not, call to SlepcUpdateVectors */
472:     SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
473:                                     ldM, PETSC_FALSE);
474:     if (alpha != 1.0)
475:       for (i=0; i<cM; i++) {
476:         VecScale(Y[i], alpha);
477:       }
478:   } else {
479:     SETERRQ(1, "I don't support this case!");
480:   }

482:   return(0);
483: }

485: /*
486:   Compute X <- alpha * X[0:dX:cX-1] * M
487:   where X is a matrix with non-consecutive columns
488: */
491: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
492:   const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
493:   PetscScalar *work, PetscInt lwork)
494: {
495:   PetscErrorCode  ierr;
496:   PetscScalar     **px, *Y, *Z;
497:   PetscInt        rX, i, j, rY, rY0, ldY;


501:   if (cX != rM) {
502:     SETERRQ(1, "Matrix dimensions doesn't match!");
503:   }

505:   rY = (lwork/2)/cX;
506:   if (rY <= 0) {
507:     SETERRQ(1, "Insufficient work space given!");
508:   }
509:   Y = work; Z = &Y[cX*rY]; ldY = rY;

511:   if ((cX == 0) || (rM == 0) || (cM == 0)) {
512:     return(0);
513:   }

515:   /* Get the dense vectors associated to the columns of X */
516:   PetscMalloc(sizeof(Vec)*cX, &px);
517:   for(i=0; i<cX; i++) {
518:     VecGetArray(X[i], &px[i]);
519:   }
520:   VecGetLocalSize(X[0], &rX);

522:   for(i=0, rY0=0; i<rX; i+=rY0) {
523:     rY0 = PetscMin(rY, rX-i);

525:     /* Y <- X[i0:i1,:] */
526:     for(j=0; j<cX; j++) {
527:       SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
528: 
529:     }

531:     /* Z <- Y * M */
532:     SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
533:                                                  M, ldM, rM, cM, PETSC_FALSE);
534: 

536:     /* X <- Z */
537:     for(j=0; j<cM; j++) {
538:       SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
539: 
540:     }
541:   }

543:   for(i=0; i<cX; i++) {
544:     VecRestoreArray(X[i], &px[i]);
545:   }
546:   PetscFree(px);

548:   return(0);
549: }



553: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
554:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
555:   where W = U' * V.
556:   workS0 and workS1 are an auxiliary scalar vector of size
557:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
558:   is needed, and of size eU*eV.
559: */

563: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
564:                         Vec *U, PetscInt sU, PetscInt eU,
565:                         Vec *V, PetscInt sV, PetscInt eV,
566:                         PetscScalar *workS0, PetscScalar *workS1)
567: {
568:   PetscErrorCode  ierr;
569:   PetscInt        ldU, ldV, i, j, k;
570:   PetscScalar     *pu, *pv, *W, *Wr;


574:   /* Check if quick exit */
575:   if ((eU-sU == 0) || (eV-sV == 0))
576:     return(0);
577: 
578:   /* Get the dense matrices and dimensions associated to U and V */
579:   VecGetLocalSize(U[0], &ldU);
580:   VecGetLocalSize(V[0], &ldV);
581:   if (ldU != ldV) {
582:     SETERRQ(1, "Matrix dimensions doesn't match!");
583:   }
584:   VecGetArray(U[0], &pu);
585:   VecGetArray(V[0], &pv);

587:   if (workS0)
588:     W = workS0;
589:   else {
590:     PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
591: 
592:   }

594:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

596:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
597:     /* Use the smart memory usage version */

599:     /* W <- U' * V */
600:     SlepcDenseMatProdTriang(W, sM, eU,
601:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
602:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
603: 
604: 
605:     /* ReduceAll(W, SUM) */
606:     MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
607:                          ((PetscObject)U[0])->comm);
608:   /* Full M matrix */
609:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
610:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
611:     if (workS1)
612:       Wr = workS1;
613:     else {
614:       PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
615: 
616:     }
617: 
618:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
619:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
620:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
621:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
622: 
623: 
624:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
625:     SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
626:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
627:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
628: 
629: 
630:     /* ReduceAll(W, SUM) */
631:     MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
632:                       MPIU_SUM, ((PetscObject)U[0])->comm);
633: 
634:     /* M(...,...) <- W */
635:     for (i=0,k=0; i<sV; i++)
636:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
637:     for (i=sV; i<eV; i++)
638:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
639: 
640:     if (!workS1) {
641:       PetscFree(Wr);
642:     }

644:   /* Upper triangular M matrix */
645:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
646:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
647:     if (workS1)
648:       Wr = workS1;
649:     else {
650:       PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
651: 
652:     }
653: 
654:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
655:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
656:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
657:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
658: 
659: 
660:     /* ReduceAll(W, SUM) */
661:     MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
662:                          ((PetscObject)U[0])->comm);
663: 
664:     /* M(...,...) <- W */
665:     for (i=sV,k=0; i<eV; i++)
666:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

668:     if (!workS1) {
669:       PetscFree(Wr);
670:     }

672:   /* Lower triangular M matrix */
673:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
674:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
675:     if (workS1)
676:       Wr = workS1;
677:     else {
678:       PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
679: 
680:     }
681: 
682:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
683:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
684:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
685:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
686: 
687: 
688:     /* ReduceAll(W, SUM) */
689:     MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
690:                          ((PetscObject)U[0])->comm);
691: 
692:     /* M(...,...) <- W */
693:     for (i=0,k=0; i<eV; i++)
694:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
695: 
696:     if (!workS1) {
697:       PetscFree(Wr);
698:     }
699:   }

701:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

703:   if (!workS0) {
704:     PetscFree(W);
705:   }

707:   VecRestoreArray(U[0], &pu);
708:   PetscObjectStateDecrease((PetscObject)U[0]);
709:   VecRestoreArray(V[0], &pv);
710:   PetscObjectStateDecrease((PetscObject)V[0]);

712:   return(0);
713: }



717: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
718:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
719:   where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
720:   workS0 and workS1 are an auxiliary scalar vector of size
721:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
722:   is needed, and of size eU*eV.
723: */

727: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
728:                           Vec *U, PetscInt sU, PetscInt eU,
729:                           Vec *V, PetscInt sV, PetscInt eV)
730: {
731:   PetscErrorCode  ierr;
732:   PetscInt        ldU, ldV;
733:   PetscScalar     *pu, *pv;


737:   /* Check if quick exit */
738:   if ((eU-sU == 0) || (eV-sV == 0))
739:     return(0);
740: 
741:   /* Get the dense matrices and dimensions associated to U and V */
742:   VecGetLocalSize(U[0], &ldU);
743:   VecGetLocalSize(V[0], &ldV);
744:   if (ldU != ldV) {
745:     SETERRQ(1, "Matrix dimensions doesn't match!");
746:   }
747:   VecGetArray(U[0], &pu);
748:   VecGetArray(V[0], &pv);

750:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
751:     /* M <- local_U' * local_V */
752:     SlepcDenseMatProdTriang(M, sM, eU,
753:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
754:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
755: 
756: 
757:   /* Full M matrix */
758:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
759:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
760:     /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
761:     SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
762:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
763:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
764: 
765: 
766:     /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
767:     SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
768:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
769:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
770: 
771: 
772:   /* Other structures */
773:   } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");

775:   VecRestoreArray(U[0], &pu);
776:   PetscObjectStateDecrease((PetscObject)U[0]);
777:   VecRestoreArray(V[0], &pv);
778:   PetscObjectStateDecrease((PetscObject)V[0]);

780:   return(0);
781: }


784: /* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
785:                           ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
786:   where W = U' * V.
787:   workS0 and workS1 are an auxiliary scalar vector of size
788:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
789:   is needed, and of size eU*eV.
790: */

794: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
795:                           PetscInt rM, PetscInt cM, PetscScalar *auxS,
796:                           Vec V)
797: {
798:   PetscErrorCode  ierr;
799:   PetscScalar     *W, *Wr;


803:   /* Check if quick exit */
804:   if ((rM == 0) || (cM == 0))
805:     return(0);
806: 
807:   if (auxS)
808:     W = auxS;
809:   else {
810:     PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
811: 
812:   }
813:   Wr = W + rM*cM;

815:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

817:   if (sM == 0) {
818:     /* W <- M */
819:     SlepcDenseCopy(W, rM, M, ldM, rM, cM);

821:     /* Wr <- ReduceAll(W, SUM) */
822:     MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
823:                          ((PetscObject)V)->comm);

825:     /* M <- Wr */
826:     SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);

828:   /* Other structures */
829:   } else SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");

831:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

833:   if (!auxS) {
834:     PetscFree(W);
835:   }

837:   return(0);
838: }


841: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
842:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
843:   where W = U' * V.
844:   r, a DvdReduction structure,
845:   sr, an structure DvdMult_copy_func.
846: */

850: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
851:                          Vec *U, PetscInt sU, PetscInt eU,
852:                          Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
853:                          DvdMult_copy_func *sr)
854: {
855:   PetscErrorCode  ierr;
856:   PetscInt        ldU, ldV;
857:   PetscScalar     *pu, *pv, *W;


861:   /* Check if quick exit */
862:   if ((eU-sU == 0) || (eV-sV == 0))
863:     return(0);
864: 
865:   /* Get the dense matrices and dimensions associated to U and V */
866:   VecGetLocalSize(U[0], &ldU);
867:   VecGetLocalSize(V[0], &ldV);
868:   if (ldU != ldV) {
869:     SETERRQ(1, "Matrix dimensions doesn't match!");
870:   }
871:   VecGetArray(U[0], &pu);
872:   VecGetArray(V[0], &pv);

874:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

876:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
877:     /* Use the smart memory usage version */

879:     /* Add the reduction to r */
880:     SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
881: 

883:     /* W <- U' * V */
884:     SlepcDenseMatProdTriang(W, sM, eU,
885:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
886:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
887: 
888: 
889:     /* M <- ReduceAll(W, SUM) */
890:     sr->M = M;    sr->ld = ldM;
891:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
892:                   sr->i2 = eV;

894:   /* Full M matrix */
895:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
896:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
897:     /* Add the reduction to r */
898:     SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
899:                              sr, &W);
900: 

902:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
903:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
904:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
905:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
906: 
907: 
908:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
909:     SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
910:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
911:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
912: 
913: 
914:     /* M <- ReduceAll(W, SUM) */
915:     sr->M = M;    sr->ld = ldM;
916:     sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
917:                   sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;

919:   /* Upper triangular M matrix */
920:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
921:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
922:     /* Add the reduction to r */
923:     SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
924: 
925: 
926:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
927:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
928:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
929:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
930: 
931: 
932:     /* M <- ReduceAll(W, SUM) */
933:     sr->M = M;    sr->ld = ldM;
934:     sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
935:                   sr->i2 = eV;
936: 
937:   /* Lower triangular M matrix */
938:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
939:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
940:     /* Add the reduction to r */
941:     SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
942: 
943: 
944:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
945:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
946:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
947:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
948: 
949: 
950:     /* ReduceAll(W, SUM) */
951:     sr->M = M;    sr->ld = ldM;
952:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
953:                   sr->i2 = eV;
954:   }

956:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

958:   VecRestoreArray(U[0], &pu);
959:   PetscObjectStateDecrease((PetscObject)U[0]);
960:   VecRestoreArray(V[0], &pv);
961:   PetscObjectStateDecrease((PetscObject)V[0]);

963:   return(0);
964: }

968: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
969:                                    void *ptr)
970: {
971:   PetscInt        i, j, k;
972:   DvdMult_copy_func
973:                   *sr = (DvdMult_copy_func*)ptr;


977:   for (i=sr->i0,k=0; i<sr->i1; i++)
978:     for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
979:   for (i=sr->i1; i<sr->i2; i++)
980:     for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];

982:   if (k != size_out) SETERRQ(1, "Error in VecsMultS_copy_func!");

984:   return(0);
985: }

987: /* 
988:   Sum up several arrays with only one call to MPIReduce.
989: */

993: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
994:                                       PetscInt max_size_ops,
995:                                       PetscScalar *in, PetscScalar *out,
996:                                       PetscInt max_size_in, DvdReduction *r,
997:                                       MPI_Comm comm)
998: {

1001:   r->in = in;
1002:   r->out = out;
1003:   r->size_in = 0;
1004:   r->max_size_in = max_size_in;
1005:   r->ops = ops;
1006:   r->size_ops = 0;
1007:   r->max_size_ops = max_size_ops;
1008:   r->comm = comm;

1010:   return(0);
1011: }

1015: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1016:                                  DvdReductionPostF f, void *ptr,
1017:                                  PetscScalar **in)
1018: {

1021:   *in = r->in + r->size_in;
1022:   r->ops[r->size_ops].out = r->out + r->size_in;
1023:   r->ops[r->size_ops].size_out = size_in;
1024:   r->ops[r->size_ops].f = f;
1025:   r->ops[r->size_ops].ptr = ptr;
1026:   if (++(r->size_ops) > r->max_size_ops) {
1027:     SETERRQ(1, "max_size_ops is not enought!");
1028:   }
1029:   if ((r->size_in+= size_in) > r->max_size_in) {
1030:     SETERRQ(1, "max_size_in is not enought!");
1031:   }

1033:   return(0);
1034: }


1039: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1040: {
1041:   PetscErrorCode  ierr;
1042:   PetscInt        i;


1046:   /* Check if quick exit */
1047:   if (r->size_ops == 0)
1048:     return(0);

1050:   /* Call the MPIAllReduce routine */
1051:   MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1052:                        r->comm);

1054:   /* Call the postponed routines */
1055:   for(i=0; i<r->size_ops; i++) {
1056:     r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1057: 
1058:   }

1060:   /* Tag the operation as done */
1061:   r->size_ops = 0;

1063:   return(0);
1064: }


1069: PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1070:                          PetscInt size_cX, Vec *V, PetscInt V_new_s,
1071:                          PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1072:                          PetscRandom rand)
1073: {
1074:   PetscErrorCode  ierr;
1075:   PetscInt        i, j;
1076:   PetscTruth      lindep;
1077:   PetscReal       norm;
1078:   PetscScalar     *auxS0 = auxS;
1079: 
1081: 
1082:   /* Orthonormalize V with IP */
1083:   for (i=V_new_s; i<V_new_e; i++) {
1084:     for(j=0; j<3; j++) {
1085:       if (j>0) { SlepcVecSetRandom(V[i], rand);  }
1086:       if (cX + size_cX == V) {
1087:         /* If cX and V are contiguous, orthogonalize in one step */
1088:         IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1089:                                V[i], auxS0, &norm, &lindep);
1090:       } else if (DS) {
1091:         /* Else orthogonalize first against DS, and then against cX and V */
1092:         IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1093:                                V[i], auxS0, PETSC_NULL, &lindep);
1094:         if(!lindep) {
1095:           IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1096:                                  V[i], auxS0, &norm, &lindep);
1097:         }
1098:       } else {
1099:         /* Else orthogonalize first against cX and then against V */
1100:         IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1101:                                V[i], auxS0, &norm, &lindep);
1102:       }
1103:       if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1104:       PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1105: 
1106:     }
1107:     if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1108:         SETERRQ(1, "Error during the orthonormalization of the eigenvectors!");
1109:     }
1110:     VecScale(V[i], 1.0/norm);
1111:   }
1112: 
1113:   return(0);
1114: }
1115: 
1118: /*
1119:   Compute eigenvectors associated to the Schur decomposition (S, T) and
1120:   save the left vectors in pY and the right vectors in pX, where
1121:   n, size of the eigenproblem
1122:   ldS, ldT, leading dimension of S and T
1123:   ldpX, ldpY, leading dimension of pX and pY
1124:   auxS, auxiliar scalar of length:
1125:     double standard 3n+n*n, double generalized 11n+4n*n,
1126:     complex standard 3n+n*n, complex generalized 3n+2n*n
1127:   size_auxS, the length of auxS
1128:   doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1129: */
1130: PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1131:   PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1132:   PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1133:   PetscInt size_auxS, PetscTruth doProd)
1134: {
1135:   PetscErrorCode  ierr;
1136:   PetscBLASInt    n, ldpX, ldpY, nout, info;
1137:   PetscScalar     *Sc, *Tc;
1138:   const char      *side, *howmny;
1139: #if defined(PETSC_USE_COMPLEX)
1140:   PetscReal       *auxR;
1141: #else
1142:   PetscScalar     *pA,*pB;
1143:   PetscBLASInt    n1, ldpA,ldpB;
1144:   PetscScalar     *alphar, *alphai, *beta;
1145: #endif
1146: 

1149:   n = PetscBLASIntCast(n_);
1150:   ldpX = PetscBLASIntCast(ldpX_);
1151:   ldpY = PetscBLASIntCast(ldpY_);

1153:   if (pX && pY) side = "B";
1154:   else if (pX)  side = "R";
1155:   else if (pY)  side = "L";
1156:   else { return(0); }

1158:   if (!pX) ldpX = 1;
1159:   if (!pY) ldpY = 1;

1161:   howmny = doProd?"B":"A";

1163:   Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1164:   if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1165:   else   Tc = PETSC_NULL;
1166: 
1167:   /* Sc <- S, Tc <- T */
1168:   SlepcDenseCopy(Sc, n, S, ldS, n, n);
1169:   if (T) {
1170:     SlepcDenseCopy(Tc, n, T, ldT, n, n);
1171:   }

1173:   if (T) {
1174:     /* [eigr, pX] = eig(S, T) */
1175: #if defined(PETSC_USE_COMPLEX)
1176:     auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1177:     if (size_auxS < 2*n)
1178:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTGEVC");
1179:     LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1180:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTGEVC %i",info);
1181: #else
1182:     alphar = auxS; auxS+= n; size_auxS-= n;
1183:     alphai = auxS; auxS+= n; size_auxS-= n;
1184:     beta = auxS; auxS+= n; size_auxS-= n;
1185:     if (doProd) {
1186:       if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1187:       else    pA = PETSC_NULL, ldpA = 0;
1188:       if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1189:       else    pB = PETSC_NULL, ldpB = 0;
1190:     } else {
1191:       pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1192:     }
1193:     /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1194:        of T that represent complex conjugate eigenpairs to be zero */
1195:     n1 = size_auxS;
1196:     if (size_auxS < 8*n)
1197:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xGGEV");
1198:     LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
1199:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xGGEV %i",info);
1200:     if (doProd) {
1201:       if (pX) {
1202:         /* pX <- pX * pA */
1203:         SlepcDenseCopy(Sc, n, pX, ldpX, n, n);
1204:         SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1205:                                  Sc, n, n, n, PETSC_FALSE,
1206:                                  pA, n, n, n, PETSC_FALSE);
1207:       }
1208:       if (pY) {
1209:         /* pY <- pY * pB */
1210:         SlepcDenseCopy(Sc, n, pY, ldpY, n, n);
1211:         SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1212:                                  Sc, n, n, n, PETSC_FALSE,
1213:                                  pB, n, n, n, PETSC_FALSE);
1214:       }
1215:     }
1216: #endif
1217:   } else {
1218:     /* [eigr, pX] = eig(S) */
1219: #if defined(PETSC_USE_COMPLEX)
1220:     auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1221:     if (size_auxS < 2*n)
1222:       SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTREVC");
1223:     LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1224: #else
1225:     LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1226: #endif
1227:     if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
1228:   }

1230:   return(0);
1231: }