Actual source code: ptoar.c

slepc-3.5.2 2014-10-10
Report Typos and Errors
  1: /*

  3:    SLEPc polynomial eigensolver: "toar"

  5:    Method: TOAR

  7:    Algorithm:

  9:        Two-Level Orthogonal Arnoldi.

 11:    References:

 13:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 14:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 16:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 17:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 18:    Copyright (c) 2002-2014, Universitat Politecnica de Valencia, Spain

 20:    This file is part of SLEPc.

 22:    SLEPc is free software: you can redistribute it and/or modify it under  the
 23:    terms of version 3 of the GNU Lesser General Public License as published by
 24:    the Free Software Foundation.

 26:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 27:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 28:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 29:    more details.

 31:    You  should have received a copy of the GNU Lesser General  Public  License
 32:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 33:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 34: */

 36: #include <slepc-private/stimpl.h> 
 37: #include <slepc-private/pepimpl.h>    /*I "slepcpep.h" I*/
 38: #include <slepcblaslapack.h>

 40: typedef struct {
 41:   PetscReal keep;         /* restart parameter */
 42: } PEP_TOAR;

 46: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 47: {
 49:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 50:   PetscBool      sinv,flg;
 51:   PetscInt       i;
 52:   PetscScalar    *coeffs=pep->solvematcoeffs;

 55:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 56:   if (!pep->max_it) pep->max_it = PetscMax(100,(pep->nmat-1)*pep->n/pep->ncv); 
 57:   if (!pep->which) {
 58:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 59:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 60:     else pep->which = PEP_LARGEST_MAGNITUDE;
 61:   }

 63:   if (!ctx->keep) ctx->keep = 0.5;

 65:   PEPAllocateSolution(pep,pep->nmat-1);
 66:   PEPSetWorkVecs(pep,3);
 67:   DSSetType(pep->ds,DSNHEP);
 68:   DSSetExtraRow(pep->ds,PETSC_TRUE);
 69:   DSAllocate(pep->ds,pep->ncv+1);

 71:   PEPBasisCoefficients(pep,pep->pbc);
 72:   STGetTransform(pep->st,&flg);
 73:   if (!flg) {
 74:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 75:     if (sinv) {
 76:       PEPEvaluateBasis(pep,pep->target,0,coeffs,NULL);
 77:     } else {
 78:       for (i=0;i<pep->nmat-1;i++) coeffs[i] = 0.0;
 79:       coeffs[pep->nmat-1] = 1.0;
 80:     }
 81:   }
 82:   return(0);
 83: }

 87: /*
 88:   Norm of [sp;sq] 
 89: */
 90: static PetscErrorCode PEPTOARSNorm2(PetscInt n,PetscScalar *S,PetscReal *norm)
 91: {
 93:   PetscBLASInt   n_,one=1;

 96:   PetscBLASIntCast(n,&n_);
 97:   *norm = BLASnrm2_(&n_,S,&one);
 98:   return(0);
 99: }

103: /*
104:  Computes GS orthogonalization   [z;x] - [Sp;Sq]*y,
105:  where y = ([Sp;Sq]'*[z;x]).
106:    k: Column from S to be orthogonalized against previous columns.
107:    Sq = Sp+ld
108: */
109: static PetscErrorCode PEPTOAROrth2(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt k,PetscScalar *y,PetscReal *norm,PetscBool *lindep,PetscScalar *work,PetscInt nw)
110: {
112:   PetscBLASInt   n_,lds_,k_,one=1;
113:   PetscScalar    sonem=-1.0,sone=1.0,szero=0.0,*x0,*x,*c;
114:   PetscInt       lwa,nwu=0,i,lds=deg*ld,n;
115:   PetscReal      eta,onorm;
116:   
118:   BVGetOrthogonalization(pep->V,NULL,NULL,&eta);
119:   n = k+deg-1;
120:   PetscBLASIntCast(n,&n_);
121:   PetscBLASIntCast(deg*ld,&lds_);
122:   PetscBLASIntCast(k,&k_); /* Number of vectors to orthogonalize against them */
123:   lwa = k;
124:   if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
125:   c = work+nwu;
126:   nwu += k;
127:   x0 = S+k*lds;
128:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,y,&one));
129:   for (i=1;i<deg;i++) {
130:     x = S+i*ld+k*lds;
131:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,y,&one));
132:   }
133:   for (i=0;i<deg;i++) {
134:     x= S+i*ld+k*lds;
135:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,y,&one,&sone,x,&one));
136:   }
137:   PEPTOARSNorm2(lds,S+k*lds,&onorm);
138:   /* twice */
139:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,c,&one));
140:   for (i=1;i<deg;i++) {
141:     x = S+i*ld+k*lds;
142:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,c,&one));
143:   }
144:   for (i=0;i<deg;i++) {
145:     x= S+i*ld+k*lds;
146:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,c,&one,&sone,x,&one));
147:   }
148:   for (i=0;i<k;i++) y[i] += c[i];
149:   PEPTOARSNorm2(lds,S+k*lds,norm);
150:   *lindep = (*norm < eta * onorm)?PETSC_TRUE:PETSC_FALSE;
151:   return(0);
152: }

156: /*
157:   Extend the TOAR basis by applying the the matrix operator
158:   over a vector which is decomposed on the TOAR way
159:   Input:
160:     - pbc: array containing the polynomial basis coefficients
161:     - S,V: define the latest Arnoldi vector (nv vectors in V)
162:   Output:
163:     - t: new vector extending the TOAR basis
164:     - r: temporally coefficients to compute the TOAR coefficients
165:          for the new Arnoldi vector
166:   Workspace: t_ (two vectors)
167: */
168: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_,PetscInt nwv)
169: {
171:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
172:   Vec            v=t_[0],ve=t_[1],q=t_[2];
173:   PetscScalar    alpha=1.0,*ss,a;
174:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
175:   PetscBool      flg;

178:   if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
179:   BVSetActiveColumns(pep->V,0,nv);
180:   STGetTransform(pep->st,&flg);
181:   if (sinvert) {
182:     for (j=0;j<nv;j++) {
183:       if (deg>1) r[lr+j] = S[j]/ca[0];
184:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
185:     }
186:     for (k=2;k<deg-1;k++) {
187:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
188:     }
189:     k = deg-1;
190:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
191:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
192:   } else {
193:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
194:   }
195:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
196:   if (pep->Dr) { /* Balancing */
197:     VecPointwiseMult(v,v,pep->Dr);
198:   }
199:   STMatMult(pep->st,off,v,q);
200:   VecScale(q,a);
201:   for (j=1+off;j<deg+off-1;j++) {
202:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
203:     if (pep->Dr) {
204:       VecPointwiseMult(v,v,pep->Dr);
205:     }
206:     STMatMult(pep->st,j,v,t);
207:     a *= pep->sfactor;
208:     VecAXPY(q,a,t);
209:   }
210:   if (sinvert) {
211:     BVMultVec(V,1.0,0.0,v,ss);
212:     if (pep->Dr) {
213:       VecPointwiseMult(v,v,pep->Dr);
214:     }
215:     STMatMult(pep->st,deg,v,t);
216:     a *= pep->sfactor;
217:     VecAXPY(q,a,t);
218:   } else {
219:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
220:     if (pep->Dr) {
221:       VecPointwiseMult(ve,ve,pep->Dr);
222:     }
223:     a *= pep->sfactor;
224:     STMatMult(pep->st,deg-1,ve,t);
225:     VecAXPY(q,a,t);    
226:     a *= pep->sfactor;
227:   }
228:   if (flg || !sinvert) alpha /= a;
229:   STMatSolve(pep->st,q,t);
230:   VecScale(t,alpha);
231:   if (!sinvert) {
232:     if (cg[deg-1]!=0) {VecAXPY(t,cg[deg-1],v);}    
233:     if (cb[deg-1]!=0) {VecAXPY(t,cb[deg-1],ve);}    
234:   }
235:   if (pep->Dr) {
236:     VecPointwiseDivide(t,t,pep->Dr);
237:   }
238:   return(0);
239: }

243: /*
244:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
245: */
246: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
247: {
248:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
249:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
250:   PetscScalar t=1.0,tp=0.0,tt;

253:   if (sinvert) {
254:     for (k=1;k<d;k++) {
255:       tt = t;
256:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
257:       tp = tt;
258:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
259:     }
260:   } else {
261:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
262:     for (k=1;k<d-1;k++) {
263:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
264:     }
265:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
266:   }
267:   return(0);
268: }

272: /*
273:   Compute a run of Arnoldi iterations
274: */
275: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscInt *nq,PetscScalar *S,PetscInt ld,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscScalar *work,PetscInt nw,Vec *t_,PetscInt nwv)
276: {
278:   PetscInt       i,j,p,m=*M,nwu=0,lwa,deg=pep->nmat-1;
279:   PetscInt       lds=ld*deg,nqt=*nq;
280:   Vec            t;
281:   PetscReal      norm;
282:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
283:   PetscScalar    *x;

286:   if (!t_||nwv<3) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",12);
287:   lwa = ld;
288:   if (!work||nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
289:   STGetTransform(pep->st,&flg);
290:   if (!flg) {
291:     /* Spectral transformation handled by the solver */
292:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
293:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"STtype not supported fr TOAR without transforming matrices");
294:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
295:   }
296:   for (j=k;j<m;j++) {
297:     /* apply operator */
298:     BVGetColumn(pep->V,nqt,&t);
299:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_,3);
300:     BVRestoreColumn(pep->V,nqt,&t);

302:     /* orthogonalize */
303:     if (sinvert) x = S+(j+1)*lds;
304:     else x = S+(deg-1)*ld+(j+1)*lds;
305:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
306:     if (!lindep) {
307:       x[nqt] = norm;
308:       BVScaleColumn(pep->V,nqt,1.0/norm);
309:       nqt++;
310:     }

312:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);
313:     /* Level-2 orthogonalization */
314:     PEPTOAROrth2(pep,S,ld,deg,j+1,H+j*ldh,&norm,breakdown,work+nwu,lwa-nwu);
315:     if (!*breakdown) {
316:       for (p=0;p<deg;p++) {
317:         for (i=0;i<=j+deg;i++) {
318:           S[i+p*ld+(j+1)*lds] /= norm;
319:         }
320:       }
321:       H[j+1+ldh*j] = norm;
322:     } else {
323:       *M = j;
324:       *nq = nqt;
325:       return(0);
326:     }
327:     *nq = nqt;
328:   }
329:   return(0);
330: }

334: static PetscErrorCode PEPTOARTrunc(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt *rs1a,PetscInt cs1,PetscScalar *work,PetscInt nw,PetscReal *rwork,PetscInt nrw)
335: {
337:   PetscInt       lwa,nwu=0,lrwa,nrwu=0;
338:   PetscInt       j,i,n,lds=deg*ld,rs1=*rs1a,rk=0;
339:   PetscScalar    *M,*V,*pU,t;
340:   PetscReal      *sg,tol;
341:   PetscBLASInt   cs1_,rs1_,cs1tdeg,n_,info,lw_;
342:   Mat            U;

345:   n = (rs1>deg*cs1)?deg*cs1:rs1;
346:   lwa = 5*ld*lds;
347:   lrwa = 6*n;
348:   if (!work||nw<lwa) {
349:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",6);
350:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",5);
351:   }
352:   if (!rwork||nrw<lrwa) {
353:     if (nrw<lrwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",8);
354:     if (!rwork) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",7);
355:   }
356:   M = work+nwu;
357:   nwu += rs1*cs1*deg;
358:   sg = rwork+nrwu;
359:   nrwu += n;
360:   pU = work+nwu;
361:   nwu += rs1*n;
362:   V = work+nwu;
363:   nwu += deg*cs1*n;
364:   for (i=0;i<cs1;i++) {
365:     for (j=0;j<deg;j++) {
366:       PetscMemcpy(M+(i+j*cs1)*rs1,S+i*lds+j*ld,rs1*sizeof(PetscScalar));
367:     } 
368:   }
369:   PetscBLASIntCast(n,&n_);
370:   PetscBLASIntCast(cs1,&cs1_);
371:   PetscBLASIntCast(rs1,&rs1_);
372:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
373:   PetscBLASIntCast(lwa-nwu,&lw_);
374: #if !defined (PETSC_USE_COMPLEX)
375:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1tdeg,M,&rs1_,sg,pU,&rs1_,V,&n_,work+nwu,&lw_,&info));
376: #else
377:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&rs1_,&cs1tdeg,M,&rs1_,sg,pU,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));  
378: #endif
379:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
380:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
381:   for (i=0;i<n;i++) if (sg[i]>tol) rk++;
382:   rk = PetscMin(cs1+deg-1,rk); 
383:   /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
384:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pU,&U);
385:   BVSetActiveColumns(pep->V,0,rs1);
386:   BVMultInPlace(pep->V,U,0,rk);
387:   MatDestroy(&U);
388:   
389:   /* Update S */
390:   PetscMemzero(S,lds*ld*sizeof(PetscScalar));
391:   for (i=0;i<rk;i++) {
392:     t = sg[i];
393:     PetscStackCallBLAS("BLASscal",BLASscal_(&cs1tdeg,&t,V+i,&n_));
394:   }
395:   for (j=0;j<cs1;j++) {
396:     for (i=0;i<deg;i++) {
397:       PetscMemcpy(S+j*lds+i*ld,V+(cs1*i+j)*n,rk*sizeof(PetscScalar));
398:     }
399:   }
400:   *rs1a = rk;
401:   return(0);
402: }

406: /*
407:   S <- S*Q 
408:   columns s-s+ncu of S
409:   rows 0-sr of S
410:   size(Q) qr x ncu
411: */
412: static PetscErrorCode PEPTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work,PetscInt nw)
413: {
415:   PetscScalar    a=1.0,b=0.0;
416:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
417:   PetscInt       lwa,j,lds=deg*ld,i;

420:   lwa = sr*ncu;
421:   if (!work||nw<lwa) {
422:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
423:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
424:   }
425:   PetscBLASIntCast(sr,&sr_);
426:   PetscBLASIntCast(qr,&qr_);
427:   PetscBLASIntCast(ncu,&ncu_);
428:   PetscBLASIntCast(lds,&lds_);
429:   PetscBLASIntCast(ldq,&ldq_);
430:   for (i=0;i<deg;i++) {
431:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+i*ld,&lds_,Q,&ldq_,&b,work,&sr_));
432:     for (j=0;j<ncu;j++) {
433:       PetscMemcpy(S+lds*(s+j)+i*ld,work+j*sr,sr*sizeof(PetscScalar));
434:     }
435:   }
436:   return(0);
437: }

441: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh,PetscScalar *work,PetscInt nw)
442: {
444:   PetscInt       i,j,nwu=0,lwa,lds,ldt,d=pep->nmat-1;
445:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,*t,sone=1.0,g,a;
446:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
447:   PetscBool      transf=PETSC_FALSE,flg;
448:   PetscReal      *ca,*cb,*cg,ex=0.0,norm,*rwork;

451:   if (k==0) return(0);
452:   lwa = 6*sr*k;
453:   if (!work||nw<lwa) {
454:     if (nw<lwa) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",10);
455:     if (!work) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid argument %d",9);
456:   }
457:   ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
458:   lds = deg*ld;
459:   At = work+nwu;
460:   nwu += sr*k;
461:   Bt = work+nwu;
462:   nwu += k*k;
463:   PetscMemzero(Bt,k*k*sizeof(PetscScalar));
464:   Hj = work+nwu;
465:   nwu += k*k;
466:   Hp = work+nwu;
467:   nwu += k*k;
468:   PetscMemzero(Hp,k*k*sizeof(PetscScalar));
469:   PetscMalloc1(k,&p);
470:   PetscBLASIntCast(sr,&sr_);
471:   PetscBLASIntCast(k,&k_);
472:   PetscBLASIntCast(lds,&lds_);
473:   PetscBLASIntCast(ldh,&ldh_);
474:   STGetTransform(pep->st,&flg);
475:   if (!flg) {
476:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
477:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
478:   }
479:   if (transf) {
480:     ldt = k;
481:     T = work+nwu;
482:     nwu += k*k;
483:     for (i=0;i<k;i++) {
484:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
485:     }
486:     if (flg) {
487:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
488:       PetscBLASIntCast(nw-nwu,&lwork);
489:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work+nwu,&lwork,&info));  
490:     }
491:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
492:   } else {
493:     T = H; ldt = ldh;
494:   }
495:   PetscBLASIntCast(ldt,&ldt_);
496:   switch (pep->extract) {
497:   case PEP_EXTRACT_NORM:
498:     PetscBLASIntCast(ldt,&ldt_);
499:     PetscMalloc1(k,&rwork);
500:     norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
501:     PetscFree(rwork);
502:     if (norm<1) {
503:       /* Copy last block of S to the first one */
504:       for (j=0;j<k;j++) {
505:         PetscMemcpy(S+j*lds,S+(d-1)*ld+j*lds,sr*sizeof(PetscScalar));
506:       }
507:     }
508:     break;
509:   case PEP_EXTRACT_STRUCTURED:
510:     for (j=0;j<k;j++) {
511:       for (i=0;i<k;i++) {
512:         Hj[j*k+i] = T[j*ldt+i]/ca[0];
513:       }
514:       Hj[j*k+j] -= cb[0]/ca[0];
515:       Bt[j+j*k] = 1.0;
516:       Hp[j+j*k] = 1.0;
517:     }
518:     for (j=0;j<sr;j++) {
519:       for (i=0;i<k;i++) {
520:         At[j*k+i] = PetscConj(S[i*lds+j]);
521:       }
522:     }
523:     for (i=1;i<deg;i++) {
524:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
525:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
526:       if (i<deg-1) {
527:         for (j=0;j<k;j++) T[j*ldt+j] += ex-cb[i];
528:         ex = cb[i];
529:         g = -cg[i]/ca[i]; a = 1/ca[i];
530:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,Hj,&k_,&g,Hp,&k_));
531:         t = Hj; Hj = Hp; Hp = t;
532:       } 
533:     }
534:     for (j=0;j<k;j++) T[j*ldt+j] += ex;
535:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
536:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
537:     for (j=0;j<sr;j++) {
538:       for (i=0;i<k;i++) {
539:         S[i*lds+j] = PetscConj(At[j*k+i]);
540:       }
541:     } 
542:     break;
543:   default:
544:       SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Extraction not implemented in this solver");
545:   }
546:   PetscFree(p);
547:   return(0);
548: }

552: PetscErrorCode PEPSolve_TOAR(PEP pep)
553: {
555:   PEP_TOAR       *pepctx = (PEP_TOAR*)pep->data;
556:   PetscInt       i,j,k,l,nv=0,ld,lds,off,ldds,newn,nq=0;
557:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nmat=pep->nmat,deg=nmat-1;
558:   PetscScalar    *S,*Q,*work,*H,*pS0,sigma;
559:   PetscReal      beta,norm,*rwork;
560:   PetscBool      breakdown=PETSC_FALSE,flg,lindep;
561:   Mat            S0;

564:   ld = pep->ncv+deg;   /* number of rows of each fragment of S */
565:   lds = deg*ld;        /* leading dimension of S */
566:   lwa = (deg+5)*ld*lds;
567:   lrwa = 7*lds;
568:   PetscMalloc3(lwa,&work,lrwa,&rwork,lds*ld,&S);
569:   PetscMemzero(S,lds*ld*sizeof(PetscScalar));
570:   DSGetLeadingDimension(pep->ds,&ldds);

572:   STGetShift(pep->st,&sigma);
573:   /* Update polynomial basis coefficients */
574:   STGetTransform(pep->st,&flg);
575:   if (pep->sfactor!=1) {
576:     for (i=0;i<nmat;i++) {
577:       pep->pbc[nmat+i] /= pep->sfactor;
578:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor; 
579:     }
580:     if (!flg) {
581:       pep->target /= pep->sfactor;
582:       pep->st->sigma /= pep->sfactor;
583:       sigma /= pep->sfactor;
584:     }
585:   }
586:   if (flg) sigma = 0.0;
587:   /* Get the starting Lanczos vector */
588:   if (pep->nini==0) {  
589:     BVSetRandomColumn(pep->V,0,pep->rand);
590:   }
591:   BVOrthogonalizeColumn(pep->V,0,NULL,&norm,&lindep);
592:   if (!lindep) {
593:     BVScaleColumn(pep->V,nq,1.0/norm);
594:     S[nq++] = norm;
595:   }
596:   for (i=1;i<deg;i++) {
597:     BVSetRandomColumn(pep->V,nq,pep->rand);
598:     BVOrthogonalizeColumn(pep->V,nq,S+nq*ld,&norm,&lindep);
599:     if (!lindep) {
600:       BVScaleColumn(pep->V,nq,1.0/norm);
601:       S[nq+nq*ld] = norm;
602:       nq++;
603:     }
604:   }
605:   if (nq==0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
606:   PEPTOARSNorm2(lds,S,&norm);
607:   for (j=0;j<nq;j++) {
608:     for (i=0;i<=j;i++) S[i+j*ld] /= norm;
609:   }
610:   /* Restart loop */
611:   l = 0;
612:   while (pep->reason == PEP_CONVERGED_ITERATING) {
613:     pep->its++;
614:     
615:     /* Compute an nv-step Lanczos factorization */
616:     nv = PetscMin(pep->nconv+pep->mpd,pep->ncv);
617:     DSGetArray(pep->ds,DS_MAT_A,&H);
618:     PEPTOARrun(pep,sigma,&nq,S,ld,H,ldds,pep->nconv+l,&nv,&breakdown,work+nwu,lwa-nwu,pep->work,4);
619:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
620:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
621:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
622:     if (l==0) {
623:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
624:     } else {
625:       DSSetState(pep->ds,DS_STATE_RAW);
626:     }

628:     /* Solve projected problem */
629:     DSSolve(pep->ds,pep->eigr,pep->eigi);
630:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
631:     DSUpdateExtraRow(pep->ds);

633:     /* Check convergence */
634:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
635:     if (pep->its >= pep->max_it) pep->reason = PEP_DIVERGED_ITS;
636:     if (k >= pep->nev) pep->reason = PEP_CONVERGED_TOL;

638:     /* Update l */
639:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
640:     else {
641:       l = PetscMax(1,(PetscInt)((nv-k)*pepctx->keep));
642:       if (!breakdown) {
643:         /* Prepare the Rayleigh quotient for restart */
644:         DSTruncate(pep->ds,k+l);
645:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
646:         l = newn-k;
647:       }
648:     }

650:     /* Update S */
651:     off = pep->nconv*ldds;
652:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
653:     PEPTOARSupdate(S,ld,deg,nq,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu,lwa-nwu);
654:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

656:     /* Copy last column of S */
657:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

659:     if (pep->reason == PEP_CONVERGED_ITERATING) {
660:       if (breakdown) {
661:         /* Stop if breakdown */
662:         PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
663:         pep->reason = PEP_DIVERGED_BREAKDOWN;
664:       } else {
665:         /* Truncate S */
666:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
667:       }
668:     }
669:     pep->nconv = k;
670:     PEPMonitor(pep,pep->its,pep->nconv,pep->eigr,pep->eigi,pep->errest,nv);
671:   }
672:   if (pep->nconv>0) {
673:     /* Truncate S */
674:     PEPTOARTrunc(pep,S,ld,deg,&nq,pep->nconv,work+nwu,lwa-nwu,rwork+nrwu,lrwa-nrwu);
675:     nq = PetscMin(pep->nconv,nq);
676:     /* Extract invariant pair */
677:     DSGetArray(pep->ds,DS_MAT_A,&H);
678:     PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds,work+nwu,lwa-nwu);
679:     DSRestoreArray(pep->ds,DS_MAT_A,&H);

681:     /* Perform Newton refinement if required */
682:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
683:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
684:       DSSetState(pep->ds,DS_STATE_RAW);
685:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds,&nq);
686:       DSSolve(pep->ds,pep->eigr,pep->eigi);
687:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
688:       DSGetArray(pep->ds,DS_MAT_Q,&Q);
689:       PEPTOARSupdate(S,ld,deg,nq,0,pep->nconv,pep->nconv,Q,ldds,work+nwu,lwa-nwu);
690:       DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
691:     }
692:     /* Update vectors V = V*S */  
693:     MatCreateSeqDense(PETSC_COMM_SELF,nq,pep->nconv,NULL,&S0);
694:     MatDenseGetArray(S0,&pS0);
695:     for (j=0;j<pep->nconv;j++) {
696:       PetscMemcpy(pS0+j*nq,S+j*lds,nq*sizeof(PetscScalar));
697:     }
698:     MatDenseRestoreArray(S0,&pS0);
699:     BVSetActiveColumns(pep->V,0,nq);
700:     BVMultInPlace(pep->V,S0,0,pep->nconv);
701:     MatDestroy(&S0);
702:     BVSetActiveColumns(pep->V,0,pep->nconv);
703:   }
704:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
705:     STGetTransform(pep->st,&flg);
706:     if (!flg) {
707:       STBackTransform(pep->st,pep->nconv,pep->eigr,pep->eigi);
708:       /* Restore original values */
709:       pep->target *= pep->sfactor;
710:       pep->st->sigma *= pep->sfactor;
711:     }
712:     if (pep->sfactor!=1.0) {
713:       for (j=0;j<pep->nconv;j++) {
714:         pep->eigr[j] *= pep->sfactor;
715:         pep->eigi[j] *= pep->sfactor;
716:       }
717:       /* Restore original values */
718:       for (i=0;i<pep->nmat;i++){
719:         pep->pbc[pep->nmat+i] *= pep->sfactor;
720:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
721:       }
722:     }
723:   }

725:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
726:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
727:   DSSetState(pep->ds,DS_STATE_RAW);

729:   PetscFree3(work,rwork,S);
730:   return(0);
731: }

735: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
736: {
737:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

740:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
741:   else {
742:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
743:     ctx->keep = keep;
744:   }
745:   return(0);
746: }

750: /*@
751:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
752:    method, in particular the proportion of basis vectors that must be kept
753:    after restart.

755:    Logically Collective on PEP

757:    Input Parameters:
758: +  pep  - the eigenproblem solver context
759: -  keep - the number of vectors to be kept at restart

761:    Options Database Key:
762: .  -pep_toar_restart - Sets the restart parameter

764:    Notes:
765:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

767:    Level: advanced

769: .seealso: PEPTOARGetRestart()
770: @*/
771: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
772: {

778:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
779:   return(0);
780: }

784: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
785: {
786:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

789:   *keep = ctx->keep;
790:   return(0);
791: }

795: /*@
796:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

798:    Not Collective

800:    Input Parameter:
801: .  pep - the eigenproblem solver context

803:    Output Parameter:
804: .  keep - the restart parameter

806:    Level: advanced

808: .seealso: PEPTOARSetRestart()
809: @*/
810: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
811: {

817:   PetscTryMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
818:   return(0);
819: }

823: PetscErrorCode PEPSetFromOptions_TOAR(PEP pep)
824: {
826:   PetscBool      flg;
827:   PetscReal      keep;

830:   PetscOptionsHead("PEP TOAR Options");
831:   PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
832:   if (flg) {
833:     PEPTOARSetRestart(pep,keep);
834:   }
835:   PetscOptionsTail();
836:   return(0);
837: }

841: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
842: {
844:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
845:   PetscBool      isascii;

848:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
849:   if (isascii) {
850:     PetscViewerASCIIPrintf(viewer,"  TOAR: %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
851:   }
852:   return(0);
853: }

857: PetscErrorCode PEPDestroy_TOAR(PEP pep)
858: {

862:   PetscFree(pep->data);
863:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
864:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
865:   return(0);
866: }

870: PETSC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
871: {
872:   PEP_TOAR       *ctx;

876:   PetscNewLog(pep,&ctx);
877:   pep->data = (void*)ctx;

879:   pep->ops->solve          = PEPSolve_TOAR;
880:   pep->ops->setup          = PEPSetUp_TOAR;
881:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
882:   pep->ops->destroy        = PEPDestroy_TOAR;
883:   pep->ops->view           = PEPView_TOAR;
884:   pep->ops->computevectors = PEPComputeVectors_Schur;
885:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
886:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
887:   return(0);
888: }