Actual source code: ciss.c

slepc-3.5.2 2014-10-10
Report Typos and Errors
  1: /*

  3:    SLEPc eigensolver: "ciss"

  5:    Method: Contour Integral Spectral Slicing

  7:    Algorithm:

  9:        Contour integral based on Sakurai-Sugiura method to construct a
 10:        subspace, with various eigenpair extractions (Rayleigh-Ritz,
 11:        explicit moment).

 13:    Based on code contributed by Y. Maeda, T. Sakurai.

 15:    References:

 17:        [1] T. Sakurai and H. Sugiura, "A projection method for generalized
 18:            eigenvalue problems", J. Comput. Appl. Math. 159:119-128, 2003.

 20:        [2] T. Sakurai and H. Tadano, "CIRR: a Rayleigh-Ritz type method with
 21:            contour integral for generalized eigenvalue problems", Hokkaido
 22:            Math. J. 36:745-757, 2007.

 24:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 25:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 26:    Copyright (c) 2002-2014, Universitat Politecnica de Valencia, Spain

 28:    This file is part of SLEPc.

 30:    SLEPc is free software: you can redistribute it and/or modify it under  the
 31:    terms of version 3 of the GNU Lesser General Public License as published by
 32:    the Free Software Foundation.

 34:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 35:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 36:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 37:    more details.

 39:    You  should have received a copy of the GNU Lesser General  Public  License
 40:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 41:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 42: */

 44: #include <slepc-private/epsimpl.h>                /*I "slepceps.h" I*/
 45: #include <slepcblaslapack.h>

 47: PetscErrorCode EPSSolve_CISS(EPS);

 49: typedef struct {
 50:   /* parameters */
 51:   PetscInt    N;          /* number of integration points (32) */
 52:   PetscInt    L;          /* block size (16) */
 53:   PetscInt    M;          /* moment degree (N/4 = 4) */
 54:   PetscReal   delta;      /* threshold of singular value (1e-12) */
 55:   PetscInt    L_max;      /* maximum number of columns of the source matrix V */
 56:   PetscReal   spurious_threshold; /* discard spurious eigenpairs */
 57:   PetscBool   isreal;     /* A and B are real */
 58:   PetscInt    refine_inner;
 59:   PetscInt    refine_outer;
 60:   PetscInt    refine_blocksize;
 61:   /* private data */
 62:   PetscReal    *sigma;     /* threshold for numerical rank */
 63:   PetscInt     num_subcomm;
 64:   PetscInt     subcomm_id;
 65:   PetscInt     num_solve_point;
 66:   PetscScalar  *weight;
 67:   PetscScalar  *omega;
 68:   PetscScalar  *pp;
 69:   BV           V;
 70:   BV           S;
 71:   BV           pV;
 72:   BV           Y;
 73:   Vec          xsub;
 74:   Vec          xdup;
 75:   KSP          *ksp;
 76:   Mat          *kspMat;
 77:   PetscBool    useconj;
 78:   PetscReal    est_eig;
 79:   VecScatter   scatterin;
 80:   Mat          pA,pB;
 81:   PetscSubcomm subcomm;
 82:   PetscBool    usest;
 83: } EPS_CISS;

 87: static PetscErrorCode SetSolverComm(EPS eps)
 88: {
 90:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
 91:   PetscInt       N = ctx->N;

 94:   if (ctx->useconj) N = N/2;
 95:   if (!ctx->subcomm) {
 96:     PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subcomm);
 97:     PetscSubcommSetNumber(ctx->subcomm,ctx->num_subcomm);
 98:     PetscSubcommSetType(ctx->subcomm,PETSC_SUBCOMM_INTERLACED);
 99:     PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
100:     PetscSubcommSetFromOptions(ctx->subcomm);
101:   }
102:   ctx->subcomm_id = ctx->subcomm->color;
103:   ctx->num_solve_point = N / ctx->num_subcomm;
104:   if ((N%ctx->num_subcomm) > ctx->subcomm_id) ctx->num_solve_point+=1;
105:   return(0);
106: }

110: static PetscErrorCode CISSRedundantMat(EPS eps)
111: {
113:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
114:   Mat            A,B;
115:   PetscInt       nmat;

118:   STGetNumMatrices(eps->st,&nmat);
119:   if (ctx->subcomm->n != 1) {
120:     STGetOperators(eps->st,0,&A);
121:     MatGetRedundantMatrix(A,ctx->subcomm->n,ctx->subcomm->comm,MAT_INITIAL_MATRIX,&ctx->pA);
122:     if (nmat>1) {
123:       STGetOperators(eps->st,1,&B);
124:       MatGetRedundantMatrix(B,ctx->subcomm->n,ctx->subcomm->comm,MAT_INITIAL_MATRIX,&ctx->pB); 
125:     } else ctx->pB = NULL;
126:   } else {
127:     ctx->pA = NULL;
128:     ctx->pB = NULL;
129:   }
130:   return(0);
131: }

135: static PetscErrorCode CISSScatterVec(EPS eps)
136: {
138:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
139:   IS             is1,is2;
140:   Vec            v0;
141:   PetscInt       i,j,k,mstart,mend,mlocal;
142:   PetscInt       *idx1,*idx2,mloc_sub;

145:   MatGetVecs(ctx->pA,&ctx->xsub,NULL);
146:   MatGetLocalSize(ctx->pA,&mloc_sub,NULL);
147:   VecCreateMPI(ctx->subcomm->dupparent,mloc_sub,PETSC_DECIDE,&ctx->xdup);
148:   if (!ctx->scatterin) {
149:     BVGetColumn(ctx->V,0,&v0);
150:     VecGetOwnershipRange(v0,&mstart,&mend);
151:     mlocal = mend - mstart;
152:     PetscMalloc2(ctx->subcomm->n*mlocal,&idx1,ctx->subcomm->n*mlocal,&idx2);
153:     j = 0;
154:     for (k=0;k<ctx->subcomm->n;k++) {
155:       for (i=mstart;i<mend;i++) {
156:         idx1[j]   = i;
157:         idx2[j++] = i + eps->n*k;
158:       }
159:     }
160:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),ctx->subcomm->n*mlocal,idx1,PETSC_COPY_VALUES,&is1);
161:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),ctx->subcomm->n*mlocal,idx2,PETSC_COPY_VALUES,&is2);
162:     VecScatterCreate(v0,is1,ctx->xdup,is2,&ctx->scatterin);
163:     ISDestroy(&is1);
164:     ISDestroy(&is2);
165:     PetscFree2(idx1,idx2);
166:     BVRestoreColumn(ctx->V,0,&v0);
167:   }
168:   return(0);
169: }

173: static PetscErrorCode SetPathParameter(EPS eps)
174: {
176:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
177:   PetscInt       i;
178:   PetscScalar    center;
179:   PetscReal      theta,radius,vscale;

182:   RGEllipseGetParameters(eps->rg,&center,&radius,&vscale);
183:   for (i=0;i<ctx->N;i++) {
184:     theta = ((2*PETSC_PI)/ctx->N)*(i+0.5);
185:     ctx->pp[i] = PetscCosReal(theta) + PETSC_i*vscale*PetscSinReal(theta);
186:     ctx->omega[i] = center + radius*ctx->pp[i];
187:     ctx->weight[i] = radius*(vscale*PetscCosReal(theta) + PETSC_i*PetscSinReal(theta))/(PetscReal)ctx->N;
188:   }
189:   return(0);
190: }

194: static PetscErrorCode CISSVecSetRandom(BV V,PetscInt i0,PetscInt i1,PetscRandom rctx)
195: {
197:   PetscInt       i,j,nlocal;
198:   PetscScalar    *vdata;
199:   Vec            x;
200:  
202:   BVGetSizes(V,&nlocal,NULL,NULL);
203:   for (i=i0;i<i1;i++) {
204:     BVSetRandomColumn(V,i,rctx);
205:     BVGetColumn(V,i,&x);
206:     VecGetArray(x,&vdata);
207:     for (j=0;j<nlocal;j++) {
208:       vdata[j] = PetscRealPart(vdata[j]);
209:       if (PetscRealPart(vdata[j]) < 0.5) vdata[j] = -1.0;
210:       else vdata[j] = 1.0;
211:     }
212:     VecRestoreArray(x,&vdata);
213:     BVRestoreColumn(V,i,&x);
214:   }
215:   return(0);
216: }

220: static PetscErrorCode VecScatterVecs(EPS eps,BV Vin,PetscInt n)
221: {
223:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
224:   PetscInt       i;
225:   Vec            vi,pvi;
226:   PetscScalar    *array;

229:   for (i=0;i<n;i++) {
230:     BVGetColumn(Vin,i,&vi);
231:     VecScatterBegin(ctx->scatterin,vi,ctx->xdup,INSERT_VALUES,SCATTER_FORWARD);
232:     VecScatterEnd(ctx->scatterin,vi,ctx->xdup,INSERT_VALUES,SCATTER_FORWARD);
233:     BVRestoreColumn(Vin,i,&vi);
234:     VecGetArray(ctx->xdup,&array);
235:     VecPlaceArray(ctx->xsub,(const PetscScalar*)array);
236:     BVGetColumn(ctx->pV,i,&pvi);
237:     VecCopy(ctx->xsub,pvi);
238:     BVRestoreColumn(ctx->pV,i,&pvi);
239:     VecResetArray(ctx->xsub);
240:   }
241:   return(0);
242: }

246: static PetscErrorCode SolveLinearSystem(EPS eps,Mat A,Mat B,BV V,PetscInt L_start,PetscInt L_end,PetscBool initksp)
247: {
249:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
250:   PetscInt       i,j,p_id;
251:   Mat            Fz;
252:   PC             pc;
253:   Vec            Bvj,vj,yj;
254:   KSP            ksp;

257:   BVGetVec(V,&Bvj);
258:   if (ctx->usest) {
259:     MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,&Fz);
260:   }
261:   if (ctx->usest && ctx->pA) {
262:     KSPCreate(ctx->subcomm->comm,&ksp);
263:   }
264:   for (i=0;i<ctx->num_solve_point;i++) {
265:     p_id = i*ctx->subcomm->n + ctx->subcomm_id;
266:     if (!ctx->usest && initksp == PETSC_TRUE) {
267:       MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,&ctx->kspMat[i]);
268:       MatCopy(A,ctx->kspMat[i],DIFFERENT_NONZERO_PATTERN);
269:       if (B) {
270:         MatAXPY(ctx->kspMat[i],-ctx->omega[p_id],B,DIFFERENT_NONZERO_PATTERN);
271:       } else {
272:         MatShift(ctx->kspMat[i],-ctx->omega[p_id]);
273:       }
274:       KSPSetOperators(ctx->ksp[i],ctx->kspMat[i],ctx->kspMat[i]);
275:       KSPSetType(ctx->ksp[i],KSPPREONLY);
276:       KSPGetPC(ctx->ksp[i],&pc);
277:       PCSetType(pc,PCREDUNDANT);
278:       KSPSetFromOptions(ctx->ksp[i]);
279:     } else if (ctx->usest && ctx->pA) {
280:       MatCopy(A,Fz,DIFFERENT_NONZERO_PATTERN);
281:       if (B) {
282:         MatAXPY(Fz,-ctx->omega[p_id],B,DIFFERENT_NONZERO_PATTERN);
283:       } else {
284:         MatShift(Fz,-ctx->omega[p_id]);
285:       }
286:       KSPSetOperators(ksp,Fz,Fz);
287:       KSPSetType(ksp,KSPPREONLY);
288:       KSPGetPC(ksp,&pc);
289:       PCSetType(pc,PCREDUNDANT);
290:       KSPSetFromOptions(ksp);
291:     } else if (ctx->usest && !ctx->pA) {
292:       STSetShift(eps->st,ctx->omega[p_id]);
293:       STGetKSP(eps->st,&ksp);
294:     }
295:     
296:     for (j=L_start;j<L_end;j++) {
297:       BVGetColumn(V,j,&vj);
298:       BVGetColumn(ctx->Y,i*ctx->L_max+j,&yj);
299:       if (B) {
300:         MatMult(B,vj,Bvj);
301:         if (ctx->usest) {
302:           KSPSolve(ksp,Bvj,yj);
303:         } else {
304:           KSPSolve(ctx->ksp[i],Bvj,yj);
305:         }
306:       } else {
307:         if (ctx->usest) {
308:           KSPSolve(ksp,vj,yj);
309:         } else {
310:           KSPSolve(ctx->ksp[i],vj,yj);
311:         }
312:       }
313:       BVRestoreColumn(V,j,&vj);
314:       BVRestoreColumn(ctx->Y,i*ctx->L_max+j,&yj);
315:     }
316:     if (ctx->usest && i<ctx->num_solve_point-1) {  KSPReset(ksp); }
317:   }
318:   if (ctx->usest) { MatDestroy(&Fz); }
319:   VecDestroy(&Bvj);
320:   if (ctx->usest && ctx->pA) {
321:     KSPDestroy(&ksp);
322:   }
323:   return(0);
324: }

328: static PetscErrorCode EstimateNumberEigs(EPS eps,PetscInt *L_add)
329: {
331:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
332:   PetscInt       i,j,p_id;
333:   PetscScalar    tmp,m = 1,sum = 0.0;
334:   PetscReal      eta;
335:   Vec            v,vtemp,vj,yj;

338:   BVGetColumn(ctx->Y,0,&yj);
339:   VecDuplicate(yj,&v);
340:   BVRestoreColumn(ctx->Y,0,&yj);
341:   BVGetVec(ctx->V,&vtemp);
342:   for (j=0;j<ctx->L;j++) {
343:     VecSet(v,0);
344:     for (i=0;i<ctx->num_solve_point; i++) {
345:       p_id = i*ctx->subcomm->n + ctx->subcomm_id;
346:       BVSetActiveColumns(ctx->Y,i*ctx->L_max+j,i*ctx->L_max+j+1);
347:       BVMultVec(ctx->Y,ctx->weight[p_id],1,v,&m);
348:     }
349:     BVGetColumn(ctx->V,j,&vj);
350:     if (ctx->pA) {
351:       VecSet(vtemp,0);
352:       VecScatterBegin(ctx->scatterin,v,vtemp,ADD_VALUES,SCATTER_REVERSE);
353:       VecScatterEnd(ctx->scatterin,v,vtemp,ADD_VALUES,SCATTER_REVERSE);
354:       VecDot(vj,vtemp,&tmp);
355:     } else {
356:       VecDot(vj,v,&tmp);
357:     }
358:     BVRestoreColumn(ctx->V,j,&vj);
359:     if (ctx->useconj) sum += PetscRealPart(tmp)*2;
360:     else sum += tmp;
361:   }
362:   ctx->est_eig = PetscAbsScalar(sum/(PetscReal)ctx->L);
363:   eta = PetscPowReal(10,-PetscLog10Real(eps->tol)/ctx->N);
364:   PetscInfo1(eps,"Estimation_#Eig %f\n",(double)ctx->est_eig);
365:   *L_add = (PetscInt)PetscCeilReal((ctx->est_eig*eta)/ctx->M) - ctx->L;
366:   if (*L_add < 0) *L_add = 0;
367:   if (*L_add>ctx->L_max-ctx->L) {
368:     PetscInfo(eps,"Number of eigenvalues around the contour path may be too large\n");
369:     *L_add = ctx->L_max-ctx->L;
370:   }
371:   VecDestroy(&v);
372:   VecDestroy(&vtemp);
373:   return(0);
374: }

378: static PetscErrorCode CalcMu(EPS eps,PetscScalar *Mu)
379: {
381:   PetscMPIInt    sub_size;
382:   PetscInt       i,j,k,s;
383:   PetscScalar    *m,*temp,*temp2,*ppk,alp;
384:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
385:   Mat            M;

388:   MPI_Comm_size(ctx->subcomm->comm,&sub_size);
389:   PetscMalloc(ctx->num_solve_point*ctx->L*(ctx->L+1)*sizeof(PetscScalar),&temp);
390:   PetscMalloc(2*ctx->M*ctx->L*ctx->L*sizeof(PetscScalar),&temp2);
391:   PetscMalloc(ctx->num_solve_point*sizeof(PetscScalar),&ppk);
392:   MatCreateSeqDense(PETSC_COMM_SELF,ctx->L,ctx->L_max*ctx->num_solve_point,NULL,&M);
393:   for (i=0;i<2*ctx->M*ctx->L*ctx->L;i++) temp2[i] = 0;
394:   BVSetActiveColumns(ctx->Y,0,ctx->L_max*ctx->num_solve_point);
395:   if (ctx->pA) { 
396:     BVSetActiveColumns(ctx->pV,0,ctx->L);
397:     BVDot(ctx->Y,ctx->pV,M);
398:   } else { 
399:     BVSetActiveColumns(ctx->V,0,ctx->L);
400:     BVDot(ctx->Y,ctx->V,M);
401:   }
402:   MatDenseGetArray(M,&m);
403:   for (i=0;i<ctx->num_solve_point;i++) {
404:     for (j=0;j<ctx->L;j++) {
405:       for (k=0;k<ctx->L;k++) {
406:         temp[k+j*ctx->L+i*ctx->L*ctx->L]=m[k+j*ctx->L+i*ctx->L*ctx->L_max];
407:       }
408:     }
409:   }
410:   MatDenseRestoreArray(M,&m);
411:   for (i=0;i<ctx->num_solve_point;i++) ppk[i] = 1;
412:   for (k=0;k<2*ctx->M;k++) {
413:     for (j=0;j<ctx->L;j++) {
414:       for (i=0;i<ctx->num_solve_point;i++) {
415:         alp = ppk[i]*ctx->weight[i*ctx->subcomm->n + ctx->subcomm_id];
416:         for (s=0;s<ctx->L;s++) {
417:           if (ctx->useconj) temp2[s+(j+k*ctx->L)*ctx->L] += PetscRealPart(alp*temp[s+(j+i*ctx->L)*ctx->L])*2;
418:           else temp2[s+(j+k*ctx->L)*ctx->L] += alp*temp[s+(j+i*ctx->L)*ctx->L];
419:         }
420:       }
421:     }
422:     for (i=0;i<ctx->num_solve_point;i++) 
423:       ppk[i] *= ctx->pp[i*ctx->subcomm->n + ctx->subcomm_id];
424:   }
425:   for (i=0;i<2*ctx->M*ctx->L*ctx->L;i++) temp2[i] /= sub_size;
426:   MPI_Allreduce(temp2,Mu,2*ctx->M*ctx->L*ctx->L,MPIU_SCALAR,MPIU_SUM,(PetscObjectComm((PetscObject)eps)));
427:   PetscFree(ppk);
428:   PetscFree(temp);
429:   PetscFree(temp2);
430:   MatDestroy(&M);
431:   return(0);
432: }

436: static PetscErrorCode BlockHankel(EPS eps,PetscScalar *Mu,PetscInt s,PetscScalar *H)
437: {
438:   EPS_CISS *ctx = (EPS_CISS*)eps->data;
439:   PetscInt i,j,k,L=ctx->L,M=ctx->M;

442:   for (k=0;k<L*M;k++) 
443:     for (j=0;j<M;j++) 
444:       for (i=0;i<L;i++)
445:         H[j*L+i+k*L*M] = Mu[i+k*L+(j+s)*L*L];
446:   return(0);
447: }

451: static PetscErrorCode SVD_H0(EPS eps,PetscScalar *S,PetscInt *K)
452: {
453: #if defined(SLEPC_MISSING_LAPACK_GESVD)
455:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
456: #else
458:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
459:   PetscInt       i,ml=ctx->L*ctx->M;
460:   PetscBLASInt   m,n,lda,ldu,ldvt,lwork,info;
461:   PetscReal      *rwork;
462:   PetscScalar    *work;

465:   PetscMalloc(3*ml*sizeof(PetscScalar),&work);
466:   PetscMalloc(5*ml*sizeof(PetscReal),&rwork);
467:   PetscBLASIntCast(ml,&m);
468:   n = m; lda = m; ldu = m; ldvt = m; lwork = 3*m;
469:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
470:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,S,&lda,ctx->sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
471:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
472:   PetscFPTrapPop();
473:   (*K) = 0;
474:   for (i=0;i<ml;i++) {
475:     if (ctx->sigma[i]/PetscMax(ctx->sigma[0],1)>ctx->delta) (*K)++;
476:   }
477:   PetscFree(work);
478:   PetscFree(rwork);
479:   return(0);
480: #endif
481: }

485: static PetscErrorCode ConstructS(EPS eps)
486: {
488:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
489:   PetscInt       i,j,k,vec_local_size,p_id;
490:   Vec            v,sj,yj;
491:   PetscScalar    *ppk, *v_data, m = 1;

494:   BVGetSizes(ctx->Y,&vec_local_size,NULL,NULL);
495:   PetscMalloc(ctx->num_solve_point*sizeof(PetscScalar),&ppk);
496:   for (i=0;i<ctx->num_solve_point;i++) ppk[i] = 1;
497:   BVGetColumn(ctx->Y,0,&yj);
498:   VecDuplicate(yj,&v);
499:   BVRestoreColumn(ctx->Y,0,&yj);
500:   for (k=0;k<ctx->M;k++) {
501:     for (j=0;j<ctx->L;j++) {
502:       VecSet(v,0);
503:       for (i=0;i<ctx->num_solve_point;i++) {
504:         p_id = i*ctx->subcomm->n + ctx->subcomm_id;
505:         BVSetActiveColumns(ctx->Y,i*ctx->L_max+j,i*ctx->L_max+j+1);
506:         BVMultVec(ctx->Y,ppk[i]*ctx->weight[p_id],1,v,&m);
507:       }
508:       if (ctx->useconj) {
509:         VecGetArray(v,&v_data);
510:         for (i=0;i<vec_local_size;i++) v_data[i] = PetscRealPart(v_data[i])*2;
511:         VecRestoreArray(v,&v_data);
512:       }
513:       BVGetColumn(ctx->S,k*ctx->L+j,&sj);
514:       if (ctx->pA) {
515:         VecSet(sj,0);
516:         VecScatterBegin(ctx->scatterin,v,sj,ADD_VALUES,SCATTER_REVERSE);
517:         VecScatterEnd(ctx->scatterin,v,sj,ADD_VALUES,SCATTER_REVERSE);
518:       } else {
519:         VecCopy(v,sj);
520:       }
521:       BVRestoreColumn(ctx->S,k*ctx->L+j,&sj);
522:     }
523:     for (i=0;i<ctx->num_solve_point;i++) {
524:       p_id = i*ctx->subcomm->n + ctx->subcomm_id;
525:       ppk[i] *= ctx->pp[p_id];
526:     }
527:   }
528:   PetscFree(ppk);
529:   VecDestroy(&v);
530:   return(0);
531: }

535: static PetscErrorCode SVD_S(BV S,PetscInt ml,PetscReal delta,PetscReal *sigma,PetscInt *K)
536: {
537: #if defined(SLEPC_MISSING_LAPACK_GESVD)
539:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
540: #else
542:   PetscInt       i,j,k,local_size;
543:   PetscReal      *rwork;
544:   PetscScalar    *work,*temp,*B,*tempB,*s_data,*Q1,*Q2,*temp2,alpha=1,beta=0;
545:   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc;

548:   BVGetSizes(S,&local_size,NULL,NULL);    
549:   BVGetArray(S,&s_data);
550:   PetscMalloc(ml*ml*sizeof(PetscScalar),&temp);
551:   PetscMalloc(ml*ml*sizeof(PetscScalar),&temp2);
552:   PetscMalloc(local_size*ml*sizeof(PetscScalar),&Q1);
553:   PetscMalloc(local_size*ml*sizeof(PetscScalar),&Q2);
554:   PetscMalloc(ml*ml*sizeof(PetscScalar),&B);
555:   PetscMemzero(B,ml*ml*sizeof(PetscScalar));
556:   PetscMalloc(3*ml*sizeof(PetscScalar),&work);
557:   PetscMalloc(5*ml*sizeof(PetscReal),&rwork);
558:   PetscMalloc(ml*ml*sizeof(PetscScalar),&tempB);
559:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);

561:   for (i=0;i<ml;i++) {
562:     B[i*ml+i]=1;
563:   }

565:   for (k=0;k<2;k++) {
566:     PetscBLASIntCast(local_size,&m);
567:     PetscBLASIntCast(ml,&l);
568:     n = l; lda = m; ldb = m; ldc = l;
569:     if (k == 0) {
570:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,s_data,&lda,s_data,&ldb,&beta,temp,&ldc));
571:     } else if ((k%2)==1) {
572:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,temp,&ldc));
573:     } else {
574:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q2,&lda,Q2,&ldb,&beta,temp,&ldc));
575:     }
576:     PetscMemzero(temp2,ml*ml*sizeof(PetscScalar));
577:     MPI_Allreduce(temp,temp2,ml*ml,MPIU_SCALAR,MPIU_SUM,(PetscObjectComm((PetscObject)S)));

579:     PetscBLASIntCast(ml,&m);
580:     n = m; lda = m; lwork = 3*m, ldu = 1; ldvt = 1;
581:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
582:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);

584:     PetscBLASIntCast(local_size,&l);
585:     PetscBLASIntCast(ml,&n);
586:     m = n; lda = l; ldb = m; ldc = l;
587:     if (k==0) {
588:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,s_data,&lda,temp2,&ldb,&beta,Q1,&ldc));
589:     } else if ((k%2)==1) {
590:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
591:     } else {
592:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q2,&lda,temp2,&ldb,&beta,Q1,&ldc));
593:     }

595:     PetscBLASIntCast(ml,&l);
596:     m = l; n = l; lda = l; ldb = m; ldc = l;
597:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
598:     for (i=0;i<ml;i++) {
599:       sigma[i] = sqrt(sigma[i]);
600:       for (j=0;j<local_size;j++) {
601:         if ((k%2)==1) Q2[j+i*local_size]/=sigma[i];
602:         else Q1[j+i*local_size]/=sigma[i];
603:       }
604:       for (j=0;j<ml;j++) {
605:         B[j+i*ml]=tempB[j+i*ml]*sigma[i];
606:       }
607:     }
608:   }

610:   PetscBLASIntCast(ml,&m);
611:   n = m; lda = m; ldu=1; ldvt=1;
612:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
613:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);

615:   PetscBLASIntCast(local_size,&l);
616:   PetscBLASIntCast(ml,&n);
617:   m = n; lda = l; ldb = m; ldc = l;
618:   if ((k%2)==1) {
619:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,s_data,&ldc));
620:   } else {
621:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,s_data,&ldc));
622:   }
623:  
624:   PetscFPTrapPop();
625:   BVRestoreArray(S,&s_data);
626:   PetscFree(temp2);
627:   PetscFree(Q1);
628:   PetscFree(Q2);

630:   (*K) = 0;
631:   for (i=0;i<ml;i++) {
632:     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*K)++;
633:   }
634:   PetscFree(temp);
635:   PetscFree(B);
636:   PetscFree(tempB);
637:   PetscFree(work);
638:   PetscFree(rwork);
639:   return(0);
640: #endif
641: }

645: static PetscErrorCode isGhost(EPS eps,PetscInt ld,PetscInt nv,PetscBool *fl)
646: {
648:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
649:   PetscInt       i,j;
650:   PetscScalar    *pX;
651:   PetscReal      *tau,s1,s2,tau_max=0.0;

654:   PetscMalloc(nv*sizeof(PetscReal),&tau);
655:   DSVectors(eps->ds,DS_MAT_X,NULL,NULL);
656:   DSGetArray(eps->ds,DS_MAT_X,&pX);

658:   for (i=0;i<nv;i++) {
659:     s1 = 0;
660:     s2 = 0;
661:     for (j=0;j<nv;j++) {
662:       s1 += PetscAbsScalar(PetscPowScalar(pX[i*ld+j],2));
663:       s2 += PetscPowReal(PetscAbsScalar(pX[i*ld+j]),2)/ctx->sigma[j];
664:     }
665:     tau[i] = s1/s2;
666:     tau_max = PetscMax(tau_max,tau[i]);
667:   }
668:   DSRestoreArray(eps->ds,DS_MAT_X,&pX);
669:   for (i=0;i<nv;i++) {
670:     tau[i] /= tau_max;
671:   }
672:   for (i=0;i<nv;i++) {
673:     if (tau[i]>=ctx->spurious_threshold) {
674:       fl[i] = PETSC_TRUE;
675:     } else fl[i] = PETSC_FALSE;
676:   }
677:   PetscFree(tau);
678:   return(0);
679: }

683: PetscErrorCode EPSSetUp_CISS(EPS eps)
684: {
686:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
687:   const char     *prefix;
688:   PetscInt       i;
689:   PetscBool      issinvert,istrivial,flg;
690:   PetscScalar    center;

693:   eps->ncv = PetscMin(eps->n,ctx->L_max*ctx->M);
694:   if (!eps->mpd) eps->mpd = eps->ncv;
695:   if (!eps->which) eps->which = EPS_ALL;
696:   if (!eps->extraction) { EPSSetExtraction(eps,EPS_RITZ); } 
697:   else if (eps->extraction!=EPS_RITZ) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported extraction type");
698:   if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs not supported in this solver");

700:   /* check region */
701:   RGIsTrivial(eps->rg,&istrivial);
702:   if (istrivial) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"EPSCISS requires a nontrivial region, e.g. -rg_type ellipse ...");
703:   PetscObjectTypeCompare((PetscObject)eps->rg,RGELLIPSE,&flg);
704:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Currently only implemented for elliptic regions");
705:   RGEllipseGetParameters(eps->rg,&center,NULL,NULL);

707:   if (ctx->isreal && PetscImaginaryPart(center) == 0.0) ctx->useconj = PETSC_TRUE;
708:   else ctx->useconj = PETSC_FALSE;

710:   /* create split comm */
711:   SetSolverComm(eps);

713:   EPSAllocateSolution(eps,0);
714:   PetscMalloc(ctx->N*sizeof(PetscScalar),&ctx->weight);
715:   PetscMalloc(ctx->N*sizeof(PetscScalar),&ctx->omega);
716:   PetscMalloc(ctx->N*sizeof(PetscScalar),&ctx->pp);
717:   PetscLogObjectMemory((PetscObject)eps,3*ctx->N*sizeof(PetscScalar));
718:   PetscMalloc(ctx->L_max*ctx->M*sizeof(PetscReal),&ctx->sigma);
719:   PetscLogObjectMemory((PetscObject)eps,ctx->L_max*ctx->N*sizeof(PetscReal));

721:   /* allocate basis vectors */
722:   BVDuplicateResize(eps->V,ctx->L_max*ctx->M,&ctx->S);
723:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->S);
724:   BVDuplicateResize(eps->V,ctx->L_max,&ctx->V);
725:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->V);

727:   CISSRedundantMat(eps);
728:   if (ctx->pA) {
729:     CISSScatterVec(eps);
730:     BVCreate(PetscObjectComm((PetscObject)ctx->xsub),&ctx->pV);
731:     BVSetSizesFromVec(ctx->pV,ctx->xsub,eps->n);
732:     BVSetFromOptions(ctx->pV);
733:     BVResize(ctx->pV,ctx->L_max,PETSC_FALSE);
734:     PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->pV);
735:   }

737:   if (ctx->usest) {
738:     PetscObjectTypeCompare((PetscObject)eps->st,STSINVERT,&issinvert);
739:     if (!issinvert) { STSetType(eps->st,STSINVERT); }
740:   } else {
741:     PetscMalloc(ctx->num_solve_point*sizeof(KSP),&ctx->ksp);
742:     PetscLogObjectMemory((PetscObject)eps,ctx->num_solve_point*sizeof(KSP));
743:     PetscMalloc(ctx->num_solve_point*sizeof(Mat),&ctx->kspMat);
744:     PetscLogObjectMemory((PetscObject)eps,ctx->num_solve_point*sizeof(Mat));
745:     for (i=0;i<ctx->num_solve_point;i++) {
746:       KSPCreate(ctx->subcomm->comm,&ctx->ksp[i]);
747:       PetscObjectIncrementTabLevel((PetscObject)ctx->ksp[i],(PetscObject)eps,1);
748:       PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->ksp[i]);
749:       KSPAppendOptionsPrefix(ctx->ksp[i],"eps_ciss_");
750:       EPSGetOptionsPrefix(eps,&prefix);
751:       KSPAppendOptionsPrefix(ctx->ksp[i],prefix);
752:     }
753:   }

755:   if (ctx->pA) {
756:     BVCreate(PetscObjectComm((PetscObject)ctx->xsub),&ctx->Y);
757:     BVSetSizesFromVec(ctx->Y,ctx->xsub,eps->n);
758:     BVSetFromOptions(ctx->Y);
759:     BVResize(ctx->Y,ctx->num_solve_point*ctx->L_max,PETSC_FALSE);
760:   } else {
761:     BVDuplicateResize(eps->V,ctx->num_solve_point*ctx->L_max,&ctx->Y);
762:   }
763:   PetscLogObjectParent((PetscObject)eps,(PetscObject)ctx->Y);


766:   if (eps->ishermitian && eps->ispositive) {
767:     DSSetType(eps->ds,DSGHEP);
768:   } else {
769:     DSSetType(eps->ds,DSGNHEP);
770:   }
771:   DSAllocate(eps->ds,eps->ncv);
772:   EPSSetWorkVecs(eps,2);
773:   
774:   /* dispatch solve method */
775:   eps->ops->solve = EPSSolve_CISS;
776:   return(0);
777: }

781: PetscErrorCode EPSSolve_CISS(EPS eps)
782: {
784:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
785:   Mat            A,B,X,M,pA,pB;
786:   PetscInt       i,ld,nmat,L_add=0,nv=0,L_base=ctx->L,inner,outer,nlocal,*inside;
787:   PetscScalar    *Mu,*H0,*rr,*temp;
788:   PetscReal      error,max_error;
789:   PetscBool      *fl1;
790:   Vec            si,w=eps->work[0];
791:   SlepcSC        sc;

794:   /* override SC settings */
795:   DSGetSlepcSC(eps->ds,&sc);
796:   sc->comparison    = SlepcCompareLargestMagnitude;
797:   sc->comparisonctx = NULL;
798:   sc->map           = NULL;
799:   sc->mapobj        = NULL;
800:   VecGetLocalSize(w,&nlocal);
801:   DSGetLeadingDimension(eps->ds,&ld);
802:   STGetNumMatrices(eps->st,&nmat);
803:   STGetOperators(eps->st,0,&A);
804:   if (nmat>1) { STGetOperators(eps->st,1,&B); }
805:   else B = NULL;
806:   SetPathParameter(eps);
807:   CISSVecSetRandom(ctx->V,0,ctx->L,eps->rand);

809:   if (ctx->pA) {
810:     VecScatterVecs(eps,ctx->V,ctx->L);
811:     SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_TRUE);
812:   } else {
813:     SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_TRUE);
814:   }

816:   EstimateNumberEigs(eps,&L_add);
817:   if (L_add>0) {
818:     PetscInfo2(eps,"Changing L %D -> %D by Estimate #Eig\n",ctx->L,ctx->L+L_add);
819:     CISSVecSetRandom(ctx->V,ctx->L,ctx->L+L_add,eps->rand);
820:     if (ctx->pA) {
821:       VecScatterVecs(eps,ctx->V,ctx->L+L_add);
822:       SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,ctx->L,ctx->L+L_add,PETSC_FALSE);
823:     } else {
824:       SolveLinearSystem(eps,A,B,ctx->V,ctx->L,ctx->L+L_add,PETSC_FALSE);
825:     }
826:     ctx->L += L_add;
827:   }
828:   PetscMalloc(ctx->L*ctx->L*ctx->M*2*sizeof(PetscScalar),&Mu);
829:   PetscMalloc(ctx->L*ctx->M*ctx->L*ctx->M*sizeof(PetscScalar),&H0);
830:   for (i=0;i<ctx->refine_blocksize;i++) {
831:     CalcMu(eps,Mu);
832:     BlockHankel(eps,Mu,0,H0);
833:     SVD_H0(eps,H0,&nv);
834:     if (ctx->sigma[0]<=ctx->delta || nv < ctx->L*ctx->M || ctx->L == ctx->L_max) break;
835:     L_add = L_base;
836:     if (ctx->L+L_add>ctx->L_max) L_add = ctx->L_max-ctx->L;
837:     PetscInfo2(eps,"Changing L %D -> %D by SVD(H0)\n",ctx->L,ctx->L+L_add);
838:     CISSVecSetRandom(ctx->V,ctx->L,ctx->L+L_add,eps->rand);
839:     if (ctx->pA) {
840:       VecScatterVecs(eps,ctx->V,ctx->L+L_add);
841:       SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,ctx->L,ctx->L+L_add,PETSC_FALSE);
842:     } else {
843:       SolveLinearSystem(eps,A,B,ctx->V,ctx->L,ctx->L+L_add,PETSC_FALSE);
844:     }
845:     ctx->L += L_add;
846:   }
847:   PetscFree(Mu);
848:   PetscFree(H0);

850:   for (outer=0;outer<=ctx->refine_outer;outer++) {
851:     for (inner=0;inner<=ctx->refine_inner;inner++) {
852:       ConstructS(eps);
853:       BVSetActiveColumns(ctx->S,0,ctx->L);
854:       BVCopy(ctx->S,ctx->V);
855:       SVD_S(ctx->S,ctx->L*ctx->M,ctx->delta,ctx->sigma,&nv);
856:       if (ctx->sigma[0]>ctx->delta && nv==ctx->L*ctx->M && inner!=ctx->refine_inner) {
857:         if (ctx->pA) {
858:           VecScatterVecs(eps,ctx->V,ctx->L);
859:           SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_FALSE);
860:         } else {
861:           SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_FALSE);
862:         }
863:       } else break;
864:     }

866:     eps->nconv = 0;
867:     if (nv == 0) break;
868:     DSSetDimensions(eps->ds,nv,0,0,0);
869:     DSSetState(eps->ds,DS_STATE_RAW);

871:     BVSetActiveColumns(ctx->S,0,nv);
872:     DSGetMat(eps->ds,DS_MAT_A,&pA);
873:     MatZeroEntries(pA);
874:     BVMatProject(ctx->S,A,ctx->S,pA);
875:     DSRestoreMat(eps->ds,DS_MAT_A,&pA);
876:     DSGetMat(eps->ds,DS_MAT_B,&pB);
877:     MatZeroEntries(pB);
878:     if (B) { BVMatProject(ctx->S,B,ctx->S,pB); }
879:     else { MatShift(pB,1); }
880:     DSRestoreMat(eps->ds,DS_MAT_B,&pB);

882:     DSSolve(eps->ds,eps->eigr,NULL);
883:     DSVectors(eps->ds,DS_MAT_X,NULL,NULL);

885:     PetscMalloc(nv*sizeof(PetscBool),&fl1);
886:     PetscMalloc(nv*sizeof(PetscInt),&inside);
887:     isGhost(eps,ld,nv,fl1);
888:     RGCheckInside(eps->rg,nv,eps->eigr,eps->eigi,inside);
889:     PetscMalloc(nv*sizeof(PetscScalar),&rr);
890:     for (i=0;i<nv;i++) {
891:       if (fl1[i] && inside[i]>0) {
892:         rr[i] = 1.0;
893:         eps->nconv++;
894:       } else rr[i] = 0.0;
895:     }
896:     PetscFree(fl1);
897:     PetscFree(inside);
898:     DSSort(eps->ds,eps->eigr,NULL,rr,NULL,&eps->nconv);
899:     PetscFree(rr);
900:     BVSetActiveColumns(eps->V,0,nv);
901:     BVSetActiveColumns(ctx->S,0,nv);
902:     BVCopy(ctx->S,eps->V);

904:     DSVectors(eps->ds,DS_MAT_X,NULL,NULL);
905:     DSGetMat(eps->ds,DS_MAT_X,&X);
906:     BVMultInPlace(ctx->S,X,0,eps->nconv);
907:     if (eps->ishermitian) {
908:       BVMultInPlace(eps->V,X,0,eps->nconv);
909:     }
910:     MatDestroy(&X);
911:     max_error = 0.0;
912:     for (i=0;i<eps->nconv;i++) {
913:       BVGetColumn(ctx->S,i,&si);
914:       VecNormalize(si,NULL);
915:       EPSComputeRelativeError_Private(eps,eps->eigr[i],0,si,NULL,&error);
916:       BVRestoreColumn(ctx->S,i,&si);
917:       max_error = PetscMax(max_error,error);
918:     }

920:     if (max_error <= eps->tol || outer == ctx->refine_outer) break;

922:     if (eps->nconv > ctx->L) nv = eps->nconv;
923:     else if (ctx->L > nv) nv = ctx->L;
924:     MatCreateSeqDense(PETSC_COMM_SELF,nv,ctx->L,NULL,&M);
925:     MatDenseGetArray(M,&temp);
926:     for (i=0;i<ctx->L*nv;i++) {
927:       PetscRandomGetValue(eps->rand,&temp[i]);
928:       temp[i] = PetscRealPart(temp[i]);
929:     }
930:     MatDenseRestoreArray(M,&temp);
931:     BVSetActiveColumns(ctx->S,0,nv);
932:     BVMultInPlace(ctx->S,M,0,ctx->L);
933:     MatDestroy(&M);
934:     BVSetActiveColumns(ctx->S,0,ctx->L);
935:     BVCopy(ctx->S,ctx->V);
936:     if (ctx->pA) {
937:       VecScatterVecs(eps,ctx->V,ctx->L);
938:       SolveLinearSystem(eps,ctx->pA,ctx->pB,ctx->pV,0,ctx->L,PETSC_FALSE);
939:     } else {
940:       SolveLinearSystem(eps,A,B,ctx->V,0,ctx->L,PETSC_FALSE);
941:     }
942:   }
943:   eps->reason = EPS_CONVERGED_TOL;
944:   return(0);
945: }

949: static PetscErrorCode EPSCISSSetSizes_CISS(EPS eps,PetscInt ip,PetscInt bs,PetscInt ms,PetscInt npart,PetscInt bsmax,PetscBool isreal)
950: {
952:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;

955:   if (ip == PETSC_DECIDE || ip == PETSC_DEFAULT) {
956:     if (ctx->N!=32) { ctx->N =32; ctx->M = ctx->N/4; }
957:   } else {
958:     if (ip<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ip argument must be > 0");
959:     if (ip%2) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ip argument must be an even number");
960:     if (ctx->N!=ip) { ctx->N = ip; ctx->M = ctx->N/4; }
961:   }
962:   if (bs == PETSC_DECIDE || bs == PETSC_DEFAULT) {
963:     ctx->L = 16;
964:   } else {
965:     if (bs<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bs argument must be > 0");
966:     if (bs>ctx->L_max) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bs argument must be less than or equal to the maximum number of block size");
967:     ctx->L = bs;
968:   }
969:   if (ms == PETSC_DECIDE || ms == PETSC_DEFAULT) {
970:     ctx->M = ctx->N/4;
971:   } else {
972:     if (ms<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ms argument must be > 0");
973:     if (ms>ctx->N) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The ms argument must be less than or equal to the number of integration points");
974:     ctx->M = ms;
975:   }
976:   if (npart == PETSC_DECIDE || npart == PETSC_DEFAULT) {
977:     ctx->num_subcomm = 1;
978:   } else {
979:     if (npart<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The npart argument must be > 0");
980:     ctx->num_subcomm = npart;
981:   }
982:   if (bsmax == PETSC_DECIDE || bsmax == PETSC_DEFAULT) {
983:     ctx->L = 256;
984:   } else {
985:     if (bsmax<1) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The bsmax argument must be > 0");
986:     if (bsmax<ctx->L) ctx->L_max = ctx->L;
987:     else ctx->L_max = bsmax;
988:   }
989:   ctx->isreal = isreal;
990:   EPSReset(eps);   /* clean allocated arrays and force new setup */
991:   return(0);
992: }

996: /*@
997:    EPSCISSSetSizes - Sets the values of various size parameters in the CISS solver.

999:    Logically Collective on EPS

1001:    Input Parameters:
1002: +  eps   - the eigenproblem solver context
1003: .  ip    - number of integration points
1004: .  bs    - block size
1005: .  ms    - moment size
1006: .  npart - number of partitions when splitting the communicator
1007: .  bsmax - max block size
1008: -  isreal - A and B are real

1010:    Options Database Keys:
1011: +  -eps_ciss_integration_points - Sets the number of integration points
1012: .  -eps_ciss_blocksize - Sets the block size
1013: .  -eps_ciss_moments - Sets the moment size
1014: .  -eps_ciss_partitions - Sets the number of partitions
1015: .  -eps_ciss_maxblocksize - Sets the maximum block size
1016: -  -eps_ciss_realmats - A and B are real

1018:    Note:
1019:    The default number of partitions is 1. This means the internal KSP object is shared
1020:    among all processes of the EPS communicator. Otherwise, the communicator is split
1021:    into npart communicators, so that npart KSP solves proceed simultaneously.

1023:    Level: advanced

1025: .seealso: EPSCISSGetSizes()
1026: @*/
1027: PetscErrorCode EPSCISSSetSizes(EPS eps,PetscInt ip,PetscInt bs,PetscInt ms,PetscInt npart,PetscInt bsmax,PetscBool isreal)
1028: {

1039:   PetscTryMethod(eps,"EPSCISSSetSizes_C",(EPS,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscBool),(eps,ip,bs,ms,npart,bsmax,isreal));
1040:   return(0);
1041: }

1045: static PetscErrorCode EPSCISSGetSizes_CISS(EPS eps,PetscInt *ip,PetscInt *bs,PetscInt *ms,PetscInt *npart,PetscInt *bsmax,PetscBool *isreal)
1046: {
1047:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1050:   if (ip) *ip = ctx->N;
1051:   if (bs) *bs = ctx->L;
1052:   if (ms) *ms = ctx->M;
1053:   if (npart) *npart = ctx->num_subcomm;
1054:   if (bsmax) *bsmax = ctx->L_max;
1055:   if (isreal) *isreal = ctx->isreal;
1056:   return(0);
1057: }

1061: /*@
1062:    EPSCISSGetSizes - Gets the values of various size parameters in the CISS solver.

1064:    Not Collective

1066:    Input Parameter:
1067: .  eps - the eigenproblem solver context

1069:    Output Parameters:
1070: +  ip    - number of integration points
1071: .  bs    - block size
1072: .  ms    - moment size
1073: .  npart - number of partitions when splitting the communicator
1074: .  bsmax - max block size
1075: -  isreal - A and B are real

1077:    Level: advanced

1079: .seealso: EPSCISSSetSizes()
1080: @*/
1081: PetscErrorCode EPSCISSGetSizes(EPS eps,PetscInt *ip,PetscInt *bs,PetscInt *ms,PetscInt *npart,PetscInt *bsmax,PetscBool *isreal)
1082: {

1087:   PetscTryMethod(eps,"EPSCISSGetSizes_C",(EPS,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscBool*),(eps,ip,bs,ms,npart,bsmax,isreal));
1088:   return(0);
1089: }

1093: static PetscErrorCode EPSCISSSetThreshold_CISS(EPS eps,PetscReal delta,PetscReal spur)
1094: {
1095:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1098:   if (delta == PETSC_DEFAULT) {
1099:     ctx->delta = 1e-12;
1100:   } else {
1101:     if (delta<=0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The delta argument must be > 0.0");
1102:     ctx->delta = delta;
1103:   }
1104:   if (spur == PETSC_DEFAULT) {
1105:     ctx->spurious_threshold = 1e-4;
1106:   } else {
1107:     if (spur<=0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The spurious threshold argument must be > 0.0");
1108:     ctx->spurious_threshold = spur;
1109:   }
1110:   return(0);
1111: }

1115: /*@
1116:    EPSCISSSetThreshold - Sets the values of various threshold parameters in
1117:    the CISS solver.

1119:    Logically Collective on EPS

1121:    Input Parameters:
1122: +  eps   - the eigenproblem solver context
1123: .  delta - threshold for numerical rank
1124: -  spur  - spurious threshold (to discard spurious eigenpairs)

1126:    Options Database Keys:
1127: +  -eps_ciss_delta - Sets the delta
1128: -  -eps_ciss_spurious_threshold - Sets the spurious threshold

1130:    Level: advanced

1132: .seealso: EPSCISSGetThreshold()
1133: @*/
1134: PetscErrorCode EPSCISSSetThreshold(EPS eps,PetscReal delta,PetscReal spur)
1135: {

1142:   PetscTryMethod(eps,"EPSCISSSetThreshold_C",(EPS,PetscReal,PetscReal),(eps,delta,spur));
1143:   return(0);
1144: }

1148: static PetscErrorCode EPSCISSGetThreshold_CISS(EPS eps,PetscReal *delta,PetscReal *spur)
1149: {
1150:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1153:   if (delta) *delta = ctx->delta;
1154:   if (spur)  *spur = ctx->spurious_threshold;
1155:   return(0);
1156: }

1160: /*@
1161:    EPSCISSGetThreshold - Gets the values of various threshold parameters
1162:    in the CISS solver.

1164:    Not Collective

1166:    Input Parameter:
1167: .  eps - the eigenproblem solver context

1169:    Output Parameters:
1170: +  delta - threshold for numerical rank
1171: -  spur  - spurious threshold (to discard spurious eigenpairs)

1173:    Level: advanced

1175: .seealso: EPSCISSSetThreshold()
1176: @*/
1177: PetscErrorCode EPSCISSGetThreshold(EPS eps,PetscReal *delta,PetscReal *spur)
1178: {

1183:   PetscTryMethod(eps,"EPSCISSGetThreshold_C",(EPS,PetscReal*,PetscReal*),(eps,delta,spur));
1184:   return(0);
1185: }

1189: static PetscErrorCode EPSCISSSetRefinement_CISS(EPS eps,PetscInt inner,PetscInt outer,PetscInt blsize)
1190: {
1191:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1194:   if (inner == PETSC_DEFAULT) {
1195:     ctx->refine_inner = 0;
1196:   } else {
1197:     if (inner<0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The refine inner argument must be >= 0");
1198:     ctx->refine_inner = inner;
1199:   }
1200:   if (outer == PETSC_DEFAULT) {
1201:     ctx->refine_outer = 0;
1202:   } else {
1203:     if (outer<0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The refine outer argument must be >= 0");
1204:     ctx->refine_outer = outer;
1205:   }
1206:   if (blsize == PETSC_DEFAULT) {
1207:     ctx->refine_blocksize = 0;
1208:   } else {
1209:     if (blsize<0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_OUTOFRANGE,"The refine blocksize argument must be >= 0");
1210:     ctx->refine_blocksize = blsize;
1211:   }
1212:   return(0);
1213: }

1217: /*@
1218:    EPSCISSSetRefinement - Sets the values of various refinement parameters
1219:    in the CISS solver.

1221:    Logically Collective on EPS

1223:    Input Parameters:
1224: +  eps    - the eigenproblem solver context
1225: .  inner  - number of iterative refinement iterations (inner loop)
1226: .  outer  - number of iterative refinement iterations (outer loop)
1227: -  blsize - number of iterative refinement iterations (blocksize loop)

1229:    Options Database Keys:
1230: +  -eps_ciss_refine_inner - Sets number of inner iterations
1231: .  -eps_ciss_refine_outer - Sets number of outer iterations
1232: -  -eps_ciss_refine_blocksize - Sets number of blocksize iterations

1234:    Level: advanced

1236: .seealso: EPSCISSGetRefinement()
1237: @*/
1238: PetscErrorCode EPSCISSSetRefinement(EPS eps,PetscInt inner,PetscInt outer,PetscInt blsize)
1239: {

1247:   PetscTryMethod(eps,"EPSCISSSetRefinement_C",(EPS,PetscInt,PetscInt,PetscInt),(eps,inner,outer,blsize));
1248:   return(0);
1249: }

1253: static PetscErrorCode EPSCISSGetRefinement_CISS(EPS eps,PetscInt *inner,PetscInt *outer,PetscInt *blsize)
1254: {
1255:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1258:   if (inner)  *inner = ctx->refine_inner;
1259:   if (outer)  *outer = ctx->refine_outer;
1260:   if (blsize) *blsize = ctx->refine_blocksize;
1261:   return(0);
1262: }

1266: /*@
1267:    EPSCISSGetRefinement - Gets the values of various refinement parameters
1268:    in the CISS solver.

1270:    Not Collective

1272:    Input Parameter:
1273: .  eps - the eigenproblem solver context

1275:    Output Parameters:
1276: +  inner  - number of iterative refinement iterations (inner loop)
1277: .  outer  - number of iterative refinement iterations (outer loop)
1278: -  blsize - number of iterative refinement iterations (blocksize loop)

1280:    Level: advanced

1282: .seealso: EPSCISSSetRefinement()
1283: @*/
1284: PetscErrorCode EPSCISSGetRefinement(EPS eps, PetscInt *inner, PetscInt *outer,PetscInt *blsize)
1285: {

1290:   PetscTryMethod(eps,"EPSCISSGetRefinement_C",(EPS,PetscInt*,PetscInt*,PetscInt*),(eps,inner,outer,blsize));
1291:   return(0);
1292: }

1296: static PetscErrorCode EPSCISSSetUseST_CISS(EPS eps,PetscBool usest)
1297: {
1298:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1301:   ctx->usest = usest;
1302:   return(0);
1303: }

1307: /*@
1308:    EPSCISSSetUseST - Sets a flag indicating that the CISS solver will
1309:    use the ST object for the linear solves.

1311:    Logically Collective on EPS

1313:    Input Parameters:
1314: +  eps    - the eigenproblem solver context
1315: -  usest  - boolean flag to use the ST object or not

1317:    Options Database Keys:
1318: +  -eps_ciss_usest <bool> - whether the ST object will be used or not

1320:    Level: advanced

1322: .seealso: EPSCISSGetUseST()
1323: @*/
1324: PetscErrorCode EPSCISSSetUseST(EPS eps,PetscBool usest)
1325: {

1331:   PetscTryMethod(eps,"EPSCISSSetUseST_C",(EPS,PetscBool),(eps,usest));
1332:   return(0);
1333: }

1337: static PetscErrorCode EPSCISSGetUseST_CISS(EPS eps,PetscBool *usest)
1338: {
1339:   EPS_CISS *ctx = (EPS_CISS*)eps->data;

1342:   *usest = ctx->usest;
1343:   return(0);
1344: }

1348: /*@
1349:    EPSCISSGetUseST - Gets the flag for using the ST object
1350:    in the CISS solver.

1352:    Not Collective

1354:    Input Parameter:
1355: .  eps - the eigenproblem solver context

1357:    Output Parameters:
1358: +  usest - boolean flag indicating if the ST object is being used

1360:    Level: advanced

1362: .seealso: EPSCISSSetUseST()
1363: @*/
1364: PetscErrorCode EPSCISSGetUseST(EPS eps, PetscBool *usest)
1365: {

1370:   PetscTryMethod(eps,"EPSCISSGetUseST_C",(EPS,PetscBool*),(eps,usest));
1371:   return(0);
1372: }

1376: PetscErrorCode EPSReset_CISS(EPS eps)
1377: {
1379:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
1380:   PetscInt       i;

1383:   PetscSubcommDestroy(&ctx->subcomm);
1384:   PetscFree(ctx->weight);
1385:   PetscFree(ctx->omega);
1386:   PetscFree(ctx->pp);
1387:   BVDestroy(&ctx->S);
1388:   BVDestroy(&ctx->V);
1389:   PetscFree(ctx->sigma);
1390:   BVDestroy(&ctx->Y);
1391:   if (!ctx->usest) {
1392:     for (i=0;i<ctx->num_solve_point;i++) {
1393:       KSPDestroy(&ctx->ksp[i]);
1394:     }
1395:     PetscFree(ctx->ksp);
1396:     for (i=0;i<ctx->num_solve_point;i++) {
1397:       MatDestroy(&ctx->kspMat[i]);
1398:     }
1399:     PetscFree(ctx->kspMat);
1400:   }
1401:   VecScatterDestroy(&ctx->scatterin);
1402:   VecDestroy(&ctx->xsub);
1403:   VecDestroy(&ctx->xdup);
1404:   if (ctx->pA) {
1405:     MatDestroy(&ctx->pA);
1406:     MatDestroy(&ctx->pB);
1407:     BVDestroy(&ctx->pV);
1408:   }
1409:   return(0);
1410: }

1414: PetscErrorCode EPSSetFromOptions_CISS(EPS eps)
1415: {
1417:   PetscReal      r3,r4;
1418:   PetscInt       i1,i2,i3,i4,i5,i6,i7,i8;
1419:   PetscBool      b1,b2;

1422:   PetscOptionsHead("EPS CISS Options");
1423:   EPSCISSGetSizes(eps,&i1,&i2,&i3,&i4,&i5,&b1);
1424:   PetscOptionsInt("-eps_ciss_integration_points","CISS number of integration points","EPSCISSSetSizes",i1,&i1,NULL);
1425:   PetscOptionsInt("-eps_ciss_blocksize","CISS block size","EPSCISSSetSizes",i2,&i2,NULL);
1426:   PetscOptionsInt("-eps_ciss_moments","CISS moment size","EPSCISSSetSizes",i3,&i3,NULL);
1427:   PetscOptionsInt("-eps_ciss_partitions","CISS number of partitions","EPSCISSSetSizes",i4,&i4,NULL);
1428:   PetscOptionsInt("-eps_ciss_maxblocksize","CISS maximum block size","EPSCISSSetSizes",i5,&i5,NULL);
1429:   PetscOptionsBool("-eps_ciss_realmats","CISS A and B are real","EPSCISSSetSizes",b1,&b1,NULL);
1430:   EPSCISSSetSizes(eps,i1,i2,i3,i4,i5,b1);

1432:   EPSCISSGetThreshold(eps,&r3,&r4);
1433:   PetscOptionsReal("-eps_ciss_delta","CISS threshold for numerical rank","EPSCISSSetThreshold",r3,&r3,NULL);
1434:   PetscOptionsReal("-eps_ciss_spurious_threshold","CISS threshold for the spurious eigenpairs","EPSCISSSetThreshold",r4,&r4,NULL);
1435:   EPSCISSSetThreshold(eps,r3,r4);

1437:   EPSCISSGetRefinement(eps,&i6,&i7,&i8);
1438:   PetscOptionsInt("-eps_ciss_refine_inner","CISS number of inner iterative refinement iterations","EPSCISSSetRefinement",i6,&i6,NULL);
1439:   PetscOptionsInt("-eps_ciss_refine_outer","CISS number of outer iterative refinement iterations","EPSCISSSetRefinement",i7,&i7,NULL);
1440:   PetscOptionsInt("-eps_ciss_refine_blocksize","CISS number of blocksize iterative refinement iterations","EPSCISSSetRefinement",i8,&i8,NULL);
1441:   EPSCISSSetRefinement(eps,i6,i7,i8);

1443:   EPSCISSGetUseST(eps,&b2);
1444:   PetscOptionsBool("-eps_ciss_usest","CISS use ST for linear solves","EPSCISSSetUseST",b2,&b2,NULL);
1445:   EPSCISSSetUseST(eps,b2);

1447:   PetscOptionsTail();
1448:   return(0);
1449: }

1453: PetscErrorCode EPSDestroy_CISS(EPS eps)
1454: {

1458:   PetscFree(eps->data);
1459:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetSizes_C",NULL);
1460:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetSizes_C",NULL);
1461:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetThreshold_C",NULL);
1462:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetThreshold_C",NULL);
1463:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetRefinement_C",NULL);
1464:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetRefinement_C",NULL);
1465:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetUseST_C",NULL);
1466:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetUseST_C",NULL);
1467:   return(0);
1468: }

1472: PetscErrorCode EPSView_CISS(EPS eps,PetscViewer viewer)
1473: {
1475:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;
1476:   PetscBool      isascii;

1479:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1480:   if (isascii) {
1481:     PetscViewerASCIIPrintf(viewer,"  CISS: sizes { integration points: %D, block size: %D, moment size: %D, partitions: %D, maximum block size: %D }\n",ctx->N,ctx->L,ctx->M,ctx->num_subcomm,ctx->L_max);
1482:     if (ctx->isreal) {
1483:       PetscViewerASCIIPrintf(viewer,"  CISS: exploiting symmetry of integration points\n");
1484:     }
1485:     PetscViewerASCIIPrintf(viewer,"  CISS: threshold { delta: %g, spurious threshold: %g }\n",(double)ctx->delta,(double)ctx->spurious_threshold);
1486:     PetscViewerASCIIPrintf(viewer,"  CISS: iterative refinement  { inner: %D, outer: %D, blocksize: %D }\n",ctx->refine_inner,ctx->refine_outer, ctx->refine_blocksize);
1487:     if (ctx->usest) {
1488:       PetscViewerASCIIPrintf(viewer,"  CISS: using ST for linear solves\n");
1489:     }
1490:     PetscViewerASCIIPushTab(viewer);
1491:     /*KSPView(ctx->ksp[0],viewer);*/
1492:     PetscViewerASCIIPopTab(viewer);
1493:   }
1494:   return(0);
1495: }

1499: PETSC_EXTERN PetscErrorCode EPSCreate_CISS(EPS eps)
1500: {
1502:   EPS_CISS       *ctx = (EPS_CISS*)eps->data;

1505:   PetscNewLog(eps,&ctx);
1506:   eps->data = ctx;
1507:   eps->ops->setup          = EPSSetUp_CISS;
1508:   eps->ops->setfromoptions = EPSSetFromOptions_CISS;
1509:   eps->ops->destroy        = EPSDestroy_CISS;
1510:   eps->ops->reset          = EPSReset_CISS;
1511:   eps->ops->view           = EPSView_CISS;
1512:   eps->ops->backtransform  = NULL;
1513:   eps->ops->computevectors = EPSComputeVectors_Schur;
1514:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetSizes_C",EPSCISSSetSizes_CISS);
1515:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetSizes_C",EPSCISSGetSizes_CISS);
1516:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetThreshold_C",EPSCISSSetThreshold_CISS);
1517:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetThreshold_C",EPSCISSGetThreshold_CISS);
1518:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetRefinement_C",EPSCISSSetRefinement_CISS);
1519:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetRefinement_C",EPSCISSGetRefinement_CISS);
1520:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSSetUseST_C",EPSCISSSetUseST_CISS);
1521:   PetscObjectComposeFunction((PetscObject)eps,"EPSCISSGetUseST_C",EPSCISSGetUseST_CISS);
1522:   /* set default values of parameters */
1523:   ctx->N       = 32;
1524:   ctx->L       = 16;
1525:   ctx->M       = ctx->N/4;
1526:   ctx->delta   = 1e-12;
1527:   ctx->L_max   = 64;
1528:   ctx->spurious_threshold = 1e-4;
1529:   ctx->usest   = PETSC_FALSE;
1530:   ctx->isreal  = PETSC_FALSE;
1531:   ctx->refine_outer = 1;
1532:   ctx->refine_inner = 1;
1533:   ctx->refine_blocksize = 1;
1534:   ctx->num_subcomm = 1;
1535:   return(0);
1536: }