Actual source code: nrefinement.c
slepc-3.5.2 2014-10-10
1: /*
2: Newton refinement for nonlinear eigenproblem.
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2014, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc-private/pepimpl.h>
25: #include <slepc-private/stimpl.h>
26: #include <slepcblaslapack.h>
28: typedef struct {
29: Mat *A;
30: BV V;
31: PetscInt k,nmat;
32: PetscScalar *Mm;
33: PetscScalar *fih;
34: PetscScalar *work;
35: Vec w1,w2;
36: } FSubctx;
38: typedef struct {
39: Mat E[2];
40: Vec tN,ttN,t1,vseq;
41: VecScatter scatterctx;
42: PetscBool computedt11;
43: PetscInt *map0,*map1,*idxg,*idxp;
44: PetscSubcomm subc;
45: VecScatter scatter_sub;
46: VecScatter *scatter_id,*scatterp_id;
47: Mat *A;
48: BV V,W;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx;
51: } MatExplicitCtx;
55: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
56: {
58: FSubctx *ctx;
59: PetscInt i,k,nmat;
60: PetscScalar *fih,*c,*vals,sone=1.0,zero=0.0;
61: Mat *A;
62: PetscBLASInt k_,lda_,one=1;
63:
65: MatShellGetContext(M,&ctx);
66: fih = ctx->fih;
67: k = ctx->k;
68: nmat = ctx->nmat;
69: A = ctx->A;
70: c = ctx->work;
71: vals = ctx->work+k;
72: PetscBLASIntCast(k,&k_);
73: PetscBLASIntCast(nmat*k,&lda_);
74: BVDotVec(ctx->V,x,c);
75: MatMult(A[0],x,y);
76: for (i=1;i<nmat;i++) {
77: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,ctx->Mm+i*k,&lda_,c,&one,&zero,vals,&one));
78: VecCopy(x,ctx->w1);
79: BVMultVec(ctx->V,-1.0,fih[i],ctx->w1,vals);
80: MatMult(A[i],ctx->w1,ctx->w2);
81: VecAXPY(y,1.0,ctx->w2);
82: }
83: return(0);
84: }
88: /*
89: Evaluates the first d elements of the polynomial basis
90: on a given matrix H which is considered to be triangular
91: */
92: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
93: {
95: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
96: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
97: PetscScalar corr=0.0,alpha,beta;
98: PetscBLASInt k_,ldh_,ldfh_;
99:
101: PetscBLASIntCast(ldh,&ldh_);
102: PetscBLASIntCast(k,&k_);
103: PetscBLASIntCast(ldfh,&ldfh_);
104: PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
105: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
106: if (nm>1) {
107: t = b[0]/a[0];
108: off = k;
109: for (j=0;j<k;j++) {
110: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
111: fH[j+j*ldfh] -= t;
112: }
113: }
114: for (i=2;i<nm;i++) {
115: off = i*k;
116: if (i==2) {
117: for (j=0;j<k;j++) {
118: fH[off+j+j*ldfh] = 1.0;
119: H[j+j*ldh] -= b[1];
120: }
121: } else {
122: for (j=0;j<k;j++) {
123: PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
124: H[j+j*ldh] += corr-b[i-1];
125: }
126: }
127: corr = b[i-1];
128: beta = -g[i-1]/a[i-1];
129: alpha = 1/a[i-1];
130: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
131: }
132: for (j=0;j<k;j++) H[j+j*ldh] += corr;
133: return(0);
134: }
138: static PetscErrorCode NRefSysSetup_shell(PetscInt nmat,PetscReal *pcf,PetscInt k,PetscInt deg,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PetscScalar *Mm,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12)
139: {
141: PetscScalar *DHii,*Tr,*Ts,s,sone=1.0,zero=0.0;
142: PetscInt i,d,j,lda=nmat*k;
143: PetscReal *a=pcf,*b=pcf+nmat,*g=pcf+2*nmat;
144: PetscBLASInt k_,lda_,lds_,info;
145:
147: DHii = T12;
148: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
149: PetscMalloc2(k*k,&Tr,k*k,&Ts);
150: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
151: for (d=2;d<nmat;d++) {
152: for (j=0;j<k;j++) {
153: for (i=0;i<k;i++) {
154: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
155: }
156: }
157: }
158: /* T22 */
159: PetscBLASIntCast(lds,&lds_);
160: PetscBLASIntCast(k,&k_);
161: PetscBLASIntCast(lda,&lda_);
162: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
163: for (i=1;i<deg;i++) {
164: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
165: s = (i==1)?0.0:1.0;
166: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
167: }
169: /* T21 */
170: for (i=1;i<deg;i++) {
171: s = (i==1)?0.0:1.0;
172: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","C",&k_,&k_,&k_,fh+i,fH+i*k,&lda_,S,&lds_,&s,T21,&k_));
173: }
174: /* Mm */
175: PetscMemcpy(Tr,T21,k*k*sizeof(PetscScalar));
176: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&k_,T22,&k_,p22,Tr,&k_,&info));
177:
178: s = 0.0;
179: for (i=1;i<nmat;i++) {
180: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
181: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Ts,&k_,Tr,&k_,&s,Mm+i*k,&lda_));
182: for (j=0;j<k;j++) {
183: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
184: }
185: }
186: PetscFree2(Tr,Ts);
187: return(0);
188: }
192: static PetscErrorCode NRefSysSolve_shell(Mat *A,KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,PetscScalar *T22,PetscBLASInt *p22,PetscScalar *T21,PetscScalar *T12,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lw)
193: {
195: PetscScalar *t0,*t1,zero=0.0,none=-1.0,sone=1.0;
196: PetscBLASInt k_,one=1,info,lda_;
197: PetscInt i,lda=nmat*k,nwu=0;
198: Vec w;
201: t0 = work+nwu;
202: nwu += k;
203: t1 = work+nwu;
204: nwu += k;
205: PetscBLASIntCast(lda,&lda_);
206: PetscBLASIntCast(k,&k_);
207: for (i=0;i<k;i++) t0[i] = Rh[i];
208: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,t0,&k_,&info));
209: for (i=1;i<nmat;i++) {
210: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,T12+i*k,&lda_,t0,&one,&zero,t1,&one));
211: BVMultVec(V,1.0,0.0,t,t1);
212: BVGetColumn(W,i,&w);
213: MatMult(A[i],t,w);
214: BVRestoreColumn(W,i,&w);
215: }
216: for (i=0;i<nmat-1;i++) t1[i]=-1.0;
217: BVSetActiveColumns(W,1,nmat);
218: BVMultVec(W,1.0,1.0,Rv,t1);
219: KSPSolve(ksp,Rv,dVi);
220: BVDotVec(V,dVi,t1);
221: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&none,T21,&k_,t1,&one,&zero,dHi,&one));
222: for (i=0;i<k;i++) dHi[i] += Rh[i];
223: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,T22,&k_,p22,dHi,&k_,&info));
224: return(0);
225: }
229: /*
230: Computes the residual P(H,V*S)*e_j for the polynomial
231: */
232: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t,PetscScalar *work,PetscInt lw)
233: {
235: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
236: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
237: PetscInt i,ii,jj,nwu=0,lda;
238: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
239: Mat M0;
240: Vec w;
241:
243: h = work+nwu;
244: nwu += k*nmat;
245: lda = k*nmat;
246: PetscBLASIntCast(k,&k_);
247: PetscBLASIntCast(lds,&lds_);
248: PetscBLASIntCast(lda,&lda_);
249: PetscBLASIntCast(nmat,&nmat_);
250: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
251: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
252: BVSetActiveColumns(W,0,nmat);
253: BVMult(W,1.0,0.0,V,M0);
254: MatDestroy(&M0);
256: BVGetColumn(W,0,&w);
257: MatMult(A[0],w,Rv);
258: BVRestoreColumn(W,0,&w);
259: for (i=1;i<nmat;i++) {
260: BVGetColumn(W,i,&w);
261: MatMult(A[i],w,t);
262: BVRestoreColumn(W,i,&w);
263: VecAXPY(Rv,1.0,t);
264: }
265: /* Update right-hand side */
266: if (j) {
267: DS0 = work+nwu;
268: nwu += k*k;
269: DS1 = work+nwu;
270: nwu += k*k;
271: PetscBLASIntCast(ldh,&ldh_);
272: Z = work+nwu;
273: nwu += k*k;
274: PetscMemzero(Z,k*k*sizeof(PetscScalar));
275: PetscMemzero(DS0,k*k*sizeof(PetscScalar));
276: PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
277: /* Update DfH */
278: for (i=1;i<nmat;i++) {
279: if (i>1) {
280: beta = -g[i-1];
281: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
282: tt += -b[i-1];
283: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
284: tt = b[i-1];
285: beta = 1.0/a[i-1];
286: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
287: F = DS0; DS0 = DS1; DS1 = F;
288: } else {
289: PetscMemzero(DS1,k*k*sizeof(PetscScalar));
290: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
291: }
292: for (jj=j;jj<k;jj++) {
293: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
294: }
295: }
296: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
297: /* Update right-hand side */
298: PetscBLASIntCast(2*k,&k2_);
299: PetscBLASIntCast(j,&j_);
300: PetscBLASIntCast(k+rds,&krds_);
301: c0 = DS0;
302: PetscMemzero(Rh,k*sizeof(PetscScalar));
303: for (i=0;i<nmat;i++) {
304: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
305: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
306: BVMultVec(V,1.0,0.0,t,h);
307: BVSetActiveColumns(dV,0,rds);
308: BVMultVec(dV,1.0,1.0,t,h+k);
309: BVGetColumn(W,i,&w);
310: MatMult(A[i],t,w);
311: BVRestoreColumn(W,i,&w);
312: if (i>0 && i<nmat-1) {
313: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
314: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
315: }
316: }
317:
318: for (i=0;i<nmat;i++) h[i] = -1.0;
319: BVMultVec(W,1.0,1.0,Rv,h);
320: }
321: return(0);
322: }
326: static PetscErrorCode NRefSysIter_shell(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,BV W,Vec t,PetscScalar *work,PetscInt lwork)
327: {
329: PetscInt nwu=0,nmat=pep->nmat,deg=nmat-1,i;
330: PetscScalar *T22,*T21,*T12;
331: PetscBLASInt *p22;
332: FSubctx *ctx;
333: Mat M,*A;
334: PetscBool flg;
337: STGetTransform(pep->st,&flg);
338: if (flg) {
339: PetscMalloc1(pep->nmat,&A);
340: for (i=0;i<pep->nmat;i++) {
341: STGetTOperators(pep->st,i,&A[i]);
342: }
343: } else A = pep->A;
344: PetscMalloc1(k,&p22);
345: T22 = work+nwu;
346: nwu += k*k;
347: T21 = work+nwu;
348: nwu += k*k;
349: T12 = work+nwu;
350: nwu += nmat*k*k;
351: KSPGetOperators(ksp,&M,NULL);
352: MatShellGetContext(M,&ctx);
353: /* Update the matrix for the system */
354: NRefSysSetup_shell(nmat,pep->pbc,k,deg,fH,S,lds,fh,h,ctx->Mm,T22,p22,T21,T12);
355: /* Solve system */
356: NRefSysSolve_shell(A,ksp,nmat,Rv,Rh,k,T22,p22,T21,T12,V,dVi,dHi,W,t,work+nwu,lwork-nwu);
357: PetscFree(p22);
358: if (flg) {
359: PetscFree(A);
360: }
361: return(0);
362: }
366: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
367: {
368: PetscErrorCode ierr;
369: PetscInt nwu=0,i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
370: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
371: Mat M,*E=matctx->E,*A,*At,Mk,Md;
372: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
373: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
374: PetscBLASInt lds_,lda_,k_;
375: const PetscInt *idxmc;
376: const PetscScalar *valsc;
377: MatStructure str;
378: Vec vc,vc0;
379: PetscBool flg;
380:
382: T22 = work+nwu;
383: nwu += k*k;
384: T21 = work+nwu;
385: nwu += k*k;
386: T12 = work+nwu;
387: nwu += nmat*k*k;
388: Tr = work+nwu;
389: nwu += k*k;
390: Ts = work+nwu;
391: nwu += k*k;
392: STGetMatStructure(pep->st,&str);
393: KSPGetOperators(ksp,&M,NULL);
394: MatGetOwnershipRange(E[1],&n1,&m1);
395: MatGetOwnershipRange(E[0],&n0,&m0);
396: MatGetOwnershipRange(M,&n,NULL);
397: PetscMalloc1(nmat,&ts);
398: STGetTransform(pep->st,&flg);
399: if (flg) {
400: PetscMalloc1(pep->nmat,&At);
401: for (i=0;i<pep->nmat;i++) {
402: STGetTOperators(pep->st,i,&At[i]);
403: }
404: } else At = pep->A;
405: if (matctx->subc) A = matctx->A;
406: else A = At;
407: /* Form the explicit system matrix */
408: DHii = T12;
409: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
410: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
411: for (d=2;d<nmat;d++) {
412: for (j=0;j<k;j++) {
413: for (i=0;i<k;i++) {
414: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
415: }
416: }
417: }
419: /* T11 */
420: if (!matctx->computedt11) {
421: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
422: PEPEvaluateBasis(pep,h,0,Ts,NULL);
423: for (j=1;j<nmat;j++) {
424: MatAXPY(E[0],Ts[j],A[j],str);
425: }
426: }
427: for (i=n0;i<m0;i++) {
428: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
429: idx = n+i-n0;
430: for (j=0;j<ncols;j++) {
431: idxg[j] = matctx->map0[idxmc[j]];
432: }
433: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
434: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
435: }
437: /* T22 */
438: PetscBLASIntCast(lds,&lds_);
439: PetscBLASIntCast(k,&k_);
440: PetscBLASIntCast(lda,&lda_);
441: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
442: for (i=1;i<deg;i++) {
443: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
444: s = (i==1)?0.0:1.0;
445: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
446: }
447: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
448: for (i=0;i<m1-n1;i++) {
449: idx = n+m0-n0+i;
450: for (j=0;j<k;j++) {
451: Tr[j] = T22[n1+i+j*k];
452: }
453: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
454: }
456: /* T21 */
457: for (i=1;i<deg;i++) {
458: s = (i==1)?0.0:1.0;
459: ss = PetscConj(fh[i]);
460: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
461: }
462: BVSetActiveColumns(W,0,k);
463: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
464: BVMult(W,1.0,0.0,V,Mk);
465: for (i=0;i<k;i++) {
466: BVGetColumn(W,i,&vc);
467: VecConjugate(vc);
468: VecGetArray(vc,&array);
469: idx = matctx->map1[i];
470: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
471: VecRestoreArray(vc,&array);
472: BVRestoreColumn(W,i,&vc);
473: }
475: /* T12 */
476: for (i=1;i<nmat;i++) {
477: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
478: for (j=0;j<k;j++) {
479: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
480: }
481: }
482: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
483: for (i=0;i<nmat;i++) ts[i] = 1.0;
484: for (j=0;j<k;j++) {
485: MatDenseGetArray(Md,&array);
486: PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
487: MatDenseRestoreArray(Md,&array);
488: BVSetActiveColumns(W,0,nmat-1);
489: BVMult(W,1.0,0.0,V,Md);
490: for (i=nmat-1;i>0;i--) {
491: BVGetColumn(W,i-1,&vc0);
492: BVGetColumn(W,i,&vc);
493: MatMult(A[i],vc0,vc);
494: BVRestoreColumn(W,i-1,&vc0);
495: BVRestoreColumn(W,i,&vc);
496: }
497: BVSetActiveColumns(W,1,nmat);
498: BVGetColumn(W,0,&vc0);
499: BVMultVec(W,1.0,0.0,vc0,ts);
500: VecGetArray(vc0,&array);
501: idx = matctx->map1[j];
502: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,array,INSERT_VALUES);
503: VecRestoreArray(vc0,&array);
504: BVRestoreColumn(W,0,&vc0);
505: }
506: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
507: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
508: KSPSetOperators(ksp,M,M);
509: KSPSetUp(ksp);
510: PetscFree(ts);
511: MatDestroy(&Mk);
512: MatDestroy(&Md);
513: if (flg) {
514: PetscFree(At);
515: }
516: return(0);
517: }
518:
521: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
522: {
524: PetscInt n0,m0,n1,m1,i;
525: PetscScalar *array,*arrayV;
528: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
529: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
530: /* Right side */
531: VecGetArray(Rv,&array);
532: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
533: VecRestoreArray(Rv,&array);
534: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
535: VecAssemblyBegin(matctx->tN);
536: VecAssemblyEnd(matctx->tN);
538: /* Solve */
539: KSPSolve(ksp,matctx->tN,matctx->ttN);
540:
541: /* Retrieve solution */
542: VecGetArray(dVi,&arrayV);
543: VecGetArray(matctx->ttN,&array);
544: PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
545: VecRestoreArray(dVi,&arrayV);
546: if (!matctx->subc) {
547: VecGetArray(matctx->t1,&arrayV);
548: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
549: VecRestoreArray(matctx->t1,&arrayV);
550: VecRestoreArray(matctx->ttN,&array);
551: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
552: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
553: VecGetArray(matctx->vseq,&array);
554: for (i=0;i<k;i++) dHi[i] = array[i];
555: VecRestoreArray(matctx->vseq,&array);
556: }
557: return(0);
558: }
562: static PetscErrorCode NRefSysIter_explicit(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W,PetscScalar *work,PetscInt lwork)
563: {
565: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
566: PetscScalar *array,*array2,h;
567: Vec R,Vi;
568:
570: if (!matctx->subc) {
571: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
572: h = H[i+i*ldh];
573: idx = i;
574: R = Rv;
575: Vi = dVi;
576: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W,work,lwork);
577: } else {
578: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
579: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
580: h = H[idx+idx*ldh];
581: matctx->idx = idx;
582: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W,work,lwork);
583: } else idx = matctx->idx;
584: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
585: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
586: VecGetArray(matctx->tg,&array);
587: VecPlaceArray(matctx->t,(const PetscScalar*)array);
588: VecCopy(matctx->t,matctx->Rv);
589: VecResetArray(matctx->t);
590: VecRestoreArray(matctx->tg,&array);
591: R = matctx->Rv;
592: Vi = matctx->Vi;
593: }
594: if (idx==i && idx<k) {
595: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
596: }
597: if (matctx->subc) {
598: VecGetLocalSize(Vi,&m);
599: VecGetArray(Vi,&array);
600: VecGetArray(matctx->tg,&array2);
601: PetscMemcpy(array2,array,m*sizeof(PetscScalar));
602: VecRestoreArray(matctx->tg,&array2);
603: VecRestoreArray(Vi,&array);
604: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
605: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
606: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
607: VecGetArray(matctx->ttN,&array);
608: VecPlaceArray(matctx->tp,(const PetscScalar*)(array+m0-n0));
609: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
610: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
611: VecResetArray(matctx->tp);
612: VecRestoreArray(matctx->ttN,&array);
613: VecGetArray(matctx->tpg,&array);
614: for (j=0;j<k;j++) dHi[j] = array[j];
615: VecRestoreArray(matctx->tpg,&array);
616: }
617: return(0);
618: }
622: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PetscScalar *work,PetscInt lw,MatExplicitCtx *matctx)
623: {
625: PetscInt i,j,nmat=pep->nmat,nwu=0,lda=nmat*k;
626: PetscScalar h,*fh,*Rh,*DfH;
627: PetscReal norm;
628: BV W;
629: Vec Rv,t,dvi;
630: FSubctx *ctx;
631: Mat M,*At;
632: PetscBool flg,lindep;
635: *rds = 0;
636: DfH = work+nwu;
637: nwu += nmat*k*k;
638: Rh = work+nwu;
639: nwu += k;
640: BVGetVec(pep->V,&t);
641: BVGetVec(pep->V,&Rv);
642: KSPGetOperators(ksp,&M,NULL);
643: if (matctx) {
644: fh = work+nwu;
645: nwu += nmat;
646: } else {
647: MatShellGetContext(M,&ctx);
648: fh = ctx->fih;
649: }
650: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
651: PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
652: PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
653: STGetTransform(pep->st,&flg);
654: if (flg) {
655: PetscMalloc1(pep->nmat,&At);
656: for (i=0;i<pep->nmat;i++) {
657: STGetTOperators(pep->st,i,&At[i]);
658: }
659: } else At = pep->A;
661: /* Main loop for computing the ith columns of dX and dS */
662: for (i=0;i<k;i++) {
663: h = H[i+i*ldh];
665: /* Compute and update i-th column of the right hand side */
666: PetscMemzero(Rh,k*sizeof(PetscScalar));
667: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t,work+nwu,lw-nwu);
669: /* Update and solve system */
670: BVGetColumn(dV,i,&dvi);
671: if (matctx) {
672: NRefSysIter_explicit(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W,work+nwu,lw-nwu);
673: if (i==0) matctx->computedt11 = PETSC_FALSE;
674: } else {
675: for (j=0;j<nmat;j++) fh[j] = fH[j*k+i+i*lda];
676: NRefSysIter_shell(pep,k,ksp,fH,S,lds,fh,h,Rv,Rh,pep->V,dvi,dH+i*k,W,t,work+nwu,lw-nwu);
677: }
678: /* Orthogonalize computed solution */
679: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
680: BVRestoreColumn(dV,i,&dvi);
681: if (!lindep) {
682: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
683: if (!lindep) {
684: dVS[k+i+i*2*k] = norm;
685: BVScaleColumn(dV,i,1.0/norm);
686: (*rds)++;
687: }
688: }
689: }
690: BVSetActiveColumns(dV,0,*rds);
691: VecDestroy(&t);
692: VecDestroy(&Rv);
693: BVDestroy(&W);
694: if (flg) {
695: PetscFree(At);
696: }
697: return(0);
698: }
702: PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscInt *prs,PetscScalar *work,PetscInt lwork)
703: {
705: PetscInt i,j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,nwu=0,rs=*prs,ldg;
706: PetscScalar *T,*G,*tau,*array,sone=1.0,zero=0.0;
707: PetscBLASInt rs_,lds_,k_,ldh_,lw_,info,ldg_,lda_;
708: Mat M0;
711: T = work+nwu;
712: nwu += rs*k;
713: tau = work+nwu;
714: nwu += k;
715: PetscBLASIntCast(lds,&lds_);
716: PetscBLASIntCast(lda,&lda_);
717: PetscBLASIntCast(k,&k_);
718: PetscBLASIntCast(lwork-nwu,&lw_);
719: if (rs>k) { /* Truncate S to have k columns*/
720: for (j=0;j<k;j++) {
721: PetscMemcpy(T+j*rs,S+j*lds,rs*sizeof(PetscScalar));
722: }
723: PetscBLASIntCast(rs,&rs_);
724: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&rs_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
725: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
726: /* Copy triangular matrix in S */
727: PetscMemzero(S,lds*k*sizeof(PetscScalar));
728: for (j=0;j<k;j++) for (i=0;i<=j;i++) S[j*lds+i] = T[j*rs+i];
729: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&rs_,&k_,&k_,T,&rs_,tau,work+nwu,&lw_,&info));
730: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
731: MatCreateSeqDense(PETSC_COMM_SELF,rs,k,NULL,&M0);
732: MatDenseGetArray(M0,&array);
733: for (j=0;j<k;j++) {
734: PetscMemcpy(array+j*rs,T+j*rs,rs*sizeof(PetscScalar));
735: }
736: MatDenseRestoreArray(M0,&array);
737: BVSetActiveColumns(pep->V,0,rs);
738: BVMultInPlace(pep->V,M0,0,k);
739: BVSetActiveColumns(pep->V,0,k);
740: MatDestroy(&M0);
741: *prs = rs = k;
742: }
743: /* Form auxiliary matrix for the orthogonalization step */
744: G = work+nwu;
745: ldg = deg*k;
746: nwu += ldg*k;
747: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
748: PetscBLASIntCast(ldg,&ldg_);
749: PetscBLASIntCast(lwork-nwu,&lw_);
750: PetscBLASIntCast(ldh,&ldh_);
751: for (j=0;j<deg;j++) {
752: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
753: }
754: /* Orthogonalize and update S */
755: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work+nwu,&lw_,&info));
756: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
757: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
759: /* Update H */
760: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
761: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
762: return(0);
763: }
767: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds,PetscScalar *work,PetscInt lwork)
768: {
770: PetscInt i,j,nmat=pep->nmat,lda=nmat*k,nwu=0;
771: PetscScalar *tau,*array;
772: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,lw_,info,k2_;
773: Mat M0;
776: tau = work+nwu;
777: nwu += k;
778: PetscBLASIntCast(lds,&lds_);
779: PetscBLASIntCast(lda,&lda_);
780: PetscBLASIntCast(ldh,&ldh_);
781: PetscBLASIntCast(k,&k_);
782: PetscBLASIntCast(2*k,&k2_);
783: PetscBLASIntCast((k+rds),&kdrs_);
784: /* Update H */
785: for (j=0;j<k;j++) {
786: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
787: }
788: /* Update V */
789: for (j=0;j<k;j++) {
790: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
791: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
792: }
793: PetscBLASIntCast(lwork-nwu,&lw_);
794: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
795: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
796: /* Copy triangular matrix in S */
797: for (j=0;j<k;j++) {
798: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
799: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
800: }
801: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work+nwu,&lw_,&info));
802: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
803: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
804: MatDenseGetArray(M0,&array);
805: for (j=0;j<k;j++) {
806: PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
807: }
808: MatDenseRestoreArray(M0,&array);
809: BVMultInPlace(pep->V,M0,0,k);
810: if (rds) {
811: MatDenseGetArray(M0,&array);
812: for (j=0;j<k;j++) {
813: PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
814: }
815: MatDenseRestoreArray(M0,&array);
816: BVMultInPlace(dV,M0,0,k);
817: BVAXPY(pep->V,1.0,dV);
818: }
819: MatDestroy(&M0);
820: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,&k,work,lwork);
821: return(0);
822: }
826: static PetscErrorCode PEPNRefSetUpMatrices(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,Mat *M,Mat *P,MatExplicitCtx *matctx,PetscBool ini)
827: {
828: PetscErrorCode ierr;
829: FSubctx *ctx;
830: PetscScalar t,*coef,*array;
831: MatStructure str;
832: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
833: MPI_Comm comm;
834: PetscMPIInt np;
835: const PetscInt *rgs0,*rgs1;
836: Mat B,C,*E,*A,*At;
837: IS is1,is2;
838: Vec v;
839: PetscBool flg;
842: PetscMalloc1(nmat,&coef);
843: STGetTransform(pep->st,&flg);
844: if (flg) {
845: PetscMalloc1(pep->nmat,&At);
846: for (i=0;i<pep->nmat;i++) {
847: STGetTOperators(pep->st,i,&At[i]);
848: }
849: } else At = pep->A;
850: if (matctx) {
851: if (ini) {
852: if (matctx->subc) {
853: A = matctx->A;
854: comm = matctx->subc->comm;
855: } else {
856: A = At;
857: PetscObjectGetComm((PetscObject)pep,&comm);
858: }
859: E = matctx->E;
860: STGetMatStructure(pep->st,&str);
861: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
862: j = (matctx->subc)?matctx->subc->color:0;
863: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
864: for (j=1;j<nmat;j++) {
865: MatAXPY(E[0],coef[j],A[j],str);
866: }
867: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
868: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
869: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
870: MatGetOwnershipRange(E[0],&n0,&m0);
871: MatGetOwnershipRange(E[1],&n1,&m1);
872: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
873: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
874: /* T12 and T21 are computed from V and V*, so,
875: they must have the same column and row ranges */
876: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
877: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
878: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
879: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
880: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
881: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
882: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
883: SlepcMatTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],M);
884: *P = *M;
885: MatDestroy(&B);
886: MatDestroy(&C);
887: matctx->computedt11 = PETSC_TRUE;
888: MatGetSize(E[0],NULL,&N0);
889: MatGetSize(E[1],NULL,&N1);
890: MPI_Comm_size(PetscObjectComm((PetscObject)*M),&np);
891: MatGetOwnershipRanges(E[0],&rgs0);
892: MatGetOwnershipRanges(E[1],&rgs1);
893: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
894: /* Create column (and row) mapping */
895: for (p=0;p<np;p++) {
896: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
897: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
898: }
899: MatGetVecs(*M,NULL,&matctx->tN);
900: MatGetVecs(matctx->E[1],NULL,&matctx->t1);
901: VecDuplicate(matctx->tN,&matctx->ttN);
902: if (matctx->subc) {
903: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
904: count = np*k;
905: PetscMalloc2(count,&idx1,count,&idx2);
906: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
907: VecGetOwnershipRange(matctx->tp,&l0,NULL);
908: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
909: for (si=0;si<matctx->subc->n;si++) {
910: if (matctx->subc->color==si) {
911: j=0;
912: if (matctx->subc->color==si) {
913: for (p=0;p<np;p++) {
914: for (i=n1;i<m1;i++) {
915: idx1[j] = l0+i-n1;
916: idx2[j++] =p*k+i;
917: }
918: }
919: }
920: count = np*(m1-n1);
921: } else count =0;
922: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
923: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
924: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
925: ISDestroy(&is1);
926: ISDestroy(&is2);
927: }
928: PetscFree2(idx1,idx2);
929: } else {
930: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
931: }
932: } else {
933: if (matctx->subc) {
934: /* Scatter vectors pep->V */
935: for (i=0;i<k;i++) {
936: BVGetColumn(pep->V,i,&v);
937: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
938: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
939: BVRestoreColumn(pep->V,i,&v);
940: VecGetArray(matctx->tg,&array);
941: VecPlaceArray(matctx->t,(const PetscScalar*)array);
942: BVInsertVec(matctx->V,i,matctx->t);
943: VecResetArray(matctx->t);
944: }
945: }
946: }
947: } else {
948: if (ini) {
949: PetscObjectGetComm((PetscObject)pep,&comm);
950: MatGetSize(At[0],&m0,&n0);
951: PetscMalloc1(1,&ctx);
952: STGetMatStructure(pep->st,&str);
953: /* Create a shell matrix to solve the linear system */
954: ctx->A = At;
955: ctx->V = pep->V;
956: ctx->k = k; ctx->nmat = nmat;
957: PetscMalloc3(k*k*nmat,&ctx->Mm,2*k*k,&ctx->work,nmat,&ctx->fih);
958: PetscMemzero(ctx->Mm,k*k*nmat*sizeof(PetscScalar));
959: BVGetVec(pep->V,&ctx->w1);
960: BVGetVec(pep->V,&ctx->w2);
961: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,M);
962: MatShellSetOperation(*M,MATOP_MULT,(void(*)(void))MatFSMult);
963: }
964: /* Compute a precond matrix for the system */
965: t = 0.0;
966: for (j=0;j<k;j++) t += H[j+j*ldh];
967: t /= k;
968: if (ini) {
969: MatDuplicate(At[0],MAT_COPY_VALUES,P);
970: } else {
971: MatCopy(At[0],*P,str);
972: }
973: PEPEvaluateBasis(pep,t,0,coef,NULL);
974: for (j=1;j<nmat;j++) {
975: MatAXPY(*P,coef[j],At[j],str);
976: }
977: }
978: PetscFree(coef);
979: if (flg) {
980: PetscFree(At);
981: }
982: return(0);
983: }
987: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
988: {
990: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
991: IS is1,is2;
992: BVType type;
993: Vec v;
994: PetscScalar *array;
995: Mat *A;
996: PetscBool flg;
999: PetscSubcommCreate(PetscObjectComm((PetscObject)pep),&matctx->subc);
1000: PetscSubcommSetNumber(matctx->subc,nsubc);
1001: PetscSubcommSetType(matctx->subc,PETSC_SUBCOMM_INTERLACED);
1002: PetscLogObjectMemory((PetscObject)pep,sizeof(PetscSubcomm));
1003: PetscSubcommSetFromOptions(matctx->subc);
1004: STGetTransform(pep->st,&flg);
1005: if (flg) {
1006: PetscMalloc1(pep->nmat,&A);
1007: for (i=0;i<pep->nmat;i++) {
1008: STGetTOperators(pep->st,i,&A[i]);
1009: }
1010: } else A = pep->A;
1011:
1012: /* Duplicate pep matrices */
1013: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1014: for (i=0;i<pep->nmat;i++) {
1015: MatGetRedundantMatrix(A[i],0,matctx->subc->comm,MAT_INITIAL_MATRIX,&matctx->A[i]);
1016: }
1018: /* Create Scatter */
1019: MatGetVecs(matctx->A[0],&matctx->t,NULL);
1020: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1021: VecCreateMPI(matctx->subc->dupparent,nloc_sub,PETSC_DECIDE,&matctx->tg);
1022: BVGetColumn(pep->V,0,&v);
1023: VecGetOwnershipRange(v,&n0,&m0);
1024: nloc0 = m0-n0;
1025: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1026: j = 0;
1027: for (si=0;si<matctx->subc->n;si++) {
1028: for (i=n0;i<m0;i++) {
1029: idx1[j] = i;
1030: idx2[j++] = i+pep->n*si;
1031: }
1032: }
1033: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1034: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1035: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1036: ISDestroy(&is1);
1037: ISDestroy(&is2);
1038: for (si=0;si<matctx->subc->n;si++) {
1039: j=0;
1040: for (i=n0;i<m0;i++) {
1041: idx1[j] = i;
1042: idx2[j++] = i+pep->n*si;
1043: }
1044: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1045: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1046: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1047: ISDestroy(&is1);
1048: ISDestroy(&is2);
1049: }
1050: BVRestoreColumn(pep->V,0,&v);
1051: PetscFree2(idx1,idx2);
1053: /* Duplicate pep->V vecs */
1054: BVGetType(pep->V,&type);
1055: BVCreate(matctx->subc->comm,&matctx->V);
1056: BVSetType(matctx->V,type);
1057: BVSetSizesFromVec(matctx->V,matctx->t,k);
1058: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1059: for (i=0;i<k;i++) {
1060: BVGetColumn(pep->V,i,&v);
1061: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1062: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1063: BVRestoreColumn(pep->V,i,&v);
1064: VecGetArray(matctx->tg,&array);
1065: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1066: BVInsertVec(matctx->V,i,matctx->t);
1067: VecResetArray(matctx->t);
1068: }
1070: VecDuplicate(matctx->t,&matctx->Rv);
1071: VecDuplicate(matctx->t,&matctx->Vi);
1072: if (flg) {
1073: PetscFree(A);
1074: }
1075: return(0);
1076: }
1080: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1081: {
1083: PetscInt i;
1086: VecScatterDestroy(&matctx->scatter_sub);
1087: for (i=0;i<matctx->subc->n;i++) {
1088: VecScatterDestroy(&matctx->scatter_id[i]);
1089: VecScatterDestroy(&matctx->scatterp_id[i]);
1090: }
1091: for (i=0;i<pep->nmat;i++) {
1092: MatDestroy(&matctx->A[i]);
1093: }
1094: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1095: BVDestroy(&matctx->V);
1096: BVDestroy(&matctx->W);
1097: VecDestroy(&matctx->t);
1098: VecDestroy(&matctx->tg);
1099: VecDestroy(&matctx->tp);
1100: VecDestroy(&matctx->tpg);
1101: VecDestroy(&matctx->Rv);
1102: VecDestroy(&matctx->Vi);
1103: PetscSubcommDestroy(&matctx->subc);
1104: return(0);
1105: }
1109: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds,PetscInt *prs)
1110: {
1112: PetscScalar *H,*work,*dH,*fH,*dVS;
1113: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nwu=0,lwa=0,nsubc=pep->npart,rds;
1114: PetscLogDouble cnt;
1115: PetscBLASInt k_,ld_,*p,info,lwork=0;
1116: BV dV;
1117: PetscBool sinvert,flg;
1118: Mat P,M;
1119: MPI_Comm comm;
1120: FSubctx *ctx;
1121: KSP ksp;
1122: MatExplicitCtx *matctx=NULL;
1123: Vec v;
1126: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1127: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1128: /* the input tolerance is not being taken into account (by the moment) */
1129: its = *maxits;
1130: lwa = (5+3*nmat)*k*k+2*k;
1131: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,lwa,&work);
1132: DSGetLeadingDimension(pep->ds,&ldh);
1133: DSGetArray(pep->ds,DS_MAT_A,&H);
1134: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1135: PetscMalloc1(2*k*k,&dVS);
1136: STGetTransform(pep->st,&flg);
1137: if (!flg && pep->st && pep->st->ops->backtransform) { /* STBackTransform */
1138: PetscBLASIntCast(k,&k_);
1139: PetscBLASIntCast(ldh,&ld_);
1140: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1141: if (sinvert){
1142: DSGetArray(pep->ds,DS_MAT_A,&H);
1143: PetscBLASIntCast(lwa-nwu,&lwork);
1144: PetscMalloc1(k,&p);
1145: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1146: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&lwork,&info));
1147: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1148: pep->st->ops->backtransform = NULL;
1149: }
1150: if (sigma!=0.0) {
1151: DSGetArray(pep->ds,DS_MAT_A,&H);
1152: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1153: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1154: pep->st->ops->backtransform = NULL;
1155: }
1156: }
1157: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1158: DSGetArray(pep->ds,DS_MAT_A,&H);
1159: for (j=0;j<k;j++) {
1160: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1161: }
1162: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1163: if (!flg) {
1164: /* Restore original values */
1165: for (i=0;i<pep->nmat;i++){
1166: pep->pbc[pep->nmat+i] *= pep->sfactor;
1167: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1168: }
1169: }
1170: }
1171: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1172: for (i=0;i<k;i++) {
1173: BVGetColumn(pep->V,i,&v);
1174: VecPointwiseMult(v,v,pep->Dr);
1175: BVRestoreColumn(pep->V,i,&v);
1176: }
1177: }
1178: DSGetArray(pep->ds,DS_MAT_A,&H);
1180: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,prs,work,lwa);
1181: /* check if H is in Schur form */
1182: for (i=0;i<k-1;i++) {
1183: if (H[i+1+i*ldh]!=0.0) {
1184: #if !defined(PETSC_USES_COMPLEX)
1185: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement require the complex Schur form of the projected matrix");
1186: #else
1187: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1188: #endif
1189: }
1190: }
1191: if (pep->schur && nsubc>1) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Split communicator only allowed for the explicit matrix option");
1192: if (!pep->schur && nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair's dimension");
1193: cnt = k*sizeof(PetscBLASInt)+(lwork+k*k*(nmat+3)+nmat+k)*sizeof(PetscScalar);
1194: PetscLogObjectMemory((PetscObject)pep,cnt);
1195: BVSetActiveColumns(pep->V,0,k);
1196: BVDuplicateResize(pep->V,k,&dV);
1197: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1198: if (!pep->schur) {
1199: PetscMalloc1(1,&matctx);
1200: if (nsubc>1) { /* spliting in subcommunicators */
1201: NRefSubcommSetup(pep,k,matctx,nsubc);
1202: comm = matctx->subc->comm;
1203: } else {
1204: matctx->subc=NULL;
1205: PetscObjectGetComm((PetscObject)pep,&comm);
1206: }
1207: } else {
1208: PetscObjectGetComm((PetscObject)pep,&comm);
1209: }
1210: KSPCreate(comm,&ksp);
1211: /* Loop performing iterative refinements */
1212: for (i=0;i<its;i++) {
1213: /* Pre-compute the polynomial basis evaluated in H */
1214: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1215: PEPNRefSetUpMatrices(pep,k,H,ldh,&M,&P,matctx,(i==0)?PETSC_TRUE:PETSC_FALSE);
1216: KSPSetOperators(ksp,M,P);
1217: if (i==0) {
1218: KSPSetFromOptions(ksp);
1219: }
1220: /* Solve the linear system */
1221: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,ksp,work+nwu,lwa-nwu,matctx);
1222: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1223: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds,work+nwu,lwa-nwu);
1224: }
1225: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1226: if (!flg && sinvert) {
1227: PetscFree(p);
1228: }
1229: PetscFree3(dH,fH,work);
1230: PetscFree(dVS);
1231: BVDestroy(&dV);
1232: if (!pep->schur) {
1233: for (i=0;i<2;i++) {
1234: MatDestroy(&matctx->E[i]);
1235: }
1236: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1237: VecDestroy(&matctx->tN);
1238: VecDestroy(&matctx->ttN);
1239: VecDestroy(&matctx->t1);
1240: if (nsubc>1) {
1241: NRefSubcommDestroy(pep,matctx);
1242: } else {
1243: VecDestroy(&matctx->vseq);
1244: VecScatterDestroy(&matctx->scatterctx);
1245: }
1246: PetscFree(matctx);
1247: } else {
1248: MatShellGetContext(M,&ctx);
1249: PetscFree3(ctx->Mm,ctx->work,ctx->fih);
1250: VecDestroy(&ctx->w1);
1251: VecDestroy(&ctx->w2);
1252: PetscFree(ctx);
1253: MatDestroy(&P);
1254: }
1255: KSPDestroy(&ksp);
1256: MatDestroy(&M);
1257: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1258: return(0);
1259: }