Actual source code: dvd_improvex.c
1: /*
2: SLEPc eigensolver: "davidson"
4: Step: improve the eigenvectors X
6: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7: SLEPc - Scalable Library for Eigenvalue Problem Computations
8: Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
10: This file is part of SLEPc.
11:
12: SLEPc is free software: you can redistribute it and/or modify it under the
13: terms of version 3 of the GNU Lesser General Public License as published by
14: the Free Software Foundation.
16: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
17: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
19: more details.
21: You should have received a copy of the GNU Lesser General Public License
22: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
23: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24: */
26: #include davidson.h
27: #include <slepcvec.h>
28: #include <slepcblaslapack.h>
30: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
31: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
32: PetscScalar *auxS);
33: PetscErrorCode dvd_matmult_jd(Mat A, Vec in, Vec out);
34: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left);
35: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
36: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
37: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
38: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
39: PetscInt max_size_D, PetscInt r_s,
40: PetscInt r_e, PetscInt *size_D);
41: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
42: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
43: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
44: PetscInt ld);
45: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
46: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
47: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
48: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
49: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
50: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld);
51: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
52: PetscScalar* theta, PetscScalar* thetai,
53: PetscInt *maxits, PetscReal *tol);
54: PetscErrorCode dvd_improvex_get_eigenvectors(dvdDashboard *d, PetscScalar *pX,
55: PetscScalar *pY, PetscInt ld_,
56: PetscScalar *auxS, PetscInt size_auxS);
57: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS);
59: #define size_Z (64*4)
61: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r ****/
63: typedef struct {
64: PetscInt size_X;
65: void
66: *old_improveX_data; /* old improveX_data */
67: improveX_type
68: old_improveX; /* old improveX */
69: KSP ksp; /* correction equation solver */
70: Vec
71: friends, /* reference vector for composite vectors */
72: *auxV; /* auxiliar vectors */
73: PetscScalar *auxS, /* auxiliar scalars */
74: *theta,
75: *thetai; /* the shifts used in the correction eq. */
76: PetscInt maxits, /* maximum number of iterations */
77: r_s, r_e, /* the selected eigenpairs to improve */
78: ksp_max_size; /* the ksp maximum subvectors size */
79: PetscReal tol, /* the maximum solution tolerance */
80: lastTol, /* last tol for dynamic stopping criterion */
81: fix; /* tolerance for using the approx. eigenvalue */
82: PetscBool
83: dynamic; /* if the dynamic stopping criterion is applied */
84: dvdDashboard
85: *d; /* the currect dvdDashboard reference */
86: PC old_pc; /* old pc in ksp */
87: Vec
88: *u, /* new X vectors */
89: *real_KZ, /* original KZ */
90: *KZ; /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
91: PetscScalar
92: *XKZ, /* X'*KZ */
93: *iXKZ; /* inverse of XKZ */
94: PetscInt
95: ldXKZ, /* leading dimension of XKZ */
96: size_iXKZ, /* size of iXKZ */
97: ldiXKZ, /* leading dimension of iXKZ */
98: size_KZ, /* size of converged KZ */
99: size_real_KZ, /* original size of KZ */
100: size_cX, /* last value of d->size_cX */
101: old_size_X; /* last number of improved vectors */
102: PetscBLASInt
103: *iXKZPivots; /* array of pivots */
104: } dvdImprovex_jd;
106: #define _Ceil(A,B) ((A)/(B)+((A)%(B)==0?0:1))
107: #define FromIntToScalar(S) ((PetscInt)_Ceil((S)*sizeof(PetscBLASInt),sizeof(PetscScalar)))
111: PetscErrorCode dvd_improvex_jd(dvdDashboard *d, dvdBlackboard *b, KSP ksp,
112: PetscInt max_bs, PetscInt cX_impr, PetscBool dynamic)
113: {
114: PetscErrorCode ierr;
115: dvdImprovex_jd *data;
116: PetscBool t, herm = DVD_IS(d->sEP, DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;
117: PC pc;
118: PetscInt size_P;
122: /* Setting configuration constrains */
123: PetscTypeCompare((PetscObject)ksp, KSPPREONLY, &t);
124: if (t) ksp = PETSC_NULL;
126: /* If the arithmetic is real and the problem is not Hermitian, then
127: the block size is incremented in one */
128: #if !defined(PETSC_USE_COMPLEX)
129: if (!herm) {
130: max_bs++;
131: b->max_size_P = PetscMax(b->max_size_P, 2);
132: } else
133: #endif
134: b->max_size_P = PetscMax(b->max_size_P, 1);
135: b->max_size_X = PetscMax(b->max_size_X, max_bs);
136: size_P = b->max_size_P+cX_impr;
137: b->max_size_auxV = PetscMax(b->max_size_auxV, b->max_size_X*(ksp?3:2));
138: /* u, kr?, auxV */
139: b->own_scalars+= size_P*size_P; /* XKZ */
140: b->max_size_auxS = PetscMax(b->max_size_auxS,
141: (herm?0:1)*2*b->max_size_proj*b->max_size_proj + /* pX, pY */
142: b->max_size_X*3 + /* theta, thetai */
143: size_P*size_P + /* iXKZ */
144: FromIntToScalar(size_P) + /* iXkZPivots */
145: PetscMax(PetscMax(
146: 3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
147: 8*cX_impr*b->max_size_X),
148: /* dvd_improvex_jd_proj_cuv_KZX */
149: (herm?0:1)*11*b->max_size_proj+4*b->max_size_proj*b->max_size_proj));
150: /* dvd_improvex_get_eigenvectors */
151: b->own_vecs+= size_P; /* KZ */
153: /* Setup the preconditioner */
154: if (ksp) {
155: KSPGetPC(ksp, &pc);
156: dvd_static_precond_PC(d, b, pc);
157: } else {
158: dvd_static_precond_PC(d, b, 0);
159: }
161: /* Setup the step */
162: if (b->state >= DVD_STATE_CONF) {
163: PetscMalloc(sizeof(dvdImprovex_jd), &data);
164: data->dynamic = dynamic;
165: data->size_real_KZ = size_P;
166: data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
167: d->max_cX_in_impr = cX_impr;
168: data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
169: data->ldXKZ = size_P;
170: data->size_X = b->max_size_X;
171: data->old_improveX_data = d->improveX_data;
172: d->improveX_data = data;
173: data->old_improveX = d->improveX;
174: data->ksp = ksp;
175: data->d = d;
176: d->improveX = dvd_improvex_jd_gen;
177: data->ksp_max_size = max_bs;
179: DVD_FL_ADD(d->startList, dvd_improvex_jd_start);
180: DVD_FL_ADD(d->endList, dvd_improvex_jd_end);
181: DVD_FL_ADD(d->destroyList, dvd_improvex_jd_d);
182: }
184: return(0);
185: }
190: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
191: {
192: PetscErrorCode ierr;
193: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
194: PetscInt rA, cA, rlA, clA;
195: Mat A;
196: PetscBool t;
197: PC pc;
201: data->KZ = data->real_KZ;
202: data->size_KZ = data->size_cX = data->old_size_X = 0;
203: data->lastTol = data->dynamic?0.5:0.0;
205: /* Setup the ksp */
206: if(data->ksp) {
207: /* Create the reference vector */
208: VecCreateCompWithVecs(d->V, data->ksp_max_size, PETSC_NULL,
209: &data->friends);
211: /* Save the current pc and set a PCNONE */
212: KSPGetPC(data->ksp, &data->old_pc);
213: PetscTypeCompare((PetscObject)data->old_pc, PCNONE, &t);
214:
215: data->lastTol = 0.5;
216: if (t) {
217: data->old_pc = 0;
218: } else {
219: PetscObjectReference((PetscObject)data->old_pc);
220: PCCreate(((PetscObject)d->eps)->comm, &pc);
221: PCSetType(pc, PCNONE);
222: PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
223: KSPSetPC(data->ksp, pc);
224: PCDestroy(&pc);
225: }
227: /* Create the (I-v*u')*K*(A-s*B) matrix */
228: MatGetSize(d->A, &rA, &cA);
229: MatGetLocalSize(d->A, &rlA, &clA);
230: MatCreateShell(((PetscObject)d->A)->comm, rlA*data->ksp_max_size,
231: clA*data->ksp_max_size, rA*data->ksp_max_size,
232: cA*data->ksp_max_size, data, &A);
233: MatShellSetOperation(A, MATOP_MULT,
234: (void(*)(void))dvd_matmult_jd);
235: MatShellSetOperation(A, MATOP_GET_VECS,
236: (void(*)(void))dvd_matgetvecs_jd);
237:
239: /* Try to avoid KSPReset */
240: KSPGetOperatorsSet(data->ksp,&t,PETSC_NULL);
241: if (t) {
242: Mat M;
243: PetscInt rM;
244: KSPGetOperators(data->ksp,&M,PETSC_NULL,PETSC_NULL);
245: MatGetSize(M,&rM,PETSC_NULL);
246: if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
247: }
248: KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
249:
250: KSPSetUp(data->ksp);
251: MatDestroy(&A);
252: } else {
253: data->old_pc = 0;
254: data->friends = PETSC_NULL;
255: }
256:
257: return(0);
258: }
263: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
264: {
265: PetscErrorCode ierr;
266: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
270: if (data->friends) { VecDestroy(&data->friends); }
272: /* Restore the pc of ksp */
273: if (data->old_pc) {
274: KSPSetPC(data->ksp, data->old_pc);
275: PCDestroy(&data->old_pc);
276: }
278: return(0);
279: }
284: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
285: {
286: PetscErrorCode ierr;
287: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
290:
291: /* Restore changes in dvdDashboard */
292: d->improveX_data = data->old_improveX_data;
294: /* Free local data and objects */
295: PetscFree(data);
297: return(0);
298: }
303: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d, Vec *D,
304: PetscInt max_size_D, PetscInt r_s,
305: PetscInt r_e, PetscInt *size_D)
306: {
307: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
308: PetscErrorCode ierr;
309: PetscInt i, j, n, maxits, maxits0, lits, s;
310: PetscScalar *pX, *pY, *auxS = d->auxS, *auxS0;
311: PetscReal tol, tol0;
312: Vec *u, *v, *kr, kr_comp, D_comp;
316: /* Quick exit */
317: if ((max_size_D == 0) || r_e-r_s <= 0) {
318: /* Callback old improveX */
319: if (data->old_improveX) {
320: d->improveX_data = data->old_improveX_data;
321: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
322: d->improveX_data = data;
323: }
324: return(0);
325: }
326:
327: n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
328: if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0!\n");
329: if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s!\n");
331: /* Compute the eigenvectors of the selected pairs */
332: if (DVD_IS(d->sEP, DVD_EP_HERMITIAN)) {
333: pX = pY = d->pX;
334: } else {
335: pX = auxS; auxS+= d->size_H*d->size_H;
336: pY = auxS; auxS+= d->size_H*d->size_H;
337: dvd_improvex_get_eigenvectors(d, pX, pY, d->size_H, auxS,
338: d->size_auxS-(auxS-d->auxS));
339:
340: }
342: /* Restart lastTol if a new pair converged */
343: if (data->dynamic && data->size_cX < d->size_cX)
344: data->lastTol = 0.5;
346: for(i=0, s=0, auxS0=auxS; i<n; i+=s) {
347: /* If the selected eigenvalue is complex, but the arithmetic is real... */
348: #if !defined(PETSC_USE_COMPLEX)
349: if (PetscAbsScalar(d->eigi[i] != 0.0)) {
350: if (i+2 <= max_size_D) s=2; else break;
351: } else
352: #endif
353: s=1;
355: data->auxV = d->auxV;
356: data->r_s = r_s+i; data->r_e = r_s+i+s;
357: auxS = auxS0;
358: data->theta = auxS; auxS+= 2*s;
359: data->thetai = auxS; auxS+= s;
360: /* If GD, kr = D */
361: if (!data->ksp) {
362: kr = &D[i];
364: /* If JD, kr = auxV */
365: } else {
366: kr = data->auxV; data->auxV+= s;
367: }
369: /* Compute theta, maximum iterations and tolerance */
370: maxits = 0; tol = 1;
371: for(j=0; j<s; j++) {
372: d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
373: &data->thetai[j], &maxits0, &tol0);
374:
375: maxits+= maxits0; tol*= tol0;
376: }
377: maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);
379: /* Compute u, v and kr */
380: dvd_improvex_jd_proj_cuv(d, r_s+i, r_s+i+s, &u, &v, kr,
381: &data->auxV, &auxS, data->theta, data->thetai,
382: &pX[d->size_H*(r_s+i+d->cX_in_H)], &pY[d->size_H*(r_s+i+d->cX_in_H)], d->size_H);
383:
384: data->u = u;
386: /* Check if the first eigenpairs are converged */
387: if (i == 0) {
388: d->preTestConv(d, 0, s, s, PETSC_NULL, PETSC_NULL, &d->npreconv);
389:
390: if (d->npreconv > 0) break;
391: }
393: /* Compute kr <- kr - v*(u'*kr) */
394: dvd_improvex_apply_proj(d, kr, s, auxS);
396: /* If JD */
397: if (data->ksp) {
398: data->auxS = auxS;
400: /* kr <- -kr */
401: for(j=0; j<s; j++) {
402: VecScale(kr[j], -1.0);
403: }
405: /* Compouse kr and D */
406: VecCreateCompWithVecs(kr, data->ksp_max_size, data->friends,
407: &kr_comp);
408: VecCreateCompWithVecs(&D[i], data->ksp_max_size, data->friends,
409: &D_comp);
410: VecCompSetSubVecs(data->friends,s,PETSC_NULL);
411:
412: /* Solve the correction equation */
413: KSPSetTolerances(data->ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT,
414: maxits);
415: KSPSolve(data->ksp, kr_comp, D_comp);
416: KSPGetIterationNumber(data->ksp, &lits);
417: d->eps->OP->lineariterations+= lits;
418:
419: /* Destroy the composed ks and D */
420: VecDestroy(&kr_comp);
421: VecDestroy(&D_comp);
422: }
423: }
424: *size_D = i;
425: if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);
426:
427: /* Callback old improveX */
428: if (data->old_improveX) {
429: d->improveX_data = data->old_improveX_data;
430: data->old_improveX(d, PETSC_NULL, 0, 0, 0, PETSC_NULL);
431: d->improveX_data = data;
432: }
434: return(0);
435: }
440: PetscErrorCode dvd_matmult_jd(Mat A, Vec in, Vec out)
441: {
442: PetscErrorCode ierr;
443: dvdImprovex_jd *data;
444: PetscInt n, i;
445: const Vec *inx, *outx, *Bx;
449: MatShellGetContext(A, (void**)&data);
450: VecCompGetSubVecs(in,PETSC_NULL,&inx);
451: VecCompGetSubVecs(out,PETSC_NULL,&outx);
452: n = data->r_e - data->r_s;
454: /* aux <- theta[1]A*in - theta[0]*B*in */
455: for(i=0; i<n; i++) {
456: MatMult(data->d->A, inx[i], data->auxV[i]);
457: }
458: if (data->d->B) {
459: for(i=0; i<n; i++) {
460: MatMult(data->d->B, inx[i], outx[i]);
461: }
462: Bx = outx;
463: } else
464: Bx = inx;
466: for(i=0; i<n; i++) {
467: #if !defined(PETSC_USE_COMPLEX)
468: if(data->d->eigi[data->r_s+i] != 0.0) {
469: /* aux_i <- [ t_2i+1*A*inx_i - t_2i*Bx_i + ti_i*Bx_i+1;
470: aux_i+1 t_2i+1*A*inx_i+1 - ti_i*Bx_i - t_2i*Bx_i+1 ] */
471: VecAXPBYPCZ(data->auxV[i], -data->theta[2*i], data->thetai[i],
472: data->theta[2*i+1], Bx[i], Bx[i+1]);
473:
474: VecAXPBYPCZ(data->auxV[i+1], -data->thetai[i],
475: -data->theta[2*i], data->theta[2*i+1], Bx[i],
476: Bx[i+1]);
477: i++;
478: } else
479: #endif
480: {
481: VecAXPBY(data->auxV[i], -data->theta[i*2], data->theta[i*2+1],
482: Bx[i]);
483: }
484: }
486: /* out <- K * aux */
487: for(i=0; i<n; i++) {
488: data->d->improvex_precond(data->d, data->r_s+i, data->auxV[i],
489: outx[i]);
490: }
492: /* out <- out - v*(u'*out) */
493: dvd_improvex_apply_proj(data->d, (Vec*)outx, n, data->auxS);
495: return(0);
496: }
501: PetscErrorCode dvd_matgetvecs_jd(Mat A, Vec *right, Vec *left)
502: {
503: PetscErrorCode ierr;
504: Vec *r, *l;
505: dvdImprovex_jd *data;
506: PetscInt n, i;
510: MatShellGetContext(A, (void**)&data);
511: n = data->ksp_max_size;
512: if (right) {
513: PetscMalloc(sizeof(Vec)*n, &r);
514: }
515: if (left) {
516: PetscMalloc(sizeof(Vec)*n, &l);
517: }
518: for (i=0; i<n; i++) {
519: MatGetVecs(data->d->A, right?&r[i]:PETSC_NULL,
520: left?&l[i]:PETSC_NULL);
521: }
522: if(right) {
523: VecCreateCompWithVecs(r, n, data->friends, right);
524: for (i=0; i<n; i++) {
525: VecDestroy(&r[i]);
526: }
527: }
528: if(left) {
529: VecCreateCompWithVecs(l, n, data->friends, left);
530: for (i=0; i<n; i++) {
531: VecDestroy(&l[i]);
532: }
533: }
535: if (right) {
536: PetscFree(r);
537: }
538: if (left) {
539: PetscFree(l);
540: }
542: return(0);
543: }
548: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d, dvdBlackboard *b,
549: ProjType_t p)
550: {
553: /* Setup the step */
554: if (b->state >= DVD_STATE_CONF) {
555: switch(p) {
556: case DVD_PROJ_KXX:
557: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
558: case DVD_PROJ_KZX:
559: d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
560: }
561: }
563: return(0);
564: }
568: /*
569: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
570: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
571: where
572: auxV, 4*(i_e-i_s) auxiliar global vectors
573: pX,pY, the right and left eigenvectors of the projected system
574: ld, the leading dimension of pX and pY
575: auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
576: */
577: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d, PetscInt i_s,
578: PetscInt i_e, Vec **u, Vec **v, Vec *kr, Vec **auxV, PetscScalar **auxS,
579: PetscScalar *theta, PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY,
580: PetscInt ld)
581: {
582: PetscErrorCode ierr;
583: PetscInt n = i_e - i_s, size_KZ, V_new, rm, i;
584: PetscScalar *wS0, *wS1;
585: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
586: PetscBLASInt s, ldXKZ, info;
590: /* Check consistency */
591: V_new = d->size_cX - data->size_cX;
592: if (V_new > data->old_size_X) { SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!"); }
593: data->old_size_X = n;
595: /* KZ <- KZ(rm:rm+max_cX-1) */
596: rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
597: for (i=0; i<d->max_cX_in_impr; i++) {
598: VecCopy(data->KZ[i+rm], data->KZ[i]);
599: }
600: data->size_cX = d->size_cX;
602: /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
603: for (i=0; i<d->max_cX_in_impr; i++) {
604: SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
605: }
606: data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);
608: /* Compute X, KZ and KR */
609: *u = *auxV; *auxV+= n;
610: *v = &data->KZ[data->size_KZ];
611: d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
612: pX, pY, ld);
614: /* XKZ <- X'*KZ */
615: size_KZ = data->size_KZ+n;
616: wS0 = *auxS; wS1 = wS0+2*n*data->size_KZ+n*n;
617: VecsMult(data->XKZ, 0, data->ldXKZ, d->V-data->size_KZ, 0, data->size_KZ, data->KZ, 0, size_KZ, wS0, wS1);
618: VecsMult(&data->XKZ[data->size_KZ], 0, data->ldXKZ, *u, 0, n, data->KZ, 0, size_KZ, wS0, wS1);
620: /* iXKZ <- inv(XKZ) */
621: s = PetscBLASIntCast(size_KZ);
622: data->ldiXKZ = data->size_iXKZ = size_KZ;
623: data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
624: data->iXKZPivots = (PetscBLASInt*)*auxS;
625: *auxS += FromIntToScalar(size_KZ);
626: SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
627: ldXKZ = PetscBLASIntCast(data->ldiXKZ);
628: LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info);
629: if (info) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
631: return(0);
632: }
634: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
635: { \
636: VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
637: VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
638: VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
639: VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
640: VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
641: VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
642: VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
643: VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
644: (b)[0] = (b)[0]+(b)[3]; /* rAr+iAi */ \
645: (b)[2] = (b)[2]-(b)[1]; /* rAi-iAr */ \
646: (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
647: (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
648: (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
649: *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
650: *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
651: }
653: #if !defined(PETSC_USE_COMPLEX)
654: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
655: for((i)=0; (i)<(n); (i)++) { \
656: if ((eigi)[(i_s)+(i)] != 0.0) { \
657: /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
658: eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
659: k = (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr) */ \
660: DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
661: (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
662: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
663: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 || \
664: PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
665: PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10 ) { \
666: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
667: "Rayleigh quotient value %G+%G\n", \
668: (eigr)[(i_s)+(i)], \
669: (eigi)[(i_s)+1], (b)[8], (b)[9]); \
670: } \
671: (i)++; \
672: } \
673: }
674: #else
675: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
676: for((i)=0; (i)<(n); (i)++) { \
677: (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
678: (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
679: (b)[0] = (b)[0]/(b)[1]; \
680: if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
681: PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10 ) { \
682: (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
683: "Rayleigh quotient value %G+%G\n", \
684: PetscRealPart((eigr)[(i_s)+(i)]), \
685: PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
686: PetscImaginaryPart((b)[0])); \
687: } \
688: }
689: #endif
691: #if !defined(PETSC_USE_COMPLEX)
692: #define DVD_NORM_FOR_UV(x) PetscAbsScalar(x)
693: #else
694: #define DVD_NORM_FOR_UV(x) (x)
695: #endif
697: #define DVD_NORMALIZE_UV(u,v,ierr,a) { \
698: (ierr) = VecDot((u), (v), &(a)); \
699: if ((a) == 0.0) { \
700: SETERRQ(((PetscObject)u)->comm,1, "Inappropriate approximate eigenvector norm"); \
701: } \
702: if ((u) == (v)) { \
703: VecScale((u), 1.0/PetscSqrtScalar(DVD_NORM_FOR_UV(a))); \
704: \
705: } else { \
706: VecScale((u), 1.0/(a)); \
707: } \
708: }
713: /*
714: Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
715: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
716: where
717: auxV, 4*(i_e-i_s) auxiliar global vectors
718: pX,pY, the right and left eigenvectors of the projected system
719: ld, the leading dimension of pX and pY
720: */
721: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d, PetscInt i_s,
722: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
723: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
724: {
725: PetscErrorCode ierr;
726: PetscInt n = i_e - i_s, i;
727: PetscScalar b[16];
728: Vec *Ax, *Bx, *r=auxV, X[4];
729: /* The memory manager doen't allow to call a subroutines */
730: PetscScalar Z[size_Z];
734: /* u <- X(i) */
735: SlepcUpdateVectorsZ(u, 0.0, 1.0, d->V-d->cX_in_H, d->size_V+d->cX_in_H, pX, ld, d->size_H, n);
736: /* nX(i) <- ||X(i)|| */
737: if (d->correctXnorm) {
738: for (i=0; i<n; i++) {
739: VecNormBegin(u[i], NORM_2, &d->nX[i_s+i]);
740: }
741: for (i=0; i<n; i++) {
742: VecNormEnd(u[i], NORM_2, &d->nX[i_s+i]);
743: }
744: } else {
745: for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;
746: }
748: /* v <- theta[0]A*u + theta[1]*B*u */
750: /* Bx <- B*X(i) */
751: Bx = kr;
752: for(i=i_s; i<i_e; i++) d->nX[i] = 1.0;
753: if (d->BV) {
754: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, pX, ld, d->size_H, n);
755: } else {
756: for(i=0; i<n; i++) {
757: if (d->B) {
758: MatMult(d->B, u[i], Bx[i]);
759: } else {
760: VecCopy(u[i], Bx[i]);
761: }
762: }
763: }
765: /* Ax <- A*X(i) */
766: Ax = r;
767: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, pX, ld, d->size_H, n);
769: /* v <- Y(i) */
770: SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, pY, ld, d->size_H, n);
772: /* Recompute the eigenvalue */
773: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);
775: for(i=0; i<n; i++) {
776: #if !defined(PETSC_USE_COMPLEX)
777: if(d->eigi[i_s+i] != 0.0) {
778: /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i' 0 1 0
779: 0 theta_2i' 0 1
780: theta_2i+1 -thetai_i -eigr_i -eigi_i
781: thetai_i theta_2i+1 eigi_i -eigr_i ] */
782: b[0] = b[5] = PetscConj(theta[2*i]);
783: b[2] = b[7] = -theta[2*i+1];
784: b[6] = -(b[3] = thetai[i]);
785: b[1] = b[4] = 0.0;
786: b[8] = b[13] = 1.0;
787: b[10] = b[15] = -d->eigr[i_s+i];
788: b[14] = -(b[11] = d->eigi[i_s+i]);
789: b[9] = b[12] = 0.0;
790: X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
791: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
792:
793: i++;
794: } else
795: #endif
796: {
797: /* [Ax_i Bx_i]*= [ theta_2i' 1/nX_i
798: theta_2i+1 -eig_i/nX_i ] */
799: b[0] = PetscConj(theta[i*2]);
800: b[1] = theta[i*2+1];
801: b[2] = 1.0/d->nX[i_s+i];
802: b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
803: X[0] = Ax[i]; X[1] = Bx[i];
804: SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
805:
806: }
807: }
809: /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
810: for(i=0; i<n; i++) {
811: d->improvex_precond(d, i_s+i, r[i], v[i]);
812: }
814: /* kr <- K^{-1}*kr = K^{-1}*(Ax - eig_i*Bx) */
815: d->calcpairs_proj_res(d, i_s, i_e, Bx);
816: for(i=0; i<n; i++) {
817: VecCopy(Bx[i], r[i]);
818: d->improvex_precond(d, i_s+i, r[i], kr[i]);
819: }
821: return(0);
822: }
826: /*
827: Compute: u <- K^{-1}*X, v <- X,
828: kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
829: where
830: auxV, 4*(i_e-i_s) auxiliar global vectors
831: pX,pY, the right and left eigenvectors of the projected system
832: ld, the leading dimension of pX and pY
833: */
834: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d, PetscInt i_s,
835: PetscInt i_e, Vec *u, Vec *v, Vec *kr, Vec *auxV, PetscScalar *theta,
836: PetscScalar *thetai, PetscScalar *pX, PetscScalar *pY, PetscInt ld)
837: {
838: PetscErrorCode ierr;
839: PetscInt n = i_e - i_s, i;
840: PetscScalar b[16];
841: Vec *Ax, *Bx, *r = auxV, X[4];
842: /* The memory manager doen't allow to call a subroutines */
843: PetscScalar Z[size_Z];
847: /* [v u] <- X(i) Y(i) */
848: SlepcUpdateVectorsZ(v, 0.0, 1.0, d->V-d->cX_in_H, d->size_V+d->cX_in_H, pX, ld, d->size_H, n);
849: SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, pY, ld, d->size_H, n);
851: /* Bx <- B*X(i) */
852: Bx = kr;
853: for(i=i_s; i<i_e; i++) d->nX[i] = 1.0;
854: if (d->BV) {
855: SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, pX, ld, d->size_H, n);
856: } else {
857: if (d->B) {
858: for(i=0; i<n; i++) {
859: MatMult(d->B, v[i], Bx[i]);
860: }
861: } else
862: Bx = v;
863: }
865: /* Ax <- A*X(i) */
866: Ax = r;
867: SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, pX, ld, d->size_H, n);
869: /* Recompute the eigenvalue */
870: DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);
872: for(i=0; i<n; i++) {
873: if (d->eigi[i_s+i] == 0.0) {
874: /* r <- Ax -eig*Bx */
875: VecAXPBY(r[i], -d->eigr[i_s+i], 1.0, Bx[i]);
876: } else {
877: /* [r_i r_i+1 kr_i kr_i+1]*= [ 1 0
878: 0 1
879: -eigr_i -eigi_i
880: eigi_i -eigr_i] */
881: b[0] = b[5] = 1.0;
882: b[2] = b[7] = -d->eigr[i_s+i];
883: b[6] = -(b[3] = d->eigi[i_s+i]);
884: b[1] = b[4] = 0.0;
885: X[0] = r[i]; X[1] = r[i+1]; X[2] = kr[i]; X[3] = kr[i+1];
886: SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
887:
888: i++;
889: }
890: }
892: /* kr <- K^{-1}*r */
893: d->calcpairs_proj_res(d, i_s, i_e, r);
894: for(i=0; i<n; i++) {
895: d->improvex_precond(d, i_s+i, r[i], kr[i]);
896: }
898: /* u <- K^{-1} X(i) */
899: for(i=0; i<n; i++) {
900: d->improvex_precond(d, i_s+i, v[i], u[i]);
901: }
903: return(0);
904: }
909: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d, dvdBlackboard *b,
910: PetscInt maxits, PetscReal tol,
911: PetscReal fix)
912: {
913: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
917: /* Setup the step */
918: if (b->state >= DVD_STATE_CONF) {
919: data->maxits = maxits;
920: data->tol = tol;
921: data->fix = fix;
922: d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
923: }
925: return(0);
926: }
931: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d, PetscInt i,
932: PetscScalar* theta, PetscScalar* thetai, PetscInt *maxits, PetscReal *tol)
933: {
934: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
935: PetscReal a;
938: a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);
940: if (d->nR[i]/a < data->fix) {
941: theta[0] = d->eigr[i];
942: theta[1] = 1.0;
943: #if !defined(PETSC_USE_COMPLEX)
944: *thetai = d->eigi[i];
945: #endif
946: } else {
947: theta[0] = d->target[0];
948: theta[1] = d->target[1];
949: #if !defined(PETSC_USE_COMPLEX)
950: *thetai = 0.0;
951: #endif
952: }
954: #if defined(PETSC_USE_COMPLEX)
955: if(thetai) *thetai = 0.0;
956: #endif
957: *maxits = data->maxits;
958: *tol = data->tol;
960: return(0);
961: }
964: /**** Patterns implementation *************************************************/
966: typedef PetscInt (*funcV0_t)(dvdDashboard*, PetscInt, PetscInt, Vec*);
967: typedef PetscInt (*funcV1_t)(dvdDashboard*, PetscInt, PetscInt, Vec*,
968: PetscScalar*, Vec);
972: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
973: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d, void *funcV, Vec *D,
974: PetscInt max_size_D, PetscInt r_s, PetscInt r_e, Vec *auxV,
975: PetscScalar *auxS)
976: {
977: PetscErrorCode ierr;
978: PetscInt i;
982: if (max_size_D >= r_e-r_s+1) {
983: /* The optimized version needs one vector extra of D */
984: /* D(1:r.size) = R(r_s:r_e-1) */
985: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
986: else ((funcV0_t)funcV)(d, r_s, r_e, D+1);
988: /* D = K^{-1} * R */
989: for (i=0; i<r_e-r_s; i++) {
990: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
991: }
992: } else if (max_size_D == r_e-r_s) {
993: /* Non-optimized version */
994: /* auxV <- R[r_e-1] */
995: if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
996: else ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);
998: /* D(1:r.size-1) = R(r_s:r_e-2) */
999: if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1000: else ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);
1002: /* D = K^{-1} * R */
1003: for (i=0; i<r_e-r_s-1; i++) {
1004: d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1005: }
1006: d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1007: } else {
1008: SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D!");
1009: }
1011: return(0);
1012: }
1017: /* Compute the left and right projected eigenvectors where,
1018: pX, the returned right eigenvectors
1019: pY, the returned left eigenvectors,
1020: ld_, the leading dimension of pX and pY,
1021: auxS, auxiliar vector of size length 6*d->size_H
1022: */
1023: PetscErrorCode dvd_improvex_get_eigenvectors(dvdDashboard *d, PetscScalar *pX,
1024: PetscScalar *pY, PetscInt ld, PetscScalar *auxS, PetscInt size_auxS)
1025: {
1026: PetscErrorCode ierr;
1027:
1030: SlepcDenseCopy(pY, ld, d->T?d->pY:d->pX, d->ldpX, d->size_H,
1031: d->size_H);
1032: SlepcDenseCopy(pX, ld, d->pX, d->ldpX, d->size_H, d->size_H);
1033:
1034:
1035: /* [qX, qY] <- eig(S, T); pX <- d->pX * qX; pY <- d->pY * qY */
1036: dvd_compute_eigenvectors(d->size_H, d->S, d->ldS, d->T, d->ldT, pX,
1037: ld, pY, ld, auxS, size_auxS, PETSC_TRUE);
1038:
1040: /* 2-Normalize the columns of pX an pY */
1041: SlepcDenseNorm(pX, ld, d->size_H, d->size_H, d->eigi);
1042: SlepcDenseNorm(pY, ld, d->size_H, d->size_H, d->eigi);
1044: return(0);
1045: }
1049: /* Compute (I - KZ*iXKZ*X')*V where,
1050: V, the vectors to apply the projector,
1051: cV, the number of vectors in V,
1052: auxS, auxiliar vector of size length 3*size_iXKZ*cV
1053: */
1054: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d, Vec *V, PetscInt cV, PetscScalar *auxS)
1055: {
1056: PetscErrorCode ierr;
1057: dvdImprovex_jd *data = (dvdImprovex_jd*)d->improveX_data;
1058: PetscInt size_in = data->size_iXKZ*cV, i, ldh;
1059: PetscScalar *h, *in, *out;
1060: PetscBLASInt cV_, n, info, ld;
1061: DvdReduction r;
1062: DvdReductionChunk
1063: ops[4];
1064: DvdMult_copy_func
1065: sr[4];
1066:
1069: /* Check consistency */
1070: if (cV > 2) { SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!"); }
1072: /* h <- X'*V */
1073: h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1074: SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1075: ((PetscObject)d->V[0])->comm);
1076: for (i=0; i<cV; i++) {
1077: VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1078: VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1079: }
1080: SlepcAllReduceSumEnd(&r);
1082: /* h <- iXKZ\h */
1083: cV_ = PetscBLASIntCast(cV);
1084: n = PetscBLASIntCast(data->size_iXKZ);
1085: ld = PetscBLASIntCast(data->ldiXKZ);
1086: LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info);
1087: if (info) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);
1089: /* V <- V - KZ*h */
1090: for (i=0; i<cV; i++) {
1091: SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1092: }
1094: return(0);
1095: }