Actual source code: dsnhep.c

slepc-3.5.2 2014-10-10
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2014, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.

  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <slepc-private/dsimpl.h>
 23: #include <slepcblaslapack.h>

 27: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 28: {

 32:   DSAllocateMat_Private(ds,DS_MAT_A);
 33:   DSAllocateMat_Private(ds,DS_MAT_Q);
 34:   PetscFree(ds->perm);
 35:   PetscMalloc1(ld,&ds->perm);
 36:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 37:   return(0);
 38: }

 42: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 43: {

 47:   DSViewMat(ds,viewer,DS_MAT_A);
 48:   if (ds->state>DS_STATE_INTERMEDIATE) {
 49:     DSViewMat(ds,viewer,DS_MAT_Q);
 50:   }
 51:   if (ds->mat[DS_MAT_X]) {
 52:     DSViewMat(ds,viewer,DS_MAT_X);
 53:   }
 54:   if (ds->mat[DS_MAT_Y]) {
 55:     DSViewMat(ds,viewer,DS_MAT_Y);
 56:   }
 57:   return(0);
 58: }

 62: PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 63: {
 64: #if defined(SLEPC_MISSING_LAPACK_GESVD)
 66:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
 67: #else
 69:   PetscInt       i,j;
 70:   PetscBLASInt   info,ld,n,n1,lwork,inc=1;
 71:   PetscScalar    sdummy,done=1.0,zero=0.0;
 72:   PetscReal      *sigma;
 73:   PetscBool      iscomplex = PETSC_FALSE;
 74:   PetscScalar    *A = ds->mat[DS_MAT_A];
 75:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
 76:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
 77:   PetscScalar    *W;

 80:   if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors");
 81:   PetscBLASIntCast(ds->n,&n);
 82:   PetscBLASIntCast(ds->ld,&ld);
 83:   n1 = n+1;
 84:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 85:   if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 86:   DSAllocateWork_Private(ds,5*ld,6*ld,0);
 87:   DSAllocateMat_Private(ds,DS_MAT_W);
 88:   W = ds->mat[DS_MAT_W];
 89:   lwork = 5*ld;
 90:   sigma = ds->rwork+5*ld;

 92:   /* build A-w*I in W */
 93:   for (j=0;j<n;j++)
 94:     for (i=0;i<=n;i++)
 95:       W[i+j*ld] = A[i+j*ld];
 96:   for (i=0;i<n;i++)
 97:     W[i+i*ld] -= A[(*k)+(*k)*ld];

 99:   /* compute SVD of W */
100: #if !defined(PETSC_USE_COMPLEX)
101:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
102: #else
103:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
104: #endif
105:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);

107:   /* the smallest singular value is the new error estimate */
108:   if (rnorm) *rnorm = sigma[n-1];

110:   /* update vector with right singular vector associated to smallest singular value,
111:      accumulating the transformation matrix Q */
112:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
113:   return(0);
114: #endif
115: }

119: PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
120: {
122:   PetscInt       i;

125:   for (i=0;i<ds->n;i++) {
126:     DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
127:   }
128:   return(0);
129: }

133: PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
134: {
135: #if defined(SLEPC_MISSING_LAPACK_TREVC)
137:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
138: #else
140:   PetscInt       i;
141:   PetscBLASInt   mm=1,mout,info,ld,n,inc = 1;
142:   PetscScalar    tmp,done=1.0,zero=0.0;
143:   PetscReal      norm;
144:   PetscBool      iscomplex = PETSC_FALSE;
145:   PetscBLASInt   *select;
146:   PetscScalar    *A = ds->mat[DS_MAT_A];
147:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
148:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
149:   PetscScalar    *Y;

152:   PetscBLASIntCast(ds->n,&n);
153:   PetscBLASIntCast(ds->ld,&ld);
154:   DSAllocateWork_Private(ds,0,0,ld);
155:   select = ds->iwork;
156:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

158:   /* Compute k-th eigenvector Y of A */
159:   Y = X+(*k)*ld;
160:   select[*k] = (PetscBLASInt)PETSC_TRUE;
161: #if !defined(PETSC_USE_COMPLEX)
162:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
163:   mm = iscomplex? 2: 1;
164:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
165:   DSAllocateWork_Private(ds,3*ld,0,0);
166:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
167: #else
168:   DSAllocateWork_Private(ds,2*ld,ld,0);
169:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
170: #endif
171:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
172:   if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

174:   /* accumulate and normalize eigenvectors */
175:   if (ds->state>=DS_STATE_CONDENSED) {
176:     PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
177:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
178: #if !defined(PETSC_USE_COMPLEX)
179:     if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
180: #endif
181:     norm = BLASnrm2_(&n,Y,&inc);
182: #if !defined(PETSC_USE_COMPLEX)
183:     if (iscomplex) {
184:       tmp = BLASnrm2_(&n,Y+ld,&inc);
185:       norm = SlepcAbsEigenvalue(norm,tmp);
186:     }
187: #endif
188:     tmp = 1.0 / norm;
189:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
190: #if !defined(PETSC_USE_COMPLEX)
191:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
192: #endif
193:   }

195:   /* set output arguments */
196:   if (iscomplex) (*k)++;
197:   if (rnorm) {
198:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
199:     else *rnorm = PetscAbsScalar(Y[n-1]);
200:   }
201:   return(0);
202: #endif
203: }

207: PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
208: {
209: #if defined(SLEPC_MISSING_LAPACK_TREVC)
211:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
212: #else
214:   PetscBLASInt   n,ld,mout,info;
215:   PetscScalar    *X,*Y,*A = ds->mat[DS_MAT_A];
216:   const char     *side,*back;

219:   PetscBLASIntCast(ds->n,&n);
220:   PetscBLASIntCast(ds->ld,&ld);
221:   if (left) {
222:     X = NULL;
223:     Y = ds->mat[DS_MAT_Y];
224:     side = "L";
225:   } else {
226:     X = ds->mat[DS_MAT_X];
227:     Y = NULL;
228:     side = "R";
229:   }
230:   if (ds->state>=DS_STATE_CONDENSED) {
231:     /* DSSolve() has been called, backtransform with matrix Q */
232:     back = "B";
233:     PetscMemcpy(left?Y:X,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
234:   } else back = "A";
235: #if !defined(PETSC_USE_COMPLEX)
236:   DSAllocateWork_Private(ds,3*ld,0,0);
237:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
238: #else
239:   DSAllocateWork_Private(ds,2*ld,ld,0);
240:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
241: #endif
242:   if (info) SETERRQ1(PetscObjectComm((PetscObject)ds),PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
243:   return(0);
244: #endif
245: }

249: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
250: {

254:   switch (mat) {
255:     case DS_MAT_X:
256:       if (ds->refined) {
257:         if (!ds->extrarow) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Refined vectors require activating the extra row");
258:         if (j) {
259:           DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
260:         } else {
261:           DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
262:         }
263:       } else {
264:         if (j) {
265:           DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
266:         } else {
267:           DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
268:         }
269:       }
270:       break;
271:     case DS_MAT_Y:
272:       if (ds->refined) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
273:       if (j) {
274:         DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
275:       } else {
276:         DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
277:       }
278:       break;
279:     case DS_MAT_U:
280:     case DS_MAT_VT:
281:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
282:       break;
283:     default:
284:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
285:   }
286:   if (ds->state < DS_STATE_CONDENSED) {
287:     DSSetState(ds,DS_STATE_CONDENSED);
288:   }
289:   return(0);
290: }

294: PetscErrorCode DSNormalize_NHEP(DS ds,DSMatType mat,PetscInt col)
295: {
297:   PetscInt       i,i0,i1;
298:   PetscBLASInt   ld,n,one = 1;
299:   PetscScalar    *A = ds->mat[DS_MAT_A],norm,*x;
300: #if !defined(PETSC_USE_COMPLEX)
301:   PetscScalar    norm0;
302: #endif

305:   switch (mat) {
306:     case DS_MAT_X:
307:     case DS_MAT_Y:
308:     case DS_MAT_Q:
309:       /* Supported matrices */
310:       break;
311:     case DS_MAT_U:
312:     case DS_MAT_VT:
313:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
314:       break;
315:     default:
316:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
317:   }

319:   PetscBLASIntCast(ds->n,&n);
320:   PetscBLASIntCast(ds->ld,&ld);
321:   DSGetArray(ds,mat,&x);
322:   if (col < 0) {
323:     i0 = 0; i1 = ds->n;
324:   } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
325:     i0 = col-1; i1 = col+1;
326:   } else {
327:     i0 = col; i1 = col+1;
328:   }
329:   for (i=i0;i<i1;i++) {
330: #if !defined(PETSC_USE_COMPLEX)
331:     if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
332:       norm = BLASnrm2_(&n,&x[ld*i],&one);
333:       norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
334:       norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
335:       PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
336:       PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*(i+1)],&one));
337:       i++;
338:     } else
339: #endif
340:     {
341:       norm = BLASnrm2_(&n,&x[ld*i],&one);
342:       norm = 1.0/norm;
343:       PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
344:     }
345:   }
346:   return(0);
347: }

351: PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
352: {
353: #if defined(SLEPC_MISSING_LAPACK_TRSEN)
355:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
356: #else
358:   PetscInt       i;
359:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
360:   PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
361: #if !defined(PETSC_USE_COMPLEX)
362:   PetscBLASInt   *iwork,liwork;
363: #endif

366:   if (!k) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must supply argument k");
367:   PetscBLASIntCast(ds->n,&n);
368:   PetscBLASIntCast(ds->ld,&ld);
369: #if !defined(PETSC_USE_COMPLEX)
370:   lwork = n;
371:   liwork = 1;
372:   DSAllocateWork_Private(ds,lwork,0,liwork+n);
373:   work = ds->work;
374:   lwork = ds->lwork;
375:   selection = ds->iwork;
376:   iwork = ds->iwork + n;
377:   liwork = ds->liwork - n;
378: #else
379:   lwork = 1;
380:   DSAllocateWork_Private(ds,lwork,0,n);
381:   work = ds->work;
382:   selection = ds->iwork;
383: #endif
384:   /* Compute the selected eigenvalue to be in the leading position */
385:   DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
386:   PetscMemzero(selection,n*sizeof(PetscBLASInt));
387:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
388: #if !defined(PETSC_USE_COMPLEX)
389:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
390: #else
391:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
392: #endif
393:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSEN %d",info);
394:   *k = mout;
395:   return(0);
396: #endif
397: }

401: PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
402: {
403: #if defined(SLEPC_MISSING_LAPACK_TREXC)
405:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
406: #else
408:   PetscScalar    re;
409:   PetscInt       i,j,pos,result;
410:   PetscBLASInt   ifst,ilst,info,n,ld;
411:   PetscScalar    *T = ds->mat[DS_MAT_A];
412:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
413: #if !defined(PETSC_USE_COMPLEX)
414:   PetscScalar    *work,im;
415: #endif

418:   PetscBLASIntCast(ds->n,&n);
419:   PetscBLASIntCast(ds->ld,&ld);
420: #if !defined(PETSC_USE_COMPLEX)
421:   DSAllocateWork_Private(ds,ld,0,0);
422:   work = ds->work;
423: #endif
424:   /* selection sort */
425:   for (i=ds->l;i<n-1;i++) {
426:     re = wr[i];
427: #if !defined(PETSC_USE_COMPLEX)
428:     im = wi[i];
429: #endif
430:     pos = 0;
431:     j=i+1; /* j points to the next eigenvalue */
432: #if !defined(PETSC_USE_COMPLEX)
433:     if (im != 0) j=i+2;
434: #endif
435:     /* find minimum eigenvalue */
436:     for (;j<n;j++) {
437: #if !defined(PETSC_USE_COMPLEX)
438:       SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
439: #else
440:       SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
441: #endif
442:       if (result > 0) {
443:         re = wr[j];
444: #if !defined(PETSC_USE_COMPLEX)
445:         im = wi[j];
446: #endif
447:         pos = j;
448:       }
449: #if !defined(PETSC_USE_COMPLEX)
450:       if (wi[j] != 0) j++;
451: #endif
452:     }
453:     if (pos) {
454:       /* interchange blocks */
455:       PetscBLASIntCast(pos+1,&ifst);
456:       PetscBLASIntCast(i+1,&ilst);
457: #if !defined(PETSC_USE_COMPLEX)
458:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
459: #else
460:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
461: #endif
462:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
463:       /* recover original eigenvalues from T matrix */
464:       for (j=i;j<n;j++) {
465:         wr[j] = T[j+j*ld];
466: #if !defined(PETSC_USE_COMPLEX)
467:         if (j<n-1 && T[j+1+j*ld] != 0.0) {
468:           /* complex conjugate eigenvalue */
469:           wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
470:                   PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
471:           wr[j+1] = wr[j];
472:           wi[j+1] = -wi[j];
473:           j++;
474:         } else {
475:           wi[j] = 0.0;
476:         }
477: #endif
478:       }
479:     }
480: #if !defined(PETSC_USE_COMPLEX)
481:     if (wi[i] != 0) i++;
482: #endif
483:   }
484:   return(0);
485: #endif
486: }

490: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
491: {

495:   if (!rr || wr == rr) {
496:     DSSort_NHEP_Total(ds,wr,wi);
497:   } else {
498:     DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
499:   }
500:   return(0);
501: }

505: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
506: {
508:   PetscInt       i;
509:   PetscBLASInt   n,ld,incx=1;
510:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

513:   PetscBLASIntCast(ds->n,&n);
514:   PetscBLASIntCast(ds->ld,&ld);
515:   A  = ds->mat[DS_MAT_A];
516:   Q  = ds->mat[DS_MAT_Q];
517:   DSAllocateWork_Private(ds,2*ld,0,0);
518:   x = ds->work;
519:   y = ds->work+ld;
520:   for (i=0;i<n;i++) x[i] = A[n+i*ld];
521:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
522:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
523:   ds->k = n;
524:   return(0);
525: }

529: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
530: {
531: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
533:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
534: #else
536:   PetscScalar    *work,*tau;
537:   PetscInt       i,j;
538:   PetscBLASInt   ilo,lwork,info,n,ld;
539:   PetscScalar    *A = ds->mat[DS_MAT_A];
540:   PetscScalar    *Q = ds->mat[DS_MAT_Q];

543: #if !defined(PETSC_USE_COMPLEX)
545: #endif
546:   PetscBLASIntCast(ds->n,&n);
547:   PetscBLASIntCast(ds->ld,&ld);
548:   PetscBLASIntCast(ds->l+1,&ilo);
549:   DSAllocateWork_Private(ds,ld+ld*ld,0,0);
550:   tau  = ds->work;
551:   work = ds->work+ld;
552:   lwork = ld*ld;

554:   /* initialize orthogonal matrix */
555:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
556:   for (i=0;i<n;i++)
557:     Q[i+i*ld] = 1.0;
558:   if (n==1) { /* quick return */
559:     wr[0] = A[0];
560:     wi[0] = 0.0;
561:     return(0);
562:   }

564:   /* reduce to upper Hessenberg form */
565:   if (ds->state<DS_STATE_INTERMEDIATE) {
566:     PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
567:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
568:     for (j=0;j<n-1;j++) {
569:       for (i=j+2;i<n;i++) {
570:         Q[i+j*ld] = A[i+j*ld];
571:         A[i+j*ld] = 0.0;
572:       }
573:     }
574:     PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
575:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
576:   }

578:   /* compute the (real) Schur form */
579: #if !defined(PETSC_USE_COMPLEX)
580:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
581:   for (j=0;j<ds->l;j++) {
582:     if (j==n-1 || A[j+1+j*ld] == 0.0) {
583:       /* real eigenvalue */
584:       wr[j] = A[j+j*ld];
585:       wi[j] = 0.0;
586:     } else {
587:       /* complex eigenvalue */
588:       wr[j] = A[j+j*ld];
589:       wr[j+1] = A[j+j*ld];
590:       wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
591:               PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
592:       wi[j+1] = -wi[j];
593:       j++;
594:     }
595:   }
596: #else
597:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
598:   if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
599: #endif
600:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
601:   return(0);
602: #endif
603: }

607: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
608: {
609:   PetscInt       i,newn,ld=ds->ld,l=ds->l;
610:   PetscScalar    *A;

613:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
614:   A = ds->mat[DS_MAT_A];
615:   /* be careful not to break a diagonal 2x2 block */
616:   if (A[n+(n-1)*ld]==0.0) newn = n;
617:   else {
618:     if (n<ds->n-1) newn = n+1;
619:     else newn = n-1;
620:   }
621:   if (ds->extrarow && ds->k==ds->n) {
622:     /* copy entries of extra row to the new position, then clean last row */
623:     for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
624:     for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
625:   }
626:   ds->k = 0;
627:   ds->n = newn;
628:   return(0);
629: }

633: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
634: {
635: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
637:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
638: #else
640:   PetscScalar    *work;
641:   PetscReal      *rwork;
642:   PetscBLASInt   *ipiv;
643:   PetscBLASInt   lwork,info,n,ld;
644:   PetscReal      hn,hin;
645:   PetscScalar    *A;

648:   PetscBLASIntCast(ds->n,&n);
649:   PetscBLASIntCast(ds->ld,&ld);
650:   lwork = 8*ld;
651:   DSAllocateWork_Private(ds,lwork,ld,ld);
652:   work  = ds->work;
653:   rwork = ds->rwork;
654:   ipiv  = ds->iwork;

656:   /* use workspace matrix W to avoid overwriting A */
657:   DSAllocateMat_Private(ds,DS_MAT_W);
658:   A = ds->mat[DS_MAT_W];
659:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

661:   /* norm of A */
662:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
663:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

665:   /* norm of inv(A) */
666:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
667:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
668:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
669:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
670:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

672:   *cond = hn*hin;
673:   return(0);
674: #endif
675: }

679: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
680: {
681: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
683:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
684: #else
686:   PetscInt       i,j;
687:   PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
688:   PetscScalar    *A,*B,*Q,*g=gin,*ghat;
689:   PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
690:   PetscReal      gnorm;

693:   PetscBLASIntCast(ds->n,&n);
694:   PetscBLASIntCast(ds->ld,&ld);
695:   A  = ds->mat[DS_MAT_A];

697:   if (!recover) {

699:     DSAllocateWork_Private(ds,0,0,ld);
700:     ipiv = ds->iwork;
701:     if (!g) {
702:       DSAllocateWork_Private(ds,ld,0,0);
703:       g = ds->work;
704:     }
705:     /* use workspace matrix W to factor A-tau*eye(n) */
706:     DSAllocateMat_Private(ds,DS_MAT_W);
707:     B = ds->mat[DS_MAT_W];
708:     PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);

710:     /* Vector g initialy stores b = beta*e_n^T */
711:     PetscMemzero(g,n*sizeof(PetscScalar));
712:     g[n-1] = beta;

714:     /* g = (A-tau*eye(n))'\b */
715:     for (i=0;i<n;i++)
716:       B[i+i*ld] -= tau;
717:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
718:     if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
719:     if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
720:     PetscLogFlops(2.0*n*n*n/3.0);
721:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
722:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
723:     PetscLogFlops(2.0*n*n-n);

725:     /* A = A + g*b' */
726:     for (i=0;i<n;i++)
727:       A[i+(n-1)*ld] += g[i]*beta;

729:   } else { /* recover */

732:     DSAllocateWork_Private(ds,ld,0,0);
733:     ghat = ds->work;
734:     Q    = ds->mat[DS_MAT_Q];

736:     /* g^ = -Q(:,idx)'*g */
737:     PetscBLASIntCast(ds->l+ds->k,&ncol);
738:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

740:     /* A = A + g^*b' */
741:     for (i=0;i<ds->l+ds->k;i++)
742:       for (j=ds->l;j<ds->l+ds->k;j++)
743:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

745:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
746:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
747:   }

749:   /* Compute gamma factor */
750:   if (gamma) {
751:     gnorm = 0.0;
752:     for (i=0;i<n;i++)
753:       gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
754:     *gamma = PetscSqrtReal(1.0+gnorm);
755:   }
756:   return(0);
757: #endif
758: }

760: #define MAX_PADE 6

764: PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds)
765: {
766: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
768:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
769: #else
771:   PetscBLASInt   n,ld,ld2,*ipiv,info,inc=1;
772:   PetscInt       j,k,odd;
773:   const PetscInt p=MAX_PADE;
774:   PetscReal      c[MAX_PADE+1],s;
775:   PetscScalar    scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
776:   PetscScalar    *A,*A2,*Q,*P,*W,*aux;

779:   PetscBLASIntCast(ds->n,&n);
780:   PetscBLASIntCast(ds->ld,&ld);
781:   ld2 = ld*ld;
782:   DSAllocateWork_Private(ds,0,ld,ld);
783:   ipiv = ds->iwork;
784:   if (!ds->mat[DS_MAT_W]) { DSAllocateMat_Private(ds,DS_MAT_W); }
785:   if (!ds->mat[DS_MAT_Z]) { DSAllocateMat_Private(ds,DS_MAT_Z); }
786:   A  = ds->mat[DS_MAT_A];
787:   A2 = ds->mat[DS_MAT_Z];
788:   Q  = ds->mat[DS_MAT_Q];
789:   P  = ds->mat[DS_MAT_F];
790:   W  = ds->mat[DS_MAT_W];

792:   /* Pade' coefficients */
793:   c[0] = 1.0;
794:   for (k=1;k<=p;k++) {
795:     c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
796:   }

798:   /* Scaling */
799:   s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork);
800:   if (s>0.5) {
801:     s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2);
802:     scale = PetscPowScalar(2,(-1)*s);
803:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc));
804:   }

806:   /* Horner evaluation */
807:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld));
808:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
809:   PetscMemzero(P,ld*ld*sizeof(PetscScalar));
810:   for (j=0;j<n;j++) {
811:     Q[j+j*ld] = c[p];
812:     P[j+j*ld] = c[p-1];
813:   }

815:   odd = 1;
816:   for (k=p-1;k>0;k--) {
817:     if (odd==1) {
818:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
819:       aux = Q;
820:       Q = W;
821:       W = aux;
822:       for (j=0;j<n;j++)
823:         Q[j+j*ld] = Q[j+j*ld] + c[k-1];
824:     } else {
825:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
826:       aux = P;
827:       P = W;
828:       W = aux;
829:       for (j=0;j<n;j++)
830:         P[j+j*ld] = P[j+j*ld] + c[k-1];
831:     }
832:     odd = 1-odd;
833:   }
834:   if (odd==1) {
835:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld));
836:     aux = Q;
837:     Q = W;
838:     W = aux;
839:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
840:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
841:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
842:     for (j=0;j<n;j++)
843:       P[j+j*ld] = P[j+j*ld] + 1.0;
844:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
845:   } else {
846:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld));
847:     aux = P;
848:     P = W;
849:     W = aux;
850:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
851:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
852:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
853:     for (j=0;j<n;j++)
854:       P[j+j*ld] = P[j+j*ld] + 1.0;
855:   }

857:   for (k=1;k<=s;k++) {
858:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
859:     PetscMemcpy(P,W,ld2*sizeof(PetscScalar));
860:   }
861:   if (P!=ds->mat[DS_MAT_F]) {
862:     PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));
863:   }
864:   return(0);
865: #endif
866: }

870: PETSC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
871: {
873:   ds->ops->allocate      = DSAllocate_NHEP;
874:   ds->ops->view          = DSView_NHEP;
875:   ds->ops->vectors       = DSVectors_NHEP;
876:   ds->ops->solve[0]      = DSSolve_NHEP;
877:   ds->ops->sort          = DSSort_NHEP;
878:   ds->ops->truncate      = DSTruncate_NHEP;
879:   ds->ops->update        = DSUpdateExtraRow_NHEP;
880:   ds->ops->cond          = DSCond_NHEP;
881:   ds->ops->transharm     = DSTranslateHarmonic_NHEP;
882:   ds->ops->normalize     = DSNormalize_NHEP;

884:   ds->ops->computefun[SLEPC_FUNCTION_EXP][0] = DSFunction_EXP_NHEP_PADE;
885:   return(0);
886: }