Actual source code: contiguous.c

  1: /*
  2:    Subroutines related to special Vecs that share a common contiguous storage.

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.
  9:       
 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <private/vecimplslepc.h>            /*I "slepcvec.h" I*/
 25: #include <petscblaslapack.h>

 27: PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;

 31: /*
 32:   Frees the array of the contiguous vectors when all vectors have been destroyed.
 33: */
 34: static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
 35: {
 36:   PetscErrorCode  ierr;
 37:   Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;

 40:   PetscFree(vc->array);
 41:   PetscFree(vc);
 42:   return(0);
 43: }

 47: /*
 48:   Version of VecDuplicateVecs that sets contiguous storage.
 49: */
 50: static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
 51: {
 52:   PetscErrorCode  ierr;
 53:   PetscInt        i,nloc;
 54:   PetscScalar     *pV;
 55:   PetscContainer  container;
 56:   Vecs_Contiguous *vc;

 59:   /* Allocate array */
 60:   VecGetLocalSize(v,&nloc);
 61:   PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);
 62:   /* Create container */
 63:   PetscNew(Vecs_Contiguous,&vc);
 64:   vc->nvecs = m;
 65:   vc->array = pV;
 66:   PetscContainerCreate(((PetscObject)v)->comm,&container);
 67:   PetscContainerSetPointer(container,vc);
 68:   PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);
 69:   /* Create vectors */
 70:   PetscMalloc(m*sizeof(Vec),V);
 71:   for (i=0;i<m;i++) {
 72:     VecCreateMPIWithArray(((PetscObject)v)->comm,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);
 73:     PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);
 74:   }
 75:   PetscContainerDestroy(&container);
 76:   return(0);
 77: }

 81: /*@
 82:    SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.

 84:    Collective on Vec

 86:    Input Parameters:
 87: .  v - the vector

 89:    Note:
 90:    Once this function is called, subsequent calls to VecDuplicateVecs()
 91:    with this vector will use a special version that generates vectors with
 92:    contiguous storage, that is, the array of values of V[1] immediately
 93:    follows the array of V[0], and so on.

 95:    Level: developer
 96: @*/
 97: PetscErrorCode SlepcVecSetTemplate(Vec v)
 98: {
100:   PetscBool      flg;

104:   PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
105:   if (!flg) SETERRQ(((PetscObject)v)->comm,PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
106:   v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
107:   return(0);
108: }

112: /*@
113:    SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
114:    i.e. with the same parallel layout, and mark them as templates
115:    for contiguous storage.
116:    
117:    Collective on Mat

119:    Input Parameter:
120: .  mat - the matrix

122:    Output Parameters:
123: +  right - (optional) vector that the matrix can be multiplied against
124: -  left  - (optional) vector that the matrix vector product can be stored in

126:    Options Database Keys:
127: .  -slepc_non_contiguous - Disable contiguous vector storage

129:    Notes:
130:    Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
131:    Contiguous storage is currently also disabled in AIJCUSP matrices.

133:    Level: developer

135: .seealso: SlepcVecSetTemplate()
136: @*/
137: PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
138: {
140:   PetscBool      flg;
141:   Vec            v;

146:   MatGetVecs(mat,right,left);
147:   v = right? *right: *left;
148:   PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
149:   if (!flg) return(0);
150:   PetscOptionsHasName(PETSC_NULL,"-slepc_non_contiguous",&flg);
151:   if (!flg) {
152:     if (right) { SlepcVecSetTemplate(*right); }
153:     if (left) { SlepcVecSetTemplate(*left); }
154:   }
155:   return(0);
156: }

160: /*
161:    SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
162:    (non-contiguous).
163: */
164: static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
165: {
166:   PetscInt       l;
167:   PetscBLASInt   j,ls,bs=64,m,k,ldq;
168:   PetscScalar    *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;

172:   PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
173:   VecGetLocalSize(V[0],&l);
174:   ls = PetscBLASIntCast(l);
175:   m = PetscBLASIntCast(m_);
176:   ldq = PetscBLASIntCast(ldq_);
177:   PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);
178:   out = work+m*bs;
179:   k = ls % bs;
180:   if (k) {
181:     for (j=0;j<m;j++) {
182:       VecGetArray(V[j],&pv);
183:       PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));
184:       VecRestoreArray(V[j],&pv);
185:     }
186:     BLASgemm_("N",qtrans?"C":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
187:     for (j=0;j<m;j++) {
188:       VecGetArray(V[j],&pv);
189:       PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));
190:       VecRestoreArray(V[j],&pv);
191:     }
192:   }
193:   for (;k<ls;k+=bs) {
194:     for (j=0;j<m;j++) {
195:       VecGetArray(V[j],&pv);
196:       PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));
197:       VecRestoreArray(V[j],&pv);
198:     }
199:     BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
200:     for (j=0;j<m;j++) {
201:       VecGetArray(V[j],&pv);
202:       PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));
203:       VecRestoreArray(V[j],&pv);
204:     }
205:   }
206:   PetscFree(work);
207:   PetscLogFlops(m*m*2.0*ls);
208:   PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
209:   return(0);
210: }

214: /*
215:    SlepcUpdateVectors_Noncontiguous - V(:,s:e-1) = V*Q(:,s:e-1) for 
216:    regular vectors (non-contiguous).

218:    Writing V = [ V1 V2 V3 ] and Q = [ Q1 Q2 Q3 ], where the V2 and Q2
219:    correspond to the columns s:e-1, the computation is done as
220:                   V2 := V2*Q2 + V1*Q1 + V3*Q3
221:    (the first term is computed with SlepcUpdateVectors_Noncontiguous_Inplace).
222: */
223: static PetscErrorCode SlepcUpdateVectors_Noncontiguous(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
224: {
225:   PetscInt       i,j,m,ln;
226:   PetscScalar    *pq,qt[100];
227:   PetscBool      allocated = PETSC_FALSE;

231:   m = e-s;
232:   if (qtrans) {
233:     ln = PetscMax(s,n-e);
234:     if (ln<=100) pq = qt;
235:     else {
236:       PetscMalloc(ln*sizeof(PetscScalar),&pq);
237:       allocated = PETSC_TRUE;
238:     }
239:   }
240:   /* V2 */
241:   SlepcUpdateVectors_Noncontiguous_Inplace(m,V+s,Q+s*ldq+s,ldq,qtrans);
242:   /* V1 */
243:   if (s>0) {
244:     for (i=s;i<e;i++) {
245:       if (qtrans) {
246:         for (j=0;j<s;j++) pq[j] = Q[i+j*ldq];
247:       } else pq = (PetscScalar*)Q+i*ldq;
248:       VecMAXPY(V[i],s,pq,V);
249:     }
250:   }
251:   /* V3 */
252:   if (n>e) {
253:     for (i=s;i<e;i++) {
254:       if (qtrans) {
255:         for (j=0;j<n-e;j++) pq[j] = Q[i+(j+e)*ldq];
256:       } else pq = (PetscScalar*)Q+i*ldq+e;
257:       VecMAXPY(V[i],n-e,pq,V+e);
258:     }
259:   }
260:   if (allocated) { PetscFree(pq); }
261:   return(0);
262: }

266: /*@
267:    SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).

269:    Not Collective

271:    Input parameters:
272: +  n      - number of vectors in V
273: .  s      - first column of V to be overwritten
274: .  e      - first column of V not to be overwritten
275: .  Q      - matrix containing the coefficients of the update
276: .  ldq    - leading dimension of Q
277: -  qtrans - flag indicating if Q is to be transposed

279:    Input/Output parameter:
280: .  V      - set of vectors

282:    Notes: 
283:    This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
284:    vectors V, columns from s to e-1 are overwritten with columns from s to
285:    e-1 of the matrix-matrix product V*Q.

287:    Matrix V is represented as an array of Vec, whereas Q is represented as
288:    a column-major dense array of leading dimension ldq. Only columns s to e-1
289:    of Q are referenced.

291:    If qtrans=PETSC_TRUE, the operation is V*Q'.

293:    This routine is implemented with a call to BLAS, therefore V is an array 
294:    of Vec which have the data stored contiguously in memory as a Fortran matrix.
295:    PETSc does not create such arrays by default.

297:    Level: developer

299: @*/
300: PetscErrorCode SlepcUpdateVectors(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
301: {
302:   PetscContainer container;

306:   if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n);
307:   if (n==0 || s>=e) return(0);
312:   PetscObjectQuery((PetscObject)(V[0]),"contiguous",(PetscObject*)&container);
313:   if (container) {
314:     /* contiguous Vecs, use BLAS calls */
315:     SlepcUpdateStrideVectors(n,V,s,1,e,Q,ldq,qtrans);
316:   } else {
317:     /* use regular Vec operations */
318:     SlepcUpdateVectors_Noncontiguous(n,V,s,e,Q,ldq,qtrans);
319:   }
320:   return(0);
321: }

325: /*@
326:    SlepcUpdateStrideVectors - Update a set of vectors V as
327:    V(:,s:d:e-1) = V*Q(:,s:e-1).

329:    Not Collective

331:    Input parameters:
332: +  n      - number of vectors in V
333: .  s      - first column of V to be overwritten
334: .  d      - stride
335: .  e      - first column of V not to be overwritten
336: .  Q      - matrix containing the coefficients of the update
337: .  ldq    - leading dimension of Q
338: -  qtrans - flag indicating if Q is to be transposed

340:    Input/Output parameter:
341: .  V      - set of vectors

343:    Notes: 
344:    This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
345:    of vectors V, columns from s to e-1 are overwritten with columns from s to
346:    e-1 of the matrix-matrix product V*Q.

348:    Matrix V is represented as an array of Vec, whereas Q is represented as
349:    a column-major dense array of leading dimension ldq. Only columns s to e-1
350:    of Q are referenced.

352:    If qtrans=PETSC_TRUE, the operation is V*Q'.

354:    This routine is implemented with a call to BLAS, therefore V is an array 
355:    of Vec which have the data stored contiguously in memory as a Fortran matrix.
356:    PETSc does not create such arrays by default.

358:    Level: developer

360: @*/
361: PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
362: {
364:   PetscInt       l;
365:   PetscBLASInt   i,j,k,bs=64,m,n,ldq,ls,ld;
366:   PetscScalar    *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
367:   const char     *qt;

370:   n = PetscBLASIntCast(n_/d);
371:   ldq = PetscBLASIntCast(ldq_);
372:   m = (e-s)/d;
373:   if (m==0) return(0);
375:   if (m<0 || n<0 || s<0 || m>n) SETERRQ(((PetscObject)*V)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
376:   PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
377:   VecGetLocalSize(V[0],&l);
378:   ls = PetscBLASIntCast(l);
379:   ld = ls*PetscBLASIntCast(d);
380:   VecGetArray(V[0],&pv);
381:   if (qtrans) {
382:     pq = (PetscScalar*)Q+s;
383:     qt = "C";
384:   } else {
385:     pq = (PetscScalar*)Q+s*ldq;
386:     qt = "N";
387:   }
388:   PetscMalloc(sizeof(PetscScalar)*bs*m,&work);
389:   k = ls % bs;
390:   if (k) {
391:     BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k);
392:     for (j=0;j<m;j++) {
393:       pw = pv+(s+j)*ld;
394:       pwork = work+j*k;
395:       for (i=0;i<k;i++) {
396:         *pw++ = *pwork++;
397:       }
398:     }
399:   }
400:   for (;k<ls;k+=bs) {
401:     BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs);
402:     for (j=0;j<m;j++) {
403:       pw = pv+(s+j)*ld+k;
404:       pwork = work+j*bs;
405:       for (i=0;i<bs;i++) {
406:         *pw++ = *pwork++;
407:       }
408:     }
409:   }
410:   VecRestoreArray(V[0],&pv);
411:   PetscFree(work);
412:   PetscLogFlops(m*n*2.0*ls);
413:   PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
414:   return(0);
415: }

419: /*@
420:    SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]

422:    Logically Collective on Vec

424:    Input parameters:
425: +  beta   - scalar beta
426: .  alpha  - scalar alpha
427: .  nv     - number of vectors in x and scalars in a
428: .  a      - array of scalars
429: -  x      - set of vectors

431:    Input/Output parameter:
432: .  y      - the vector to update

434:    Notes:
435:    If x are Vec's with contiguous storage, then the operation is done
436:    through a call to BLAS. Otherwise, VecMAXPY() is called.

438:    Level: developer

440: .seealso: SlepcVecSetTemplate()
441: @*/
442: PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,PetscScalar a[],Vec x[])
443: {
444:   PetscErrorCode    ierr;
445:   PetscBLASInt      i,n,m,one=1;
446:   PetscScalar       *py;
447:   const PetscScalar *px;
448:   PetscContainer    container;
449:   Vec               z;

453:   if (!nv || !(y)->map->n) return(0);
454:   if (nv < 0) SETERRQ1(((PetscObject)y)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
464:   if ((*x)->map->N != (y)->map->N) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
465:   if ((*x)->map->n != (y)->map->n) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");

467:   PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);
468:   if (container) {
469:     /* assume x Vecs are contiguous, use BLAS calls */
470:     PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);
471:     VecGetArray(y,&py);
472:     VecGetArrayRead(*x,&px);
473:     n = PetscBLASIntCast(nv);
474:     m = PetscBLASIntCast((y)->map->n);
475:     BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one);
476:     VecRestoreArray(y,&py);
477:     VecRestoreArrayRead(*x,&px);
478:     PetscLogFlops(nv*2*(y)->map->n);
479:     PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);
480:   } else {
481:     /* use regular Vec operations */
482:     if (alpha==-beta) {
483:       for (i=0;i<nv;i++) a[i] = -a[i];
484:       VecMAXPY(y,nv,a,x);
485:       for (i=0;i<nv;i++) a[i] = -a[i];
486:       VecScale(y,beta);
487:     } else {
488:       VecDuplicate(y,&z);
489:       VecCopy(y,z);
490:       VecMAXPY(y,nv,a,x);
491:       VecAXPBY(y,beta-alpha,alpha,z);
492:       VecDestroy(&z);
493:     }
494:   }
495:   return(0);
496: }