Actual source code: contiguous.c
1: /*
2: Subroutines related to special Vecs that share a common contiguous storage.
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
9:
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <private/vecimplslepc.h> /*I "slepcvec.h" I*/
25: #include <petscblaslapack.h>
27: PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;
31: /*
32: Frees the array of the contiguous vectors when all vectors have been destroyed.
33: */
34: static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
35: {
36: PetscErrorCode ierr;
37: Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;
40: PetscFree(vc->array);
41: PetscFree(vc);
42: return(0);
43: }
47: /*
48: Version of VecDuplicateVecs that sets contiguous storage.
49: */
50: static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
51: {
52: PetscErrorCode ierr;
53: PetscInt i,nloc;
54: PetscScalar *pV;
55: PetscContainer container;
56: Vecs_Contiguous *vc;
59: /* Allocate array */
60: VecGetLocalSize(v,&nloc);
61: PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);
62: /* Create container */
63: PetscNew(Vecs_Contiguous,&vc);
64: vc->nvecs = m;
65: vc->array = pV;
66: PetscContainerCreate(((PetscObject)v)->comm,&container);
67: PetscContainerSetPointer(container,vc);
68: PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);
69: /* Create vectors */
70: PetscMalloc(m*sizeof(Vec),V);
71: for (i=0;i<m;i++) {
72: VecCreateMPIWithArray(((PetscObject)v)->comm,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);
73: PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);
74: }
75: PetscContainerDestroy(&container);
76: return(0);
77: }
81: /*@
82: SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.
84: Collective on Vec
86: Input Parameters:
87: . v - the vector
89: Note:
90: Once this function is called, subsequent calls to VecDuplicateVecs()
91: with this vector will use a special version that generates vectors with
92: contiguous storage, that is, the array of values of V[1] immediately
93: follows the array of V[0], and so on.
95: Level: developer
96: @*/
97: PetscErrorCode SlepcVecSetTemplate(Vec v)
98: {
100: PetscBool flg;
104: PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
105: if (!flg) SETERRQ(((PetscObject)v)->comm,PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
106: v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
107: return(0);
108: }
112: /*@
113: SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
114: i.e. with the same parallel layout, and mark them as templates
115: for contiguous storage.
116:
117: Collective on Mat
119: Input Parameter:
120: . mat - the matrix
122: Output Parameters:
123: + right - (optional) vector that the matrix can be multiplied against
124: - left - (optional) vector that the matrix vector product can be stored in
126: Options Database Keys:
127: . -slepc_non_contiguous - Disable contiguous vector storage
129: Notes:
130: Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
131: Contiguous storage is currently also disabled in AIJCUSP matrices.
133: Level: developer
135: .seealso: SlepcVecSetTemplate()
136: @*/
137: PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
138: {
140: PetscBool flg;
141: Vec v;
146: MatGetVecs(mat,right,left);
147: v = right? *right: *left;
148: PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
149: if (!flg) return(0);
150: PetscOptionsHasName(PETSC_NULL,"-slepc_non_contiguous",&flg);
151: if (!flg) {
152: if (right) { SlepcVecSetTemplate(*right); }
153: if (left) { SlepcVecSetTemplate(*left); }
154: }
155: return(0);
156: }
160: /*
161: SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
162: (non-contiguous).
163: */
164: static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
165: {
166: PetscInt l;
167: PetscBLASInt j,ls,bs=64,m,k,ldq;
168: PetscScalar *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;
172: PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
173: VecGetLocalSize(V[0],&l);
174: ls = PetscBLASIntCast(l);
175: m = PetscBLASIntCast(m_);
176: ldq = PetscBLASIntCast(ldq_);
177: PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);
178: out = work+m*bs;
179: k = ls % bs;
180: if (k) {
181: for (j=0;j<m;j++) {
182: VecGetArray(V[j],&pv);
183: PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));
184: VecRestoreArray(V[j],&pv);
185: }
186: BLASgemm_("N",qtrans?"C":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
187: for (j=0;j<m;j++) {
188: VecGetArray(V[j],&pv);
189: PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));
190: VecRestoreArray(V[j],&pv);
191: }
192: }
193: for (;k<ls;k+=bs) {
194: for (j=0;j<m;j++) {
195: VecGetArray(V[j],&pv);
196: PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));
197: VecRestoreArray(V[j],&pv);
198: }
199: BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
200: for (j=0;j<m;j++) {
201: VecGetArray(V[j],&pv);
202: PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));
203: VecRestoreArray(V[j],&pv);
204: }
205: }
206: PetscFree(work);
207: PetscLogFlops(m*m*2.0*ls);
208: PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
209: return(0);
210: }
214: /*
215: SlepcUpdateVectors_Noncontiguous - V(:,s:e-1) = V*Q(:,s:e-1) for
216: regular vectors (non-contiguous).
218: Writing V = [ V1 V2 V3 ] and Q = [ Q1 Q2 Q3 ], where the V2 and Q2
219: correspond to the columns s:e-1, the computation is done as
220: V2 := V2*Q2 + V1*Q1 + V3*Q3
221: (the first term is computed with SlepcUpdateVectors_Noncontiguous_Inplace).
222: */
223: static PetscErrorCode SlepcUpdateVectors_Noncontiguous(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
224: {
225: PetscInt i,j,m,ln;
226: PetscScalar *pq,qt[100];
227: PetscBool allocated = PETSC_FALSE;
231: m = e-s;
232: if (qtrans) {
233: ln = PetscMax(s,n-e);
234: if (ln<=100) pq = qt;
235: else {
236: PetscMalloc(ln*sizeof(PetscScalar),&pq);
237: allocated = PETSC_TRUE;
238: }
239: }
240: /* V2 */
241: SlepcUpdateVectors_Noncontiguous_Inplace(m,V+s,Q+s*ldq+s,ldq,qtrans);
242: /* V1 */
243: if (s>0) {
244: for (i=s;i<e;i++) {
245: if (qtrans) {
246: for (j=0;j<s;j++) pq[j] = Q[i+j*ldq];
247: } else pq = (PetscScalar*)Q+i*ldq;
248: VecMAXPY(V[i],s,pq,V);
249: }
250: }
251: /* V3 */
252: if (n>e) {
253: for (i=s;i<e;i++) {
254: if (qtrans) {
255: for (j=0;j<n-e;j++) pq[j] = Q[i+(j+e)*ldq];
256: } else pq = (PetscScalar*)Q+i*ldq+e;
257: VecMAXPY(V[i],n-e,pq,V+e);
258: }
259: }
260: if (allocated) { PetscFree(pq); }
261: return(0);
262: }
266: /*@
267: SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).
269: Not Collective
271: Input parameters:
272: + n - number of vectors in V
273: . s - first column of V to be overwritten
274: . e - first column of V not to be overwritten
275: . Q - matrix containing the coefficients of the update
276: . ldq - leading dimension of Q
277: - qtrans - flag indicating if Q is to be transposed
279: Input/Output parameter:
280: . V - set of vectors
282: Notes:
283: This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
284: vectors V, columns from s to e-1 are overwritten with columns from s to
285: e-1 of the matrix-matrix product V*Q.
287: Matrix V is represented as an array of Vec, whereas Q is represented as
288: a column-major dense array of leading dimension ldq. Only columns s to e-1
289: of Q are referenced.
291: If qtrans=PETSC_TRUE, the operation is V*Q'.
293: This routine is implemented with a call to BLAS, therefore V is an array
294: of Vec which have the data stored contiguously in memory as a Fortran matrix.
295: PETSc does not create such arrays by default.
297: Level: developer
299: @*/
300: PetscErrorCode SlepcUpdateVectors(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
301: {
302: PetscContainer container;
306: if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n);
307: if (n==0 || s>=e) return(0);
312: PetscObjectQuery((PetscObject)(V[0]),"contiguous",(PetscObject*)&container);
313: if (container) {
314: /* contiguous Vecs, use BLAS calls */
315: SlepcUpdateStrideVectors(n,V,s,1,e,Q,ldq,qtrans);
316: } else {
317: /* use regular Vec operations */
318: SlepcUpdateVectors_Noncontiguous(n,V,s,e,Q,ldq,qtrans);
319: }
320: return(0);
321: }
325: /*@
326: SlepcUpdateStrideVectors - Update a set of vectors V as
327: V(:,s:d:e-1) = V*Q(:,s:e-1).
329: Not Collective
331: Input parameters:
332: + n - number of vectors in V
333: . s - first column of V to be overwritten
334: . d - stride
335: . e - first column of V not to be overwritten
336: . Q - matrix containing the coefficients of the update
337: . ldq - leading dimension of Q
338: - qtrans - flag indicating if Q is to be transposed
340: Input/Output parameter:
341: . V - set of vectors
343: Notes:
344: This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
345: of vectors V, columns from s to e-1 are overwritten with columns from s to
346: e-1 of the matrix-matrix product V*Q.
348: Matrix V is represented as an array of Vec, whereas Q is represented as
349: a column-major dense array of leading dimension ldq. Only columns s to e-1
350: of Q are referenced.
352: If qtrans=PETSC_TRUE, the operation is V*Q'.
354: This routine is implemented with a call to BLAS, therefore V is an array
355: of Vec which have the data stored contiguously in memory as a Fortran matrix.
356: PETSc does not create such arrays by default.
358: Level: developer
360: @*/
361: PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
362: {
364: PetscInt l;
365: PetscBLASInt i,j,k,bs=64,m,n,ldq,ls,ld;
366: PetscScalar *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
367: const char *qt;
370: n = PetscBLASIntCast(n_/d);
371: ldq = PetscBLASIntCast(ldq_);
372: m = (e-s)/d;
373: if (m==0) return(0);
375: if (m<0 || n<0 || s<0 || m>n) SETERRQ(((PetscObject)*V)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
376: PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
377: VecGetLocalSize(V[0],&l);
378: ls = PetscBLASIntCast(l);
379: ld = ls*PetscBLASIntCast(d);
380: VecGetArray(V[0],&pv);
381: if (qtrans) {
382: pq = (PetscScalar*)Q+s;
383: qt = "C";
384: } else {
385: pq = (PetscScalar*)Q+s*ldq;
386: qt = "N";
387: }
388: PetscMalloc(sizeof(PetscScalar)*bs*m,&work);
389: k = ls % bs;
390: if (k) {
391: BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k);
392: for (j=0;j<m;j++) {
393: pw = pv+(s+j)*ld;
394: pwork = work+j*k;
395: for (i=0;i<k;i++) {
396: *pw++ = *pwork++;
397: }
398: }
399: }
400: for (;k<ls;k+=bs) {
401: BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs);
402: for (j=0;j<m;j++) {
403: pw = pv+(s+j)*ld+k;
404: pwork = work+j*bs;
405: for (i=0;i<bs;i++) {
406: *pw++ = *pwork++;
407: }
408: }
409: }
410: VecRestoreArray(V[0],&pv);
411: PetscFree(work);
412: PetscLogFlops(m*n*2.0*ls);
413: PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
414: return(0);
415: }
419: /*@
420: SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]
422: Logically Collective on Vec
424: Input parameters:
425: + beta - scalar beta
426: . alpha - scalar alpha
427: . nv - number of vectors in x and scalars in a
428: . a - array of scalars
429: - x - set of vectors
431: Input/Output parameter:
432: . y - the vector to update
434: Notes:
435: If x are Vec's with contiguous storage, then the operation is done
436: through a call to BLAS. Otherwise, VecMAXPY() is called.
438: Level: developer
440: .seealso: SlepcVecSetTemplate()
441: @*/
442: PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,PetscScalar a[],Vec x[])
443: {
444: PetscErrorCode ierr;
445: PetscBLASInt i,n,m,one=1;
446: PetscScalar *py;
447: const PetscScalar *px;
448: PetscContainer container;
449: Vec z;
453: if (!nv || !(y)->map->n) return(0);
454: if (nv < 0) SETERRQ1(((PetscObject)y)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
464: if ((*x)->map->N != (y)->map->N) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
465: if ((*x)->map->n != (y)->map->n) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");
467: PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);
468: if (container) {
469: /* assume x Vecs are contiguous, use BLAS calls */
470: PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);
471: VecGetArray(y,&py);
472: VecGetArrayRead(*x,&px);
473: n = PetscBLASIntCast(nv);
474: m = PetscBLASIntCast((y)->map->n);
475: BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one);
476: VecRestoreArray(y,&py);
477: VecRestoreArrayRead(*x,&px);
478: PetscLogFlops(nv*2*(y)->map->n);
479: PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);
480: } else {
481: /* use regular Vec operations */
482: if (alpha==-beta) {
483: for (i=0;i<nv;i++) a[i] = -a[i];
484: VecMAXPY(y,nv,a,x);
485: for (i=0;i<nv;i++) a[i] = -a[i];
486: VecScale(y,beta);
487: } else {
488: VecDuplicate(y,&z);
489: VecCopy(y,z);
490: VecMAXPY(y,nv,a,x);
491: VecAXPBY(y,beta-alpha,alpha,z);
492: VecDestroy(&z);
493: }
494: }
495: return(0);
496: }