Actual source code: veccomp0.h

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <private/vecimpl.h>     /*I  "petsvec.h"  I*/

 24: #ifdef __WITH_MPI__
 26: #else
 28: #endif


 36: PetscErrorCode __SUF__(VecDot_Comp)(Vec a,Vec b,PetscScalar *z)
 37: {
 38:   PetscScalar    sum = 0.0,work;
 39:   PetscInt       i;
 41:   Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;

 46:   if (as->x[0]->ops->dot_local) {
 47:     for (i=0,sum=0.0;i<as->n->n;i++) {
 48:       as->x[i]->ops->dot_local(as->x[i],bs->x[i],&work);
 49:       sum += work;
 50:     }
 51: #ifdef __WITH_MPI__
 52:     work = sum;
 53:     MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
 54: #endif
 55:   } else {
 56:     for(i=0,sum=0.0; i<as->n->n; i++) {
 57:       VecDot(as->x[i],bs->x[i],&work);
 58:       sum += work;
 59:     }
 60:   }
 61:   *z = sum;
 62:   return(0);
 63: }

 67: PetscErrorCode __SUF__(VecMDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
 68: {
 69:   PetscScalar    *work,*work0,*r;
 71:   Vec_Comp       *as = (Vec_Comp*)a->data;
 72:   Vec            *bx;
 73:   PetscInt       i,j;


 79:   if (as->n->n == 0) {
 80:     *z = 0;
 81:     return(0);
 82:   }

 84:   PetscMalloc(sizeof(PetscScalar)*n,&work0);
 85:   PetscMalloc(sizeof(Vec)*n,&bx);

 87: #ifdef __WITH_MPI__
 88:   if (as->x[0]->ops->mdot_local) {
 89:     r = work0; work = z;
 90:   } else
 91: #endif
 92:   {
 93:     r = z; work = work0;
 94:   }

 96:   /* z[i] <- a.x' * b[i].x */
 97:   for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
 98:   if (as->x[0]->ops->mdot_local) {
 99:     as->x[0]->ops->mdot_local(as->x[0],n,bx,r);
100:   } else {
101:     VecMDot(as->x[0],n,bx,r);
102:   }
103:   for (j=0;j<as->n->n;j++) {
104:     for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
105:     if (as->x[0]->ops->mdot_local) {
106:       as->x[j]->ops->mdot_local(as->x[j],n,bx,work);
107:     } else {
108:       VecMDot(as->x[j],n,bx,work);
109:     }
110:     for (i=0;i<n;i++) r[i] += work[i];
111:   }

113:   /* If def(__WITH_MPI__) and exists mdot_local */
114: #ifdef __WITH_MPI__
115:   if (as->x[0]->ops->mdot_local) {
116:     /* z[i] <- Allreduce(work[i]) */
117:     MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);
118:   }
119: #endif

121:   PetscFree(work0);
122:   PetscFree(bx);
123:   return(0);
124: }

126: #ifndef __VEC_NORM2_FUNCS_
128: PETSC_STATIC_INLINE void SumNorm2(PetscReal *ssq0,PetscReal *scale0,PetscReal *ssq1,PetscReal *scale1)
129: {
130:   PetscReal q;
131:   if (*scale0 > *scale1) { q = *scale1/(*scale0); *ssq1 = *ssq0 + q*q*(*ssq1); *scale1 = *scale0; }
132:   else                   { q = *scale0/(*scale1); *ssq1+=         q*q*(*ssq0); }
133: }

135: PETSC_STATIC_INLINE PetscReal GetNorm2(PetscReal ssq,PetscReal scale)
136: {
137:   return scale*PetscSqrtReal(ssq);
138: }

140: PETSC_STATIC_INLINE void AddNorm2(PetscReal *ssq,PetscReal *scale,PetscReal x)
141: {
142:   if (x != 0.0) {
143:     PetscReal absx = PetscAbs(x), q;
144:     if (*scale < absx) { q = *scale/absx; *ssq = 1.0 + *ssq*q*q; *scale = absx; }
145:     else               { q = absx/(*scale); *ssq+= q*q; }
146:   }
147: }

149: MPI_Datatype MPIU_NORM2=0, MPIU_NORM1_AND_2=0;
150: MPI_Op MPIU_NORM2_SUM=0;

155: void SlepcSumNorm2_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
156: {
157:   PetscInt       i,count = *cnt;

160:   if (*datatype == MPIU_NORM2) {
161:     PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
162:     for (i=0; i<count; i++) {
163:       SumNorm2(&xin[i*2],&xin[i*2+1],&xout[i*2],&xout[i*2+1]);
164:     }
165:   } else if (*datatype == MPIU_NORM1_AND_2) {
166:     PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
167:     for (i=0; i<count; i++) {
168:       xout[i*3]+= xin[i*3];
169:       SumNorm2(&xin[i*3+1],&xin[i*3+2],&xout[i*3+1],&xout[i*3+2]);
170:     }
171:   } else {
172:     (*PetscErrorPrintf)("Can only handle MPIU_NORM* data types");
173:     MPI_Abort(MPI_COMM_WORLD,1);
174:   }
175:   PetscFunctionReturnVoid();
176: }

180: PetscErrorCode VecNormCompEnd()
181: {

185:   MPI_Type_free(&MPIU_NORM2);
186:   MPI_Type_free(&MPIU_NORM1_AND_2);
187:   MPI_Op_free(&MPIU_NORM2_SUM);
188: 
189:   return(0);
190: }

195: PetscErrorCode VecNormCompInit()
196: {

200:   MPI_Type_contiguous(sizeof(PetscReal)*2,MPI_BYTE,&MPIU_NORM2);
201:   MPI_Type_commit(&MPIU_NORM2);
202:   MPI_Type_contiguous(sizeof(PetscReal)*3,MPI_BYTE,&MPIU_NORM1_AND_2);
203:   MPI_Type_commit(&MPIU_NORM1_AND_2);
204:   MPI_Op_create(SlepcSumNorm2_Local,1,&MPIU_NORM2_SUM);
205:   PetscRegisterFinalize(VecNormCompEnd);
206: 
207:   return(0);
208: }
209: #endif

213: PetscErrorCode __SUF__(VecNorm_Comp)(Vec a,NormType t,PetscReal *norm)
214: {
215:   PetscReal      work[3],s=0.0;
217:   Vec_Comp       *as = (Vec_Comp*)a->data;
218:   PetscInt       i;

222:   /* Initialize norm */
223:   switch(t) {
224:   case NORM_1: case NORM_INFINITY: *norm = 0.0; break;
225:   case NORM_2: case NORM_FROBENIUS: *norm = 1.0; s = 0.0; break;
226:   case NORM_1_AND_2: norm[0] = 0.0; norm[1] = 1.0; s = 0.0; break;
227:   }
228:   for (i=0;i<as->n->n;i++) {
229:     if (as->x[0]->ops->norm_local) {
230:       as->x[0]->ops->norm_local(as->x[i],t,work);
231:     } else {
232:       VecNorm(as->x[i],t,work);
233:     }
234:     /* norm+= work */
235:     switch(t) {
236:     case NORM_1: *norm+= *work; break;
237:     case NORM_2: case NORM_FROBENIUS: AddNorm2(norm,&s,*work); break;
238:     case NORM_1_AND_2: norm[0]+= work[0]; AddNorm2(&norm[1],&s,work[1]); break;
239:     case NORM_INFINITY: *norm = PetscMax(*norm,*work); break;
240:     }
241:   }

243:   /* If def(__WITH_MPI__) and exists norm_local */
244: #ifdef __WITH_MPI__
245:   if (as->x[0]->ops->norm_local) {
246:     PetscReal work0[3];
247:     /* norm <- Allreduce(work) */
248:     switch(t) {
249:     case NORM_1:
250:       work[0] = *norm;
251:       MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)a)->comm);
252:       break;
253:     case NORM_2: case NORM_FROBENIUS:
254:       work[0] = *norm; work[1] = s;
255:       MPI_Allreduce(work,work0,1,MPIU_NORM2,MPIU_NORM2_SUM,((PetscObject)a)->comm);
256:       *norm = GetNorm2(work0[0],work0[1]);
257:       break;
258:     case NORM_1_AND_2:
259:       work[0] = norm[0]; work[1] = norm[1]; work[2] = s;
260:       MPI_Allreduce(work,work0,1,MPIU_NORM1_AND_2,MPIU_NORM2_SUM,((PetscObject)a)->comm);
261:       norm[0] = work0[0];
262:       norm[1] = GetNorm2(work0[1],work0[2]);
263:       break;
264:     case NORM_INFINITY:
265:       work[0] = *norm;
266:       MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)a)->comm);
267:       break;
268:     }
269:   }
270: #else
271:   /* Norm correction */
272:   switch(t) {
273:   case NORM_2: case NORM_FROBENIUS: *norm = GetNorm2(*norm,s); break;
274:   case NORM_1_AND_2: norm[1] = GetNorm2(norm[1],s); break;
275:   default: ;
276:   }
277: #endif

279:   return(0);
280: }

284: PetscErrorCode __SUF__(VecDotNorm2_Comp)(Vec v,Vec w,PetscScalar *dp,PetscScalar *nm)
285: {
286:   PetscScalar    *vx,*wx,dp0,nm0,dp1,nm1;
288:   Vec_Comp       *vs = (Vec_Comp*)v->data,*ws = (Vec_Comp*)w->data;
289:   PetscInt       i,n;
290:   PetscBool      t0,t1;
291: #ifdef __WITH_MPI__
292:   PetscScalar    work[4];
293: #endif

296:   /* Compute recursively the local part */
297:   dp0 = nm0 = 0.0;
298:   PetscTypeCompare((PetscObject)v,VECCOMP,&t0);
299:   PetscTypeCompare((PetscObject)w,VECCOMP,&t1);
300:   if (t0 == PETSC_TRUE && t1 == PETSC_TRUE) {
303:     for (i=0;i<vs->n->n;i++) {
304:       VecDotNorm2_Comp_Seq(vs->x[i],ws->x[i],&dp1,&nm1);
305:       dp0 += dp1;
306:       nm0 += nm1;
307:     }
308:   } else if (t0 == PETSC_FALSE && t1 == PETSC_FALSE) {
309:     VecGetLocalSize(v,&n);
310:     VecGetArray(v,&vx);
311:     VecGetArray(w,&wx);
312:     for (i=0;i<n;i++) {
313:       dp0 += vx[i]*PetscConj(wx[i]);
314:       nm0 += wx[i]*PetscConj(wx[i]);
315:     }
316:     VecRestoreArray(v,&vx);
317:     VecRestoreArray(w,&wx);
318:   } else
319:     SETERRQ(((PetscObject)v)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector types");

321: #ifdef __WITH_MPI__
322:     /* [dp, nm] <- Allreduce([dp0, nm0]) */
323:     work[0] = dp0; work[1] = nm0;
324:     MPI_Allreduce(&work,&work[2],2,MPIU_SCALAR,MPIU_SUM,((PetscObject)v)->comm);
325:     *dp = work[2]; *nm = work[3];
326: #else
327:     *dp = dp0; *nm = nm0;
328: #endif
329:   return(0);
330: }