Actual source code: dvd_improvex.c

  1: /*
  2:   SLEPc eigensolver: "davidson"

  4:   Step: improve the eigenvectors X

  6:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  7:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  8:    Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain

 10:    This file is part of SLEPc.

 12:    SLEPc is free software: you can redistribute it and/or modify it under  the
 13:    terms of version 3 of the GNU Lesser General Public License as published by
 14:    the Free Software Foundation.

 16:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 17:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 18:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 19:    more details.

 21:    You  should have received a copy of the GNU Lesser General  Public  License
 22:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 23:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 24: */

 26:  #include davidson.h
 27: #include <slepc-private/vecimplslepc.h>         /*I "slepcvec.h" I*/

 29: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d,void *funcV,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,Vec *auxV,PetscScalar *auxS);
 30: PetscErrorCode PCApplyBA_dvd(PC pc,PCSide side,Vec in,Vec out,Vec w);
 31: PetscErrorCode PCApply_dvd(PC pc,Vec in,Vec out);
 32: PetscErrorCode PCApplyTranspose_dvd(PC pc,Vec in,Vec out);
 33: PetscErrorCode MatMult_dvd_jd(Mat A,Vec in,Vec out);
 34: PetscErrorCode MatMultTranspose_dvd_jd(Mat A,Vec in,Vec out);
 35: PetscErrorCode MatGetVecs_dvd_jd(Mat A,Vec *right,Vec *left);
 36: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d);
 37: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d);
 38: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d);
 39: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D);
 40: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec **u,Vec **v,Vec *kr,Vec **auxV,PetscScalar **auxS,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
 41: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
 42: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld);
 43: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d,PetscInt i,PetscScalar* theta,PetscScalar* thetai,PetscInt *maxits,PetscReal *tol);
 44: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS);
 45: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS);

 47: #define size_Z (64*4)

 49: /**** JD update step (I - Kfg'/(g'Kf)) K(A - sB) (I - Kfg'/(g'Kf)) t = (I - Kfg'/(g'Kf))r  ****/

 51: typedef struct {
 52:   PetscInt size_X;
 53:   void
 54:     *old_improveX_data;   /* old improveX_data */
 55:   improveX_type
 56:     old_improveX;         /* old improveX */
 57:   KSP ksp;                /* correction equation solver */
 58:   Vec
 59:     friends,              /* reference vector for composite vectors */
 60:     *auxV;                /* auxiliar vectors */
 61:   PetscScalar *auxS,      /* auxiliar scalars */
 62:     *theta,
 63:     *thetai;              /* the shifts used in the correction eq. */
 64:   PetscInt maxits,        /* maximum number of iterations */
 65:     r_s, r_e,             /* the selected eigenpairs to improve */
 66:     ksp_max_size;         /* the ksp maximum subvectors size */
 67:   PetscReal tol,          /* the maximum solution tolerance */
 68:     lastTol,              /* last tol for dynamic stopping criterion */
 69:     fix;                  /* tolerance for using the approx. eigenvalue */
 70:   PetscBool
 71:     dynamic;              /* if the dynamic stopping criterion is applied */
 72:   dvdDashboard
 73:     *d;                   /* the currect dvdDashboard reference */
 74:   PC old_pc;              /* old pc in ksp */
 75:   Vec
 76:     *u,                   /* new X vectors */
 77:     *real_KZ,             /* original KZ */
 78:     *KZ;                  /* KZ vecs for the projector KZ*inv(X'*KZ)*X' */
 79:   PetscScalar
 80:    *XKZ,                  /* X'*KZ */
 81:    *iXKZ;                 /* inverse of XKZ */
 82:   PetscInt
 83:     ldXKZ,                /* leading dimension of XKZ */
 84:     size_iXKZ,            /* size of iXKZ */
 85:     ldiXKZ,               /* leading dimension of iXKZ */
 86:     size_KZ,              /* size of converged KZ */
 87:     size_real_KZ,         /* original size of KZ */
 88:     size_cX,              /* last value of d->size_cX */
 89:     old_size_X;           /* last number of improved vectors */
 90:   PetscBLASInt
 91:     *iXKZPivots;          /* array of pivots */
 92: } dvdImprovex_jd;

 94: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);
 95: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV);

 99: PetscErrorCode dvd_improvex_jd(dvdDashboard *d,dvdBlackboard *b,KSP ksp,PetscInt max_bs,PetscInt cX_impr,PetscBool dynamic)
100: {
101:   PetscErrorCode  ierr;
102:   dvdImprovex_jd  *data;
103:   PetscBool       useGD,her_probl,std_probl;
104:   PC              pc;
105:   PetscInt        size_P,s=1;

108:   std_probl = DVD_IS(d->sEP,DVD_EP_STD)?PETSC_TRUE:PETSC_FALSE;
109:   her_probl = DVD_IS(d->sEP,DVD_EP_HERMITIAN)?PETSC_TRUE:PETSC_FALSE;

111:   /* Setting configuration constrains */
112:   PetscObjectTypeCompare((PetscObject)ksp,KSPPREONLY,&useGD);

114:   /* If the arithmetic is real and the problem is not Hermitian, then
115:      the block size is incremented in one */
116: #if !defined(PETSC_USE_COMPLEX)
117:   if (!her_probl) {
118:     max_bs++;
119:     b->max_size_P = PetscMax(b->max_size_P,2);
120:     s = 2;
121:   } else
122: #endif
123:     b->max_size_P = PetscMax(b->max_size_P,1);
124:   b->max_size_X = PetscMax(b->max_size_X,max_bs);
125:   size_P = b->max_size_P+cX_impr;
126:   b->max_size_auxV = PetscMax(b->max_size_auxV,
127:      b->max_size_X*3+(useGD?0:2)+ /* u, kr, auxV(max_size_X+2?) */
128:      ((her_probl || !d->eps->trueres)?1:PetscMax(s*2,b->max_size_cX_proj+b->max_size_X))); /* testConv */

130:   b->own_scalars+= size_P*size_P; /* XKZ */
131:   b->max_size_auxS = PetscMax(b->max_size_auxS,
132:     b->max_size_X*3 + /* theta, thetai */
133:     size_P*size_P + /* iXKZ */
134:     FromIntToScalar(size_P) + /* iXkZPivots */
135:     PetscMax(PetscMax(
136:       3*b->max_size_proj*b->max_size_X, /* dvd_improvex_apply_proj */
137:       8*cX_impr*b->max_size_X), /* dvd_improvex_jd_proj_cuv_KZX */
138:       (her_probl || !d->eps->trueres)?0:b->max_nev*b->max_nev+PetscMax(b->max_nev*6,(b->max_nev+b->max_size_proj)*s+b->max_nev*(b->max_size_X+b->max_size_cX_proj)*(std_probl?2:4)+64))); /* preTestConv */
139:   b->own_vecs+= size_P; /* KZ */

141:   /* Setup the preconditioner */
142:   if (ksp) {
143:     KSPGetPC(ksp,&pc);
144:     dvd_static_precond_PC(d,b,pc);
145:   } else {
146:     dvd_static_precond_PC(d,b,0);
147:   }

149:   /* Setup the step */
150:   if (b->state >= DVD_STATE_CONF) {
151:     PetscMalloc(sizeof(dvdImprovex_jd),&data);
152:     PetscLogObjectMemory(d->eps,sizeof(dvdImprovex_jd));
153:     data->dynamic = dynamic;
154:     data->size_real_KZ = size_P;
155:     data->real_KZ = b->free_vecs; b->free_vecs+= data->size_real_KZ;
156:     d->max_cX_in_impr = cX_impr;
157:     data->XKZ = b->free_scalars; b->free_scalars+= size_P*size_P;
158:     data->ldXKZ = size_P;
159:     data->size_X = b->max_size_X;
160:     data->old_improveX_data = d->improveX_data;
161:     d->improveX_data = data;
162:     data->old_improveX = d->improveX;
163:     data->ksp = useGD?NULL:ksp;
164:     data->d = d;
165:     d->improveX = dvd_improvex_jd_gen;
166:     data->ksp_max_size = max_bs;

168:     DVD_FL_ADD(d->startList,dvd_improvex_jd_start);
169:     DVD_FL_ADD(d->endList,dvd_improvex_jd_end);
170:     DVD_FL_ADD(d->destroyList,dvd_improvex_jd_d);
171:   }
172:   return(0);
173: }

177: PetscErrorCode dvd_improvex_jd_start(dvdDashboard *d)
178: {
179:   PetscErrorCode  ierr;
180:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
181:   PetscInt        rA, cA, rlA, clA;
182:   Mat             A;
183:   PetscBool       t;
184:   PC              pc;

187:   data->KZ = data->real_KZ;
188:   data->size_KZ = data->size_cX = data->old_size_X = 0;
189:   data->lastTol = data->dynamic?0.5:0.0;

191:   /* Setup the ksp */
192:   if (data->ksp) {
193:     /* Create the reference vector */
194:     VecCreateCompWithVecs(d->V,data->ksp_max_size,NULL,&data->friends);
195:     PetscLogObjectParent(d->eps,data->friends);

197:     /* Save the current pc and set a PCNONE */
198:     KSPGetPC(data->ksp, &data->old_pc);
199:     PetscObjectTypeCompare((PetscObject)data->old_pc,PCNONE,&t);
200:     data->lastTol = 0.5;
201:     if (t) {
202:       data->old_pc = 0;
203:     } else {
204:       PetscObjectReference((PetscObject)data->old_pc);
205:       PCCreate(PetscObjectComm((PetscObject)d->eps),&pc);
206:       PCSetType(pc,PCSHELL);
207:       PCSetOperators(pc, d->A, d->A, SAME_PRECONDITIONER);
208:       PCShellSetApply(pc,PCApply_dvd);
209:       PCShellSetApplyBA(pc,PCApplyBA_dvd);
210:       PCShellSetApplyTranspose(pc,PCApplyTranspose_dvd);
211:       KSPSetPC(data->ksp,pc);
212:       PCDestroy(&pc);
213:     }

215:     /* Create the (I-v*u')*K*(A-s*B) matrix */
216:     MatGetSize(d->A, &rA, &cA);
217:     MatGetLocalSize(d->A, &rlA, &clA);
218:     MatCreateShell(PetscObjectComm((PetscObject)d->A), rlA*data->ksp_max_size,
219:                           clA*data->ksp_max_size, rA*data->ksp_max_size,
220:                           cA*data->ksp_max_size, data, &A);
221:     MatShellSetOperation(A, MATOP_MULT,(void(*)(void))MatMult_dvd_jd);
222:     MatShellSetOperation(A, MATOP_MULT_TRANSPOSE,(void(*)(void))MatMultTranspose_dvd_jd);
223:     MatShellSetOperation(A, MATOP_GET_VECS,(void(*)(void))MatGetVecs_dvd_jd);

225:     /* Try to avoid KSPReset */
226:     KSPGetOperatorsSet(data->ksp,&t,NULL);
227:     if (t) {
228:       Mat M;
229:       PetscInt rM;
230:       KSPGetOperators(data->ksp,&M,NULL,NULL);
231:       MatGetSize(M,&rM,NULL);
232:       if (rM != rA*data->ksp_max_size) { KSPReset(data->ksp); }
233:     }
234:     KSPSetOperators(data->ksp, A, A, SAME_PRECONDITIONER);
235:     KSPSetUp(data->ksp);
236:     MatDestroy(&A);
237:   } else {
238:     data->old_pc = 0;
239:     data->friends = NULL;
240:   }
241:   return(0);
242: }

246: PetscErrorCode dvd_improvex_jd_end(dvdDashboard *d)
247: {
248:   PetscErrorCode  ierr;
249:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

252:   if (data->friends) { VecDestroy(&data->friends); }

254:   /* Restore the pc of ksp */
255:   if (data->old_pc) {
256:     KSPSetPC(data->ksp, data->old_pc);
257:     PCDestroy(&data->old_pc);
258:   }
259:   return(0);
260: }

264: PetscErrorCode dvd_improvex_jd_d(dvdDashboard *d)
265: {
266:   PetscErrorCode  ierr;
267:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

270:   /* Restore changes in dvdDashboard */
271:   d->improveX_data = data->old_improveX_data;

273:   /* Free local data and objects */
274:   PetscFree(data);
275:   return(0);
276: }

280: PetscErrorCode dvd_improvex_jd_gen(dvdDashboard *d,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,PetscInt *size_D)
281: {
282:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
283:   PetscErrorCode  ierr;
284:   PetscInt        i,j,n,maxits,maxits0,lits,s,ld,k;
285:   PetscScalar     *pX,*pY,*auxS = d->auxS,*auxS0;
286:   PetscReal       tol,tol0;
287:   Vec             *u,*v,*kr,kr_comp,D_comp;
288:   PetscBool       odd_situation = PETSC_FALSE;

291:   /* Quick exit */
292:   if ((max_size_D == 0) || r_e-r_s <= 0) {
293:    *size_D = 0;
294:    /* Callback old improveX */
295:     if (data->old_improveX) {
296:       d->improveX_data = data->old_improveX_data;
297:       data->old_improveX(d, NULL, 0, 0, 0, NULL);
298:       d->improveX_data = data;
299:     }
300:     return(0);
301:   }

303:   n = PetscMin(PetscMin(data->size_X, max_size_D), r_e-r_s);
304:   if (n == 0) SETERRQ(PETSC_COMM_SELF,1, "n == 0");
305:   if (data->size_X < r_e-r_s) SETERRQ(PETSC_COMM_SELF,1, "size_X < r_e-r_s");

307:   DSGetLeadingDimension(d->ps,&ld);

309:   /* Restart lastTol if a new pair converged */
310:   if (data->dynamic && data->size_cX < d->size_cX)
311:     data->lastTol = 0.5;

313:   for (i=0,s=0,auxS0=auxS;i<n;i+=s) {
314:     /* If the selected eigenvalue is complex, but the arithmetic is real... */
315: #if !defined(PETSC_USE_COMPLEX)
316:     if (d->eigi[i] != 0.0) {
317:       if (i+2 <= max_size_D) s=2;
318:       else break;
319:     } else
320: #endif
321:       s=1;

323:     data->auxV = d->auxV;
324:     data->r_s = r_s+i; data->r_e = r_s+i+s;
325:     auxS = auxS0;
326:     data->theta = auxS; auxS+= 2*s;
327:     data->thetai = auxS; auxS+= s;
328:     kr = data->auxV; data->auxV+= s;

330:     /* Compute theta, maximum iterations and tolerance */
331:     maxits = 0; tol = 1;
332:     for (j=0;j<s;j++) {
333:       d->improvex_jd_lit(d, r_s+i+j, &data->theta[2*j],
334:                                 &data->thetai[j], &maxits0, &tol0);
335:       maxits+= maxits0; tol*= tol0;
336:     }
337:     maxits/= s; tol = data->dynamic?data->lastTol:exp(log(tol)/s);

339:     /* Compute u, v and kr */
340:     k = r_s+i+d->cX_in_H;
341:     DSVectors(d->ps,DS_MAT_X,&k,NULL);
342:     DSNormalize(d->ps,DS_MAT_X,r_s+i+d->cX_in_H);
343:     k = r_s+i+d->cX_in_H;
344:     DSVectors(d->ps,DS_MAT_Y,&k,NULL);
345:     DSNormalize(d->ps,DS_MAT_Y,r_s+i+d->cX_in_H);
346:     DSGetArray(d->ps,DS_MAT_X,&pX);
347:     DSGetArray(d->ps,DS_MAT_Y,&pY);
348:     dvd_improvex_jd_proj_cuv(d,r_s+i,r_s+i+s,&u,&v,kr,&data->auxV,&auxS,data->theta,data->thetai,pX,pY,ld);
349:     DSRestoreArray(d->ps,DS_MAT_X,&pX);
350:     DSRestoreArray(d->ps,DS_MAT_Y,&pY);
351:     data->u = u;

353:     /* Check if the first eigenpairs are converged */
354:     if (i == 0) {
355:       PetscInt n_auxV = data->auxV-d->auxV+s, n_auxS = auxS - d->auxS;
356:       d->auxV+= n_auxV; d->size_auxV-= n_auxV;
357:       d->auxS+= n_auxS; d->size_auxS-= n_auxS;
358:       d->preTestConv(d,0,s,s,d->auxV-s,NULL,&d->npreconv);
359:       d->auxV-= n_auxV; d->size_auxV+= n_auxV;
360:       d->auxS-= n_auxS; d->size_auxS+= n_auxS;
361:       if (d->npreconv > 0) break;
362:     }

364:     /* Test the odd situation of solving Ax=b with A=I */
365: #if !defined(PETSC_USE_COMPLEX)
366:     odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && data->thetai[0] == 0. && d->B == NULL)?PETSC_TRUE:PETSC_FALSE;
367: #else
368:     odd_situation = (data->ksp && data->theta[0] == 1. && data->theta[1] == 0. && d->B == NULL)?PETSC_TRUE:PETSC_FALSE;
369: #endif
370:     /* If JD */
371:     if (data->ksp && !odd_situation) {
372:       data->auxS = auxS;

374:       /* kr <- -kr */
375:       for (j=0;j<s;j++) {
376:         VecScale(kr[j],-1.0);
377:       }

379:       /* Compose kr and D */
380:       VecCreateCompWithVecs(kr,data->ksp_max_size,data->friends,&kr_comp);
381:       VecCreateCompWithVecs(&D[i],data->ksp_max_size,data->friends,&D_comp);
382:       VecCompSetSubVecs(data->friends,s,NULL);

384:       /* Solve the correction equation */
385:       KSPSetTolerances(data->ksp,tol,PETSC_DEFAULT,PETSC_DEFAULT,maxits);
386:       KSPSolve(data->ksp,kr_comp,D_comp);
387:       KSPGetIterationNumber(data->ksp,&lits);
388:       d->eps->st->lineariterations+= lits;

390:       /* Destroy the composed ks and D */
391:       VecDestroy(&kr_comp);
392:       VecDestroy(&D_comp);

394:     /* If GD */
395:     } else {
396:       for (j=0;j<s;j++) {
397:         d->improvex_precond(d, r_s+i+j, kr[j], D[i+j]);
398:       }
399:       dvd_improvex_apply_proj(d, &D[i], s, auxS);
400:     }
401:     /* Prevent that short vectors are discarded in the orthogonalization */
402:     if (i == 0 && d->eps->errest[d->nconv+r_s] > PETSC_MACHINE_EPSILON && d->eps->errest[d->nconv+r_s] < PETSC_MAX_REAL) {
403:       for (j=0;j<s;j++) {
404:         VecScale(D[j],1.0/d->eps->errest[d->nconv+r_s]);
405:       }
406:     }
407:   }
408:   *size_D = i;
409:   if (data->dynamic) data->lastTol = PetscMax(data->lastTol/2.0,PETSC_MACHINE_EPSILON*10.0);

411:   /* Callback old improveX */
412:   if (data->old_improveX) {
413:     d->improveX_data = data->old_improveX_data;
414:     data->old_improveX(d, NULL, 0, 0, 0, NULL);
415:     d->improveX_data = data;
416:   }
417:   return(0);
418: }

420: /* y <- theta[1]A*x - theta[0]*B*x
421:    auxV, two auxiliary vectors */
424: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmult(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
425: {
426:   PetscErrorCode  ierr;
427:   PetscInt        n,i;
428:   const Vec       *Bx;

431:   n = data->r_e - data->r_s;
432:   for (i=0;i<n;i++) {
433:     MatMult(data->d->A,x[i],y[i]);
434:   }

436:   for (i=0;i<n;i++) {
437: #if !defined(PETSC_USE_COMPLEX)
438:     if (data->d->eigi[data->r_s+i] != 0.0) {
439:       if (data->d->B) {
440:         MatMult(data->d->B,x[i],auxV[0]);
441:         MatMult(data->d->B,x[i+1],auxV[1]);
442:         Bx = auxV;
443:       } else Bx = &x[i];

445:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i + ti_i*Bx_i+1;
446:          y_i+1      t_2i+1*A*x_i+1 - ti_i*Bx_i - t_2i*Bx_i+1  ] */
447:       VecAXPBYPCZ(y[i],-data->theta[2*i],data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
448:       VecAXPBYPCZ(y[i+1],-data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
449:       i++;
450:     } else
451: #endif
452:     {
453:       if (data->d->B) {
454:         MatMult(data->d->B,x[i],auxV[0]);
455:         Bx = auxV;
456:       } else Bx = &x[i];
457:       VecAXPBY(y[i],-data->theta[i*2],data->theta[i*2+1],Bx[0]);
458:     }
459:   }
460:   return(0);
461: }

463: /* y <- theta[1]'*A'*x - theta[0]'*B'*x
464:    auxV, two auxiliary vectors */
467: PETSC_STATIC_INLINE PetscErrorCode dvd_aux_matmulttrans(dvdImprovex_jd *data,const Vec *x,const Vec *y,const Vec *auxV)
468: {
469:   PetscErrorCode  ierr;
470:   PetscInt        n,i;
471:   const Vec       *Bx;

474:   n = data->r_e - data->r_s;
475:   for (i=0;i<n;i++) {
476:     MatMultTranspose(data->d->A,x[i],y[i]);
477:   }

479:   for (i=0;i<n;i++) {
480: #if !defined(PETSC_USE_COMPLEX)
481:     if (data->d->eigi[data->r_s+i] != 0.0) {
482:       if (data->d->B) {
483:         MatMultTranspose(data->d->B,x[i],auxV[0]);
484:         MatMultTranspose(data->d->B,x[i+1],auxV[1]);
485:         Bx = auxV;
486:       } else Bx = &x[i];

488:       /* y_i   <- [ t_2i+1*A*x_i   - t_2i*Bx_i - ti_i*Bx_i+1;
489:          y_i+1      t_2i+1*A*x_i+1 + ti_i*Bx_i - t_2i*Bx_i+1  ] */
490:       VecAXPBYPCZ(y[i],-data->theta[2*i],-data->thetai[i],data->theta[2*i+1],Bx[0],Bx[1]);
491:       VecAXPBYPCZ(y[i+1],data->thetai[i],-data->theta[2*i],data->theta[2*i+1],Bx[0],Bx[1]);
492:       i++;
493:     } else
494: #endif
495:     {
496:       if (data->d->B) {
497:         MatMultTranspose(data->d->B,x[i],auxV[0]);
498:         Bx = auxV;
499:       } else Bx = &x[i];
500:       VecAXPBY(y[i],PetscConj(-data->theta[i*2]),PetscConj(data->theta[i*2+1]),Bx[0]);
501:     }
502:   }
503:   return(0);
504: }

508: PetscErrorCode PCApplyBA_dvd(PC pc,PCSide side,Vec in,Vec out,Vec w)
509: {
510:   PetscErrorCode  ierr;
511:   dvdImprovex_jd  *data;
512:   PetscInt        n,i;
513:   const Vec       *inx,*outx,*wx;
514:   Mat             A;

517:   PCGetOperators(pc,&A,NULL,NULL);
518:   MatShellGetContext(A,(void**)&data);
519:   VecCompGetSubVecs(in,NULL,&inx);
520:   VecCompGetSubVecs(out,NULL,&outx);
521:   VecCompGetSubVecs(w,NULL,&wx);
522:   n = data->r_e - data->r_s;

524:   /* Check auxiliary vectors */
525:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

527:   switch (side) {
528:   case PC_LEFT:
529:     /* aux <- theta[1]A*in - theta[0]*B*in */
530:     dvd_aux_matmult(data,inx,data->auxV,outx);

532:     /* out <- K * aux */
533:     for (i=0;i<n;i++) {
534:       data->d->improvex_precond(data->d,data->r_s+i,data->auxV[i],outx[i]);
535:     }
536:     break;
537:   case PC_RIGHT:
538:     /* aux <- K * in */
539:     for (i=0;i<n;i++) {
540:       data->d->improvex_precond(data->d,data->r_s+i,inx[i],data->auxV[i]);
541:     }

543:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
544:     dvd_aux_matmult(data,data->auxV,outx,wx);
545:     break;
546:   case PC_SYMMETRIC:
547:     /* aux <- K^{1/2} * in */
548:     for (i=0;i<n;i++) {
549:       PCApplySymmetricRight(data->old_pc,inx[i],data->auxV[i]);
550:     }

552:     /* out <- theta[1]A*auxV - theta[0]*B*auxV */
553:     dvd_aux_matmult(data,data->auxV,wx,outx);

555:     /* aux <- K^{1/2} * in */
556:     for (i=0;i<n;i++) {
557:       PCApplySymmetricLeft(data->old_pc,wx[i],outx[i]);
558:     }
559:     break;
560:   default:
561:     SETERRQ(PETSC_COMM_SELF,1, "Unsupported KSP side");
562:   }

564:   /* out <- out - v*(u'*out) */
565:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
566:   return(0);
567: }

571: PetscErrorCode PCApply_dvd(PC pc,Vec in,Vec out)
572: {
573:   PetscErrorCode  ierr;
574:   dvdImprovex_jd  *data;
575:   PetscInt        n,i;
576:   const Vec       *inx, *outx;
577:   Mat             A;

580:   PCGetOperators(pc,&A,NULL,NULL);
581:   MatShellGetContext(A,(void**)&data);
582:   VecCompGetSubVecs(in,NULL,&inx);
583:   VecCompGetSubVecs(out,NULL,&outx);
584:   n = data->r_e - data->r_s;

586:   /* out <- K * in */
587:   for (i=0;i<n;i++) {
588:     data->d->improvex_precond(data->d,data->r_s+i,inx[i],outx[i]);
589:   }

591:   /* out <- out - v*(u'*out) */
592:   dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
593:   return(0);
594: }

598: PetscErrorCode PCApplyTranspose_dvd(PC pc,Vec in,Vec out)
599: {
600:   PetscErrorCode  ierr;
601:   dvdImprovex_jd  *data;
602:   PetscInt        n,i;
603:   const Vec       *inx, *outx;
604:   Mat             A;

607:   PCGetOperators(pc,&A,NULL,NULL);
608:   MatShellGetContext(A,(void**)&data);
609:   VecCompGetSubVecs(in,NULL,&inx);
610:   VecCompGetSubVecs(out,NULL,&outx);
611:   n = data->r_e - data->r_s;

613:   /* Check auxiliary vectors */
614:   if (&data->auxV[n] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

616:   /* auxV <- in */
617:   for (i=0;i<n;i++) {
618:     VecCopy(inx[i],data->auxV[i]);
619:   }

621:   /* auxV <- auxV - u*(v'*auxV) */
622:   dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);

624:   /* out <- K' * aux */
625:   for (i=0;i<n;i++) {
626:     PCApplyTranspose(data->old_pc,data->auxV[i],outx[i]);
627:   }
628:   return(0);
629: }

633: PetscErrorCode MatMult_dvd_jd(Mat A,Vec in,Vec out)
634: {
635:   PetscErrorCode  ierr;
636:   dvdImprovex_jd  *data;
637:   PetscInt        n;
638:   const Vec       *inx, *outx;
639:   PCSide          side;

642:   MatShellGetContext(A,(void**)&data);
643:   VecCompGetSubVecs(in,NULL,&inx);
644:   VecCompGetSubVecs(out,NULL,&outx);
645:   n = data->r_e - data->r_s;

647:   /* Check auxiliary vectors */
648:   if (&data->auxV[2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

650:   /* out <- theta[1]A*in - theta[0]*B*in */
651:   dvd_aux_matmult(data,inx,outx,data->auxV);

653:   KSPGetPCSide(data->ksp,&side);
654:   if (side == PC_RIGHT) {
655:     /* out <- out - v*(u'*out) */
656:     dvd_improvex_apply_proj(data->d,(Vec*)outx,n,data->auxS);
657:   }
658:   return(0);
659: }

663: PetscErrorCode MatMultTranspose_dvd_jd(Mat A,Vec in,Vec out)
664: {
665:   PetscErrorCode  ierr;
666:   dvdImprovex_jd  *data;
667:   PetscInt        n,i;
668:   const Vec       *inx,*outx,*r,*auxV;
669:   PCSide          side;

672:   MatShellGetContext(A,(void**)&data);
673:   VecCompGetSubVecs(in,NULL,&inx);
674:   VecCompGetSubVecs(out,NULL,&outx);
675:   n = data->r_e - data->r_s;

677:   /* Check auxiliary vectors */
678:   if (&data->auxV[n+2] > data->d->auxV+data->d->size_auxV) SETERRQ(PETSC_COMM_SELF,1, "Insufficient auxiliary vectors");

680:   KSPGetPCSide(data->ksp,&side);
681:   if (side == PC_RIGHT) {
682:     /* auxV <- in */
683:     for (i=0;i<n;i++) {
684:       VecCopy(inx[i],data->auxV[i]);
685:     }

687:     /* auxV <- auxV - v*(u'*auxV) */
688:     dvd_improvex_applytrans_proj(data->d,data->auxV,n,data->auxS);
689:     r = data->auxV;
690:     auxV = data->auxV+n;
691:   } else {
692:     r = inx;
693:     auxV = data->auxV;
694:   }

696:   /* out <- theta[1]A*r - theta[0]*B*r */
697:   dvd_aux_matmulttrans(data,r,outx,auxV);
698:   return(0);
699: }

703: PetscErrorCode MatGetVecs_dvd_jd(Mat A,Vec *right,Vec *left)
704: {
705:   PetscErrorCode  ierr;
706:   Vec             *r, *l;
707:   dvdImprovex_jd  *data;
708:   PetscInt        n, i;

711:   MatShellGetContext(A, (void**)&data);
712:   n = data->ksp_max_size;
713:   if (right) {
714:     PetscMalloc(sizeof(Vec)*n, &r);
715:   }
716:   if (left) {
717:     PetscMalloc(sizeof(Vec)*n, &l);
718:   }
719:   for (i=0; i<n; i++) {
720:     MatGetVecs(data->d->A, right?&r[i]:NULL,left?&l[i]:NULL);
721:   }
722:   if (right) {
723:     VecCreateCompWithVecs(r, n, data->friends, right);
724:     for (i=0; i<n; i++) {
725:       VecDestroy(&r[i]);
726:     }
727:   }
728:   if (left) {
729:     VecCreateCompWithVecs(l, n, data->friends, left);
730:     for (i=0; i<n; i++) {
731:       VecDestroy(&l[i]);
732:     }
733:   }

735:   if (right) {
736:     PetscFree(r);
737:   }
738:   if (left) {
739:     PetscFree(l);
740:   }
741:   return(0);
742: }

746: PetscErrorCode dvd_improvex_jd_proj_uv(dvdDashboard *d,dvdBlackboard *b,ProjType_t p)
747: {
749:   /* Setup the step */
750:   if (b->state >= DVD_STATE_CONF) {
751:     switch (p) {
752:     case DVD_PROJ_KXX:
753:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KXX; break;
754:     case DVD_PROJ_KZX:
755:       d->improvex_jd_proj_uv = dvd_improvex_jd_proj_uv_KZX; break;
756:     }
757:   }
758:   return(0);
759: }

763: /*
764:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
765:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
766:   where
767:   auxV, 4*(i_e-i_s) auxiliar global vectors
768:   pX,pY, the right and left eigenvectors of the projected system
769:   ld, the leading dimension of pX and pY
770:   auxS: max(8*bs*max_cX_in_proj,size_V*size_V)
771: */
772: PetscErrorCode dvd_improvex_jd_proj_cuv(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec **u,Vec **v,Vec *kr,Vec **auxV,PetscScalar **auxS,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
773: {
774: #if defined(PETSC_MISSING_LAPACK_GETRF)
776:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable");
777: #else
778:   PetscErrorCode    ierr;
779:   PetscInt          n = i_e - i_s, size_KZ, V_new, rm, i, size_in;
780:   dvdImprovex_jd    *data = (dvdImprovex_jd*)d->improveX_data;
781:   PetscBLASInt      s, ldXKZ, info;
782:   DvdReduction      r;
783:   DvdReductionChunk ops[2];
784:   DvdMult_copy_func sr[2];

787:   /* Check consistency */
788:   V_new = d->size_cX - data->size_cX;
789:   if (V_new > data->old_size_X) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
790:   data->old_size_X = n;

792:   /* KZ <- KZ(rm:rm+max_cX-1) */
793:   rm = PetscMax(V_new+data->size_KZ-d->max_cX_in_impr, 0);
794:   for (i=0; i<d->max_cX_in_impr; i++) {
795:     VecCopy(data->KZ[i+rm], data->KZ[i]);
796:   }
797:   data->size_cX = d->size_cX;

799:   /* XKZ <- XKZ(rm:rm+max_cX-1,rm:rm+max_cX-1) */
800:   for (i=0; i<d->max_cX_in_impr; i++) {
801:     SlepcDenseCopy(&data->XKZ[i*data->ldXKZ+i], data->ldXKZ, &data->XKZ[(i+rm)*data->ldXKZ+i+rm], data->ldXKZ, data->size_KZ, 1);
802:   }
803:   data->size_KZ = PetscMin(d->max_cX_in_impr, data->size_KZ+V_new);

805:   /* Compute X, KZ and KR */
806:   *u = *auxV; *auxV+= n;
807:   *v = &data->KZ[data->size_KZ];
808:   d->improvex_jd_proj_uv(d, i_s, i_e, *u, *v, kr, *auxV, theta, thetai,
809:                                 pX, pY, ld);

811:   /* XKZ <- X'*KZ */
812:   size_KZ = data->size_KZ+n;
813:   size_in = 2*n*data->size_KZ+n*n;
814:   SlepcAllReduceSumBegin(ops,2,*auxS,*auxS+size_in,size_in,&r,PetscObjectComm((PetscObject)d->V[0]));
815:   VecsMultS(data->XKZ,0,data->ldXKZ,d->V-data->size_KZ,0,data->size_KZ,data->KZ,data->size_KZ,size_KZ,&r,&sr[0]);
816:   VecsMultS(&data->XKZ[data->size_KZ],0,data->ldXKZ,*u,0,n,data->KZ,0,size_KZ,&r,&sr[1]);
817:   SlepcAllReduceSumEnd(&r);

819:   /* iXKZ <- inv(XKZ) */
820:   PetscBLASIntCast(size_KZ,&s);
821:   data->ldiXKZ = data->size_iXKZ = size_KZ;
822:   data->iXKZ = *auxS; *auxS+= size_KZ*size_KZ;
823:   data->iXKZPivots = (PetscBLASInt*)*auxS;
824:   *auxS += FromIntToScalar(size_KZ);
825:   SlepcDenseCopy(data->iXKZ,data->ldiXKZ,data->XKZ,data->ldXKZ,size_KZ,size_KZ);
826:   PetscBLASIntCast(data->ldiXKZ,&ldXKZ);
827:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
828:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&s, &s, data->iXKZ, &ldXKZ, data->iXKZPivots, &info));
829:   PetscFPTrapPop();
830:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRF %d", info);
831:   return(0);
832: #endif
833: }

835: #define DVD_COMPLEX_RAYLEIGH_QUOTIENT(ur,ui,Axr,Axi,Bxr,Bxi,eigr,eigi,b,ierr)\
836: { \
837:   VecDot((Axr), (ur), &(b)[0]); /* r*A*r */ \
838:   VecDot((Axr), (ui), &(b)[1]); /* i*A*r */ \
839:   VecDot((Axi), (ur), &(b)[2]); /* r*A*i */ \
840:   VecDot((Axi), (ui), &(b)[3]); /* i*A*i */ \
841:   VecDot((Bxr), (ur), &(b)[4]); /* r*B*r */ \
842:   VecDot((Bxr), (ui), &(b)[5]); /* i*B*r */ \
843:   VecDot((Bxi), (ur), &(b)[6]); /* r*B*i */ \
844:   VecDot((Bxi), (ui), &(b)[7]); /* i*B*i */ \
845:   (b)[0]  = (b)[0]+(b)[3]; /* rAr+iAi */ \
846:   (b)[2] =  (b)[2]-(b)[1]; /* rAi-iAr */ \
847:   (b)[4] = (b)[4]+(b)[7]; /* rBr+iBi */ \
848:   (b)[6] = (b)[6]-(b)[5]; /* rBi-iBr */ \
849:   (b)[7] = (b)[4]*(b)[4] + (b)[6]*(b)[6]; /* k */ \
850:   *(eigr) = ((b)[0]*(b)[4] + (b)[2]*(b)[6]) / (b)[7]; /* eig_r */ \
851:   *(eigi) = ((b)[2]*(b)[4] - (b)[0]*(b)[6]) / (b)[7]; /* eig_i */ \
852: }

854: #if !defined(PETSC_USE_COMPLEX)
855: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
856:   for ((i)=0; (i)<(n); (i)++) { \
857:     if ((eigi)[(i_s)+(i)] != 0.0) { \
858:       /* eig_r = [(rAr+iAi)*(rBr+iBi) + (rAi-iAr)*(rBi-iBr)]/k \
859:          eig_i = [(rAi-iAr)*(rBr+iBi) - (rAr+iAi)*(rBi-iBr)]/k \
860:          k     =  (rBr+iBi)*(rBr+iBi) + (rBi-iBr)*(rBi-iBr)    */ \
861:       DVD_COMPLEX_RAYLEIGH_QUOTIENT((u)[(i)], (u)[(i)+1], (Ax)[(i)], \
862:         (Ax)[(i)+1], (Bx)[(i)], (Bx)[(i)+1], &(b)[8], &(b)[9], (b), (ierr)); \
863:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[8])/ \
864:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10    || \
865:           PetscAbsScalar((eigi)[(i_s)+(i)] - (b)[9])/ \
866:             PetscAbsScalar((eigi)[(i_s)+(i)]) > 1e-10) { \
867:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its "\
868:                             "Rayleigh quotient value %G+%G\n", \
869:                             (eigr)[(i_s)+(i)], \
870:                             (eigi)[(i_s)+(i)], (b)[8], (b)[9]); \
871:       } \
872:       (i)++; \
873:     } \
874:   }
875: #else
876: #define DVD_COMPUTE_N_RR(eps,i,i_s,n,eigr,eigi,u,Ax,Bx,b,ierr) \
877:   for ((i)=0; (i)<(n); (i)++) { \
878:       (ierr) = VecDot((Ax)[(i)], (u)[(i)], &(b)[0]); \
879:       (ierr) = VecDot((Bx)[(i)], (u)[(i)], &(b)[1]); \
880:       (b)[0] = (b)[0]/(b)[1]; \
881:       if (PetscAbsScalar((eigr)[(i_s)+(i)] - (b)[0])/ \
882:             PetscAbsScalar((eigr)[(i_s)+(i)]) > 1e-10) { \
883:         (ierr) = PetscInfo4((eps), "The eigenvalue %G+%G is far from its " \
884:                "Rayleigh quotient value %G+%G\n", \
885:                PetscRealPart((eigr)[(i_s)+(i)]), \
886:                PetscImaginaryPart((eigr)[(i_s)+(i)]), PetscRealPart((b)[0]), \
887:                PetscImaginaryPart((b)[0])); \
888:       } \
889:     }
890: #endif

894: /*
895:   Compute: u <- X, v <- K*(theta[0]*A+theta[1]*B)*X,
896:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1], Y <- W*pY[i_s..i_e-1]
897:   where
898:   auxV, 4*(i_e-i_s) auxiliar global vectors
899:   pX,pY, the right and left eigenvectors of the projected system
900:   ld, the leading dimension of pX and pY
901: */
902: PetscErrorCode dvd_improvex_jd_proj_uv_KZX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
903: {
904:   PetscErrorCode  ierr;
905:   PetscInt        n = i_e - i_s, i;
906:   PetscScalar     b[16];
907:   Vec             *Ax, *Bx, *r=auxV, X[4];
908:   /* The memory manager doen't allow to call a subroutines */
909:   PetscScalar     Z[size_Z];

912:   /* u <- X(i) */
913:   dvd_improvex_compute_X(d,i_s,i_e,u,pX,ld);

915:   /* v <- theta[0]A*u + theta[1]*B*u */

917:   /* Bx <- B*X(i) */
918:   Bx = kr;
919:   if (d->BV) {
920:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
921:   } else {
922:     for (i=0;i<n;i++) {
923:       if (d->B) {
924:         MatMult(d->B, u[i], Bx[i]);
925:       } else {
926:         VecCopy(u[i], Bx[i]);
927:       }
928:     }
929:   }

931:   /* Ax <- A*X(i) */
932:   Ax = r;
933:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

935:   /* v <- Y(i) */
936:   SlepcUpdateVectorsZ(v, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

938:   /* Recompute the eigenvalue */
939:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, v, Ax, Bx, b, ierr);

941:   for (i=0;i<n;i++) {
942: #if !defined(PETSC_USE_COMPLEX)
943:     if (d->eigi[i_s+i] != 0.0) {
944:       /* [r_i r_i+1 kr_i kr_i+1]*= [ theta_2i'    0            1        0
945:                                        0         theta_2i'     0        1
946:                                      theta_2i+1 -thetai_i   -eigr_i -eigi_i
947:                                      thetai_i    theta_2i+1  eigi_i -eigr_i ] */
948:       b[0] = b[5] = PetscConj(theta[2*i]);
949:       b[2] = b[7] = -theta[2*i+1];
950:       b[6] = -(b[3] = thetai[i]);
951:       b[1] = b[4] = 0.0;
952:       b[8] = b[13] = 1.0/d->nX[i_s+i];
953:       b[10] = b[15] = -d->eigr[i_s+i]/d->nX[i_s+i];
954:       b[14] = -(b[11] = d->eigi[i_s+i]/d->nX[i_s+i]);
955:       b[9] = b[12] = 0.0;
956:       X[0] = Ax[i]; X[1] = Ax[i+1]; X[2] = Bx[i]; X[3] = Bx[i+1];
957:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 4, Z, size_Z);
958:       i++;
959:     } else
960: #endif
961:     {
962:       /* [Ax_i Bx_i]*= [ theta_2i'    1/nX_i
963:                         theta_2i+1  -eig_i/nX_i ] */
964:       b[0] = PetscConj(theta[i*2]);
965:       b[1] = theta[i*2+1];
966:       b[2] = 1.0/d->nX[i_s+i];
967:       b[3] = -d->eigr[i_s+i]/d->nX[i_s+i];
968:       X[0] = Ax[i]; X[1] = Bx[i];
969:       SlepcUpdateVectorsD(X, 2, 1.0, b, 2, 2, 2, Z, size_Z);
970:     }
971:   }
972:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

974:   /* v <- K^{-1} r = K^{-1}(theta_2i'*Ax + theta_2i+1*Bx) */
975:   for (i=0;i<n;i++) {
976:     d->improvex_precond(d, i_s+i, r[i], v[i]);
977:   }

979:   /* kr <- P*(Ax - eig_i*Bx) */
980:   d->calcpairs_proj_res(d, i_s, i_e, kr);
981:   return(0);
982: }

986: /*
987:   Compute: u <- K^{-1}*X, v <- X,
988:   kr <- K^{-1}*(A-eig*B)*X, being X <- V*pX[i_s..i_e-1]
989:   where
990:   auxV, 4*(i_e-i_s) auxiliar global vectors
991:   pX,pY, the right and left eigenvectors of the projected system
992:   ld, the leading dimension of pX and pY
993: */
994: PetscErrorCode dvd_improvex_jd_proj_uv_KXX(dvdDashboard *d,PetscInt i_s,PetscInt i_e,Vec *u,Vec *v,Vec *kr,Vec *auxV,PetscScalar *theta,PetscScalar *thetai,PetscScalar *pX,PetscScalar *pY,PetscInt ld)
995: {
996:   PetscErrorCode  ierr;
997:   PetscInt        n = i_e - i_s, i;
998:   PetscScalar     b[16];
999:   Vec             *Ax, *Bx, *r = auxV, X[4];
1000:   /* The memory manager doen't allow to call a subroutines */
1001:   PetscScalar     Z[size_Z];

1004:   /* [v u] <- X(i) Y(i) */
1005:   dvd_improvex_compute_X(d,i_s,i_e,v,pX,ld);
1006:   SlepcUpdateVectorsZ(u, 0.0, 1.0, (d->W?d->W:d->V)-d->cX_in_H, d->size_V+d->cX_in_H, &pY[ld*i_s], ld, d->size_H, n);

1008:   /* Bx <- B*X(i) */
1009:   Bx = r;
1010:   if (d->BV) {
1011:     SlepcUpdateVectorsZ(Bx, 0.0, 1.0, d->BV-d->cX_in_H, d->size_BV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);
1012:   } else {
1013:     if (d->B) {
1014:       for (i=0;i<n;i++) {
1015:         MatMult(d->B, v[i], Bx[i]);
1016:       }
1017:     } else Bx = v;
1018:   }

1020:   /* Ax <- A*X(i) */
1021:   Ax = kr;
1022:   SlepcUpdateVectorsZ(Ax, 0.0, 1.0, d->AV-d->cX_in_H, d->size_AV+d->cX_in_H, &pX[ld*i_s], ld, d->size_H, n);

1024:   /* Recompute the eigenvalue */
1025:   DVD_COMPUTE_N_RR(d->eps, i, i_s, n, d->eigr, d->eigi, u, Ax, Bx, b, ierr);

1027:   for (i=0;i<n;i++) {
1028:     if (d->eigi[i_s+i] == 0.0) {
1029:       /* kr <- Ax -eig*Bx */
1030:       VecAXPBY(kr[i], -d->eigr[i_s+i]/d->nX[i_s+i], 1.0/d->nX[i_s+i], Bx[i]);
1031:     } else {
1032:       /* [kr_i kr_i+1 r_i r_i+1]*= [   1        0
1033:                                        0        1
1034:                                     -eigr_i -eigi_i
1035:                                      eigi_i -eigr_i] */
1036:       b[0] = b[5] = 1.0/d->nX[i_s+i];
1037:       b[2] = b[7] = -d->eigr[i_s+i]/d->nX[i_s+i];
1038:       b[6] = -(b[3] = d->eigi[i_s+i]/d->nX[i_s+i]);
1039:       b[1] = b[4] = 0.0;
1040:       X[0] = kr[i]; X[1] = kr[i+1]; X[2] = r[i]; X[3] = r[i+1];
1041:       SlepcUpdateVectorsD(X, 4, 1.0, b, 4, 4, 2, Z, size_Z);
1042:       i++;
1043:     }
1044:   }
1045:   for (i=0; i<n; i++) d->nX[i_s+i] = 1.0;

1047:   /* kr <- P*kr */
1048:   d->calcpairs_proj_res(d, i_s, i_e, r);

1050:   /* u <- K^{-1} X(i) */
1051:   for (i=0;i<n;i++) {
1052:     d->improvex_precond(d, i_s+i, v[i], u[i]);
1053:   }
1054:   return(0);
1055: }

1059: PetscErrorCode dvd_improvex_jd_lit_const(dvdDashboard *d,dvdBlackboard *b,PetscInt maxits,PetscReal tol,PetscReal fix)
1060: {
1061:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;

1064:   /* Setup the step */
1065:   if (b->state >= DVD_STATE_CONF) {
1066:     data->maxits = maxits;
1067:     data->tol = tol;
1068:     data->fix = fix;
1069:     d->improvex_jd_lit = dvd_improvex_jd_lit_const_0;
1070:   }
1071:   return(0);
1072: }

1076: PetscErrorCode dvd_improvex_jd_lit_const_0(dvdDashboard *d,PetscInt i,PetscScalar* theta,PetscScalar* thetai,PetscInt *maxits,PetscReal *tol)
1077: {
1078:   dvdImprovex_jd  *data = (dvdImprovex_jd*)d->improveX_data;
1079:   PetscReal       a;

1082:   a = SlepcAbsEigenvalue(d->eigr[i],d->eigi[i]);

1084:   if (d->nR[i]/a < data->fix) {
1085:     theta[0] = d->eigr[i];
1086:     theta[1] = 1.0;
1087: #if !defined(PETSC_USE_COMPLEX)
1088:     *thetai = d->eigi[i];
1089: #endif
1090:   } else {
1091:     theta[0] = d->target[0];
1092:     theta[1] = d->target[1];
1093: #if !defined(PETSC_USE_COMPLEX)
1094:     *thetai = 0.0;
1095: #endif
1096: }

1098: #if defined(PETSC_USE_COMPLEX)
1099:   if (thetai) *thetai = 0.0;
1100: #endif
1101:   *maxits = data->maxits;
1102:   *tol = data->tol;
1103:   return(0);
1104: }

1106: /**** Patterns implementation *************************************************/

1108: typedef PetscInt (*funcV0_t)(dvdDashboard*,PetscInt,PetscInt,Vec*);
1109: typedef PetscInt (*funcV1_t)(dvdDashboard*,PetscInt,PetscInt,Vec*,PetscScalar*,Vec);

1113: /* Compute D <- K^{-1} * funcV[r_s..r_e] */
1114: PetscErrorCode dvd_improvex_PfuncV(dvdDashboard *d,void *funcV,Vec *D,PetscInt max_size_D,PetscInt r_s,PetscInt r_e,Vec *auxV,PetscScalar *auxS)
1115: {
1116:   PetscErrorCode  ierr;
1117:   PetscInt        i;

1120:   if (max_size_D >= r_e-r_s+1) {
1121:     /* The optimized version needs one vector extra of D */
1122:     /* D(1:r.size) = R(r_s:r_e-1) */
1123:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e, D+1, auxS, auxV[0]);
1124:     else      ((funcV0_t)funcV)(d, r_s, r_e, D+1);

1126:     /* D = K^{-1} * R */
1127:     for (i=0; i<r_e-r_s; i++) {
1128:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1129:     }
1130:   } else if (max_size_D == r_e-r_s) {
1131:     /* Non-optimized version */
1132:     /* auxV <- R[r_e-1] */
1133:     if (auxS) ((funcV1_t)funcV)(d, r_e-1, r_e, auxV, auxS, auxV[1]);
1134:     else      ((funcV0_t)funcV)(d, r_e-1, r_e, auxV);

1136:     /* D(1:r.size-1) = R(r_s:r_e-2) */
1137:     if (auxS) ((funcV1_t)funcV)(d, r_s, r_e-1, D+1, auxS, auxV[1]);
1138:     else      ((funcV0_t)funcV)(d, r_s, r_e-1, D+1);

1140:     /* D = K^{-1} * R */
1141:     for (i=0; i<r_e-r_s-1; i++) {
1142:       d->improvex_precond(d, i+r_s, D[i+1], D[i]);
1143:     }
1144:     d->improvex_precond(d, r_e-1, auxV[0], D[r_e-r_s-1]);
1145:   } else SETERRQ(PETSC_COMM_SELF,1, "Problem: r_e-r_s > max_size_D");
1146:   return(0);
1147: }

1151: /* Compute (I - KZ*iXKZ*X')*V where,
1152:    V, the vectors to apply the projector,
1153:    cV, the number of vectors in V,
1154:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1155: */
1156: PetscErrorCode dvd_improvex_apply_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS)
1157: {
1158: #if defined(PETSC_MISSING_LAPACK_GETRS)
1160:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1161: #else
1162:   PetscErrorCode    ierr;
1163:   dvdImprovex_jd    *data = (dvdImprovex_jd*)d->improveX_data;
1164:   PetscInt          size_in = data->size_iXKZ*cV, i, ldh;
1165:   PetscScalar       *h, *in, *out;
1166:   PetscBLASInt      cV_, n, info, ld;
1167:   DvdReduction      r;
1168:   DvdReductionChunk ops[4];
1169:   DvdMult_copy_func sr[4];

1172:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1174:   /* h <- X'*V */
1175:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1176:   SlepcAllReduceSumBegin(ops, 4, in, out, size_in, &r,
1177:                                 PetscObjectComm((PetscObject)d->V[0]));
1178:   for (i=0; i<cV; i++) {
1179:     VecsMultS(&h[i*ldh],0,ldh,d->V-data->size_KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i*2]);
1180:     VecsMultS(&h[i*ldh+data->size_KZ],0,ldh,data->u,0,data->size_iXKZ-data->size_KZ,V+i,0,1,&r,&sr[i*2+1]);
1181:   }
1182:   SlepcAllReduceSumEnd(&r);

1184:   /* h <- iXKZ\h */
1185:   PetscBLASIntCast(cV,&cV_);
1186:   PetscBLASIntCast(data->size_iXKZ,&n);
1187:   PetscBLASIntCast(data->ldiXKZ,&ld);
1189:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1190:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info));
1191:   PetscFPTrapPop();
1192:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1194:   /* V <- V - KZ*h */
1195:   for (i=0; i<cV; i++) {
1196:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->KZ,data->size_iXKZ,&h[ldh*i],ldh,data->size_iXKZ,1);
1197:   }
1198:   return(0);
1199: #endif
1200: }

1204: /* Compute (I - X*iXKZ*KZ')*V where,
1205:    V, the vectors to apply the projector,
1206:    cV, the number of vectors in V,
1207:    auxS, auxiliar vector of size length 3*size_iXKZ*cV
1208: */
1209: PetscErrorCode dvd_improvex_applytrans_proj(dvdDashboard *d,Vec *V,PetscInt cV,PetscScalar *auxS)
1210: {
1211: #if defined(PETSC_MISSING_LAPACK_GETRS)
1213:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routines are unavailable");
1214: #else
1215:   PetscErrorCode    ierr;
1216:   dvdImprovex_jd    *data = (dvdImprovex_jd*)d->improveX_data;
1217:   PetscInt          size_in = data->size_iXKZ*cV, i, ldh;
1218:   PetscScalar       *h, *in, *out;
1219:   PetscBLASInt      cV_, n, info, ld;
1220:   DvdReduction      r;
1221:   DvdReductionChunk ops[2];
1222:   DvdMult_copy_func sr[2];

1225:   if (cV > 2) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");

1227:   /* h <- KZ'*V */
1228:   h = auxS; in = h+size_in; out = in+size_in; ldh = data->size_iXKZ;
1229:   SlepcAllReduceSumBegin(ops, 2, in, out, size_in, &r,
1230:                                 PetscObjectComm((PetscObject)d->V[0]));
1231:   for (i=0; i<cV; i++) {
1232:     VecsMultS(&h[i*ldh],0,ldh,data->KZ,0,data->size_KZ,V+i,0,1,&r,&sr[i]);
1233:   }
1234:   SlepcAllReduceSumEnd(&r);

1236:   /* h <- iXKZ\h */
1237:   PetscBLASIntCast(cV,&cV_);
1238:   PetscBLASIntCast(data->size_iXKZ,&n);
1239:   PetscBLASIntCast(data->ldiXKZ,&ld);
1241:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1242:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C", &n, &cV_, data->iXKZ, &ld, data->iXKZPivots, h, &n, &info));
1243:   PetscFPTrapPop();
1244:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack XGETRS %d", info);

1246:   /* V <- V - X*h */
1247:   for (i=0; i<cV; i++) {
1248:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,d->V-data->size_KZ,data->size_KZ,&h[ldh*i],ldh,data->size_KZ,1);
1249:     SlepcUpdateVectorsZ(V+i,1.0,-1.0,data->u,data->size_iXKZ-data->size_KZ,&h[ldh*i+data->size_KZ],ldh,data->size_iXKZ-data->size_KZ,1);
1250:   }
1251:   return(0);
1252: #endif
1253: }