AcouSTO  version 2.0

◆ gmres()

void gmres ( int  icounter)
50  {
51 
52 
53  int i,k,l,nr,mm,it;
54 
55  COMPLEX* r0;
56  COMPLEX* b0;
57  COMPLEX* v;
58  COMPLEX* w;
59  COMPLEX* H;
60  COMPLEX* y;
61  DOUBLE eps;
62  COMPLEX* cs;
63  COMPLEX* sn;
64  COMPLEX sum;
65 
66 
67  COMPLEX gamma;
68  DOUBLE beta;
69  COMPLEX* bhat;
70  COMPLEX* xn;
71 
72  DOUBLE bnrm;
73 
74  DOUBLE error;
75 
76  int gRows,gCols;
77  int locRows,locCols;
78  int izero = 0;
79  int ione = 1;
80  int itemp;
81  int descY[9];
82  int descRHS[9];
83  int descsol[9];
84  int descr[9];
85  int descv[9];
86  int descw[9];
87  int info;
88  int n;
89  long m,NIT,cit;
90  COMPLEX cone = 1.0 + I*0.0;
91  COMPLEX mcone = -1.0 + I*0.0;
92  COMPLEX czero = 0.0 + I*0.0;
93  DOUBLE one = 1.0;
94  COMPLEX tmp;
95  DOUBLE dtmp;
96  double t1,t2;
97  int nconv;
98 
99 
100  DOUBLE cr,max_cr,min_cr,normr;
101  int mmax,mmin,mi,d;
102 
103  char fname[255];
104  FILE *f;
105 
106 
107  gRows = modgeominfo->ncntr + modgeominfo->nchief;
108  gCols = modgeominfo->ncntr;
109  n = modgeominfo->ncntr;
110  m = n;
111  if(modsolinfo->restart != 0){
112  m = modsolinfo->restart;
113  }
114 
115 
116  locRows = numroc_(&gRows ,&runinfo->row_block_size,&runinfo->myrow,&izero,&runinfo->nprows);
117  locCols = numroc_(&gCols ,&runinfo->col_block_size,&runinfo->mycol,&izero,&runinfo->npcols);
118 
119  eps = modsolinfo->tolerance;
120  H = calloc((m+1)*(m+2),sizeof(COMPLEX));
121  cs = calloc(m+1,sizeof(COMPLEX));
122  sn = calloc(m+1,sizeof(COMPLEX));
123 
124 
125  b0 = calloc(locCols,sizeof(COMPLEX));
126  r0 = calloc(locRows,sizeof(COMPLEX));
127  xn = calloc(locRows,sizeof(COMPLEX));
128 
129  v = calloc(locRows*(m+1),sizeof(COMPLEX));
130  w = calloc(locRows ,sizeof(COMPLEX));
131 
132 
133  y = calloc(m+1,sizeof(COMPLEX));
134  bhat = calloc(m+1,sizeof(COMPLEX));
135 
136 
137  for(i =0; i<m*m;i++){
138  H[i] = czero;
139  }
140  for(i =0; i<m;i++){
141  bhat[i] = czero;
142  cs[i] = czero;
143  sn[i] = czero;
144  }
145 
146  itemp = max(locRows,locCols);
147  descinit_(descY ,&gRows,&gCols,&runinfo->row_block_size,&runinfo->col_block_size ,&izero,&izero,&runinfo->ctxt,&locRows,&info);
148  descinit_(descRHS ,&gRows,&ione ,&runinfo->row_block_size,&ione ,&izero,&izero,&runinfo->ctxt,&locRows,&info);
149 
150 
151  descinit_(descsol ,&gRows ,&ione ,&gRows ,&ione ,&izero,&izero,&runinfo->ctxt,&gRows ,&info);
152  descinit_(descr ,&gRows ,&ione ,&runinfo->row_block_size,&ione ,&izero,&izero,&runinfo->ctxt,&locRows,&info);
153 
154 
155  descinit_(descw ,&gRows ,&ione ,&runinfo->row_block_size,&ione ,&izero,&izero,&runinfo->ctxt,&locRows,&info);
156  descinit_(descv ,&gRows ,&ione ,&runinfo->row_block_size,&ione ,&izero,&izero,&runinfo->ctxt,&locRows,&info);
157 
158 
159 
160  pdznrm2_(&gCols,&bnrm,solution->rhs,&ione,&ione,descRHS,&ione); // beta = vec_norm2(r0,n);
161  //r0 = b
162  /*
163  // r0 = Ax - r0
164  pzgemv_("N",&gRows,&gCols,
165  &cone, solution->matY ,&ione,&ione,descY,
166  solution->sol ,&ione,&ione,descsol,&ione,
167  &cone,r0 ,&ione,&ione,descr,&ione);
168  */
169 
170  /*
171  printf("Ag=[");
172  for(j=0;j<gRows;j++){
173  for(i=0;i<gCols;i++){
174  printf("%f+ %%i*%f,",creal(solution->matY[j*n+i]),cimag(solution->matY[j*n+i]));
175  }
176  printf(";\n");
177  }
178  printf("];\n");
179  printf("bg=[");
180  for(i=0;i<gRows;i++){
181  printf("%f + %%i*%f ,\n",creal(solution->rhs[i]),cimag(solution->rhs[i]));
182  }
183  printf("];\n");
184  */
185 
186 
187  NIT = 1;
188  if(modsolinfo->maxiterations != 0){
189  NIT = modsolinfo->maxiterations;
190  }
191 
192  if(rank == 0 ){
193  get_filename(fname,"%s-gmres-res.out",rundetails->title);
194  f = fopen(fname,"w+");
195  }
196 
197  t1 = MPI_Wtime();
198  logger(LOG_INFO,"Starting GMRES iterations: maxit=%d, restart=%d\n",NIT,m);
199  cit=0;
200 
201  max_cr = .99;
202  min_cr = .4;
203  cr = 1.0;
204  d = 3;
205  mmax = modsolinfo->restart;
206  mmin = 3;
207  mi = m;
208 
209  // STARTING OUTER ITERATION
210  for(it=0;it<NIT;it++){
211 
212  VecZeroEntries(y,(m+1));
213  VecZeroEntries(v,locRows*(m+1));
214  VecZeroEntries(w,locRows);
215  VecZeroEntries(r0,locRows);
216  VecZeroEntries(H,(m+1)*(m+2));
217  // r0 = Ax - r0
218  pzcopy_(&gRows,solution->rhs,&ione,&ione,descRHS,&ione,r0,&ione,&ione,descr,&ione);
219  pzgemv_("N",&gRows,&gCols,
220  &mcone, solution->matY ,&ione,&ione,descY,
221  solution->sol ,&ione,&ione,descsol,&ione,
222  &cone, r0 ,&ione,&ione,descr,&ione);
223 
224  //pzaxpy_(&gRows,&mcone,solution->rhs,&ione,&ione,descRHS,&ione,
225 
226 
227  pdznrm2_(&gCols,&beta,r0,&ione,&ione,descr,&ione); // beta = vec_norm2(r0,n);
228 
229 
230  /*
231  // Trying A. H. Baker, E. R. Jessup, Tz. V. Kolev "A simple strategy for varying the restart parameter in GMRES(m)"
232  // Journal of Computational and Applied Mathematics
233  // October 2007
234  if(it > 0){
235  dtmp = normr;
236  pdznrm2_(&gCols,&normr,r0,&ione,&ione,descr,&ione); // beta = vec_norm2(r0,n);
237  cr = normr/dtmp;
238  }
239 
240 
241 
242  if(cr > max_cr || it == 0){
243  logger(LOG_INFO," (cr > max_cr, %f) changing m: %d -> %d\n",cr, m,mmax);
244  m = mmax;
245  }else if (cr < min_cr){
246  m = m;
247  }else{
248  if(mi-d >=mmin){
249  logger(LOG_INFO," (%f) changing m: %d -> %d\n",cr, m,m-d);
250  m = m- d;
251  }else{
252  logger(LOG_INFO," (%f) changing m: %d -> %d\n",cr, m,mmax);
253  m =mmax;
254  }
255  }
256  */
257 
258 
259 
260 
261  pzcopy_(&gRows,r0,&ione,&ione,descr,&ione, v,&ione,&ione,descv,&ione);
262  tmp = one/beta;
263  pzscal_(&gRows,&tmp,v,&ione,&ione,descv,&ione);// vec_mult_scal(v,r0,ONE/beta,n); v = r/beta
264 
265  bhat[0] = beta;
266  mm = m;
267 
268  normr = beta;
269 
270  // GMRES ITERATION
271  for(i =0; i<m;i++){
272  cit++;
273  if(0 == rank){
274  logger(LOG_DEBUG,"Iteration #%ld/#%d\n",cit,m*NIT);
275  }
276 
277  pzgemv_("N",&gRows,&gCols,
278  &cone, solution->matY ,&ione,&ione,descY,
279  &(v[i*locRows]) ,&ione,&ione,descv,&ione,
280  &czero,w ,&ione,&ione,descw,&ione); // matmult(w,A,&v[i*n],n); // w = A*v
281 
282 
283 
284  for(k=0;k<=i;k++){
285  // H[k,i] = <w,v[k]>
286  // w = w - H[k,i]v[k]
287  // H[k*m + i] = vec_scalar_tr(w,&v[k*n],n); // H(k,i) = w*v(k)
288  // vec_sum2(w,w,&v[k*n],-H[k*m + i],n); // w = w- H(k,i)*v(k)
289 
290  pzdotc_(&gRows,&tmp,w ,&ione,&ione,descw,&ione,
291  &v[k*locRows],&ione,&ione,descv,&ione);
292  H[k*m + i] = tmp;
293  tmp = -tmp;
294  pzaxpy_(&gRows,&tmp,&v[k*locRows],&ione,&ione,descv,&ione,w,&ione ,&ione,descw,&ione);
295 
296  }
297 
298 
299  pdznrm2_(&gRows,&dtmp,w,&ione,&ione,descw,&ione);// H[(i+1)*m + i] = vec_norm2(w,n); // H(i+1,i) = ||w||
300  H[(i+1)*m+i] = dtmp;
301 
302  pzcopy_(&gRows,w,&ione,&ione,descw,&ione, &v[(i+1)*locRows],&ione,&ione,descv,&ione);
303  tmp = cone/(H[(i+1)*m+i]);
304  pzscal_(&gRows,&tmp,&v[(i+1)*locRows],&ione,&ione,descv,&ione); // vec_mult_scal(&v[(i+1)*n],w,ONE/(H[(i+1)*m+i]),n); // v(i+1) = w/H(i+1,i)
305 // MPI_Barrier(MPI_COMM_WORLD);
306 
307  // Givens Rotation
308  for(k=0;k<i;k++){
309  gamma = sn[k]*H[(k+1)*m+i] + cs[k]*H[k*m+i]; // gamma = sn(k)*H(k+1,i) + cs(k)*H(k,i)
310  H[(k+1)*m +i] = -sn[k]*H[(k )*m+i] + cs[k]*H[(k+1)*m+i]; // H(k+1,i) = -sn(k)H(k,i)+cs(k)H(k+1,i)
311  H[(k)*m +i] = gamma; // H(k,i) = gamma
312  }
313  rotmat(&cs[i], &sn[i], H[i*m+i], H[(i+1)*m+i]);
314 
315 
316  H[i*m+i] = cs[i]*H[i*m+i] + sn[i]*H[(i+1)*m+i]; // H(i,i) = cs(i)H(i,i) + sn(i)*H(i+1,i)
317  H[(i+1)*m+i] = 0.0; // H(i+1,i) = 0.0
318 
319  //for(j =0; j<locRows;j++) { printf("%f %f\n",creal(v[locRows + j]),cimag(v[locRows + j])); }
320  bhat[i+1] = -sn[i] *bhat[i];
321  bhat[i] = cs[i] *bhat[i];
322 
323  error = cabs(bhat[i+1])/bnrm;
324  MPI_Bcast(&error,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
325  if(rank == 0 ){
326  fprintf(f,"%ld %d %d %10.8E\n",cit,it,i,error);
327  }
328 // MPI_Barrier(MPI_COMM_WORLD);
329  if(error< eps){
330  nr = i;
331  nconv = cit;
332  if(rank == 0){
333  logger(LOG_INFO,"Reached convergence: error=%+10.8E at iteration %ld\n",error,cit);
334  }
335  goto SOL;
336  }
337 
338 
339  }
340  --i;
341  nr = m-1;
342  y[nr] = bhat[nr]/H[nr*m+nr];
343 
344  SOL:
345 
346 
347  for(k=i;k>=0;--k){
348  sum = 0.0;
349  for(l=k+1;l<=i;++l){
350  sum += H[k*m+l]*y[l];
351  }
352  y[k] = (bhat[k] - sum) / H[k*m+k];
353  }
354 
355  /*
356  dtmp=0.0;
357  for(l=0;l<=nr;l++){
358  dtmp+=cabs(y[l])*cabs(y[l]);
359  }
360  printf("%f\n",sqrt(dtmp));
361  */
362 
363  for(l=0;l<=nr;l++){
364  pzaxpy_(&gRows,&y[l],&v[l*locRows],&ione, &ione, descv , &ione,
365  solution->sol ,&ione, &ione, descsol, &ione); //vec_sum2(x,x,&v[l*n],y[l],n); // x = x + y[l]*v
366  }
367 
368 
369  if(error < eps){
370  break;
371  }
372  // r0 = Ax - r0
373  /*
374  // Trying A. H. Baker, E. R. Jessup, Tz. V. Kolev "A simple strategy for varying the restart parameter in GMRES(m)"
375  // Journal of Computational and Applied Mathematics
376  // October 2007
377  pzcopy_(&gRows,solution->rhs,&ione,&ione,descRHS,&ione,r0,&ione,&ione,descr,&ione);
378  pzgemv_("N",&gRows,&gCols,
379  &mcone, solution->matY ,&ione,&ione,descY,
380  solution->sol ,&ione,&ione,descsol,&ione,
381  &cone, r0 ,&ione,&ione,descr,&ione);
382  dtmp = normr;
383  pdznrm2_(&gCols,&normr,r0,&ione,&ione,descr,&ione); // beta = vec_norm2(r0,n);
384  cr = normr/dtmp;
385  */
386  }
387 
388 
389 
390  MPI_Barrier(MPI_COMM_WORLD);
391  t2 = MPI_Wtime();
392  logger(LOG_DEBUG," GMRES iteration took %f s\n",(t2-t1));
393 
394  logger(LOG_INFO," GMRES: %d iterations, residual:%f\n",cit,error);
395  //END CHECK
396  logger(LOG_INFO,"Broadcasting solution (%d complex values)\n",gRows);
397  MPI_Bcast(solution->sol,gRows,MPI_ACOUSTO_COMPLEX,0,MPI_COMM_WORLD);
398  logger(LOG_INFO," Solution sent to all nodes\n");
399 
400  if(rank == 0 ){
401  fclose(f);
402  }
403 
404  free(H);
405  free(cs);
406  free(sn);
407 
408 
409  free(b0);
410  free(r0);
411  free(xn);
412  free(v);
413  free(w);
414  free(y);
415  free(bhat);
416 
417 }
struct run_info * runinfo
Definition: globals.h:34
int npcols
Definition: structs.h:113
void pzgemv_(char *trans, int *m, int *n, COMPLEX *alpha, COMPLEX *A, int *ia, int *ja, int *desca, COMPLEX *x, int *ix, int *jx, int *descx, int *mx, COMPLEX *beta, COMPLEX *y, int *iy, int *jy, int *descy, int *my)
const char * title
Definition: structs.h:135
int ncntr
Definition: structs.h:299
COMPLEX * matY
Definition: structs.h:603
#define MPI_ACOUSTO_COMPLEX
Definition: types.h:56
int myrow
Definition: structs.h:115
int nprows
Definition: structs.h:111
void pzaxpy_(int *n, COMPLEX *alpha, COMPLEX *x, int *ix, int *jx, int *descx, int *incx, COMPLEX *y, int *iy, int *jy, int *descy, int *incy)
void get_filename(char *filename, const char *format,...)
Definition: utils.c:59
int nchief
Definition: structs.h:301
COMPLEX * sol
Definition: structs.h:607
DOUBLE tolerance
Definition: structs.h:510
#define LOG_INFO
Definition: logger.h:26
void pzscal_(int *n, COMPLEX *alpha, COMPLEX *x, int *ix, int *jx, int *descx, int *incx)
int ctxt
Definition: structs.h:109
void descinit_(int *desc, int *m, int *n, int *mb, int *nb, int *ia, int *ja, int *ictxt, int *llda, int *info)
struct solution_struct * solution
Definition: globals.h:52
#define COMPLEX
Definition: types.h:48
void pdznrm2_(int *n, DOUBLE *norm2, COMPLEX *X, int *ix, int *jx, int *descx, int *incx)
struct modsol_info * modsolinfo
Definition: globals.h:44
#define LOG_DEBUG
Definition: logger.h:27
int mycol
Definition: structs.h:117
void pzdotc_(int *n, COMPLEX *dotc, COMPLEX *x, int *ix, int *jx, int *descx, int *incx, COMPLEX *y, int *iy, int *jy, int *descy, int *incy)
double DOUBLE
Definition: types.h:44
int numroc_(int *m, int *mb, int *p, int *ia, int *npr)
int rank
Definition: globals.h:79
int col_block_size
Definition: structs.h:121
long restart
Definition: structs.h:516
struct run_details * rundetails
Definition: globals.h:36
void pzcopy_(int *n, COMPLEX *x, int *ix, int *jx, int *descx, int *incx, COMPLEX *y, int *iy, int *jy, int *descy, int *incy)
static int max(int a, int b)
Definition: ac_gmres.c:26
void rotmat(COMPLEX *c, COMPLEX *s, const COMPLEX a, const COMPLEX b)
Definition: ac_gmres.c:29
COMPLEX * rhs
Definition: structs.h:605
int row_block_size
Definition: structs.h:119
long maxiterations
Definition: structs.h:513
void logger(int level, char *msg,...)
Definition: logger.c:56
#define calloc(n, size)
Definition: allocation.h:37
struct modgeom_info * modgeominfo
Definition: globals.h:38
#define VecZeroEntries(V, n)
Definition: allocation.h:99
DOUBLE y
Definition: structs.h:33