Actual source code: gmreig.c
 
   petsc-3.12.4 2020-02-04
   
  2:  #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3:  #include <petscblaslapack.h>
  5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
  6: {
  7: #if defined(PETSC_MISSING_LAPACK_GESVD)
  9:   /*
 10:       The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL)
 11:       for PCs do not seem to have the DGESVD() lapack routines
 12:   */
 13:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
 14: #else
 15:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 17:   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
 18:   PetscBLASInt   bn, bN,lwork, idummy,lierr;
 19:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy = 0;
 20:   PetscReal      *realpart = gmres->Dsvd;
 23:   PetscBLASIntCast(n,&bn);
 24:   PetscBLASIntCast(N,&bN);
 25:   PetscBLASIntCast(5*N,&lwork);
 26:   PetscBLASIntCast(N,&idummy);
 27:   if (n <= 0) {
 28:     *emax = *emin = 1.0;
 29:     return(0);
 30:   }
 31:   /* copy R matrix to work space */
 32:   PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));
 34:   /* zero below diagonal garbage */
 35:   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;
 37:   /* compute Singular Values */
 38:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 39: #if !defined(PETSC_USE_COMPLEX)
 40:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 41: #else
 42:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
 43: #endif
 44:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
 45:   PetscFPTrapPop();
 47:   *emin = realpart[n-1];
 48:   *emax = realpart[0];
 49: #endif
 50:   return(0);
 51: }
 53: /* ------------------------------------------------------------------------ */
 54: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
 55: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
 56: {
 57: #if defined(PETSC_HAVE_ESSL)
 58:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 60:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
 61:   PetscInt       i,*perm;
 62:   PetscScalar    *R     = gmres->Rsvd;
 63:   PetscScalar    *cwork = R + N*N,sdummy = 0;
 64:   PetscReal      *work,*realpart = gmres->Dsvd;
 65:   PetscBLASInt   zero = 0,bn,bN,idummy = -1,lwork;
 68:   PetscBLASIntCast(n,&bn);
 69:   PetscBLASIntCast(N,&bN);
 70:   PetscBLASIntCast(5*N,&lwork);
 71:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
 72:   *neig = n;
 74:   if (!n) return(0);
 76:   /* copy R matrix to work space */
 77:   PetscArraycpy(R,gmres->hes_origin,N*N);
 79:   /* compute eigenvalues */
 81:   /* for ESSL version need really cwork of length N (complex), 2N
 82:      (real); already at least 5N of space has been allocated */
 84:   PetscMalloc1(lwork,&work);
 85:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 86:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
 87:   PetscFPTrapPop();
 88:   PetscFree(work);
 90:   /* For now we stick with the convention of storing the real and imaginary
 91:      components of evalues separately.  But is this what we really want? */
 92:   PetscMalloc1(n,&perm);
 94: #if !defined(PETSC_USE_COMPLEX)
 95:   for (i=0; i<n; i++) {
 96:     realpart[i] = cwork[2*i];
 97:     perm[i]     = i;
 98:   }
 99:   PetscSortRealWithPermutation(n,realpart,perm);
100:   for (i=0; i<n; i++) {
101:     r[i] = cwork[2*perm[i]];
102:     c[i] = cwork[2*perm[i]+1];
103:   }
104: #else
105:   for (i=0; i<n; i++) {
106:     realpart[i] = PetscRealPart(cwork[i]);
107:     perm[i]     = i;
108:   }
109:   PetscSortRealWithPermutation(n,realpart,perm);
110:   for (i=0; i<n; i++) {
111:     r[i] = PetscRealPart(cwork[perm[i]]);
112:     c[i] = PetscImaginaryPart(cwork[perm[i]]);
113:   }
114: #endif
115:   PetscFree(perm);
116: #elif defined(PETSC_MISSING_LAPACK_GEEV)
118:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
119: #elif !defined(PETSC_USE_COMPLEX)
120:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
122:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
123:   PetscBLASInt   bn, bN, lwork, idummy, l-1;
124:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
125:   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;
128:   PetscBLASIntCast(n,&bn);
129:   PetscBLASIntCast(N,&bN);
130:   PetscBLASIntCast(5*N,&lwork);
131:   PetscBLASIntCast(N,&idummy);
132:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
133:   *neig = n;
135:   if (!n) return(0);
137:   /* copy R matrix to work space */
138:   PetscArraycpy(R,gmres->hes_origin,N*N);
140:   /* compute eigenvalues */
141:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
142:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
143:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
144:   PetscFPTrapPop();
145:   PetscMalloc1(n,&perm);
146:   for (i=0; i<n; i++) perm[i] = i;
147:   PetscSortRealWithPermutation(n,realpart,perm);
148:   for (i=0; i<n; i++) {
149:     r[i] = realpart[perm[i]];
150:     c[i] = imagpart[perm[i]];
151:   }
152:   PetscFree(perm);
153: #else
154:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
156:   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
157:   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
158:   PetscBLASInt   bn,bN,lwork,idummy,l-1;
161:   PetscBLASIntCast(n,&bn);
162:   PetscBLASIntCast(N,&bN);
163:   PetscBLASIntCast(5*N,&lwork);
164:   PetscBLASIntCast(N,&idummy);
165:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
166:   *neig = n;
168:   if (!n) return(0);
170:   /* copy R matrix to work space */
171:   PetscArraycpy(R,gmres->hes_origin,N*N);
173:   /* compute eigenvalues */
174:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
175:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
176:   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
177:   PetscFPTrapPop();
178:   PetscMalloc1(n,&perm);
179:   for (i=0; i<n; i++) perm[i] = i;
180:   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
181:   PetscSortRealWithPermutation(n,r,perm);
182:   for (i=0; i<n; i++) {
183:     r[i] = PetscRealPart(eigs[perm[i]]);
184:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
185:   }
186:   PetscFree(perm);
187: #endif
188:   return(0);
189: }
191: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
192: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
193: {
194:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
196:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
197:   PetscInt       i,j,*perm;
198:   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
199:   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
200:   PetscReal      *SR,*work;
201:   PetscBLASInt   bn,bN,lwork,idummy;
202:   PetscScalar    *t,sdummy = 0;
205:   /* n: size of the Hessenberg matrix */
206:   if (gmres->fullcycle) n = N-1;
207:   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
208:   NbrRitz = PetscMin(*nrit,n);
210:   /* Definition of PetscBLASInt for lapack routines*/
211:   PetscBLASIntCast(n,&bn);
212:   PetscBLASIntCast(N,&bN);
213:   PetscBLASIntCast(N,&idummy);
214:   PetscBLASIntCast(5*N,&lwork);
215:   /* Memory allocation */
216:   PetscMalloc1(bN*bN,&H);
217:   PetscMalloc1(bn*bn,&Q);
218:   PetscMalloc1(lwork,&work);
219:   PetscMalloc1(n,&wr);
220:   PetscMalloc1(n,&wi);
222:   /* copy H matrix to work space */
223:   if (gmres->fullcycle) {
224:     PetscArraycpy(H,gmres->hes_ritz,bN*bN);
225:   } else {
226:     PetscArraycpy(H,gmres->hes_origin,bN*bN);
227:   }
229:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
230:   if (!ritz) {
231:     /* Transpose the Hessenberg matrix => Ht */
232:     PetscMalloc1(bn*bn,&Ht);
233:     for (i=0; i<bn; i++) {
234:       for (j=0; j<bn; j++) {
235:         Ht[i*bn+j] = H[j*bN+i];
236:       }
237:     }
238:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
239:     PetscCalloc1(bn,&t);
240:     /* t = h^2_{m+1,m}e_m */
241:     if (gmres->fullcycle) {
242:       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
243:     } else {
244:       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
245:     }
246:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
247: #if   defined(PETSC_MISSING_LAPACK_GESV)
248:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
249: #else
250:     {
251:       PetscBLASInt info;
252:       PetscBLASInt nrhs = 1;
253:       PetscBLASInt *ipiv;
254:       PetscMalloc1(bn,&ipiv);
255:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
256:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
257:       PetscFree(ipiv);
258:       PetscFree(Ht);
259:     }
260: #endif
261:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
262:     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
263:     PetscFree(t);
264:   }
266:   /* Compute (harmonic) Ritz pairs */
267: #if defined(PETSC_MISSING_LAPACK_HSEQR)
268:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
269: #else
270:   {
271:     PetscBLASInt info;
272:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
273:     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
274:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
275:   }
276: #endif
277:   /* sort the (harmonic) Ritz values */
278:   PetscMalloc1(n,&modul);
279:   PetscMalloc1(n,&perm);
280:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
281:   for (i=0; i<n; i++) perm[i] = i;
282:   PetscSortRealWithPermutation(n,modul,perm);
283:   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
284:   if (small) {
285:     while (nb < NbrRitz) {
286:       if (!wi[perm[nb]]) nb += 1;
287:       else nb += 2;
288:     }
289:     PetscMalloc1(nb*n,&SR);
290:     for (i=0; i<nb; i++) {
291:       tetar[i] = wr[perm[i]];
292:       tetai[i] = wi[perm[i]];
293:       PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
294:     }
295:   } else {
296:     while (nb < NbrRitz) {
297:       if (wi[perm[n-nb-1]] == 0) nb += 1;
298:       else nb += 2;
299:     }
300:     PetscMalloc1(nb*n,&SR);
301:     for (i=0; i<nb; i++) {
302:       tetar[i] = wr[perm[n-nb+i]];
303:       tetai[i] = wi[perm[n-nb+i]];
304:       PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
305:     }
306:   }
307:   PetscFree(modul);
308:   PetscFree(perm);
310:   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 
311:     where the columns of VV correspond to the basis of the Krylov subspace */
312:   if (gmres->fullcycle) {
313:     for (j=0; j<nb; j++) {
314:       VecZeroEntries(S[j]);
315:       VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
316:     }
317:   } else {
318:     for (j=0; j<nb; j++) {
319:       VecZeroEntries(S[j]);
320:       VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
321:     }
322:   }
323:   *nrit = nb;
324:   PetscFree(H);
325:   PetscFree(Q);
326:   PetscFree(SR);
327:   PetscFree(wr);
328:   PetscFree(wi);
329:   return(0);
330: }
331: #endif