Actual source code: sell.c
 
   petsc-3.12.4 2020-02-04
   
  2: /*
  3:   Defines the basic matrix operations for the SELL matrix storage format.
  4: */
  5:  #include <../src/mat/impls/sell/seq/sell.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/kernels/blocktranspose.h>
  8: #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 10:   #include <immintrin.h>
 12:   #if !defined(_MM_SCALE_8)
 13:   #define _MM_SCALE_8    8
 14:   #endif
 16:   #if defined(__AVX512F__)
 17:   /* these do not work
 18:    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
 19:    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
 20:   */
 21:     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
 22:     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
 23:     vec_idx  = _mm256_loadu_si256((__m256i const*)acolidx); \
 24:     vec_vals = _mm512_loadu_pd(aval); \
 25:     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
 26:     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
 27:   #elif defined(__AVX2__) && defined(__FMA__)
 28:     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
 29:     vec_vals = _mm256_loadu_pd(aval); \
 30:     vec_idx  = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
 31:     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
 32:     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
 33:   #endif
 34: #endif  /* PETSC_HAVE_IMMINTRIN_H */
 36: /*@C
 37:  MatSeqSELLSetPreallocation - For good matrix assembly performance
 38:  the user should preallocate the matrix storage by setting the parameter nz
 39:  (or the array nnz).  By setting these parameters accurately, performance
 40:  during matrix assembly can be increased significantly.
 42:  Collective
 44:  Input Parameters:
 45:  +  B - The matrix
 46:  .  nz - number of nonzeros per row (same for all rows)
 47:  -  nnz - array containing the number of nonzeros in the various rows
 48:  (possibly different for each row) or NULL
 50:  Notes:
 51:  If nnz is given then nz is ignored.
 53:  Specify the preallocated storage with either nz or nnz (not both).
 54:  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
 55:  allocation.  For large problems you MUST preallocate memory or you
 56:  will get TERRIBLE performance, see the users' manual chapter on matrices.
 58:  You can call MatGetInfo() to get information on how effective the preallocation was;
 59:  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
 60:  You can also run with the option -info and look for messages with the string
 61:  malloc in them to see if additional memory allocation was needed.
 63:  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
 64:  entries or columns indices.
 66:  The maximum number of nonzeos in any row should be as accurate as possible.
 67:  If it is underestimated, you will get bad performance due to reallocation
 68:  (MatSeqXSELLReallocateSELL).
 70:  Level: intermediate
 72:  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
 74:  @*/
 75: PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
 76: {
 82:   PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));
 83:   return(0);
 84: }
 86: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
 87: {
 88:   Mat_SeqSELL    *b;
 89:   PetscInt       i,j,totalslices;
 90:   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
 94:   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
 95:   if (maxallocrow == MAT_SKIP_ALLOCATION) {
 96:     skipallocation = PETSC_TRUE;
 97:     maxallocrow    = 0;
 98:   }
100:   PetscLayoutSetUp(B->rmap);
101:   PetscLayoutSetUp(B->cmap);
103:   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
104:   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
105:   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
106:   if (rlen) {
107:     for (i=0; i<B->rmap->n; i++) {
108:       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
109:       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
110:     }
111:   }
113:   B->preallocated = PETSC_TRUE;
115:   b = (Mat_SeqSELL*)B->data;
117:   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
118:   b->totalslices = totalslices;
119:   if (!skipallocation) {
120:     if (B->rmap->n & 0x07) {PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);}
122:     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
123:       PetscMalloc1(totalslices+1,&b->sliidx);
124:       PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));
125:     }
126:     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127:       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128:       else if (maxallocrow < 0) maxallocrow = 1;
129:       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
130:     } else {
131:       maxallocrow = 0;
132:       b->sliidx[0] = 0;
133:       for (i=1; i<totalslices; i++) {
134:         b->sliidx[i] = 0;
135:         for (j=0;j<8;j++) {
136:           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
137:         }
138:         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
139:         PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);
140:       }
141:       /* last slice */
142:       b->sliidx[totalslices] = 0;
143:       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
144:       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
145:       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
146:     }
148:     /* allocate space for val, colidx, rlen */
149:     /* FIXME: should B's old memory be unlogged? */
150:     MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);
151:     /* FIXME: assuming an element of the bit array takes 8 bits */
152:     PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);
153:     PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));
154:     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
155:     PetscCalloc1(8*totalslices,&b->rlen);
156:     PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));
158:     b->singlemalloc = PETSC_TRUE;
159:     b->free_val     = PETSC_TRUE;
160:     b->free_colidx  = PETSC_TRUE;
161:   } else {
162:     b->free_val    = PETSC_FALSE;
163:     b->free_colidx = PETSC_FALSE;
164:   }
166:   b->nz               = 0;
167:   b->maxallocrow      = maxallocrow;
168:   b->rlenmax          = maxallocrow;
169:   b->maxallocmat      = b->sliidx[totalslices];
170:   B->info.nz_unneeded = (double)b->maxallocmat;
171:   if (realalloc) {
172:     MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
173:   }
174:   return(0);
175: }
177: PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
178: {
179:   Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
180:   PetscInt    shift;
183:   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
184:   if (nz) *nz = a->rlen[row];
185:   shift = a->sliidx[row>>3]+(row&0x07);
186:   if (!a->getrowcols) {
189:     PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);
190:   }
191:   if (idx) {
192:     PetscInt j;
193:     for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
194:     *idx = a->getrowcols;
195:   }
196:   if (v) {
197:     PetscInt j;
198:     for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
199:     *v = a->getrowvals;
200:   }
201:   return(0);
202: }
204: PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
205: {
207:   return(0);
208: }
210: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
211: {
212:   Mat            B;
213:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
214:   PetscInt       i;
218:   if (reuse == MAT_REUSE_MATRIX) {
219:     B    = *newmat;
220:     MatZeroEntries(B);
221:   } else {
222:     MatCreate(PetscObjectComm((PetscObject)A),&B);
223:     MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
224:     MatSetType(B,MATSEQAIJ);
225:     MatSeqAIJSetPreallocation(B,0,a->rlen);
226:   }
228:   for (i=0; i<A->rmap->n; i++) {
229:     PetscInt    nz = 0,*cols = NULL;
230:     PetscScalar *vals = NULL;
232:     MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);
233:     MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);
234:     MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);
235:   }
237:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
238:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
239:   B->rmap->bs = A->rmap->bs;
241:   if (reuse == MAT_INPLACE_MATRIX) {
242:     MatHeaderReplace(A,&B);
243:   } else {
244:     *newmat = B;
245:   }
246:   return(0);
247: }
249:  #include <../src/mat/impls/aij/seq/aij.h>
251: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
252: {
253:   Mat               B;
254:   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
255:   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
256:   const PetscInt    *cols;
257:   const PetscScalar *vals;
258:   PetscErrorCode    ierr;
262:   if (reuse == MAT_REUSE_MATRIX) {
263:     B = *newmat;
264:   } else {
265:     /* Can we just use ilen? */
266:     PetscMalloc1(m,&rowlengths);
267:     for (i=0; i<m; i++) {
268:       rowlengths[i] = ai[i+1] - ai[i];
269:     }
271:     MatCreate(PetscObjectComm((PetscObject)A),&B);
272:     MatSetSizes(B,m,n,m,n);
273:     MatSetType(B,MATSEQSELL);
274:     MatSeqSELLSetPreallocation(B,0,rowlengths);
275:     PetscFree(rowlengths);
276:   }
278:   for (row=0; row<m; row++) {
279:     MatGetRow(A,row,&ncols,&cols,&vals);
280:     MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);
281:     MatRestoreRow(A,row,&ncols,&cols,&vals);
282:   }
283:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
284:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
285:   B->rmap->bs = A->rmap->bs;
287:   if (reuse == MAT_INPLACE_MATRIX) {
288:     MatHeaderReplace(A,&B);
289:   } else {
290:     *newmat = B;
291:   }
292:   return(0);
293: }
295: PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
296: {
297:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
298:   PetscScalar       *y;
299:   const PetscScalar *x;
300:   const MatScalar   *aval=a->val;
301:   PetscInt          totalslices=a->totalslices;
302:   const PetscInt    *acolidx=a->colidx;
303:   PetscInt          i,j;
304:   PetscErrorCode    ierr;
305: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
306:   __m512d           vec_x,vec_y,vec_vals;
307:   __m256i           vec_idx;
308:   __mmask8          mask;
309:   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
310:   __m256i           vec_idx2,vec_idx3,vec_idx4;
311: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
312:   __m128i           vec_idx;
313:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
314:   MatScalar         yval;
315:   PetscInt          r,rows_left,row,nnz_in_row;
316: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
317:   __m128d           vec_x_tmp;
318:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
319:   MatScalar         yval;
320:   PetscInt          r,rows_left,row,nnz_in_row;
321: #else
322:   PetscScalar       sum[8];
323: #endif
325: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
326: #pragma disjoint(*x,*y,*aval)
327: #endif
330:   VecGetArrayRead(xx,&x);
331:   VecGetArray(yy,&y);
332: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
333:   for (i=0; i<totalslices; i++) { /* loop over slices */
334:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
335:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
337:     vec_y  = _mm512_setzero_pd();
338:     vec_y2 = _mm512_setzero_pd();
339:     vec_y3 = _mm512_setzero_pd();
340:     vec_y4 = _mm512_setzero_pd();
342:     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
343:     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
344:     case 3:
345:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
346:       acolidx += 8; aval += 8;
347:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
348:       acolidx += 8; aval += 8;
349:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
350:       acolidx += 8; aval += 8;
351:       j += 3;
352:       break;
353:     case 2:
354:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
355:       acolidx += 8; aval += 8;
356:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
357:       acolidx += 8; aval += 8;
358:       j += 2;
359:       break;
360:     case 1:
361:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
362:       acolidx += 8; aval += 8;
363:       j += 1;
364:       break;
365:     }
366:     #pragma novector
367:     for (; j<(a->sliidx[i+1]>>3); j+=4) {
368:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
369:       acolidx += 8; aval += 8;
370:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
371:       acolidx += 8; aval += 8;
372:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
373:       acolidx += 8; aval += 8;
374:       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
375:       acolidx += 8; aval += 8;
376:     }
378:     vec_y = _mm512_add_pd(vec_y,vec_y2);
379:     vec_y = _mm512_add_pd(vec_y,vec_y3);
380:     vec_y = _mm512_add_pd(vec_y,vec_y4);
381:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
382:       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
383:       _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
384:     } else {
385:       _mm512_storeu_pd(&y[8*i],vec_y);
386:     }
387:   }
388: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
389:   for (i=0; i<totalslices; i++) { /* loop over full slices */
390:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
391:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
393:     /* last slice may have padding rows. Don't use vectorization. */
394:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
395:       rows_left = A->rmap->n - 8*i;
396:       for (r=0; r<rows_left; ++r) {
397:         yval = (MatScalar)0;
398:         row = 8*i + r;
399:         nnz_in_row = a->rlen[row];
400:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
401:         y[row] = yval;
402:       }
403:       break;
404:     }
406:     vec_y  = _mm256_setzero_pd();
407:     vec_y2 = _mm256_setzero_pd();
409:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
410:     #pragma novector
411:     #pragma unroll(2)
412:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
413:       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
414:       aval += 4; acolidx += 4;
415:       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
416:       aval += 4; acolidx += 4;
417:     }
419:     _mm256_storeu_pd(y+i*8,vec_y);
420:     _mm256_storeu_pd(y+i*8+4,vec_y2);
421:   }
422: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
423:   for (i=0; i<totalslices; i++) { /* loop over full slices */
424:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
425:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
427:     vec_y  = _mm256_setzero_pd();
428:     vec_y2 = _mm256_setzero_pd();
430:     /* last slice may have padding rows. Don't use vectorization. */
431:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
432:       rows_left = A->rmap->n - 8*i;
433:       for (r=0; r<rows_left; ++r) {
434:         yval = (MatScalar)0;
435:         row = 8*i + r;
436:         nnz_in_row = a->rlen[row];
437:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
438:         y[row] = yval;
439:       }
440:       break;
441:     }
443:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
444:     #pragma novector
445:     #pragma unroll(2)
446:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
447:       vec_vals  = _mm256_loadu_pd(aval);
448:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
449:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
450:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
451:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
454:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
455:       aval     += 4;
457:       vec_vals  = _mm256_loadu_pd(aval);
458:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
461:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
464:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
465:       aval     += 4;
466:     }
468:     _mm256_storeu_pd(y + i*8,     vec_y);
469:     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
470:   }
471: #else
472:   for (i=0; i<totalslices; i++) { /* loop over slices */
473:     for (j=0; j<8; j++) sum[j] = 0.0;
474:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
475:       sum[0] += aval[j] * x[acolidx[j]];
476:       sum[1] += aval[j+1] * x[acolidx[j+1]];
477:       sum[2] += aval[j+2] * x[acolidx[j+2]];
478:       sum[3] += aval[j+3] * x[acolidx[j+3]];
479:       sum[4] += aval[j+4] * x[acolidx[j+4]];
480:       sum[5] += aval[j+5] * x[acolidx[j+5]];
481:       sum[6] += aval[j+6] * x[acolidx[j+6]];
482:       sum[7] += aval[j+7] * x[acolidx[j+7]];
483:     }
484:     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
485:       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
486:     } else {
487:       for(j=0; j<8; j++) y[8*i+j] = sum[j];
488:     }
489:   }
490: #endif
492:   PetscLogFlops(2.0*a->nz-a->nonzerorowcnt); /* theoretical minimal FLOPs */
493:   VecRestoreArrayRead(xx,&x);
494:   VecRestoreArray(yy,&y);
495:   return(0);
496: }
498: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
499: PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
500: {
501:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
502:   PetscScalar       *y,*z;
503:   const PetscScalar *x;
504:   const MatScalar   *aval=a->val;
505:   PetscInt          totalslices=a->totalslices;
506:   const PetscInt    *acolidx=a->colidx;
507:   PetscInt          i,j;
508:   PetscErrorCode    ierr;
509: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
510:   __m512d           vec_x,vec_y,vec_vals;
511:   __m256i           vec_idx;
512:   __mmask8          mask;
513:   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
514:   __m256i           vec_idx2,vec_idx3,vec_idx4;
515: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
516:   __m128d           vec_x_tmp;
517:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
518:   MatScalar         yval;
519:   PetscInt          r,row,nnz_in_row;
520: #else
521:   PetscScalar       sum[8];
522: #endif
524: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
525: #pragma disjoint(*x,*y,*aval)
526: #endif
529:   VecGetArrayRead(xx,&x);
530:   VecGetArrayPair(yy,zz,&y,&z);
531: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
532:   for (i=0; i<totalslices; i++) { /* loop over slices */
533:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
534:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
536:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
537:       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
538:       vec_y  = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
539:     } else {
540:       vec_y  = _mm512_loadu_pd(&y[8*i]);
541:     }
542:     vec_y2 = _mm512_setzero_pd();
543:     vec_y3 = _mm512_setzero_pd();
544:     vec_y4 = _mm512_setzero_pd();
546:     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
547:     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
548:     case 3:
549:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
550:       acolidx += 8; aval += 8;
551:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
552:       acolidx += 8; aval += 8;
553:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
554:       acolidx += 8; aval += 8;
555:       j += 3;
556:       break;
557:     case 2:
558:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
559:       acolidx += 8; aval += 8;
560:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
561:       acolidx += 8; aval += 8;
562:       j += 2;
563:       break;
564:     case 1:
565:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
566:       acolidx += 8; aval += 8;
567:       j += 1;
568:       break;
569:     }
570:     #pragma novector
571:     for (; j<(a->sliidx[i+1]>>3); j+=4) {
572:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
573:       acolidx += 8; aval += 8;
574:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
575:       acolidx += 8; aval += 8;
576:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
577:       acolidx += 8; aval += 8;
578:       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
579:       acolidx += 8; aval += 8;
580:     }
582:     vec_y = _mm512_add_pd(vec_y,vec_y2);
583:     vec_y = _mm512_add_pd(vec_y,vec_y3);
584:     vec_y = _mm512_add_pd(vec_y,vec_y4);
585:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
586:       _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
587:     } else {
588:       _mm512_storeu_pd(&z[8*i],vec_y);
589:     }
590:   }
591: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
592:   for (i=0; i<totalslices; i++) { /* loop over full slices */
593:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
594:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
596:     /* last slice may have padding rows. Don't use vectorization. */
597:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
598:       for (r=0; r<(A->rmap->n & 0x07); ++r) {
599:         row        = 8*i + r;
600:         yval       = (MatScalar)0.0;
601:         nnz_in_row = a->rlen[row];
602:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
603:         z[row] = y[row] + yval;
604:       }
605:       break;
606:     }
608:     vec_y  = _mm256_loadu_pd(y+8*i);
609:     vec_y2 = _mm256_loadu_pd(y+8*i+4);
611:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
612:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
613:       vec_vals  = _mm256_loadu_pd(aval);
614:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
615:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
616:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
617:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
620:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
621:       aval     += 4;
623:       vec_vals  = _mm256_loadu_pd(aval);
624:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
625:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
626:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
627:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
630:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
631:       aval     += 4;
632:     }
634:     _mm256_storeu_pd(z+i*8,vec_y);
635:     _mm256_storeu_pd(z+i*8+4,vec_y2);
636:   }
637: #else
638:   for (i=0; i<totalslices; i++) { /* loop over slices */
639:     for (j=0; j<8; j++) sum[j] = 0.0;
640:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
641:       sum[0] += aval[j] * x[acolidx[j]];
642:       sum[1] += aval[j+1] * x[acolidx[j+1]];
643:       sum[2] += aval[j+2] * x[acolidx[j+2]];
644:       sum[3] += aval[j+3] * x[acolidx[j+3]];
645:       sum[4] += aval[j+4] * x[acolidx[j+4]];
646:       sum[5] += aval[j+5] * x[acolidx[j+5]];
647:       sum[6] += aval[j+6] * x[acolidx[j+6]];
648:       sum[7] += aval[j+7] * x[acolidx[j+7]];
649:     }
650:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
651:       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
652:     } else {
653:       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
654:     }
655:   }
656: #endif
658:   PetscLogFlops(2.0*a->nz);
659:   VecRestoreArrayRead(xx,&x);
660:   VecRestoreArrayPair(yy,zz,&y,&z);
661:   return(0);
662: }
664: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
665: {
666:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
667:   PetscScalar       *y;
668:   const PetscScalar *x;
669:   const MatScalar   *aval=a->val;
670:   const PetscInt    *acolidx=a->colidx;
671:   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
672:   PetscErrorCode    ierr;
674: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
675: #pragma disjoint(*x,*y,*aval)
676: #endif
679:   if (A->symmetric) {
680:     MatMultAdd_SeqSELL(A,xx,zz,yy);
681:     return(0);
682:   }
683:   if (zz != yy) { VecCopy(zz,yy); }
684:   VecGetArrayRead(xx,&x);
685:   VecGetArray(yy,&y);
686:   for (i=0; i<a->totalslices; i++) { /* loop over slices */
687:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
688:       for (r=0; r<(A->rmap->n & 0x07); ++r) {
689:         row        = 8*i + r;
690:         nnz_in_row = a->rlen[row];
691:         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
692:       }
693:       break;
694:     }
695:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
696:       y[acolidx[j]]   += aval[j] * x[8*i];
697:       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
698:       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
699:       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
700:       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
701:       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
702:       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
703:       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
704:     }
705:   }
706:   PetscLogFlops(2.0*a->sliidx[a->totalslices]);
707:   VecRestoreArrayRead(xx,&x);
708:   VecRestoreArray(yy,&y);
709:   return(0);
710: }
712: PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
713: {
717:   if (A->symmetric) {
718:     MatMult_SeqSELL(A,xx,yy);
719:   } else {
720:     VecSet(yy,0.0);
721:     MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);
722:   }
723:   return(0);
724: }
726: /*
727:      Checks for missing diagonals
728: */
729: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
730: {
731:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
732:   PetscInt       *diag,i;
736:   *missing = PETSC_FALSE;
737:   if (A->rmap->n > 0 && !(a->colidx)) {
738:     *missing = PETSC_TRUE;
739:     if (d) *d = 0;
740:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
741:   } else {
742:     diag = a->diag;
743:     for (i=0; i<A->rmap->n; i++) {
744:       if (diag[i] == -1) {
745:         *missing = PETSC_TRUE;
746:         if (d) *d = i;
747:         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
748:         break;
749:       }
750:     }
751:   }
752:   return(0);
753: }
755: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
756: {
757:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
758:   PetscInt       i,j,m=A->rmap->n,shift;
762:   if (!a->diag) {
763:     PetscMalloc1(m,&a->diag);
764:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
765:     a->free_diag = PETSC_TRUE;
766:   }
767:   for (i=0; i<m; i++) { /* loop over rows */
768:     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
769:     a->diag[i] = -1;
770:     for (j=0; j<a->rlen[i]; j++) {
771:       if (a->colidx[shift+j*8] == i) {
772:         a->diag[i] = shift+j*8;
773:         break;
774:       }
775:     }
776:   }
777:   return(0);
778: }
780: /*
781:   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
782: */
783: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
784: {
785:   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
786:   PetscInt       i,*diag,m = A->rmap->n;
787:   MatScalar      *val = a->val;
788:   PetscScalar    *idiag,*mdiag;
792:   if (a->idiagvalid) return(0);
793:   MatMarkDiagonal_SeqSELL(A);
794:   diag = a->diag;
795:   if (!a->idiag) {
796:     PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);
797:     PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));
798:     val  = a->val;
799:   }
800:   mdiag = a->mdiag;
801:   idiag = a->idiag;
803:   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
804:     for (i=0; i<m; i++) {
805:       mdiag[i] = val[diag[i]];
806:       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
807:         if (PetscRealPart(fshift)) {
808:           PetscInfo1(A,"Zero diagonal on row %D\n",i);
809:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
810:           A->factorerror_zeropivot_value = 0.0;
811:           A->factorerror_zeropivot_row   = i;
812:         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
813:       }
814:       idiag[i] = 1.0/val[diag[i]];
815:     }
816:     PetscLogFlops(m);
817:   } else {
818:     for (i=0; i<m; i++) {
819:       mdiag[i] = val[diag[i]];
820:       idiag[i] = omega/(fshift + val[diag[i]]);
821:     }
822:     PetscLogFlops(2.0*m);
823:   }
824:   a->idiagvalid = PETSC_TRUE;
825:   return(0);
826: }
828: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
829: {
830:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
834:   PetscArrayzero(a->val,a->sliidx[a->totalslices]);
835:   MatSeqSELLInvalidateDiagonal(A);
836:   return(0);
837: }
839: PetscErrorCode MatDestroy_SeqSELL(Mat A)
840: {
841:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
845: #if defined(PETSC_USE_LOG)
846:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
847: #endif
848:   MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);
849:   ISDestroy(&a->row);
850:   ISDestroy(&a->col);
851:   PetscFree(a->diag);
852:   PetscFree(a->rlen);
853:   PetscFree(a->sliidx);
854:   PetscFree3(a->idiag,a->mdiag,a->ssor_work);
855:   PetscFree(a->solve_work);
856:   ISDestroy(&a->icol);
857:   PetscFree(a->saved_values);
858:   PetscFree2(a->getrowcols,a->getrowvals);
860:   PetscFree(A->data);
862:   PetscObjectChangeTypeName((PetscObject)A,0);
863:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
864:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
865: #if defined(PETSC_HAVE_ELEMENTAL)
866: #endif
867:   PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);
868:   return(0);
869: }
871: PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
872: {
873:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
877:   switch (op) {
878:   case MAT_ROW_ORIENTED:
879:     a->roworiented = flg;
880:     break;
881:   case MAT_KEEP_NONZERO_PATTERN:
882:     a->keepnonzeropattern = flg;
883:     break;
884:   case MAT_NEW_NONZERO_LOCATIONS:
885:     a->nonew = (flg ? 0 : 1);
886:     break;
887:   case MAT_NEW_NONZERO_LOCATION_ERR:
888:     a->nonew = (flg ? -1 : 0);
889:     break;
890:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
891:     a->nonew = (flg ? -2 : 0);
892:     break;
893:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
894:     a->nounused = (flg ? -1 : 0);
895:     break;
896:   case MAT_NEW_DIAGONALS:
897:   case MAT_IGNORE_OFF_PROC_ENTRIES:
898:   case MAT_USE_HASH_TABLE:
899:   case MAT_SORTED_FULL:
900:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
901:     break;
902:   case MAT_SPD:
903:   case MAT_SYMMETRIC:
904:   case MAT_STRUCTURALLY_SYMMETRIC:
905:   case MAT_HERMITIAN:
906:   case MAT_SYMMETRY_ETERNAL:
907:     /* These options are handled directly by MatSetOption() */
908:     break;
909:   default:
910:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
911:   }
912:   return(0);
913: }
915: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
916: {
917:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
918:   PetscInt       i,j,n,shift;
919:   PetscScalar    *x,zero=0.0;
923:   VecGetLocalSize(v,&n);
924:   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
926:   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
927:     PetscInt *diag=a->diag;
928:     VecGetArray(v,&x);
929:     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
930:     VecRestoreArray(v,&x);
931:     return(0);
932:   }
934:   VecSet(v,zero);
935:   VecGetArray(v,&x);
936:   for (i=0; i<n; i++) { /* loop over rows */
937:     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
938:     x[i] = 0;
939:     for (j=0; j<a->rlen[i]; j++) {
940:       if (a->colidx[shift+j*8] == i) {
941:         x[i] = a->val[shift+j*8];
942:         break;
943:       }
944:     }
945:   }
946:   VecRestoreArray(v,&x);
947:   return(0);
948: }
950: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
951: {
952:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
953:   const PetscScalar *l,*r;
954:   PetscInt          i,j,m,n,row;
955:   PetscErrorCode    ierr;
958:   if (ll) {
959:     /* The local size is used so that VecMPI can be passed to this routine
960:        by MatDiagonalScale_MPISELL */
961:     VecGetLocalSize(ll,&m);
962:     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
963:     VecGetArrayRead(ll,&l);
964:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
965:       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
966:         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
967:           if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row];
968:         }
969:       } else {
970:         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
971:           a->val[j] *= l[8*i+row];
972:         }
973:       }
974:     }
975:     VecRestoreArrayRead(ll,&l);
976:     PetscLogFlops(a->nz);
977:   }
978:   if (rr) {
979:     VecGetLocalSize(rr,&n);
980:     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
981:     VecGetArrayRead(rr,&r);
982:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
983:       if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
984:         for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
985:           if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
986:         }
987:       } else {
988:         for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
989:           a->val[j] *= r[a->colidx[j]];
990:         }
991:       }
992:     }
993:     VecRestoreArrayRead(rr,&r);
994:     PetscLogFlops(a->nz);
995:   }
996:   MatSeqSELLInvalidateDiagonal(A);
997:   return(0);
998: }
1000: extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1002: PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1003: {
1004:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1005:   PetscInt    *cp,i,k,low,high,t,row,col,l;
1006:   PetscInt    shift;
1007:   MatScalar   *vp;
1010:   for (k=0; k<m; k++) { /* loop over requested rows */
1011:     row = im[k];
1012:     if (row<0) continue;
1013: #if defined(PETSC_USE_DEBUG)
1014:     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1015: #endif
1016:     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1017:     cp = a->colidx+shift; /* pointer to the row */
1018:     vp = a->val+shift; /* pointer to the row */
1019:     for (l=0; l<n; l++) { /* loop over requested columns */
1020:       col = in[l];
1021:       if (col<0) continue;
1022: #if defined(PETSC_USE_DEBUG)
1023:       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1024: #endif
1025:       high = a->rlen[row]; low = 0; /* assume unsorted */
1026:       while (high-low > 5) {
1027:         t = (low+high)/2;
1028:         if (*(cp+t*8) > col) high = t;
1029:         else low = t;
1030:       }
1031:       for (i=low; i<high; i++) {
1032:         if (*(cp+8*i) > col) break;
1033:         if (*(cp+8*i) == col) {
1034:           *v++ = *(vp+8*i);
1035:           goto finished;
1036:         }
1037:       }
1038:       *v++ = 0.0;
1039:     finished:;
1040:     }
1041:   }
1042:   return(0);
1043: }
1045: PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1046: {
1047:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1048:   PetscInt          i,j,m=A->rmap->n,shift;
1049:   const char        *name;
1050:   PetscViewerFormat format;
1051:   PetscErrorCode    ierr;
1054:   PetscViewerGetFormat(viewer,&format);
1055:   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1056:     PetscInt nofinalvalue = 0;
1057:     /*
1058:     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1059:       nofinalvalue = 1;
1060:     }
1061:     */
1062:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1063:     PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);
1064:     PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);
1065: #if defined(PETSC_USE_COMPLEX)
1066:     PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);
1067: #else
1068:     PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);
1069: #endif
1070:     PetscViewerASCIIPrintf(viewer,"zzz = [\n");
1072:     for (i=0; i<m; i++) {
1073:       shift = a->sliidx[i>>3]+(i&0x07);
1074:       for (j=0; j<a->rlen[i]; j++) {
1075: #if defined(PETSC_USE_COMPLEX)
1076:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1077: #else
1078:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);
1079: #endif
1080:       }
1081:     }
1082:     /*
1083:     if (nofinalvalue) {
1084: #if defined(PETSC_USE_COMPLEX)
1085:       PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);
1086: #else
1087:       PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);
1088: #endif
1089:     }
1090:     */
1091:     PetscObjectGetName((PetscObject)A,&name);
1092:     PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);
1093:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1094:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1095:     return(0);
1096:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1097:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1098:     for (i=0; i<m; i++) {
1099:       PetscViewerASCIIPrintf(viewer,"row %D:",i);
1100:       shift = a->sliidx[i>>3]+(i&0x07);
1101:       for (j=0; j<a->rlen[i]; j++) {
1102: #if defined(PETSC_USE_COMPLEX)
1103:         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1104:           PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1105:         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1106:           PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1107:         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1108:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1109:         }
1110: #else
1111:         if (a->val[shift+8*j] != 0.0) {PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);}
1112: #endif
1113:       }
1114:       PetscViewerASCIIPrintf(viewer,"\n");
1115:     }
1116:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1117:   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1118:     PetscInt    cnt=0,jcnt;
1119:     PetscScalar value;
1120: #if defined(PETSC_USE_COMPLEX)
1121:     PetscBool   realonly=PETSC_TRUE;
1122:     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1123:       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1124:         realonly = PETSC_FALSE;
1125:         break;
1126:       }
1127:     }
1128: #endif
1130:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1131:     for (i=0; i<m; i++) {
1132:       jcnt = 0;
1133:       shift = a->sliidx[i>>3]+(i&0x07);
1134:       for (j=0; j<A->cmap->n; j++) {
1135:         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1136:           value = a->val[cnt++];
1137:           jcnt++;
1138:         } else {
1139:           value = 0.0;
1140:         }
1141: #if defined(PETSC_USE_COMPLEX)
1142:         if (realonly) {
1143:           PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));
1144:         } else {
1145:           PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));
1146:         }
1147: #else
1148:         PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);
1149: #endif
1150:       }
1151:       PetscViewerASCIIPrintf(viewer,"\n");
1152:     }
1153:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1154:   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1155:     PetscInt fshift=1;
1156:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1157: #if defined(PETSC_USE_COMPLEX)
1158:     PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");
1159: #else
1160:     PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");
1161: #endif
1162:     PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);
1163:     for (i=0; i<m; i++) {
1164:       shift = a->sliidx[i>>3]+(i&0x07);
1165:       for (j=0; j<a->rlen[i]; j++) {
1166: #if defined(PETSC_USE_COMPLEX)
1167:         PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1168: #else
1169:         PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);
1170: #endif
1171:       }
1172:     }
1173:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1174:   } else if (format == PETSC_VIEWER_NATIVE) {
1175:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
1176:       PetscInt row;
1177:       PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);
1178:       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1179: #if defined(PETSC_USE_COMPLEX)
1180:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1181:           PetscViewerASCIIPrintf(viewer,"  %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1182:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1183:           PetscViewerASCIIPrintf(viewer,"  %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));
1184:         } else {
1185:           PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));
1186:         }
1187: #else
1188:         PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);
1189: #endif
1190:       }
1191:     }
1192:   } else {
1193:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1194:     if (A->factortype) {
1195:       for (i=0; i<m; i++) {
1196:         shift = a->sliidx[i>>3]+(i&0x07);
1197:         PetscViewerASCIIPrintf(viewer,"row %D:",i);
1198:         /* L part */
1199:         for (j=shift; j<a->diag[i]; j+=8) {
1200: #if defined(PETSC_USE_COMPLEX)
1201:           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1202:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1203:           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1204:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1205:           } else {
1206:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1207:           }
1208: #else
1209:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1210: #endif
1211:         }
1212:         /* diagonal */
1213:         j = a->diag[i];
1214: #if defined(PETSC_USE_COMPLEX)
1215:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1216:           PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));
1217:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1218:           PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));
1219:         } else {
1220:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));
1221:         }
1222: #else
1223:         PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));
1224: #endif
1226:         /* U part */
1227:         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1228: #if defined(PETSC_USE_COMPLEX)
1229:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1230:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1231:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1232:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1233:           } else {
1234:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1235:           }
1236: #else
1237:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1238: #endif
1239:         }
1240:         PetscViewerASCIIPrintf(viewer,"\n");
1241:       }
1242:     } else {
1243:       for (i=0; i<m; i++) {
1244:         shift = a->sliidx[i>>3]+(i&0x07);
1245:         PetscViewerASCIIPrintf(viewer,"row %D:",i);
1246:         for (j=0; j<a->rlen[i]; j++) {
1247: #if defined(PETSC_USE_COMPLEX)
1248:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1249:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1250:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1251:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1252:           } else {
1253:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1254:           }
1255: #else
1256:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);
1257: #endif
1258:         }
1259:         PetscViewerASCIIPrintf(viewer,"\n");
1260:       }
1261:     }
1262:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1263:   }
1264:   PetscViewerFlush(viewer);
1265:   return(0);
1266: }
1268:  #include <petscdraw.h>
1269: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1270: {
1271:   Mat               A=(Mat)Aa;
1272:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1273:   PetscInt          i,j,m=A->rmap->n,shift;
1274:   int               color;
1275:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1276:   PetscViewer       viewer;
1277:   PetscViewerFormat format;
1278:   PetscErrorCode    ierr;
1281:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1282:   PetscViewerGetFormat(viewer,&format);
1283:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1285:   /* loop over matrix elements drawing boxes */
1287:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1288:     PetscDrawCollectiveBegin(draw);
1289:     /* Blue for negative, Cyan for zero and  Red for positive */
1290:     color = PETSC_DRAW_BLUE;
1291:     for (i=0; i<m; i++) {
1292:       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1293:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1294:       for (j=0; j<a->rlen[i]; j++) {
1295:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1296:         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1297:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1298:       }
1299:     }
1300:     color = PETSC_DRAW_CYAN;
1301:     for (i=0; i<m; i++) {
1302:       shift = a->sliidx[i>>3]+(i&0x07);
1303:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1304:       for (j=0; j<a->rlen[i]; j++) {
1305:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1306:         if (a->val[shift+8*j] !=  0.) continue;
1307:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1308:       }
1309:     }
1310:     color = PETSC_DRAW_RED;
1311:     for (i=0; i<m; i++) {
1312:       shift = a->sliidx[i>>3]+(i&0x07);
1313:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1314:       for (j=0; j<a->rlen[i]; j++) {
1315:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1316:         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1317:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1318:       }
1319:     }
1320:     PetscDrawCollectiveEnd(draw);
1321:   } else {
1322:     /* use contour shading to indicate magnitude of values */
1323:     /* first determine max of all nonzero values */
1324:     PetscReal minv=0.0,maxv=0.0;
1325:     PetscInt  count=0;
1326:     PetscDraw popup;
1327:     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1328:       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1329:     }
1330:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1331:     PetscDrawGetPopup(draw,&popup);
1332:     PetscDrawScalePopup(popup,minv,maxv);
1334:     PetscDrawCollectiveBegin(draw);
1335:     for (i=0; i<m; i++) {
1336:       shift = a->sliidx[i>>3]+(i&0x07);
1337:       y_l = m - i - 1.0;
1338:       y_r = y_l + 1.0;
1339:       for (j=0; j<a->rlen[i]; j++) {
1340:         x_l = a->colidx[shift+j*8];
1341:         x_r = x_l + 1.0;
1342:         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1343:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1344:         count++;
1345:       }
1346:     }
1347:     PetscDrawCollectiveEnd(draw);
1348:   }
1349:   return(0);
1350: }
1352:  #include <petscdraw.h>
1353: PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1354: {
1355:   PetscDraw      draw;
1356:   PetscReal      xr,yr,xl,yl,h,w;
1357:   PetscBool      isnull;
1361:   PetscViewerDrawGetDraw(viewer,0,&draw);
1362:   PetscDrawIsNull(draw,&isnull);
1363:   if (isnull) return(0);
1365:   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1366:   xr  += w;          yr += h;         xl = -w;     yl = -h;
1367:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1368:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1369:   PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);
1370:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1371:   PetscDrawSave(draw);
1372:   return(0);
1373: }
1375: PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1376: {
1377:   PetscBool      iascii,isbinary,isdraw;
1381:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1382:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1383:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1384:   if (iascii) {
1385:     MatView_SeqSELL_ASCII(A,viewer);
1386:   } else if (isbinary) {
1387:     /* MatView_SeqSELL_Binary(A,viewer); */
1388:   } else if (isdraw) {
1389:     MatView_SeqSELL_Draw(A,viewer);
1390:   }
1391:   return(0);
1392: }
1394: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1395: {
1396:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1397:   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1398:   MatScalar      *vp;
1402:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1403:   /* To do: compress out the unused elements */
1404:   MatMarkDiagonal_SeqSELL(A);
1405:   PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);
1406:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
1407:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);
1408:   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1409:   for (i=0; i<a->totalslices; ++i) {
1410:     shift = a->sliidx[i];    /* starting index of the slice */
1411:     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1412:     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1413:     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1414:       row  = 8*i + row_in_slice;
1415:       nrow = a->rlen[row]; /* number of nonzeros in row */
1416:       /*
1417:         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1418:         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1419:       */
1420:       lastcol = 0;
1421:       if (nrow>0) { /* nonempty row */
1422:         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1423:       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1424:         for (j=1;j<8;j++) {
1425:           if (a->rlen[8*i+j]) {
1426:             lastcol = cp[j];
1427:             break;
1428:           }
1429:         }
1430:       } else {
1431:         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1432:       }
1434:       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1435:         cp[8*k+row_in_slice] = lastcol;
1436:         vp[8*k+row_in_slice] = (MatScalar)0;
1437:       }
1438:     }
1439:   }
1441:   A->info.mallocs += a->reallocs;
1442:   a->reallocs      = 0;
1444:   MatSeqSELLInvalidateDiagonal(A);
1445:   return(0);
1446: }
1448: PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1449: {
1450:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1453:   info->block_size   = 1.0;
1454:   info->nz_allocated = a->maxallocmat;
1455:   info->nz_used      = a->sliidx[a->totalslices]; /* include padding zeros */
1456:   info->nz_unneeded  = (a->maxallocmat-a->sliidx[a->totalslices]);
1457:   info->assemblies   = A->num_ass;
1458:   info->mallocs      = A->info.mallocs;
1459:   info->memory       = ((PetscObject)A)->mem;
1460:   if (A->factortype) {
1461:     info->fill_ratio_given  = A->info.fill_ratio_given;
1462:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1463:     info->factor_mallocs    = A->info.factor_mallocs;
1464:   } else {
1465:     info->fill_ratio_given  = 0;
1466:     info->fill_ratio_needed = 0;
1467:     info->factor_mallocs    = 0;
1468:   }
1469:   return(0);
1470: }
1472: PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1473: {
1474:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1475:   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1476:   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1477:   MatScalar      *vp,value;
1481:   for (k=0; k<m; k++) { /* loop over added rows */
1482:     row = im[k];
1483:     if (row < 0) continue;
1484: #if defined(PETSC_USE_DEBUG)
1485:     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1486: #endif
1487:     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1488:     cp    = a->colidx+shift; /* pointer to the row */
1489:     vp    = a->val+shift; /* pointer to the row */
1490:     nrow  = a->rlen[row];
1491:     low   = 0;
1492:     high  = nrow;
1494:     for (l=0; l<n; l++) { /* loop over added columns */
1495:       col = in[l];
1496:       if (col<0) continue;
1497: #if defined(PETSC_USE_DEBUG)
1498:       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1499: #endif
1500:       if (a->roworiented) {
1501:         value = v[l+k*n];
1502:       } else {
1503:         value = v[k+l*m];
1504:       }
1505:       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1507:       /* search in this row for the specified colmun, i indicates the column to be set */
1508:       if (col <= lastcol) low = 0;
1509:       else high = nrow;
1510:       lastcol = col;
1511:       while (high-low > 5) {
1512:         t = (low+high)/2;
1513:         if (*(cp+t*8) > col) high = t;
1514:         else low = t;
1515:       }
1516:       for (i=low; i<high; i++) {
1517:         if (*(cp+i*8) > col) break;
1518:         if (*(cp+i*8) == col) {
1519:           if (is == ADD_VALUES) *(vp+i*8) += value;
1520:           else *(vp+i*8) = value;
1521:           low = i + 1;
1522:           goto noinsert;
1523:         }
1524:       }
1525:       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1526:       if (nonew == 1) goto noinsert;
1527:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1528:       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1529:       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1530:       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1531:       for (ii=nrow-1; ii>=i; ii--) {
1532:         *(cp+(ii+1)*8) = *(cp+ii*8);
1533:         *(vp+(ii+1)*8) = *(vp+ii*8);
1534:       }
1535:       a->rlen[row]++;
1536:       *(cp+i*8) = col;
1537:       *(vp+i*8) = value;
1538:       a->nz++;
1539:       A->nonzerostate++;
1540:       low = i+1; high++; nrow++;
1541: noinsert:;
1542:     }
1543:     a->rlen[row] = nrow;
1544:   }
1545:   return(0);
1546: }
1548: PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1549: {
1553:   /* If the two matrices have the same copy implementation, use fast copy. */
1554:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1555:     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1556:     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1558:     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1559:     PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);
1560:   } else {
1561:     MatCopy_Basic(A,B,str);
1562:   }
1563:   return(0);
1564: }
1566: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1567: {
1571:   MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);
1572:   return(0);
1573: }
1575: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1576: {
1577:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1580:   *array = a->val;
1581:   return(0);
1582: }
1584: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1585: {
1587:   return(0);
1588: }
1590: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1591: {
1592:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1593:   PetscInt    i;
1594:   MatScalar   *aval=a->val;
1597:   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1598:   return(0);
1599: }
1601: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1602: {
1603:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1604:   PetscInt       i;
1605:   MatScalar      *aval=a->val;
1609:   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1610:   MatSeqSELLInvalidateDiagonal(A);
1611:   return(0);
1612: }
1614: PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1615: {
1616:   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1617:   MatScalar      *aval=a->val;
1618:   PetscScalar    oalpha=alpha;
1619:   PetscBLASInt   one=1,size;
1623:   PetscBLASIntCast(a->sliidx[a->totalslices],&size);
1624:   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1625:   PetscLogFlops(a->nz);
1626:   MatSeqSELLInvalidateDiagonal(inA);
1627:   return(0);
1628: }
1630: PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1631: {
1632:   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;
1636:   if (!Y->preallocated || !y->nz) {
1637:     MatSeqSELLSetPreallocation(Y,1,NULL);
1638:   }
1639:   MatShift_Basic(Y,a);
1640:   return(0);
1641: }
1643: PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1644: {
1645:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1646:   PetscScalar       *x,sum,*t;
1647:   const MatScalar   *idiag=0,*mdiag;
1648:   const PetscScalar *b,*xb;
1649:   PetscInt          n,m=A->rmap->n,i,j,shift;
1650:   const PetscInt    *diag;
1651:   PetscErrorCode    ierr;
1654:   its = its*lits;
1656:   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1657:   if (!a->idiagvalid) {MatInvertDiagonal_SeqSELL(A,omega,fshift);}
1658:   a->fshift = fshift;
1659:   a->omega  = omega;
1661:   diag  = a->diag;
1662:   t     = a->ssor_work;
1663:   idiag = a->idiag;
1664:   mdiag = a->mdiag;
1666:   VecGetArray(xx,&x);
1667:   VecGetArrayRead(bb,&b);
1668:   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1669:   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1670:   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1671:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1673:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1674:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1675:       for (i=0; i<m; i++) {
1676:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1677:         sum   = b[i];
1678:         n     = (diag[i]-shift)/8;
1679:         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1680:         t[i]  = sum;
1681:         x[i]  = sum*idiag[i];
1682:       }
1683:       xb   = t;
1684:       PetscLogFlops(a->nz);
1685:     } else xb = b;
1686:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1687:       for (i=m-1; i>=0; i--) {
1688:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1689:         sum   = xb[i];
1690:         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1691:         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1692:         if (xb == b) {
1693:           x[i] = sum*idiag[i];
1694:         } else {
1695:           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1696:         }
1697:       }
1698:       PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1699:     }
1700:     its--;
1701:   }
1702:   while (its--) {
1703:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1704:       for (i=0; i<m; i++) {
1705:         /* lower */
1706:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1707:         sum   = b[i];
1708:         n     = (diag[i]-shift)/8;
1709:         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1710:         t[i]  = sum;             /* save application of the lower-triangular part */
1711:         /* upper */
1712:         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1713:         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1714:         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1715:       }
1716:       xb   = t;
1717:       PetscLogFlops(2.0*a->nz);
1718:     } else xb = b;
1719:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1720:       for (i=m-1; i>=0; i--) {
1721:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1722:         sum = xb[i];
1723:         if (xb == b) {
1724:           /* whole matrix (no checkpointing available) */
1725:           n     = a->rlen[i];
1726:           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1727:           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1728:         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1729:           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1730:           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1731:           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1732:         }
1733:       }
1734:       if (xb == b) {
1735:         PetscLogFlops(2.0*a->nz);
1736:       } else {
1737:         PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1738:       }
1739:     }
1740:   }
1741:   VecRestoreArray(xx,&x);
1742:   VecRestoreArrayRead(bb,&b);
1743:   return(0);
1744: }
1746: /* -------------------------------------------------------------------*/
1747: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1748:                                        MatGetRow_SeqSELL,
1749:                                        MatRestoreRow_SeqSELL,
1750:                                        MatMult_SeqSELL,
1751:                                /* 4*/  MatMultAdd_SeqSELL,
1752:                                        MatMultTranspose_SeqSELL,
1753:                                        MatMultTransposeAdd_SeqSELL,
1754:                                        0,
1755:                                        0,
1756:                                        0,
1757:                                /* 10*/ 0,
1758:                                        0,
1759:                                        0,
1760:                                        MatSOR_SeqSELL,
1761:                                        0,
1762:                                /* 15*/ MatGetInfo_SeqSELL,
1763:                                        MatEqual_SeqSELL,
1764:                                        MatGetDiagonal_SeqSELL,
1765:                                        MatDiagonalScale_SeqSELL,
1766:                                        0,
1767:                                /* 20*/ 0,
1768:                                        MatAssemblyEnd_SeqSELL,
1769:                                        MatSetOption_SeqSELL,
1770:                                        MatZeroEntries_SeqSELL,
1771:                                /* 24*/ 0,
1772:                                        0,
1773:                                        0,
1774:                                        0,
1775:                                        0,
1776:                                /* 29*/ MatSetUp_SeqSELL,
1777:                                        0,
1778:                                        0,
1779:                                        0,
1780:                                        0,
1781:                                /* 34*/ MatDuplicate_SeqSELL,
1782:                                        0,
1783:                                        0,
1784:                                        0,
1785:                                        0,
1786:                                /* 39*/ 0,
1787:                                        0,
1788:                                        0,
1789:                                        MatGetValues_SeqSELL,
1790:                                        MatCopy_SeqSELL,
1791:                                /* 44*/ 0,
1792:                                        MatScale_SeqSELL,
1793:                                        MatShift_SeqSELL,
1794:                                        0,
1795:                                        0,
1796:                                /* 49*/ 0,
1797:                                        0,
1798:                                        0,
1799:                                        0,
1800:                                        0,
1801:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1802:                                        0,
1803:                                        0,
1804:                                        0,
1805:                                        0,
1806:                                /* 59*/ 0,
1807:                                        MatDestroy_SeqSELL,
1808:                                        MatView_SeqSELL,
1809:                                        0,
1810:                                        0,
1811:                                /* 64*/ 0,
1812:                                        0,
1813:                                        0,
1814:                                        0,
1815:                                        0,
1816:                                /* 69*/ 0,
1817:                                        0,
1818:                                        0,
1819:                                        0,
1820:                                        0,
1821:                                /* 74*/ 0,
1822:                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1823:                                        0,
1824:                                        0,
1825:                                        0,
1826:                                /* 79*/ 0,
1827:                                        0,
1828:                                        0,
1829:                                        0,
1830:                                        0,
1831:                                /* 84*/ 0,
1832:                                        0,
1833:                                        0,
1834:                                        0,
1835:                                        0,
1836:                                /* 89*/ 0,
1837:                                        0,
1838:                                        0,
1839:                                        0,
1840:                                        0,
1841:                                /* 94*/ 0,
1842:                                        0,
1843:                                        0,
1844:                                        0,
1845:                                        0,
1846:                                /* 99*/ 0,
1847:                                        0,
1848:                                        0,
1849:                                        MatConjugate_SeqSELL,
1850:                                        0,
1851:                                /*104*/ 0,
1852:                                        0,
1853:                                        0,
1854:                                        0,
1855:                                        0,
1856:                                /*109*/ 0,
1857:                                        0,
1858:                                        0,
1859:                                        0,
1860:                                        MatMissingDiagonal_SeqSELL,
1861:                                /*114*/ 0,
1862:                                        0,
1863:                                        0,
1864:                                        0,
1865:                                        0,
1866:                                /*119*/ 0,
1867:                                        0,
1868:                                        0,
1869:                                        0,
1870:                                        0,
1871:                                /*124*/ 0,
1872:                                        0,
1873:                                        0,
1874:                                        0,
1875:                                        0,
1876:                                /*129*/ 0,
1877:                                        0,
1878:                                        0,
1879:                                        0,
1880:                                        0,
1881:                                /*134*/ 0,
1882:                                        0,
1883:                                        0,
1884:                                        0,
1885:                                        0,
1886:                                /*139*/ 0,
1887:                                        0,
1888:                                        0,
1889:                                        MatFDColoringSetUp_SeqXAIJ,
1890:                                        0,
1891:                                 /*144*/0
1892: };
1894: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1895: {
1896:   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1900:   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1902:   /* allocate space for values if not already there */
1903:   if (!a->saved_values) {
1904:     PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);
1905:     PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));
1906:   }
1908:   /* copy values over */
1909:   PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);
1910:   return(0);
1911: }
1913: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1914: {
1915:   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;
1919:   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1920:   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1921:   PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);
1922:   return(0);
1923: }
1925: /*@C
1926:  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1928:  Not Collective
1930:  Input Parameters:
1931:  .  mat - a MATSEQSELL matrix
1932:  .  array - pointer to the data
1934:  Level: intermediate
1936:  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1937:  @*/
1938: PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1939: {
1943:   PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));
1944:   return(0);
1945: }
1947: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1948: {
1949:   Mat_SeqSELL    *b;
1950:   PetscMPIInt    size;
1954:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
1955:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1957:   PetscNewLog(B,&b);
1959:   B->data = (void*)b;
1961:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1963:   b->row                = 0;
1964:   b->col                = 0;
1965:   b->icol               = 0;
1966:   b->reallocs           = 0;
1967:   b->ignorezeroentries  = PETSC_FALSE;
1968:   b->roworiented        = PETSC_TRUE;
1969:   b->nonew              = 0;
1970:   b->diag               = 0;
1971:   b->solve_work         = 0;
1972:   B->spptr              = 0;
1973:   b->saved_values       = 0;
1974:   b->idiag              = 0;
1975:   b->mdiag              = 0;
1976:   b->ssor_work          = 0;
1977:   b->omega              = 1.0;
1978:   b->fshift             = 0.0;
1979:   b->idiagvalid         = PETSC_FALSE;
1980:   b->keepnonzeropattern = PETSC_FALSE;
1982:   PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);
1983:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);
1984:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);
1985:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);
1986:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);
1987:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);
1988:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);
1989:   return(0);
1990: }
1992: /*
1993:  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1994:  */
1995: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1996: {
1997:   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1998:   PetscInt       i,m=A->rmap->n;
1999:   PetscInt       totalslices=a->totalslices;
2003:   c = (Mat_SeqSELL*)C->data;
2005:   C->factortype = A->factortype;
2006:   c->row        = 0;
2007:   c->col        = 0;
2008:   c->icol       = 0;
2009:   c->reallocs   = 0;
2011:   C->assembled = PETSC_TRUE;
2013:   PetscLayoutReference(A->rmap,&C->rmap);
2014:   PetscLayoutReference(A->cmap,&C->cmap);
2016:   PetscMalloc1(8*totalslices,&c->rlen);
2017:   PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2018:   PetscMalloc1(totalslices+1,&c->sliidx);
2019:   PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));
2021:   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2022:   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2024:   /* allocate the matrix space */
2025:   if (mallocmatspace) {
2026:     PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);
2027:     PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));
2029:     c->singlemalloc = PETSC_TRUE;
2031:     if (m > 0) {
2032:       PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);
2033:       if (cpvalues == MAT_COPY_VALUES) {
2034:         PetscArraycpy(c->val,a->val,a->maxallocmat);
2035:       } else {
2036:         PetscArrayzero(c->val,a->maxallocmat);
2037:       }
2038:     }
2039:   }
2041:   c->ignorezeroentries = a->ignorezeroentries;
2042:   c->roworiented       = a->roworiented;
2043:   c->nonew             = a->nonew;
2044:   if (a->diag) {
2045:     PetscMalloc1(m,&c->diag);
2046:     PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2047:     for (i=0; i<m; i++) {
2048:       c->diag[i] = a->diag[i];
2049:     }
2050:   } else c->diag = 0;
2052:   c->solve_work         = 0;
2053:   c->saved_values       = 0;
2054:   c->idiag              = 0;
2055:   c->ssor_work          = 0;
2056:   c->keepnonzeropattern = a->keepnonzeropattern;
2057:   c->free_val           = PETSC_TRUE;
2058:   c->free_colidx        = PETSC_TRUE;
2060:   c->maxallocmat  = a->maxallocmat;
2061:   c->maxallocrow  = a->maxallocrow;
2062:   c->rlenmax      = a->rlenmax;
2063:   c->nz           = a->nz;
2064:   C->preallocated = PETSC_TRUE;
2066:   c->nonzerorowcnt = a->nonzerorowcnt;
2067:   C->nonzerostate  = A->nonzerostate;
2069:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2070:   return(0);
2071: }
2073: PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2074: {
2078:   MatCreate(PetscObjectComm((PetscObject)A),B);
2079:   MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);
2080:   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2081:     MatSetBlockSizesFromMats(*B,A,A);
2082:   }
2083:   MatSetType(*B,((PetscObject)A)->type_name);
2084:   MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);
2085:   return(0);
2086: }
2088: /*@C
2089:  MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2091:  Collective
2093:  Input Parameters:
2094:  +  comm - MPI communicator, set to PETSC_COMM_SELF
2095:  .  m - number of rows
2096:  .  n - number of columns
2097:  .  rlenmax - maximum number of nonzeros in a row
2098:  -  rlen - array containing the number of nonzeros in the various rows
2099:  (possibly different for each row) or NULL
2101:  Output Parameter:
2102:  .  A - the matrix
2104:  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2105:  MatXXXXSetPreallocation() paradigm instead of this routine directly.
2106:  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2108:  Notes:
2109:  If nnz is given then nz is ignored
2111:  Specify the preallocated storage with either rlenmax or rlen (not both).
2112:  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2113:  allocation.  For large problems you MUST preallocate memory or you
2114:  will get TERRIBLE performance, see the users' manual chapter on matrices.
2116:  Level: intermediate
2118:  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2120:  @*/
2121: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2122: {
2126:   MatCreate(comm,A);
2127:   MatSetSizes(*A,m,n,m,n);
2128:   MatSetType(*A,MATSEQSELL);
2129:   MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);
2130:   return(0);
2131: }
2133: PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2134: {
2135:   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2136:   PetscInt       totalslices=a->totalslices;
2140:   /* If the  matrix dimensions are not equal,or no of nonzeros */
2141:   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2142:     *flg = PETSC_FALSE;
2143:     return(0);
2144:   }
2145:   /* if the a->colidx are the same */
2146:   PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);
2147:   if (!*flg) return(0);
2148:   /* if a->val are the same */
2149:   PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);
2150:   return(0);
2151: }
2153: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2154: {
2155:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2158:   a->idiagvalid  = PETSC_FALSE;
2159:   return(0);
2160: }
2162: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2163: {
2164: #if defined(PETSC_USE_COMPLEX)
2165:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2166:   PetscInt    i;
2167:   PetscScalar *val = a->val;
2170:   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2171:     val[i] = PetscConj(val[i]);
2172:   }
2173: #else
2175: #endif
2176:   return(0);
2177: }