Actual source code: vecviennacl.cxx
 
   petsc-3.12.4 2020-02-04
   
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */
  5: #include <petscconf.h>
  6:  #include <petsc/private/vecimpl.h>
  7:  #include <../src/vec/vec/impls/dvecimpl.h>
  8:  #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
 10: #include <vector>
 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"
 17: #ifdef VIENNACL_WITH_OPENCL
 18: #include "viennacl/ocl/backend.hpp"
 19: #endif
 22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 23: {
 28:   *a   = 0;
 29:   VecViennaCLCopyToGPU(v);
 30:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 31:   ViennaCLWaitForGPU();
 32:   return(0);
 33: }
 35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 36: {
 41:   v->offloadmask = PETSC_OFFLOAD_GPU;
 43:   PetscObjectStateIncrease((PetscObject)v);
 44:   return(0);
 45: }
 47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 48: {
 53:   *a   = 0;
 54:   VecViennaCLCopyToGPU(v);
 55:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 56:   ViennaCLWaitForGPU();
 57:   return(0);
 58: }
 60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 61: {
 64:   return(0);
 65: }
 67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 68: {
 73:   *a   = 0;
 74:   VecViennaCLAllocateCheck(v);
 75:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 76:   ViennaCLWaitForGPU();
 77:   return(0);
 78: }
 80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 81: {
 86:   v->offloadmask = PETSC_OFFLOAD_GPU;
 88:   PetscObjectStateIncrease((PetscObject)v);
 89:   return(0);
 90: }
 94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 95: {
 96:   PetscErrorCode       ierr;
 97:   char                 string[20];
 98:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;
101:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,12,&flg);
103:   if (flg) {
104:     try {
105:       PetscStrcasecmp(string,"cuda",&flg_cuda);
106:       PetscStrcasecmp(string,"opencl",&flg_opencl);
107:       PetscStrcasecmp(string,"openmp",&flg_openmp);
109:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117:       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118:     } catch (std::exception const & ex) {
119:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120:     }
121:   }
123: #if defined(PETSC_HAVE_OPENCL)
124:   /* ViennaCL OpenCL device type configuration */
125:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,12,&flg);
126:   if (flg) {
127:     try {
128:       PetscStrcasecmp(string,"cpu",&flg);
129:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131:       PetscStrcasecmp(string,"gpu",&flg);
132:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134:       PetscStrcasecmp(string,"accelerator",&flg);
135:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136:     } catch (std::exception const & ex) {
137:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138:     }
139:   }
140: #endif
142:   /* Print available backends */
143:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144:   if (flg) {
145:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157:     PetscPrintf(PETSC_COMM_WORLD, "\n");
159:     /* Print selected backends */
160:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164:     }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169:     }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174:     }
175: #else
176:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178:     }
179: #endif
180:     PetscPrintf(PETSC_COMM_WORLD, "\n");
181:   }
182:   return(0);
183: }
185: /*
186:     Allocates space for the vector array on the Host if it does not exist.
187:     Does NOT change the PetscViennaCLFlag for the vector
188:     Does NOT zero the ViennaCL array
189:  */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193:   PetscScalar    *array;
194:   Vec_Seq        *s;
195:   PetscInt       n = v->map->n;
198:   s    = (Vec_Seq*)v->data;
199:   VecViennaCLAllocateCheck(v);
200:   if (s->array == 0) {
201:     PetscMalloc1(n,&array);
202:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203:     s->array           = array;
204:     s->array_allocated = array;
205:   }
206:   return(0);
207: }
210: /*
211:     Allocates space for the vector array on the GPU if it does not exist.
212:     Does NOT change the PetscViennaCLFlag for the vector
213:     Does NOT zero the ViennaCL array
215:  */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219:   int            rank;
222:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
223:   // First allocate memory on the GPU if needed
224:   if (!v->spptr) {
225:     try {
226:       v->spptr                            = new Vec_ViennaCL;
227:       ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
229:     } catch(std::exception const & ex) {
230:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
231:     }
232:   }
233:   return(0);
234: }
237: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
238: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
239: {
244:   VecViennaCLAllocateCheck(v);
245:   if (v->map->n > 0) {
246:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
247:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
248:       try {
249:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
250:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
251:         ViennaCLWaitForGPU();
252:       } catch(std::exception const & ex) {
253:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
254:       }
255:       PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
256:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
257:       v->offloadmask = PETSC_OFFLOAD_BOTH;
258:     }
259:   }
260:   return(0);
261: }
265: /*
266:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
267: */
268: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
269: {
274:   VecViennaCLAllocateCheckHost(v);
275:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
276:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
277:     try {
278:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
279:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
280:       ViennaCLWaitForGPU();
281:     } catch(std::exception const & ex) {
282:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
283:     }
284:     PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
285:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
286:     v->offloadmask = PETSC_OFFLOAD_BOTH;
287:   }
288:   return(0);
289: }
292: /* Copy on CPU */
293: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
294: {
295:   PetscScalar       *ya;
296:   const PetscScalar *xa;
297:   PetscErrorCode    ierr;
300:   VecViennaCLAllocateCheckHost(xin);
301:   VecViennaCLAllocateCheckHost(yin);
302:   if (xin != yin) {
303:     VecGetArrayRead(xin,&xa);
304:     VecGetArray(yin,&ya);
305:     PetscArraycpy(ya,xa,xin->map->n);
306:     VecRestoreArrayRead(xin,&xa);
307:     VecRestoreArray(yin,&ya);
308:   }
309:   return(0);
310: }
312: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
313: {
315:   PetscInt       n = xin->map->n,i;
316:   PetscScalar    *xx;
319:   VecGetArray(xin,&xx);
320:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
321:   VecRestoreArray(xin,&xx);
322:   return(0);
323: }
325: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
326: {
327:   Vec_Seq        *vs = (Vec_Seq*)v->data;
331:   PetscObjectSAWsViewOff(v);
332: #if defined(PETSC_USE_LOG)
333:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
334: #endif
335:   if (vs->array_allocated) PetscFree(vs->array_allocated);
336:   PetscFree(vs);
337:   return(0);
338: }
340: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
341: {
342:   Vec_Seq *v = (Vec_Seq*)vin->data;
345:   v->array         = v->unplacedarray;
346:   v->unplacedarray = 0;
347:   return(0);
348: }
351: /*MC
352:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
354:    Options Database Keys:
355: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
357:   Level: beginner
359: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
360: M*/
363: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
364: {
365:   const ViennaCLVector  *xgpu;
366:   ViennaCLVector        *ygpu;
367:   PetscErrorCode        ierr;
370:   VecViennaCLGetArrayRead(xin,&xgpu);
371:   VecViennaCLGetArray(yin,&ygpu);
372:   PetscLogGpuTimeBegin();
373:   try {
374:     if (alpha != 0.0 && xin->map->n > 0) {
375:       *ygpu = *xgpu + alpha * *ygpu;
376:       PetscLogGpuFlops(2.0*yin->map->n);
377:     } else {
378:       *ygpu = *xgpu;
379:     }
380:     ViennaCLWaitForGPU();
381:   } catch(std::exception const & ex) {
382:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
383:   }
384:   PetscLogGpuTimeEnd();
385:   VecViennaCLRestoreArrayRead(xin,&xgpu);
386:   VecViennaCLRestoreArray(yin,&ygpu);
387:   return(0);
388: }
391: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
392: {
393:   const ViennaCLVector  *xgpu;
394:   ViennaCLVector        *ygpu;
395:   PetscErrorCode        ierr;
398:   if (alpha != 0.0 && xin->map->n > 0) {
399:     VecViennaCLGetArrayRead(xin,&xgpu);
400:     VecViennaCLGetArray(yin,&ygpu);
401:     PetscLogGpuTimeBegin();
402:     try {
403:       *ygpu += alpha * *xgpu;
404:       ViennaCLWaitForGPU();
405:     } catch(std::exception const & ex) {
406:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
407:     }
408:     PetscLogGpuTimeEnd();
409:     VecViennaCLRestoreArrayRead(xin,&xgpu);
410:     VecViennaCLRestoreArray(yin,&ygpu);
411:     PetscLogGpuFlops(2.0*yin->map->n);
412:   }
413:   return(0);
414: }
417: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
418: {
419:   const ViennaCLVector  *xgpu,*ygpu;
420:   ViennaCLVector        *wgpu;
421:   PetscErrorCode        ierr;
424:   if (xin->map->n > 0) {
425:     VecViennaCLGetArrayRead(xin,&xgpu);
426:     VecViennaCLGetArrayRead(yin,&ygpu);
427:     VecViennaCLGetArrayWrite(win,&wgpu);
428:     PetscLogGpuTimeBegin();
429:     try {
430:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
431:       ViennaCLWaitForGPU();
432:     } catch(std::exception const & ex) {
433:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
434:     }
435:     PetscLogGpuTimeEnd();
436:     PetscLogGpuFlops(win->map->n);
437:     VecViennaCLRestoreArrayRead(xin,&xgpu);
438:     VecViennaCLRestoreArrayRead(yin,&ygpu);
439:     VecViennaCLRestoreArrayWrite(win,&wgpu);
440:   }
441:   return(0);
442: }
445: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
446: {
447:   const ViennaCLVector  *xgpu,*ygpu;
448:   ViennaCLVector        *wgpu;
449:   PetscErrorCode        ierr;
452:   if (alpha == 0.0 && xin->map->n > 0) {
453:     VecCopy_SeqViennaCL(yin,win);
454:   } else {
455:     VecViennaCLGetArrayRead(xin,&xgpu);
456:     VecViennaCLGetArrayRead(yin,&ygpu);
457:     VecViennaCLGetArrayWrite(win,&wgpu);
458:     PetscLogGpuTimeBegin();
459:     if (alpha == 1.0) {
460:       try {
461:         *wgpu = *ygpu + *xgpu;
462:       } catch(std::exception const & ex) {
463:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
464:       }
465:       PetscLogGpuFlops(win->map->n);
466:     } else if (alpha == -1.0) {
467:       try {
468:         *wgpu = *ygpu - *xgpu;
469:       } catch(std::exception const & ex) {
470:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
471:       }
472:       PetscLogGpuFlops(win->map->n);
473:     } else {
474:       try {
475:         *wgpu = *ygpu + alpha * *xgpu;
476:       } catch(std::exception const & ex) {
477:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
478:       }
479:       PetscLogGpuFlops(2*win->map->n);
480:     }
481:     ViennaCLWaitForGPU();
482:     PetscLogGpuTimeEnd();
483:     VecViennaCLRestoreArrayRead(xin,&xgpu);
484:     VecViennaCLRestoreArrayRead(yin,&ygpu);
485:     VecViennaCLRestoreArrayWrite(win,&wgpu);
486:   }
487:   return(0);
488: }
491: /*
492:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
493:  *
494:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
495:  * hence there is an iterated application of these until the final result is obtained
496:  */
497: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
498: {
500:   PetscInt       j;
503:   for (j = 0; j < nv; ++j) {
504:     if (j+1 < nv) {
505:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
506:       ++j;
507:     } else {
508:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
509:     }
510:   }
511:   ViennaCLWaitForGPU();
512:   return(0);
513: }
516: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
517: {
518:   const ViennaCLVector  *xgpu,*ygpu;
519:   PetscErrorCode        ierr;
522:   if (xin->map->n > 0) {
523:     VecViennaCLGetArrayRead(xin,&xgpu);
524:     VecViennaCLGetArrayRead(yin,&ygpu);
525:     PetscLogGpuTimeBegin();
526:     try {
527:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
528:     } catch(std::exception const & ex) {
529:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
530:     }
531:     ViennaCLWaitForGPU();
532:     PetscLogGpuTimeEnd();
533:     if (xin->map->n >0) {
534:       PetscLogGpuFlops(2.0*xin->map->n-1);
535:     }
536:     VecViennaCLRestoreArrayRead(xin,&xgpu);
537:     VecViennaCLRestoreArrayRead(yin,&ygpu);
538:   } else *z = 0.0;
539:   return(0);
540: }
544: /*
545:  * Operation z[j] = dot(x, y[j])
546:  *
547:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
548:  */
549: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
550: {
551:   PetscErrorCode       ierr;
552:   PetscInt             n = xin->map->n,i;
553:   const ViennaCLVector *xgpu,*ygpu;
554:   Vec                  *yyin = (Vec*)yin;
555:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
558:   if (xin->map->n > 0) {
559:     VecViennaCLGetArrayRead(xin,&xgpu);
560:     for (i=0; i<nv; i++) {
561:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
562:       ygpu_array[i] = ygpu;
563:     }
564:     PetscLogGpuTimeBegin();
565:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
566:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
567:     viennacl::copy(result.begin(), result.end(), z);
568:     for (i=0; i<nv; i++) {
569:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
570:     }
571:     ViennaCLWaitForGPU();
572:     PetscLogGpuTimeEnd();
573:     VecViennaCLRestoreArrayRead(xin,&xgpu);
574:     PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
575:   } else {
576:     for (i=0; i<nv; i++) z[i] = 0.0;
577:   }
578:   return(0);
579: }
581: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
582: {
586:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
587:   VecMDot_SeqViennaCL(xin,nv,yin,z);
588:   ViennaCLWaitForGPU();
589:   return(0);
590: }
593: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
594: {
595:   ViennaCLVector *xgpu;
599:   if (xin->map->n > 0) {
600:     VecViennaCLGetArrayWrite(xin,&xgpu);
601:     PetscLogGpuTimeBegin();
602:     try {
603:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
604:       ViennaCLWaitForGPU();
605:     } catch(std::exception const & ex) {
606:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
607:     }
608:     PetscLogGpuTimeEnd();
609:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
610:   }
611:   return(0);
612: }
614: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
615: {
616:   ViennaCLVector *xgpu;
620:   if (alpha == 0.0 && xin->map->n > 0) {
621:     VecSet_SeqViennaCL(xin,alpha);
622:     PetscLogGpuFlops(xin->map->n);
623:   } else if (alpha != 1.0 && xin->map->n > 0) {
624:     VecViennaCLGetArray(xin,&xgpu);
625:     PetscLogGpuTimeBegin();
626:     try {
627:       *xgpu *= alpha;
628:       ViennaCLWaitForGPU();
629:     } catch(std::exception const & ex) {
630:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
631:     }
632:     PetscLogGpuTimeEnd();
633:     VecViennaCLRestoreArray(xin,&xgpu);
634:     PetscLogGpuFlops(xin->map->n);
635:   }
636:   return(0);
637: }
640: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
641: {
645:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
646:   VecDot_SeqViennaCL(xin, yin, z);
647:   ViennaCLWaitForGPU();
648:   return(0);
649: }
652: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
653: {
654:   const ViennaCLVector *xgpu;
655:   ViennaCLVector       *ygpu;
656:   PetscErrorCode       ierr;
659:   if (xin != yin && xin->map->n > 0) {
660:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
661:       VecViennaCLGetArrayRead(xin,&xgpu);
662:       VecViennaCLGetArrayWrite(yin,&ygpu);
663:       PetscLogGpuTimeBegin();
664:       try {
665:         *ygpu = *xgpu;
666:         ViennaCLWaitForGPU();
667:       } catch(std::exception const & ex) {
668:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
669:       }
670:       PetscLogGpuTimeEnd();
671:       VecViennaCLRestoreArrayRead(xin,&xgpu);
672:       VecViennaCLRestoreArrayWrite(yin,&ygpu);
674:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
675:       /* copy in CPU if we are on the CPU*/
676:       VecCopy_SeqViennaCL_Private(xin,yin);
677:       ViennaCLWaitForGPU();
678:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
679:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
680:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
681:         /* copy in CPU */
682:         VecCopy_SeqViennaCL_Private(xin,yin);
683:         ViennaCLWaitForGPU();
684:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
685:         /* copy in GPU */
686:         VecViennaCLGetArrayRead(xin,&xgpu);
687:         VecViennaCLGetArrayWrite(yin,&ygpu);
688:         PetscLogGpuTimeBegin();
689:         try {
690:           *ygpu = *xgpu;
691:           ViennaCLWaitForGPU();
692:         } catch(std::exception const & ex) {
693:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
694:         }
695:         PetscLogGpuTimeEnd();
696:         VecViennaCLRestoreArrayRead(xin,&xgpu);
697:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
698:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
699:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
700:            default to copy in GPU (this is an arbitrary choice) */
701:         VecViennaCLGetArrayRead(xin,&xgpu);
702:         VecViennaCLGetArrayWrite(yin,&ygpu);
703:         PetscLogGpuTimeBegin();
704:         try {
705:           *ygpu = *xgpu;
706:           ViennaCLWaitForGPU();
707:         } catch(std::exception const & ex) {
708:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
709:         }
710:         PetscLogGpuTimeEnd();
711:         VecViennaCLRestoreArrayRead(xin,&xgpu);
712:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
713:       } else {
714:         VecCopy_SeqViennaCL_Private(xin,yin);
715:         ViennaCLWaitForGPU();
716:       }
717:     }
718:   }
719:   return(0);
720: }
723: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
724: {
726:   ViennaCLVector *xgpu,*ygpu;
729:   if (xin != yin && xin->map->n > 0) {
730:     VecViennaCLGetArray(xin,&xgpu);
731:     VecViennaCLGetArray(yin,&ygpu);
732:     PetscLogGpuTimeBegin();
733:     try {
734:       viennacl::swap(*xgpu, *ygpu);
735:       ViennaCLWaitForGPU();
736:     } catch(std::exception const & ex) {
737:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
738:     }
739:     PetscLogGpuTimeEnd();
740:     VecViennaCLRestoreArray(xin,&xgpu);
741:     VecViennaCLRestoreArray(yin,&ygpu);
742:   }
743:   return(0);
744: }
747: // y = alpha * x + beta * y
748: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
749: {
750:   PetscErrorCode       ierr;
751:   PetscScalar          a = alpha,b = beta;
752:   const ViennaCLVector *xgpu;
753:   ViennaCLVector       *ygpu;
756:   if (a == 0.0 && xin->map->n > 0) {
757:     VecScale_SeqViennaCL(yin,beta);
758:   } else if (b == 1.0 && xin->map->n > 0) {
759:     VecAXPY_SeqViennaCL(yin,alpha,xin);
760:   } else if (a == 1.0 && xin->map->n > 0) {
761:     VecAYPX_SeqViennaCL(yin,beta,xin);
762:   } else if (b == 0.0 && xin->map->n > 0) {
763:     VecViennaCLGetArrayRead(xin,&xgpu);
764:     VecViennaCLGetArray(yin,&ygpu);
765:     PetscLogGpuTimeBegin();
766:     try {
767:       *ygpu = *xgpu * alpha;
768:       ViennaCLWaitForGPU();
769:     } catch(std::exception const & ex) {
770:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
771:     }
772:     PetscLogGpuTimeEnd();
773:     PetscLogGpuFlops(xin->map->n);
774:     VecViennaCLRestoreArrayRead(xin,&xgpu);
775:     VecViennaCLRestoreArray(yin,&ygpu);
776:   } else if (xin->map->n > 0) {
777:     VecViennaCLGetArrayRead(xin,&xgpu);
778:     VecViennaCLGetArray(yin,&ygpu);
779:     PetscLogGpuTimeBegin();
780:     try {
781:       *ygpu = *xgpu * alpha + *ygpu * beta;
782:       ViennaCLWaitForGPU();
783:     } catch(std::exception const & ex) {
784:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
785:     }
786:     PetscLogGpuTimeEnd();
787:     VecViennaCLRestoreArrayRead(xin,&xgpu);
788:     VecViennaCLRestoreArray(yin,&ygpu);
789:     PetscLogGpuFlops(3.0*xin->map->n);
790:   }
791:   return(0);
792: }
795: /* operation  z = alpha * x + beta *y + gamma *z*/
796: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
797: {
798:   PetscErrorCode       ierr;
799:   PetscInt             n = zin->map->n;
800:   const ViennaCLVector *xgpu,*ygpu;
801:   ViennaCLVector       *zgpu;
804:   VecViennaCLGetArrayRead(xin,&xgpu);
805:   VecViennaCLGetArrayRead(yin,&ygpu);
806:   VecViennaCLGetArray(zin,&zgpu);
807:   if (alpha == 0.0 && xin->map->n > 0) {
808:     PetscLogGpuTimeBegin();
809:     try {
810:       if (beta == 0.0) {
811:         *zgpu = gamma * *zgpu;
812:         ViennaCLWaitForGPU();
813:         PetscLogGpuFlops(1.0*n);
814:       } else if (gamma == 0.0) {
815:         *zgpu = beta * *ygpu;
816:         ViennaCLWaitForGPU();
817:         PetscLogGpuFlops(1.0*n);
818:       } else {
819:         *zgpu = beta * *ygpu + gamma * *zgpu;
820:         ViennaCLWaitForGPU();
821:         PetscLogGpuFlops(3.0*n);
822:       }
823:     } catch(std::exception const & ex) {
824:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
825:     }
826:     PetscLogGpuTimeEnd();
827:     PetscLogGpuFlops(3.0*n);
828:   } else if (beta == 0.0 && xin->map->n > 0) {
829:     PetscLogGpuTimeBegin();
830:     try {
831:       if (gamma == 0.0) {
832:         *zgpu = alpha * *xgpu;
833:         ViennaCLWaitForGPU();
834:         PetscLogGpuFlops(1.0*n);
835:       } else {
836:         *zgpu = alpha * *xgpu + gamma * *zgpu;
837:         ViennaCLWaitForGPU();
838:         PetscLogGpuFlops(3.0*n);
839:       }
840:     } catch(std::exception const & ex) {
841:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
842:     }
843:     PetscLogGpuTimeEnd();
844:   } else if (gamma == 0.0 && xin->map->n > 0) {
845:     PetscLogGpuTimeBegin();
846:     try {
847:       *zgpu = alpha * *xgpu + beta * *ygpu;
848:       ViennaCLWaitForGPU();
849:     } catch(std::exception const & ex) {
850:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
851:     }
852:     PetscLogGpuTimeEnd();
853:     PetscLogGpuFlops(3.0*n);
854:   } else if (xin->map->n > 0) {
855:     PetscLogGpuTimeBegin();
856:     try {
857:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
858:       if (gamma != 1.0)
859:         *zgpu *= gamma;
860:       *zgpu += alpha * *xgpu + beta * *ygpu;
861:       ViennaCLWaitForGPU();
862:     } catch(std::exception const & ex) {
863:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
864:     }
865:     PetscLogGpuTimeEnd();
866:     VecViennaCLRestoreArray(zin,&zgpu);
867:     VecViennaCLRestoreArrayRead(xin,&xgpu);
868:     VecViennaCLRestoreArrayRead(yin,&ygpu);
869:     PetscLogGpuFlops(5.0*n);
870:   }
871:   return(0);
872: }
874: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
875: {
876:   PetscErrorCode       ierr;
877:   PetscInt             n = win->map->n;
878:   const ViennaCLVector *xgpu,*ygpu;
879:   ViennaCLVector       *wgpu;
882:   if (xin->map->n > 0) {
883:     VecViennaCLGetArrayRead(xin,&xgpu);
884:     VecViennaCLGetArrayRead(yin,&ygpu);
885:     VecViennaCLGetArray(win,&wgpu);
886:     PetscLogGpuTimeBegin();
887:     try {
888:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
889:       ViennaCLWaitForGPU();
890:     } catch(std::exception const & ex) {
891:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
892:     }
893:     PetscLogGpuTimeEnd();
894:     VecViennaCLRestoreArrayRead(xin,&xgpu);
895:     VecViennaCLRestoreArrayRead(yin,&ygpu);
896:     VecViennaCLRestoreArray(win,&wgpu);
897:     PetscLogGpuFlops(n);
898:   }
899:   return(0);
900: }
903: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
904: {
905:   PetscErrorCode       ierr;
906:   PetscInt             n = xin->map->n;
907:   PetscBLASInt         bn;
908:   const ViennaCLVector *xgpu;
911:   if (xin->map->n > 0) {
912:     PetscBLASIntCast(n,&bn);
913:     VecViennaCLGetArrayRead(xin,&xgpu);
914:     if (type == NORM_2 || type == NORM_FROBENIUS) {
915:       PetscLogGpuTimeBegin();
916:       try {
917:         *z = viennacl::linalg::norm_2(*xgpu);
918:         ViennaCLWaitForGPU();
919:       } catch(std::exception const & ex) {
920:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
921:       }
922:       PetscLogGpuTimeEnd();
923:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
924:     } else if (type == NORM_INFINITY) {
925:       VecViennaCLGetArrayRead(xin,&xgpu);
926:       PetscLogGpuTimeBegin();
927:       try {
928:         *z = viennacl::linalg::norm_inf(*xgpu);
929:         ViennaCLWaitForGPU();
930:       } catch(std::exception const & ex) {
931:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
932:       }
933:       PetscLogGpuTimeEnd();
934:       VecViennaCLRestoreArrayRead(xin,&xgpu);
935:     } else if (type == NORM_1) {
936:       PetscLogGpuTimeBegin();
937:       try {
938:         *z = viennacl::linalg::norm_1(*xgpu);
939:         ViennaCLWaitForGPU();
940:       } catch(std::exception const & ex) {
941:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
942:       }
943:       PetscLogGpuTimeEnd();
944:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
945:     } else if (type == NORM_1_AND_2) {
946:       PetscLogGpuTimeBegin();
947:       try {
948:         *z     = viennacl::linalg::norm_1(*xgpu);
949:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
950:         ViennaCLWaitForGPU();
951:       } catch(std::exception const & ex) {
952:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
953:       }
954:       PetscLogGpuTimeEnd();
955:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
956:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
957:     }
958:     VecViennaCLRestoreArrayRead(xin,&xgpu);
959:   } else if (type == NORM_1_AND_2) {
960:     *z      = 0.0;
961:     *(z+1)  = 0.0;
962:   } else *z = 0.0;
963:   return(0);
964: }
967: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
968: {
972:   VecSetRandom_SeqViennaCL_Private(xin,r);
973:   xin->offloadmask = PETSC_OFFLOAD_CPU;
974:   return(0);
975: }
977: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
978: {
983:   VecViennaCLCopyFromGPU(vin);
984:   VecResetArray_SeqViennaCL_Private(vin);
985:   vin->offloadmask = PETSC_OFFLOAD_CPU;
986:   return(0);
987: }
989: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
990: {
995:   VecViennaCLCopyFromGPU(vin);
996:   VecPlaceArray_Seq(vin,a);
997:   vin->offloadmask = PETSC_OFFLOAD_CPU;
998:   return(0);
999: }
1001: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
1002: {
1007:   VecViennaCLCopyFromGPU(vin);
1008:   VecReplaceArray_Seq(vin,a);
1009:   vin->offloadmask = PETSC_OFFLOAD_CPU;
1010:   return(0);
1011: }
1014: /*@
1015:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1017:    Collective
1019:    Input Parameter:
1020: +  comm - the communicator, should be PETSC_COMM_SELF
1021: -  n - the vector length
1023:    Output Parameter:
1024: .  V - the vector
1026:    Notes:
1027:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1028:    same type as an existing vector.
1030:    Level: intermediate
1032: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1033: @*/
1034: PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1035: {
1039:   VecCreate(comm,v);
1040:   VecSetSizes(*v,n,n);
1041:   VecSetType(*v,VECSEQVIENNACL);
1042:   return(0);
1043: }
1046: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1047:  *
1048:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1049:  */
1050: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1051: {
1052:   PetscErrorCode                         ierr;
1055:   VecDot_SeqViennaCL(s,t,dp);
1056:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1057:   *nm *= *nm; //squared norm required
1058:   return(0);
1059: }
1061: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1062: {
1066:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1067:   PetscLayoutReference(win->map,&(*V)->map);
1068:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1069:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1070:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1071:   return(0);
1072: }
1074: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1075: {
1079:   try {
1080:     if (v->spptr) {
1081:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1082:       delete (Vec_ViennaCL*) v->spptr;
1083:     }
1084:   } catch(char *ex) {
1085:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1086:   }
1087:   VecDestroy_SeqViennaCL_Private(v);
1088:   return(0);
1089: }
1091: static PetscErrorCode VecPinToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1092: {
1096:   V->pinnedtocpu = flg;
1097:   if (flg) {
1098:     VecViennaCLCopyFromGPU(V);
1099:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1100:     V->ops->dot             = VecDot_Seq;
1101:     V->ops->norm            = VecNorm_Seq;
1102:     V->ops->tdot            = VecTDot_Seq;
1103:     V->ops->scale           = VecScale_Seq;
1104:     V->ops->copy            = VecCopy_Seq;
1105:     V->ops->set             = VecSet_Seq;
1106:     V->ops->swap            = VecSwap_Seq;
1107:     V->ops->axpy            = VecAXPY_Seq;
1108:     V->ops->axpby           = VecAXPBY_Seq;
1109:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1110:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1111:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1112:     V->ops->setrandom       = VecSetRandom_Seq;
1113:     V->ops->dot_local       = VecDot_Seq;
1114:     V->ops->tdot_local      = VecTDot_Seq;
1115:     V->ops->norm_local      = VecNorm_Seq;
1116:     V->ops->mdot_local      = VecMDot_Seq;
1117:     V->ops->mtdot_local     = VecMTDot_Seq;
1118:     V->ops->maxpy           = VecMAXPY_Seq;
1119:     V->ops->mdot            = VecMDot_Seq;
1120:     V->ops->mtdot           = VecMTDot_Seq;
1121:     V->ops->aypx            = VecAYPX_Seq;
1122:     V->ops->waxpy           = VecWAXPY_Seq;
1123:     V->ops->dotnorm2        = NULL;
1124:     V->ops->placearray      = VecPlaceArray_Seq;
1125:     V->ops->replacearray    = VecReplaceArray_Seq;
1126:     V->ops->resetarray      = VecResetArray_Seq;
1127:     V->ops->duplicate       = VecDuplicate_Seq;
1128:   } else {
1129:     V->ops->dot             = VecDot_SeqViennaCL;
1130:     V->ops->norm            = VecNorm_SeqViennaCL;
1131:     V->ops->tdot            = VecTDot_SeqViennaCL;
1132:     V->ops->scale           = VecScale_SeqViennaCL;
1133:     V->ops->copy            = VecCopy_SeqViennaCL;
1134:     V->ops->set             = VecSet_SeqViennaCL;
1135:     V->ops->swap            = VecSwap_SeqViennaCL;
1136:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1137:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1138:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1139:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1140:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1141:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1142:     V->ops->dot_local       = VecDot_SeqViennaCL;
1143:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1144:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1145:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1146:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1147:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1148:     V->ops->mdot            = VecMDot_SeqViennaCL;
1149:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1150:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1151:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1152:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1153:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1154:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1155:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1156:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1157:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1158:   }
1159:   return(0);
1160: }
1162: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1163: {
1165:   PetscMPIInt    size;
1168:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1169:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1170:   VecCreate_Seq_Private(V,0);
1171:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1173:   VecPinToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1174:   V->ops->pintocpu = VecPinToCPU_SeqAIJViennaCL;
1176:   VecViennaCLAllocateCheck(V);
1177:   VecViennaCLAllocateCheckHost(V);
1178:   VecSet(V,0.0);
1179:   VecSet_Seq(V,0.0);
1180:   V->offloadmask = PETSC_OFFLOAD_BOTH;
1181:   return(0);
1182: }