Actual source code: vecimpl.h
2: /*
3: This private file should not be included in users' code.
4: Defines the fields shared by all vector implementations.
6: */
8: #ifndef __VECIMPL_H
11: #include <petscvec.h>
14: /*S
15: PetscLayout - defines layout of vectors and matrices across processes (which rows are owned by which processes)
17: Level: developer
20: .seealso: PetscLayoutCreate(), PetscLayoutDestroy()
21: S*/
22: typedef struct _n_PetscLayout* PetscLayout;
23: struct _n_PetscLayout{
24: MPI_Comm comm;
25: PetscInt n,N; /* local, global vector size */
26: PetscInt rstart,rend; /* local start, local end + 1 */
27: PetscInt *range; /* the offset of each processor */
28: PetscInt bs; /* number of elements in each block (generally for multi-component problems) Do NOT multiply above numbers by bs */
29: PetscInt refcnt; /* MPI Vecs obtained with VecDuplicate() and from MatGetVecs() reuse map of input object */
30: ISLocalToGlobalMapping mapping; /* mapping used in Vec/MatSetValuesLocal() */
31: ISLocalToGlobalMapping bmapping; /* mapping used in Vec/MatSetValuesBlockedLocal() */
32: };
41: PetscPolymorphicFunction(PetscLayoutGetLocalSize,(PetscLayout m),(m,&s),PetscInt,s)
44: PetscPolymorphicFunction(PetscLayoutGetSize,(PetscLayout m),(m,&s),PetscInt,s)
54: /*@C
55: PetscLayoutFindOwner - Find the owning rank for a global index
57: Not Collective
59: Input Parameters:
60: + map - the layout
61: - idx - global index to find the owner of
63: Output Parameter:
64: . owner - the owning rank
66: Level: developer
68: Fortran Notes:
69: Not available from Fortran
71: @*/
72: PETSC_STATIC_INLINE PetscErrorCode PetscLayoutFindOwner(PetscLayout map,PetscInt idx,PetscInt *owner)
73: {
75: PetscMPIInt lo = 0,hi,t;
78: if (!((map->n >= 0) && (map->N >= 0) && (map->range))) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscLayoutSetUp() must be called first");
79: if (idx < 0 || idx > map->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D is out of range",idx);
80: MPI_Comm_size(map->comm,&hi);
81: while (hi - lo > 1) {
82: t = lo + (hi - lo) / 2;
83: if (idx < map->range[t]) hi = t;
84: else lo = t;
85: }
86: *owner = lo;
87: return(0);
88: }
90: /* ----------------------------------------------------------------------------*/
91: typedef struct _n_PetscUniformSection *PetscUniformSection;
92: struct _n_PetscUniformSection {
93: MPI_Comm comm;
94: PetscInt pStart, pEnd; /* The chart: all points are contained in [pStart, pEnd) */
95: PetscInt numDof; /* Describes layout of storage, point --> (constant # of values, (p - pStart)*constant # of values) */
96: };
98: #if 0
99: // Should I protect these for C++?
102: #endif
104: /*S
105: PetscSection - This is a mapping from DMMESH points to sets of values, which is
106: our presentation of a fibre bundle.
108: Level: developer
110: .seealso: PetscSectionCreate(), PetscSectionDestroy()
111: S*/
112: typedef struct _n_PetscSection *PetscSection;
113: struct _n_PetscSection {
114: struct _n_PetscUniformSection atlasLayout; /* Layout for the atlas */
115: PetscInt *atlasDof; /* Describes layout of storage, point --> # of values */
116: PetscInt *atlasOff; /* Describes layout of storage, point --> offset into storage */
117: PetscSection bc; /* Describes constraints, point --> # local dofs which are constrained */
118: PetscInt *bcIndices; /* Local indices for constrained dofs */
119: PetscInt refcnt; /* Vecs obtained with VecDuplicate() and from MatGetVecs() reuse map of input object */
120: };
139: /* ----------------------------------------------------------------------------*/
141: typedef struct _VecOps *VecOps;
142: struct _VecOps {
143: PetscErrorCode (*duplicate)(Vec,Vec*); /* get single vector */
144: PetscErrorCode (*duplicatevecs)(Vec,PetscInt,Vec**); /* get array of vectors */
145: PetscErrorCode (*destroyvecs)(PetscInt,Vec[]); /* free array of vectors */
146: PetscErrorCode (*dot)(Vec,Vec,PetscScalar*); /* z = x^H * y */
147: PetscErrorCode (*mdot)(Vec,PetscInt,const Vec[],PetscScalar*); /* z[j] = x dot y[j] */
148: PetscErrorCode (*norm)(Vec,NormType,PetscReal*); /* z = sqrt(x^H * x) */
149: PetscErrorCode (*tdot)(Vec,Vec,PetscScalar*); /* x'*y */
150: PetscErrorCode (*mtdot)(Vec,PetscInt,const Vec[],PetscScalar*);/* z[j] = x dot y[j] */
151: PetscErrorCode (*scale)(Vec,PetscScalar); /* x = alpha * x */
152: PetscErrorCode (*copy)(Vec,Vec); /* y = x */
153: PetscErrorCode (*set)(Vec,PetscScalar); /* y = alpha */
154: PetscErrorCode (*swap)(Vec,Vec); /* exchange x and y */
155: PetscErrorCode (*axpy)(Vec,PetscScalar,Vec); /* y = y + alpha * x */
156: PetscErrorCode (*axpby)(Vec,PetscScalar,PetscScalar,Vec); /* y = alpha * x + beta * y*/
157: PetscErrorCode (*maxpy)(Vec,PetscInt,const PetscScalar*,Vec*); /* y = y + alpha[j] x[j] */
158: PetscErrorCode (*aypx)(Vec,PetscScalar,Vec); /* y = x + alpha * y */
159: PetscErrorCode (*waxpy)(Vec,PetscScalar,Vec,Vec); /* w = y + alpha * x */
160: PetscErrorCode (*axpbypcz)(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec); /* z = alpha * x + beta *y + gamma *z*/
161: PetscErrorCode (*pointwisemult)(Vec,Vec,Vec); /* w = x .* y */
162: PetscErrorCode (*pointwisedivide)(Vec,Vec,Vec); /* w = x ./ y */
163: PetscErrorCode (*setvalues)(Vec,PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
164: PetscErrorCode (*assemblybegin)(Vec); /* start global assembly */
165: PetscErrorCode (*assemblyend)(Vec); /* end global assembly */
166: PetscErrorCode (*getarray)(Vec,PetscScalar**); /* get data array */
167: PetscErrorCode (*getsize)(Vec,PetscInt*);
168: PetscErrorCode (*getlocalsize)(Vec,PetscInt*);
169: PetscErrorCode (*restorearray)(Vec,PetscScalar**); /* restore data array */
170: PetscErrorCode (*max)(Vec,PetscInt*,PetscReal*); /* z = max(x); idx=index of max(x) */
171: PetscErrorCode (*min)(Vec,PetscInt*,PetscReal*); /* z = min(x); idx=index of min(x) */
172: PetscErrorCode (*setrandom)(Vec,PetscRandom); /* set y[j] = random numbers */
173: PetscErrorCode (*setoption)(Vec,VecOption,PetscBool );
174: PetscErrorCode (*setvaluesblocked)(Vec,PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
175: PetscErrorCode (*destroy)(Vec);
176: PetscErrorCode (*view)(Vec,PetscViewer);
177: PetscErrorCode (*placearray)(Vec,const PetscScalar*); /* place data array */
178: PetscErrorCode (*replacearray)(Vec,const PetscScalar*); /* replace data array */
179: PetscErrorCode (*dot_local)(Vec,Vec,PetscScalar*);
180: PetscErrorCode (*tdot_local)(Vec,Vec,PetscScalar*);
181: PetscErrorCode (*norm_local)(Vec,NormType,PetscReal*);
182: PetscErrorCode (*mdot_local)(Vec,PetscInt,const Vec[],PetscScalar*);
183: PetscErrorCode (*mtdot_local)(Vec,PetscInt,const Vec[],PetscScalar*);
184: PetscErrorCode (*load)(Vec,PetscViewer);
185: PetscErrorCode (*reciprocal)(Vec);
186: PetscErrorCode (*conjugate)(Vec);
187: PetscErrorCode (*setlocaltoglobalmapping)(Vec,ISLocalToGlobalMapping);
188: PetscErrorCode (*setvalueslocal)(Vec,PetscInt,const PetscInt *,const PetscScalar *,InsertMode);
189: PetscErrorCode (*resetarray)(Vec); /* vector points to its original array, i.e. undoes any VecPlaceArray() */
190: PetscErrorCode (*setfromoptions)(Vec);
191: PetscErrorCode (*maxpointwisedivide)(Vec,Vec,PetscReal*); /* m = max abs(x ./ y) */
192: PetscErrorCode (*pointwisemax)(Vec,Vec,Vec);
193: PetscErrorCode (*pointwisemaxabs)(Vec,Vec,Vec);
194: PetscErrorCode (*pointwisemin)(Vec,Vec,Vec);
195: PetscErrorCode (*getvalues)(Vec,PetscInt,const PetscInt[],PetscScalar[]);
196: PetscErrorCode (*sqrt)(Vec);
197: PetscErrorCode (*abs)(Vec);
198: PetscErrorCode (*exp)(Vec);
199: PetscErrorCode (*log)(Vec);
200: PetscErrorCode (*shift)(Vec);
201: PetscErrorCode (*create)(Vec);
202: PetscErrorCode (*stridegather)(Vec,PetscInt,Vec,InsertMode);
203: PetscErrorCode (*stridescatter)(Vec,PetscInt,Vec,InsertMode);
204: PetscErrorCode (*dotnorm2)(Vec,Vec,PetscScalar*,PetscScalar*);
205: PetscErrorCode (*getsubvector)(Vec,IS,Vec*);
206: PetscErrorCode (*restoresubvector)(Vec,IS,Vec*);
207: };
209: /*
210: The stash is used to temporarily store inserted vec values that
211: belong to another processor. During the assembly phase the stashed
212: values are moved to the correct processor and
213: */
215: typedef struct {
216: PetscInt nmax; /* maximum stash size */
217: PetscInt umax; /* max stash size user wants */
218: PetscInt oldnmax; /* the nmax value used previously */
219: PetscInt n; /* stash size */
220: PetscInt bs; /* block size of the stash */
221: PetscInt reallocs; /* preserve the no of mallocs invoked */
222: PetscInt *idx; /* global row numbers in stash */
223: PetscScalar *array; /* array to hold stashed values */
224: /* The following variables are used for communication */
225: MPI_Comm comm;
226: PetscMPIInt size,rank;
227: PetscMPIInt tag1,tag2;
228: MPI_Request *send_waits; /* array of send requests */
229: MPI_Request *recv_waits; /* array of receive requests */
230: MPI_Status *send_status; /* array of send status */
231: PetscInt nsends,nrecvs; /* numbers of sends and receives */
232: PetscScalar *svalues,*rvalues; /* sending and receiving data */
233: PetscInt *sindices,*rindices;
234: PetscInt rmax; /* maximum message length */
235: PetscInt *nprocs; /* tmp data used both during scatterbegin and end */
236: PetscInt nprocessed; /* number of messages already processed */
237: PetscBool donotstash;
238: PetscBool ignorenegidx; /* ignore negative indices passed into VecSetValues/VetGetValues */
239: InsertMode insertmode;
240: PetscInt *bowners;
241: } VecStash;
243: #if defined(PETSC_HAVE_CUSP)
244: /* Defines the flag structure that the CUSP arch uses. */
245: typedef enum {PETSC_CUSP_UNALLOCATED,PETSC_CUSP_GPU,PETSC_CUSP_CPU,PETSC_CUSP_BOTH} PetscCUSPFlag;
246: #endif
248: struct _p_Vec {
249: PETSCHEADER(struct _VecOps);
250: PetscLayout map;
251: void *data; /* implementation-specific data */
252: PetscBool array_gotten;
253: VecStash stash,bstash; /* used for storing off-proc values during assembly */
254: PetscBool petscnative; /* means the ->data starts with VECHEADER and can use VecGetArrayFast()*/
255: #if defined(PETSC_HAVE_CUSP)
256: PetscCUSPFlag valid_GPU_array; /* indicates where the most recently modified vector data is (GPU or CPU) */
257: void *spptr; /* if we're using CUSP, then this is the special pointer to the array on the GPU */
258: #endif
259: };
269: #if defined(PETSC_HAVE_CUSP)
271: #endif
275: PETSC_STATIC_INLINE PetscErrorCode VecGetArrayRead(Vec x,const PetscScalar *a[])
276: {
280: if (x->petscnative){
281: #if defined(PETSC_HAVE_CUSP)
282: if (x->valid_GPU_array == PETSC_CUSP_GPU || !*((PetscScalar**)x->data)){
283: VecCUSPCopyFromGPU(x);
284: }
285: #endif
286: *a = *((PetscScalar **)x->data);
287: } else {
288: (*x->ops->getarray)(x,(PetscScalar**)a);
289: }
290: return(0);
291: }
295: PETSC_STATIC_INLINE PetscErrorCode VecRestoreArrayRead(Vec x,const PetscScalar *a[])
296: {
300: if (x->petscnative){
301: #if defined(PETSC_HAVE_CUSP)
302: if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED) {
303: x->valid_GPU_array = PETSC_CUSP_BOTH;
304: }
305: #endif
306: } else {
307: (*x->ops->restorearray)(x,(PetscScalar**)a);
308: }
309: if (a) *a = PETSC_NULL;
310: return(0);
311: }
315: PETSC_STATIC_INLINE PetscErrorCode VecGetArray(Vec x,PetscScalar *a[])
316: {
320: if (x->petscnative){
321: #if defined(PETSC_HAVE_CUSP)
322: if (x->valid_GPU_array == PETSC_CUSP_GPU || !*((PetscScalar**)x->data)){
323: VecCUSPCopyFromGPU(x);
324: }
325: #endif
326: *a = *((PetscScalar **)x->data);
327: } else {
328: (*x->ops->getarray)(x,a);
329: }
330: return(0);
331: }
335: PETSC_STATIC_INLINE PetscErrorCode VecRestoreArray(Vec x,PetscScalar *a[])
336: {
340: if (x->petscnative){
341: #if defined(PETSC_HAVE_CUSP)
342: if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED) {
343: x->valid_GPU_array = PETSC_CUSP_CPU;
344: }
345: #endif
346: } else {
347: (*x->ops->restorearray)(x,a);
348: }
349: PetscObjectStateIncrease((PetscObject)x);
350: if (a) *a = PETSC_NULL;
351: return(0);
352: }
355: /*
356: Common header shared by array based vectors,
357: currently Vec_Seq and Vec_MPI
358: */
359: #define VECHEADER \
360: PetscScalar *array; \
361: PetscScalar *array_allocated; /* if the array was allocated by PETSc this is its pointer */ \
362: PetscScalar *unplacedarray; /* if one called VecPlaceArray(), this is where it stashed the original */
364: /* Default obtain and release vectors; can be used by any implementation */
372: /* --------------------------------------------------------------------*/
373: /* */
374: /* Defines the data structures used in the Vec Scatter operations */
376: typedef enum { VEC_SCATTER_SEQ_GENERAL,VEC_SCATTER_SEQ_STRIDE,
377: VEC_SCATTER_MPI_GENERAL,VEC_SCATTER_MPI_TOALL,
378: VEC_SCATTER_MPI_TOONE} VecScatterType;
380: /*
381: These scatters are for the purely local case.
382: */
383: typedef struct {
384: VecScatterType type;
385: PetscInt n; /* number of components to scatter */
386: PetscInt *vslots; /* locations of components */
387: /*
388: The next three fields are used in parallel scatters, they contain
389: optimization in the special case that the "to" vector and the "from"
390: vector are the same, so one only needs copy components that truly
391: copies instead of just y[idx[i]] = y[jdx[i]] where idx[i] == jdx[i].
392: */
393: PetscBool nonmatching_computed;
394: PetscInt n_nonmatching; /* number of "from"s != "to"s */
395: PetscInt *slots_nonmatching; /* locations of "from"s != "to"s */
396: PetscBool is_copy;
397: PetscInt copy_start; /* local scatter is a copy starting at copy_start */
398: PetscInt copy_length;
399: } VecScatter_Seq_General;
401: typedef struct {
402: VecScatterType type;
403: PetscInt n;
404: PetscInt first;
405: PetscInt step;
406: } VecScatter_Seq_Stride;
408: /*
409: This scatter is for a global vector copied (completely) to each processor (or all to one)
410: */
411: typedef struct {
412: VecScatterType type;
413: PetscMPIInt *count; /* elements of vector on each processor */
414: PetscMPIInt *displx;
415: PetscScalar *work1;
416: PetscScalar *work2;
417: } VecScatter_MPI_ToAll;
419: /*
420: This is the general parallel scatter
421: */
422: typedef struct {
423: VecScatterType type;
424: PetscInt n; /* number of processors to send/receive */
425: PetscInt *starts; /* starting point in indices and values for each proc*/
426: PetscInt *indices; /* list of all components sent or received */
427: PetscMPIInt *procs; /* processors we are communicating with in scatter */
428: MPI_Request *requests,*rev_requests;
429: PetscScalar *values; /* buffer for all sends or receives */
430: VecScatter_Seq_General local; /* any part that happens to be local */
431: MPI_Status *sstatus,*rstatus;
432: PetscBool use_readyreceiver;
433: PetscInt bs;
434: PetscBool sendfirst;
435: PetscBool contiq;
436: /* for MPI_Alltoallv() approach */
437: PetscBool use_alltoallv;
438: PetscMPIInt *counts,*displs;
439: /* for MPI_Alltoallw() approach */
440: PetscBool use_alltoallw;
441: #if defined(PETSC_HAVE_MPI_ALLTOALLW)
442: PetscMPIInt *wcounts,*wdispls;
443: MPI_Datatype *types;
444: #endif
445: PetscBool use_window;
446: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
447: MPI_Win window;
448: PetscInt *winstarts; /* displacements in the processes I am putting to */
449: #endif
450: } VecScatter_MPI_General;
452: struct _p_VecScatter {
453: PETSCHEADER(int);
454: PetscInt to_n,from_n;
455: PetscBool inuse; /* prevents corruption from mixing two scatters */
456: PetscBool beginandendtogether; /* indicates that the scatter begin and end function are called together, VecScatterEnd()
457: is then treated as a nop */
458: PetscBool packtogether; /* packs all the messages before sending, same with receive */
459: PetscBool reproduce; /* always receive the ghost points in the same order of processes */
460: PetscErrorCode (*begin)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
461: PetscErrorCode (*end)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
462: PetscErrorCode (*copy)(VecScatter,VecScatter);
463: PetscErrorCode (*destroy)(VecScatter);
464: PetscErrorCode (*view)(VecScatter,PetscViewer);
465: void *fromdata,*todata;
466: void *spptr;
467: };
478: /*
479: VecStashValue_Private - inserts a single value into the stash.
481: Input Parameters:
482: stash - the stash
483: idx - the global of the inserted value
484: values - the value inserted
485: */
486: PETSC_STATIC_INLINE PetscErrorCode VecStashValue_Private(VecStash *stash,PetscInt row,PetscScalar value)
487: {
489: /* Check and see if we have sufficient memory */
490: if (((stash)->n + 1) > (stash)->nmax) {
491: VecStashExpand_Private(stash,1);
492: }
493: (stash)->idx[(stash)->n] = row;
494: (stash)->array[(stash)->n] = value;
495: (stash)->n++;
496: return 0;
497: }
499: /*
500: VecStashValuesBlocked_Private - inserts 1 block of values into the stash.
502: Input Parameters:
503: stash - the stash
504: idx - the global block index
505: values - the values inserted
506: */
507: PETSC_STATIC_INLINE PetscErrorCode VecStashValuesBlocked_Private(VecStash *stash,PetscInt row,PetscScalar *values)
508: {
509: PetscInt jj,stash_bs=(stash)->bs;
510: PetscScalar *array;
512: if (((stash)->n+1) > (stash)->nmax) {
513: VecStashExpand_Private(stash,1);
514: }
515: array = (stash)->array + stash_bs*(stash)->n;
516: (stash)->idx[(stash)->n] = row;
517: for (jj=0; jj<stash_bs; jj++) { array[jj] = values[jj];}
518: (stash)->n++;
519: return 0;
520: }
526: #if defined(PETSC_HAVE_MATLAB_ENGINE)
531: #endif
535: /* Reset __FUNCT__ in case the user does not define it themselves */
539: #endif