Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Sacado_DynamicArrayTraits.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30#ifndef SACADO_DYNAMICARRAYTRAITS_HPP
31#define SACADO_DYNAMICARRAYTRAITS_HPP
32
33#include <new>
34#include <cstring>
35#include <stdint.h>
36
37#include "Sacado_Traits.hpp"
38#if defined(HAVE_SACADO_KOKKOSCORE)
39#include "Kokkos_Core.hpp"
40#endif
41
42namespace Sacado {
43
44 template <typename ExecSpace>
45 void createGlobalMemoryPool(const ExecSpace& space
46 , const size_t min_total_alloc_size
47 , const uint32_t min_block_alloc_size
48 , const uint32_t max_block_alloc_size
49 , const uint32_t min_superblock_size
50 ) {}
51
52 template <typename ExecSpace>
53 void destroyGlobalMemoryPool(const ExecSpace& space) {}
54
55#if 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)
56 namespace Impl {
57 extern const Kokkos::MemoryPool<Kokkos::OpenMP>* global_sacado_openmp_memory_pool;
58 }
59
60 inline void
61 createGlobalMemoryPool(const ExecSpace& space
62 , const size_t min_total_alloc_size
63 , const uint32_t min_block_alloc_size
64 , const uint32_t max_block_alloc_size
65 , const uint32_t min_superblock_size
66 )
67 {
68 typedef Kokkos::MemoryPool<Kokkos::OpenMP> pool_t;
69 Impl::global_sacado_openmp_memory_pool =
70 new pool_t(typename Kokkos::OpenMP::memory_space(),
71 min_total_alloc_size,
72 min_block_alloc_size,
73 max_block_alloc_size,
74 min_superblock_size);
75 }
76
77 inline void destroyGlobalMemoryPool(const Kokkos::OpenMP& space)
78 {
79 delete Impl::global_sacado_openmp_memory_pool;
80 }
81#endif
82
83#if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
84
85 namespace Impl {
86
87 extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_host;
88 extern const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_device;
89#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
90 extern __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device;
91#else
92 __device__ const Kokkos::MemoryPool<Kokkos::Cuda>* global_sacado_cuda_memory_pool_on_device = 0;
93#endif
94
95 struct SetMemoryPoolPtr {
96 Kokkos::MemoryPool<Kokkos::Cuda>* pool_device;
97 __device__ inline void operator()(int) const {
98 global_sacado_cuda_memory_pool_on_device = pool_device;
99 };
100 };
101
102 }
103
104 // For some reason we get memory errors if these functions are defined in
105 // Sacado_DynamicArrayTraits.cpp
106 inline void
107 createGlobalMemoryPool(const Kokkos::Cuda& space
108 , const size_t min_total_alloc_size
109 , const uint32_t min_block_alloc_size
110 , const uint32_t max_block_alloc_size
111 , const uint32_t min_superblock_size
112 )
113 {
114 typedef Kokkos::MemoryPool<Kokkos::Cuda> pool_t;
115 pool_t* pool =
116 new pool_t(typename Kokkos::Cuda::memory_space(),
117 min_total_alloc_size,
118 min_block_alloc_size,
119 max_block_alloc_size,
120 min_superblock_size);
121 Impl::SetMemoryPoolPtr f;
122 KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc( &f.pool_device, sizeof(pool_t) ) );
123 KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMemcpy( f.pool_device, pool,
124 sizeof(pool_t),
125 cudaMemcpyHostToDevice ) );
126 Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,1),f);
127 Impl::global_sacado_cuda_memory_pool_host = pool;
128 Impl::global_sacado_cuda_memory_pool_device = f.pool_device;
129 }
130
131 inline void destroyGlobalMemoryPool(const Kokkos::Cuda& space)
132 {
133 KOKKOS_IMPL_CUDA_SAFE_CALL( cudaFree( (void*) Impl::global_sacado_cuda_memory_pool_device ) );
134 delete Impl::global_sacado_cuda_memory_pool_host;
135 }
136
137#endif
138
139#if !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
140
141 namespace Impl {
142
143 // Compute warp lane/thread index
144 __device__ inline int warpLane(const int warp_size = 32) {
145 return ( threadIdx.x + (threadIdx.y + threadIdx.z*blockDim.y)*blockDim.x ) % warp_size;
146 }
147
148 // Reduce y across the warp and broadcast to all lanes
149 template <typename T>
150 __device__ inline T warpReduce(T y, const int warp_size = 32) {
151 for (int i=1; i<warp_size; i*=2) {
152 y += Kokkos::shfl_down(y, i, warp_size);
153 }
154 y = Kokkos::shfl(y, 0, warp_size);
155 return y;
156 }
157
158 // Non-inclusive plus-scan up the warp, replacing the first entry with 0
159 template <typename T>
160 __device__ inline int warpScan(T y, const int warp_size = 32) {
161 const int lane = warpLane();
162 y = Kokkos::shfl_up(y, 1, warp_size);
163 if (lane == 0)
164 y = T(0);
165 for (int i=1; i<warp_size; i*=2) {
166 T t = Kokkos::shfl_up(y, i, warp_size);
167 if (lane > i)
168 y += t;
169 }
170 return y;
171 }
172
173 template <typename T>
174 __device__ inline T warpBcast(T y, int id, const int warp_size = 32) {
175 return Kokkos::shfl(y, id, warp_size);
176 }
177
178 }
179
180#endif
181
182 namespace Impl {
183
184 template <typename T>
186 static T* ds_alloc(const int sz) {
187#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
188 T* m = 0;
189 if (sz > 0)
190 KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMallocManaged( (void**) &m, sz*sizeof(T), cudaMemAttachGlobal ) );
191#elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
192 // This code assumes all threads enter ds_alloc, even those with sz == 0
193 T* m = 0;
194 const int total_sz = warpReduce(sz);
195 const int lane = warpLane();
196 if (total_sz > 0 && lane == 0) {
197 m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
198 if (m == 0)
199 Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
200 }
201 m = warpBcast(m,0);
202 m += warpScan(sz);
203#elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
204 T* m = 0;
205 if (sz > 0) {
206 if (global_sacado_openmp_memory_pool != 0) {
207 m = static_cast<T*>(global_sacado_openmp_memory_pool->allocate(sz*sizeof(T)));
208 if (m == 0)
209 Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
210 }
211 else
212 m = static_cast<T* >(operator new(sz*sizeof(T)));
213 }
214#else
215 T* m = 0;
216 if (sz > 0) {
217 m = static_cast<T* >(operator new(sz*sizeof(T)));
218#if defined(HAVE_SACADO_KOKKOSCORE)
219 if (m == 0)
220 Kokkos::abort("Allocation failed.");
221#endif
222 }
223#endif
224 return m;
225 }
226
227 template <typename T>
229 static void ds_free(T* m, int sz) {
230#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && defined(KOKKOS_ENABLE_CUDA_UVM) && !defined( __CUDA_ARCH__ )
231 if (sz > 0)
232 KOKKOS_IMPL_CUDA_SAFE_CALL( cudaFree(m) );
233#elif defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
234 const int total_sz = warpReduce(sz);
235 const int lane = warpLane();
236 if (total_sz > 0 && lane == 0) {
237 global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
238 }
239#elif 0 && defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL) && defined(KOKKOS_ENABLE_OPENMP)
240 if (sz > 0) {
241 if (global_sacado_openmp_memory_pool != 0)
242 global_sacado_openmp_memory_pool->deallocate((void*) m, sz*sizeof(T));
243 else
244 operator delete((void*) m);
245 }
246#else
247 if (sz > 0)
248 operator delete((void*) m);
249#endif
250 }
251
252 }
253
257 template <typename T, bool isScalar = IsScalarType<T>::value>
258 struct ds_array {
259
262 static T* get(int sz) {
263 T* m = Impl::ds_alloc<T>(sz);
264 T* p = m;
265 for (int i=0; i<sz; ++i)
266 new (p++) T();
267 return m;
268 }
269
272 static T* get_and_fill(int sz) {
273 T* m = Impl::ds_alloc<T>(sz);
274 T* p = m;
275 for (int i=0; i<sz; ++i)
276 new (p++) T(0.0);
277 return m;
278 }
279
285 static T* get_and_fill(const T* src, int sz) {
286 T* m = Impl::ds_alloc<T>(sz);
287 T* p = m;
288 for (int i=0; i<sz; ++i)
289 new (p++) T(*(src++));
290 return m;
291 }
292
298 static T* strided_get_and_fill(const T* src, int stride, int sz) {
299 T* m = Impl::ds_alloc<T>(sz);
300 T* p = m;
301 for (int i=0; i<sz; ++i) {
302 new (p++) T(*(src));
303 src += stride;
304 }
305 return m;
306 }
307
310 static void copy(const T* src, T* dest, int sz) {
311 for (int i=0; i<sz; ++i)
312 *(dest++) = *(src++);
313 }
314
317 static void strided_copy(const T* src, int src_stride,
318 T* dest, int dest_stride, int sz) {
319 for (int i=0; i<sz; ++i) {
320 *(dest) = *(src);
321 dest += dest_stride;
322 src += src_stride;
323 }
324 }
325
328 static void zero(T* dest, int sz) {
329 for (int i=0; i<sz; ++i)
330 *(dest++) = T(0.);
331 }
332
335 static void strided_zero(T* dest, int stride, int sz) {
336 for (int i=0; i<sz; ++i) {
337 *(dest) = T(0.);
338 dest += stride;
339 }
340 }
341
344 static void destroy_and_release(T* m, int sz) {
345 T* e = m+sz;
346 for (T* b = m; b!=e; b++)
347 b->~T();
348 Impl::ds_free(m, sz);
349 }
350 };
351
352#if defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
353
354 namespace Impl {
355
356 template <typename T>
358 static T* ds_strided_alloc(const int sz) {
359 T* m = 0;
360 // Only do strided memory allocations when we are doing hierarchical
361 // parallelism with a vector dimension of 32. The limitation on the
362 // memory pool allowing only a single thread in a warp to allocate
363 // makes it too difficult to do otherwise.
364 if (blockDim.x == 32) {
365 //const int lane = warpLane();
366 const int lane = threadIdx.x;
367 if (sz > 0 && lane == 0) {
368#if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
369 m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(sz*sizeof(T)));
370 if (m == 0)
371 Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
372#else
373 m = static_cast<T* >(operator new(sz*sizeof(T)));
374#if defined(HAVE_SACADO_KOKKOSCORE)
375 if (m == 0)
376 Kokkos::abort("Allocation failed.");
377#endif
378#endif
379 }
380 m = warpBcast(m,0,blockDim.x);
381 }
382 else {
383 if (sz > 0) {
384 m = static_cast<T* >(operator new(sz*sizeof(T)));
385#if defined(HAVE_SACADO_KOKKOSCORE)
386 if (m == 0)
387 Kokkos::abort("Allocation failed.");
388#endif
389 }
390 }
391
392 return m;
393 }
394
395 template <typename T>
397 static void ds_strided_free(T* m, int sz) {
398 if (blockDim.x == 32) {
399 // const int lane = warpLane();
400 const int lane = threadIdx.x;
401 if (sz > 0 && lane == 0) {
402#if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
403 global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, sz*sizeof(T));
404#else
405 operator delete((void*) m);
406#endif
407 }
408 }
409 else {
410 if (sz > 0)
411 operator delete((void*) m);
412 }
413
414 }
415
416 }
417
422 template <typename T>
423 struct ds_array<T,true> {
424
427 static T* get(int sz) {
428 T* m = Impl::ds_strided_alloc<T>(sz);
429 return m;
430 }
431
434 static T* get_and_fill(int sz) {
435 T* m = Impl::ds_strided_alloc<T>(sz);
436 for (int i=threadIdx.x; i<sz; i+=blockDim.x)
437 m[i] = 0.0;
438 return m;
439 }
440
446 static T* get_and_fill(const T* src, int sz) {
447 T* m = Impl::ds_strided_alloc<T>(sz);
448 for (int i=threadIdx.x; i<sz; i+=blockDim.x)
449 m[i] = src[i];
450 return m;
451 }
452
458 static T* strided_get_and_fill(const T* src, int stride, int sz) {
459 T* m = Impl::ds_strided_alloc<T>(sz);
460 for (int i=threadIdx.x; i<sz; i+=blockDim.x)
461 m[i] = src[i*stride];
462 return m;
463 }
464
467 static void copy(const T* src, T* dest, int sz) {
468 for (int i=threadIdx.x; i<sz; i+=blockDim.x)
469 dest[i] = src[i];
470 }
471
474 static void strided_copy(const T* src, int src_stride,
475 T* dest, int dest_stride, int sz) {
476 for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
477 dest[i*dest_stride] = src[i*src_stride];
478 }
479 }
480
483 static void zero(T* dest, int sz) {
484 for (int i=threadIdx.x; i<sz; i+=blockDim.x)
485 dest[i] = T(0.);
486 }
487
490 static void strided_zero(T* dest, int stride, int sz) {
491 for (int i=threadIdx.x; i<sz; i+=blockDim.x) {
492 dest[i*stride] = T(0.);
493 }
494 }
495
498 static void destroy_and_release(T* m, int sz) {
499 Impl::ds_strided_free(m, sz);
500 }
501 };
502
503#elif defined(SACADO_VIEW_CUDA_HIERARCHICAL_DFAD_STRIDED) && !defined(SACADO_DISABLE_CUDA_IN_KOKKOS) && defined(__CUDA_ARCH__)
504
505 namespace Impl {
506
507 template <typename T>
509 static T* ds_strided_alloc(const int sz) {
510 T* m = 0;
511 // Only do strided memory allocations when we are doing hierarchical
512 // parallelism with a vector dimension of 32. The limitation on the
513 // memory pool allowing only a single thread in a warp to allocate
514 // makes it too difficult to do otherwise.
515 if (blockDim.x == 32) {
516 // const int total_sz = warpReduce(sz);
517 // const int lane = warpLane();
518 const int total_sz = warpReduce(sz, blockDim.x);
519 const int lane = threadIdx.x;
520 if (total_sz > 0 && lane == 0) {
521#if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
522 m = static_cast<T*>(global_sacado_cuda_memory_pool_on_device->allocate(total_sz*sizeof(T)));
523 if (m == 0)
524 Kokkos::abort("Allocation failed. Kokkos memory pool is out of memory");
525#else
526 m = static_cast<T* >(operator new(total_sz*sizeof(T)));
527#if defined(HAVE_SACADO_KOKKOSCORE)
528 if (m == 0)
529 Kokkos::abort("Allocation failed.");
530#endif
531#endif
532 }
533 m = warpBcast(m,0,blockDim.x);
534 m += lane;
535 }
536 else {
537 if (sz > 0) {
538 m = static_cast<T* >(operator new(sz*sizeof(T)));
539#if defined(HAVE_SACADO_KOKKOSCORE)
540 if (m == 0)
541 Kokkos::abort("Allocation failed.");
542#endif
543 }
544 }
545
546 return m;
547 }
548
549 template <typename T>
551 static void ds_strided_free(T* m, int sz) {
552 if (blockDim.x == 32) {
553 // const int total_sz = warpReduce(sz);
554 // const int lane = warpLane();
555 const int total_sz = warpReduce(sz, blockDim.x);
556 const int lane = threadIdx.x;
557 if (total_sz > 0 && lane == 0) {
558#if defined(HAVE_SACADO_KOKKOSCORE) && defined(SACADO_KOKKOS_USE_MEMORY_POOL)
559 global_sacado_cuda_memory_pool_on_device->deallocate((void*) m, total_sz*sizeof(T));
560#else
561 operator delete((void*) m);
562#endif
563 }
564 }
565 else {
566 if (sz > 0)
567 operator delete((void*) m);
568 }
569 }
570 }
571
576 template <typename T>
577 struct ds_array<T,true> {
578
581 static T* get(int sz) {
582 T* m = Impl::ds_strided_alloc<T>(sz);
583 return m;
584 }
585
588 static T* get_and_fill(int sz) {
589 T* m = Impl::ds_strided_alloc<T>(sz);
590 for (int i=0; i<sz; ++i)
591 m[i*blockDim.x] = 0.0;
592 return m;
593 }
594
600 static T* get_and_fill(const T* src, int sz) {
601 T* m = Impl::ds_strided_alloc<T>(sz);
602 for (int i=0; i<sz; ++i)
603 m[i*blockDim.x] = src[i*blockDim.x];
604 return m;
605 }
606
612 static T* strided_get_and_fill(const T* src, int stride, int sz) {
613 T* m = Impl::ds_strided_alloc<T>(sz);
614 for (int i=0; i<sz; ++i)
615 m[i*blockDim.x] = src[i*stride];
616 return m;
617 }
618
621 static void copy(const T* src, T* dest, int sz) {
622 for (int i=0; i<sz; ++i)
623 dest[i*blockDim.x] = src[i*blockDim.x];
624 }
625
628 static void strided_copy(const T* src, int src_stride,
629 T* dest, int dest_stride, int sz) {
630 for (int i=0; i<sz; ++i) {
631 *(dest) = *(src);
632 dest += dest_stride;
633 src += src_stride;
634 }
635 }
636
639 static void zero(T* dest, int sz) {
640 for (int i=0; i<sz; ++i)
641 dest[i*blockDim.x] = T(0.);
642 }
643
646 static void strided_zero(T* dest, int stride, int sz) {
647 for (int i=0; i<sz; ++i) {
648 *(dest) = T(0.);
649 dest += stride;
650 }
651 }
652
655 static void destroy_and_release(T* m, int sz) {
656 Impl::ds_strided_free(m, sz);
657 }
658 };
659
660#else
661
666 template <typename T>
667 struct ds_array<T,true> {
668
671 static T* get(int sz) {
672 T* m = Impl::ds_alloc<T>(sz);
673 return m;
674 }
675
678 static T* get_and_fill(int sz) {
679 T* m = Impl::ds_alloc<T>(sz);
680#if defined(__CUDACC__ ) || defined(__HIPCC__ )
681 for (int i=0; i<sz; ++i)
682 m[i] = 0.0;
683#else
684 if (sz > 0)
685 std::memset(m,0,sz*sizeof(T));
686#endif
687 return m;
688 }
689
695 static T* get_and_fill(const T* src, int sz) {
696 T* m = Impl::ds_alloc<T>(sz);
697 for (int i=0; i<sz; ++i)
698 m[i] = src[i];
699 return m;
700 }
701
707 static T* strided_get_and_fill(const T* src, int stride, int sz) {
708 T* m = Impl::ds_alloc<T>(sz);
709 for (int i=0; i<sz; ++i)
710 m[i] = src[i*stride];
711 return m;
712 }
713
716 static void copy(const T* src, T* dest, int sz) {
717 if (sz > 0 && dest != NULL && src != NULL)
718#if defined( __CUDACC__) || defined(__HIPCC__ )
719 for (int i=0; i<sz; ++i)
720 dest[i] = src[i];
721#else
722 std::memcpy(dest,src,sz*sizeof(T));
723#endif
724 }
725
728 static void strided_copy(const T* src, int src_stride,
729 T* dest, int dest_stride, int sz) {
730 for (int i=0; i<sz; ++i) {
731 *(dest) = *(src);
732 dest += dest_stride;
733 src += src_stride;
734 }
735 }
736
739 static void zero(T* dest, int sz) {
740 if (sz > 0 && dest != NULL)
741#if defined(__CUDACC__ ) || defined(__HIPCC__ )
742 for (int i=0; i<sz; ++i)
743 dest[i] = T(0.);
744#else
745 std::memset(dest,0,sz*sizeof(T));
746#endif
747 }
748
751 static void strided_zero(T* dest, int stride, int sz) {
752 for (int i=0; i<sz; ++i) {
753 *(dest) = T(0.);
754 dest += stride;
755 }
756 }
757
760 static void destroy_and_release(T* m, int sz) {
761 Impl::ds_free(m, sz);
762 }
763 };
764
765#endif
766
767} // namespace Sacado
768
769#endif // SACADO_DYNAMICARRAY_HPP
#define SACADO_INLINE_FUNCTION
expr true
#define T
const double y
const char * p
static SACADO_INLINE_FUNCTION T * ds_alloc(const int sz)
static SACADO_INLINE_FUNCTION void ds_free(T *m, int sz)
void createGlobalMemoryPool(const ExecSpace &space, const size_t min_total_alloc_size, const uint32_t min_block_alloc_size, const uint32_t max_block_alloc_size, const uint32_t min_superblock_size)
void destroyGlobalMemoryPool(const ExecSpace &space)
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
static SACADO_INLINE_FUNCTION T * get(int sz)
Get memory for new array of length sz.
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
Dynamic array allocation class that works for any type.
static SACADO_INLINE_FUNCTION T * get(int sz)
Get memory for new array of length sz.
static SACADO_INLINE_FUNCTION T * strided_get_and_fill(const T *src, int stride, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION void strided_copy(const T *src, int src_stride, T *dest, int dest_stride, int sz)
Copy array from src to dest of length sz.
static SACADO_INLINE_FUNCTION T * get_and_fill(int sz)
Get memory for new array of length sz and fill with zeros.
static SACADO_INLINE_FUNCTION void destroy_and_release(T *m, int sz)
Destroy array elements and release memory.
static SACADO_INLINE_FUNCTION void zero(T *dest, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION void strided_zero(T *dest, int stride, int sz)
Zero out array dest of length sz.
static SACADO_INLINE_FUNCTION T * get_and_fill(const T *src, int sz)
Get memory for new array of length sz and fill with entries from src.
static SACADO_INLINE_FUNCTION void copy(const T *src, T *dest, int sz)
Copy array from src to dest of length sz.