Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_CudaSpace.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_CUDASPACE_HPP
23#define KOKKOS_CUDASPACE_HPP
24
25#include <Kokkos_Macros.hpp>
26#if defined(KOKKOS_ENABLE_CUDA)
27
28#include <Kokkos_Core_fwd.hpp>
29
30#include <iosfwd>
31#include <typeinfo>
32#include <string>
33#include <memory>
34
35#include <Kokkos_HostSpace.hpp>
36#include <impl/Kokkos_SharedAlloc.hpp>
37
38#include <impl/Kokkos_Profiling_Interface.hpp>
39
40#include <Cuda/Kokkos_Cuda_abort.hpp>
41
42#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
43extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
44extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
45#endif
46
47/*--------------------------------------------------------------------------*/
48
49namespace Kokkos {
50namespace Impl {
51
52template <typename T>
53struct is_cuda_type_space : public std::false_type {};
54
55} // namespace Impl
56
59class CudaSpace {
60 public:
62 using memory_space = CudaSpace;
63 using execution_space = Kokkos::Cuda;
65
66 using size_type = unsigned int;
67
68 /*--------------------------------*/
69
70 CudaSpace();
71 CudaSpace(CudaSpace&& rhs) = default;
72 CudaSpace(const CudaSpace& rhs) = default;
73 CudaSpace& operator=(CudaSpace&& rhs) = default;
74 CudaSpace& operator=(const CudaSpace& rhs) = default;
75 ~CudaSpace() = default;
76
78 void* allocate(const Cuda& exec_space, const size_t arg_alloc_size) const;
79 void* allocate(const Cuda& exec_space, const char* arg_label,
80 const size_t arg_alloc_size,
81 const size_t arg_logical_size = 0) const;
82 void* allocate(const size_t arg_alloc_size) const;
83 void* allocate(const char* arg_label, const size_t arg_alloc_size,
84 const size_t arg_logical_size = 0) const;
85
87 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
88 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
89 const size_t arg_alloc_size,
90 const size_t arg_logical_size = 0) const;
91
92 private:
93 template <class, class, class, class>
95 void* impl_allocate(const Cuda& exec_space, const char* arg_label,
96 const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0,
98 const Kokkos::Tools::SpaceHandle =
99 Kokkos::Tools::make_space_handle(name())) const;
100 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
101 const size_t arg_logical_size = 0,
102 const Kokkos::Tools::SpaceHandle =
103 Kokkos::Tools::make_space_handle(name())) const;
104 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
105 const size_t arg_alloc_size,
106 const size_t arg_logical_size = 0,
107 const Kokkos::Tools::SpaceHandle =
108 Kokkos::Tools::make_space_handle(name())) const;
109
110 public:
112 static constexpr const char* name() { return m_name; }
113
114 private:
115 int m_device;
116
117 static constexpr const char* m_name = "Cuda";
118 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
119};
120
121template <>
122struct Impl::is_cuda_type_space<CudaSpace> : public std::true_type {};
123
124} // namespace Kokkos
125
126/*--------------------------------------------------------------------------*/
127/*--------------------------------------------------------------------------*/
128
129namespace Kokkos {
130
134class CudaUVMSpace {
135 public:
137 using memory_space = CudaUVMSpace;
138 using execution_space = Cuda;
140 using size_type = unsigned int;
141
142#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
144 KOKKOS_DEPRECATED static bool available();
145#endif
146
147 /*--------------------------------*/
148
149 /*--------------------------------*/
150
151 CudaUVMSpace();
152 CudaUVMSpace(CudaUVMSpace&& rhs) = default;
153 CudaUVMSpace(const CudaUVMSpace& rhs) = default;
154 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
155 CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
156 ~CudaUVMSpace() = default;
157
159 void* allocate(const size_t arg_alloc_size) const;
160 void* allocate(const char* arg_label, const size_t arg_alloc_size,
161 const size_t arg_logical_size = 0) const;
162
164 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
165 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
166 const size_t arg_alloc_size,
167 const size_t arg_logical_size = 0) const;
168
169 private:
170 template <class, class, class, class>
172 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
173 const size_t arg_logical_size = 0,
174 const Kokkos::Tools::SpaceHandle =
175 Kokkos::Tools::make_space_handle(name())) const;
176 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0,
179 const Kokkos::Tools::SpaceHandle =
180 Kokkos::Tools::make_space_handle(name())) const;
181
182 public:
184 static constexpr const char* name() { return m_name; }
185
186#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
187 static bool cuda_pin_uvm_to_host();
188 static void cuda_set_pin_uvm_to_host(bool val);
189#endif
190 /*--------------------------------*/
191
192 private:
193 int m_device;
194
195#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
196 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
197#endif
198 static constexpr const char* m_name = "CudaUVM";
199};
200
201template <>
202struct Impl::is_cuda_type_space<CudaUVMSpace> : public std::true_type {};
203
204} // namespace Kokkos
205
206/*--------------------------------------------------------------------------*/
207/*--------------------------------------------------------------------------*/
208
209namespace Kokkos {
210
214class CudaHostPinnedSpace {
215 public:
217
218 using execution_space = HostSpace::execution_space;
219 using memory_space = CudaHostPinnedSpace;
221 using size_type = unsigned int;
222
223 /*--------------------------------*/
224
225 CudaHostPinnedSpace();
226 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
227 CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
228 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
229 CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
230 ~CudaHostPinnedSpace() = default;
231
233 void* allocate(const size_t arg_alloc_size) const;
234 void* allocate(const char* arg_label, const size_t arg_alloc_size,
235 const size_t arg_logical_size = 0) const;
236
238 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
239 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
240 const size_t arg_alloc_size,
241 const size_t arg_logical_size = 0) const;
242
243 private:
244 template <class, class, class, class>
246 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
247 const size_t arg_logical_size = 0,
248 const Kokkos::Tools::SpaceHandle =
249 Kokkos::Tools::make_space_handle(name())) const;
250 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
251 const size_t arg_alloc_size,
252 const size_t arg_logical_size = 0,
253 const Kokkos::Tools::SpaceHandle =
254 Kokkos::Tools::make_space_handle(name())) const;
255
256 public:
258 static constexpr const char* name() { return m_name; }
259
260 private:
261 static constexpr const char* m_name = "CudaHostPinned";
262
263 /*--------------------------------*/
264};
265
266template <>
267struct Impl::is_cuda_type_space<CudaHostPinnedSpace> : public std::true_type {};
268
269} // namespace Kokkos
270
271/*--------------------------------------------------------------------------*/
272/*--------------------------------------------------------------------------*/
273
274namespace Kokkos {
275namespace Impl {
276
277cudaStream_t cuda_get_deep_copy_stream();
278
279const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
280 bool initialize = true);
281
283 Kokkos::CudaSpace>::assignable,
284 "");
286 Kokkos::CudaUVMSpace>::assignable,
287 "");
288static_assert(
290 Kokkos::CudaHostPinnedSpace>::assignable,
291 "");
292
293//----------------------------------------
294
295template <>
296struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
297 enum : bool { assignable = false };
298 enum : bool { accessible = false };
299 enum : bool { deepcopy = true };
300};
301
302template <>
303struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
304 // HostSpace::execution_space != CudaUVMSpace::execution_space
305 enum : bool { assignable = false };
306 enum : bool { accessible = true };
307 enum : bool { deepcopy = true };
308};
309
310template <>
311struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
312 // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
313 enum : bool { assignable = true };
314 enum : bool { accessible = true };
315 enum : bool { deepcopy = true };
316};
317
318//----------------------------------------
319
320template <>
321struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
322 enum : bool { assignable = false };
323 enum : bool { accessible = false };
324 enum : bool { deepcopy = true };
325};
326
327template <>
328struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
329 // CudaSpace::execution_space == CudaUVMSpace::execution_space
330 enum : bool { assignable = true };
331 enum : bool { accessible = true };
332 enum : bool { deepcopy = true };
333};
334
335template <>
336struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
337 // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
338 enum : bool { assignable = false };
339 enum : bool { accessible = true }; // CudaSpace::execution_space
340 enum : bool { deepcopy = true };
341};
342
343//----------------------------------------
344// CudaUVMSpace::execution_space == Cuda
345// CudaUVMSpace accessible to both Cuda and Host
346
347template <>
348struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
349 enum : bool { assignable = false };
350 enum : bool { accessible = false }; // Cuda cannot access HostSpace
351 enum : bool { deepcopy = true };
352};
353
354template <>
355struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
356 // CudaUVMSpace::execution_space == CudaSpace::execution_space
357 // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
358 enum : bool { assignable = false };
359
360 // CudaUVMSpace::execution_space can access CudaSpace
361 enum : bool { accessible = true };
362 enum : bool { deepcopy = true };
363};
364
365template <>
366struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
367 // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
368 enum : bool { assignable = false };
369 enum : bool { accessible = true }; // CudaUVMSpace::execution_space
370 enum : bool { deepcopy = true };
371};
372
373//----------------------------------------
374// CudaHostPinnedSpace::execution_space == HostSpace::execution_space
375// CudaHostPinnedSpace accessible to both Cuda and Host
376
377template <>
378struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
379 enum : bool { assignable = false }; // Cannot access from Cuda
380 enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
381 enum : bool { deepcopy = true };
382};
383
384template <>
385struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
386 enum : bool { assignable = false }; // Cannot access from Host
387 enum : bool { accessible = false };
388 enum : bool { deepcopy = true };
389};
390
391template <>
392struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
393 enum : bool { assignable = false }; // different execution_space
394 enum : bool { accessible = true }; // same accessibility
395 enum : bool { deepcopy = true };
396};
397
398//----------------------------------------
399
400} // namespace Impl
401} // namespace Kokkos
402
403/*--------------------------------------------------------------------------*/
404/*--------------------------------------------------------------------------*/
405
406namespace Kokkos {
407namespace Impl {
408
409void DeepCopyCuda(void* dst, const void* src, size_t n);
410void DeepCopyAsyncCuda(const Cuda& instance, void* dst, const void* src,
411 size_t n);
412void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
413
414template <class MemSpace>
415struct DeepCopy<MemSpace, HostSpace, Cuda,
416 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
417 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
418 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
419 DeepCopyAsyncCuda(instance, dst, src, n);
420 }
421};
422
423template <class MemSpace>
424struct DeepCopy<HostSpace, MemSpace, Cuda,
425 std::enable_if_t<is_cuda_type_space<MemSpace>::value>> {
426 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
427 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
428 DeepCopyAsyncCuda(instance, dst, src, n);
429 }
430};
431
432template <class MemSpace1, class MemSpace2>
433struct DeepCopy<MemSpace1, MemSpace2, Cuda,
434 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
435 is_cuda_type_space<MemSpace2>::value>> {
436 DeepCopy(void* dst, const void* src, size_t n) { DeepCopyCuda(dst, src, n); }
437 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
438 DeepCopyAsyncCuda(instance, dst, src, n);
439 }
440};
441
442template <class MemSpace1, class MemSpace2, class ExecutionSpace>
443struct DeepCopy<MemSpace1, MemSpace2, ExecutionSpace,
444 std::enable_if_t<is_cuda_type_space<MemSpace1>::value &&
445 is_cuda_type_space<MemSpace2>::value &&
446 !std::is_same<ExecutionSpace, Cuda>::value>> {
447 inline DeepCopy(void* dst, const void* src, size_t n) {
448 DeepCopyCuda(dst, src, n);
449 }
450
451 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
452 size_t n) {
453 exec.fence(fence_string());
454 DeepCopyAsyncCuda(dst, src, n);
455 }
456
457 private:
458 static const std::string& fence_string() {
459 static const std::string string =
460 std::string("Kokkos::Impl::DeepCopy<") + MemSpace1::name() + "Space, " +
461 MemSpace2::name() +
462 "Space, ExecutionSpace>::DeepCopy: fence before copy";
463 return string;
464 }
465};
466
467template <class MemSpace, class ExecutionSpace>
468struct DeepCopy<MemSpace, HostSpace, ExecutionSpace,
469 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
470 !std::is_same<ExecutionSpace, Cuda>::value>> {
471 inline DeepCopy(void* dst, const void* src, size_t n) {
472 DeepCopyCuda(dst, src, n);
473 }
474
475 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
476 size_t n) {
477 exec.fence(fence_string());
478 DeepCopyAsyncCuda(dst, src, n);
479 }
480
481 private:
482 static const std::string& fence_string() {
483 static const std::string string =
484 std::string("Kokkos::Impl::DeepCopy<") + MemSpace::name() +
485 "Space, HostSpace, ExecutionSpace>::DeepCopy: fence before copy";
486 return string;
487 }
488};
489
490template <class MemSpace, class ExecutionSpace>
491struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
492 std::enable_if_t<is_cuda_type_space<MemSpace>::value &&
493 !std::is_same<ExecutionSpace, Cuda>::value>> {
494 inline DeepCopy(void* dst, const void* src, size_t n) {
495 DeepCopyCuda(dst, src, n);
496 }
497
498 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
499 size_t n) {
500 exec.fence(fence_string());
501 DeepCopyAsyncCuda(dst, src, n);
502 }
503
504 private:
505 static const std::string& fence_string() {
506 static const std::string string =
507 std::string("Kokkos::Impl::DeepCopy<HostSpace, ") + MemSpace::name() +
508 "Space, ExecutionSpace>::DeepCopy: fence before copy";
509 return string;
510 }
511};
512
513} // namespace Impl
514} // namespace Kokkos
515
516//----------------------------------------------------------------------------
517//----------------------------------------------------------------------------
518
519namespace Kokkos {
520namespace Impl {
521
522template <>
523class SharedAllocationRecord<Kokkos::CudaSpace, void>
524 : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
525 private:
526 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
527 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
528 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
529
530 using RecordBase = SharedAllocationRecord<void, void>;
531 using base_t =
532 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
533
534 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
535 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
536
537 static ::cudaTextureObject_t attach_texture_object(
538 const unsigned sizeof_alias, void* const alloc_ptr,
539 const size_t alloc_size);
540
541#ifdef KOKKOS_ENABLE_DEBUG
542 static RecordBase s_root_record;
543#endif
544
545 ::cudaTextureObject_t m_tex_obj = 0;
546 const Kokkos::CudaSpace m_space;
547
548 protected:
549 ~SharedAllocationRecord();
550 SharedAllocationRecord() = default;
551
552 // This constructor does not forward to the one without exec_space arg
553 // in order to work around https://github.com/kokkos/kokkos/issues/5258
554 // This constructor is templated so I can't just put it into the cpp file
555 // like the other constructor.
556 template <typename ExecutionSpace>
557 SharedAllocationRecord(
558 const ExecutionSpace& /*exec_space*/, const Kokkos::CudaSpace& arg_space,
559 const std::string& arg_label, const size_t arg_alloc_size,
560 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
561 : base_t(
562#ifdef KOKKOS_ENABLE_DEBUG
563 &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
564#endif
565 Impl::checked_allocation_with_header(arg_space, arg_label,
566 arg_alloc_size),
567 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
568 arg_label),
569 m_tex_obj(0),
570 m_space(arg_space) {
571
572 SharedAllocationHeader header;
573
574 this->base_t::_fill_host_accessible_header_info(header, arg_label);
575
576 // Copy to device memory
577 // workaround for issue with NVCC and MSVC
578 // https://github.com/kokkos/kokkos/issues/5258
579 deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header);
580 }
581
582 SharedAllocationRecord(
583 const Kokkos::Cuda& exec_space, const Kokkos::CudaSpace& arg_space,
584 const std::string& arg_label, const size_t arg_alloc_size,
585 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
586
587 SharedAllocationRecord(
588 const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
589 const size_t arg_alloc_size,
590 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
591
592 // helper function to work around MSVC+NVCC issue
593 // https://github.com/kokkos/kokkos/issues/5258
594 static void deep_copy_header_no_exec(void*, const void*);
595
596 public:
597 template <typename AliasType>
598 inline ::cudaTextureObject_t attach_texture_object() {
599 static_assert((std::is_same<AliasType, int>::value ||
600 std::is_same<AliasType, ::int2>::value ||
601 std::is_same<AliasType, ::int4>::value),
602 "Cuda texture fetch only supported for alias types of int, "
603 "::int2, or ::int4");
604
605 if (m_tex_obj == 0) {
606 m_tex_obj = attach_texture_object(sizeof(AliasType),
607 (void*)RecordBase::m_alloc_ptr,
608 RecordBase::m_alloc_size);
609 }
610
611 return m_tex_obj;
612 }
613
614 template <typename AliasType>
615 inline int attach_texture_object_offset(const AliasType* const ptr) {
616 // Texture object is attached to the entire allocation range
617 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
618 }
619};
620
621template <>
622class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
623 : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
624 private:
625 friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
626
627 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
628 using RecordBase = SharedAllocationRecord<void, void>;
629
630 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
631 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
632
633 static RecordBase s_root_record;
634
635 ::cudaTextureObject_t m_tex_obj = 0;
636 const Kokkos::CudaUVMSpace m_space;
637
638 protected:
639 ~SharedAllocationRecord();
640 SharedAllocationRecord() = default;
641
642 // This constructor does not forward to the one without exec_space arg
643 // in order to work around https://github.com/kokkos/kokkos/issues/5258
644 // This constructor is templated so I can't just put it into the cpp file
645 // like the other constructor.
646 template <typename ExecutionSpace>
647 SharedAllocationRecord(
648 const ExecutionSpace& /*exec_space*/,
649 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
650 const size_t arg_alloc_size,
651 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
652 : base_t(
653#ifdef KOKKOS_ENABLE_DEBUG
654 &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
655#endif
656 Impl::checked_allocation_with_header(arg_space, arg_label,
657 arg_alloc_size),
658 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
659 arg_label),
660 m_tex_obj(0),
661 m_space(arg_space) {
662 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
663 arg_label);
664 }
665
666 SharedAllocationRecord(
667 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
668 const size_t arg_alloc_size,
669 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
670
671 public:
672 template <typename AliasType>
673 inline ::cudaTextureObject_t attach_texture_object() {
674 static_assert((std::is_same<AliasType, int>::value ||
675 std::is_same<AliasType, ::int2>::value ||
676 std::is_same<AliasType, ::int4>::value),
677 "Cuda texture fetch only supported for alias types of int, "
678 "::int2, or ::int4");
679
680 if (m_tex_obj == 0) {
681 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
682 attach_texture_object(sizeof(AliasType),
683 (void*)RecordBase::m_alloc_ptr,
684 RecordBase::m_alloc_size);
685 }
686
687 return m_tex_obj;
688 }
689
690 template <typename AliasType>
691 inline int attach_texture_object_offset(const AliasType* const ptr) {
692 // Texture object is attached to the entire allocation range
693 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
694 }
695};
696
697template <>
698class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
699 : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
700 private:
701 friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
702
703 using RecordBase = SharedAllocationRecord<void, void>;
704 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
705
706 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
707 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
708
709 static RecordBase s_root_record;
710
711 const Kokkos::CudaHostPinnedSpace m_space;
712
713 protected:
714 ~SharedAllocationRecord();
715 SharedAllocationRecord() = default;
716
717 // This constructor does not forward to the one without exec_space arg
718 // in order to work around https://github.com/kokkos/kokkos/issues/5258
719 // This constructor is templated so I can't just put it into the cpp file
720 // like the other constructor.
721 template <typename ExecutionSpace>
722 SharedAllocationRecord(
723 const ExecutionSpace& /*exec_space*/,
724 const Kokkos::CudaHostPinnedSpace& arg_space,
725 const std::string& arg_label, const size_t arg_alloc_size,
726 const RecordBase::function_type arg_dealloc = &base_t::deallocate)
727 : base_t(
728#ifdef KOKKOS_ENABLE_DEBUG
729 &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
730 void>::s_root_record,
731#endif
732 Impl::checked_allocation_with_header(arg_space, arg_label,
733 arg_alloc_size),
734 sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
735 arg_label),
736 m_space(arg_space) {
737 this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
738 arg_label);
739 }
740
741 SharedAllocationRecord(
742 const Kokkos::CudaHostPinnedSpace& arg_space,
743 const std::string& arg_label, const size_t arg_alloc_size,
744 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
745};
746
747} // namespace Impl
748} // namespace Kokkos
749
750//----------------------------------------------------------------------------
751//----------------------------------------------------------------------------
752
753#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
754#endif /* #define KOKKOS_CUDASPACE_HPP */
A thread safe view to a bitset.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Memory management for host memory.
DefaultHostExecutionSpace execution_space
Default execution space for this memory space.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.