Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_ExecPolicy.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_EXECPOLICY_HPP
23#define KOKKOS_EXECPOLICY_HPP
24
25#include <Kokkos_Core_fwd.hpp>
26#include <impl/Kokkos_Traits.hpp>
27#include <impl/Kokkos_Error.hpp>
28#include <impl/Kokkos_AnalyzePolicy.hpp>
29#include <Kokkos_Concepts.hpp>
30#include <typeinfo>
31
32//----------------------------------------------------------------------------
33
34namespace Kokkos {
35
36struct ParallelForTag {};
37struct ParallelScanTag {};
38struct ParallelReduceTag {};
39
40struct ChunkSize {
41 int value;
42 ChunkSize(int value_) : value(value_) {}
43};
44
66template <class... Properties>
67class RangePolicy : public Impl::PolicyTraits<Properties...> {
68 public:
69 using traits = Impl::PolicyTraits<Properties...>;
70
71 private:
72 typename traits::execution_space m_space;
73 typename traits::index_type m_begin;
74 typename traits::index_type m_end;
75 typename traits::index_type m_granularity;
76 typename traits::index_type m_granularity_mask;
77
78 template <class... OtherProperties>
79 friend class RangePolicy;
80
81 public:
84 using member_type = typename traits::index_type;
85 using index_type = typename traits::index_type;
86
87 KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
88 return m_space;
89 }
90 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
91 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
92
93 // TODO: find a better workaround for Clangs weird instantiation order
94 // This thing is here because of an instantiation error, where the RangePolicy
95 // is inserted into FunctorValue Traits, which tries decltype on the operator.
96 // It tries to do this even though the first argument of parallel for clearly
97 // doesn't match.
98 void operator()(const int&) const {}
99
100 template <class... OtherProperties>
101 RangePolicy(const RangePolicy<OtherProperties...>& p)
102 : traits(p), // base class may contain data such as desired occupancy
103 m_space(p.m_space),
104 m_begin(p.m_begin),
105 m_end(p.m_end),
106 m_granularity(p.m_granularity),
107 m_granularity_mask(p.m_granularity_mask) {}
108
109 inline RangePolicy()
110 : m_space(),
111 m_begin(0),
112 m_end(0),
113 m_granularity(0),
114 m_granularity_mask(0) {}
115
117 inline RangePolicy(const typename traits::execution_space& work_space,
118 const member_type work_begin, const member_type work_end)
119 : m_space(work_space),
120 m_begin(work_begin < work_end ? work_begin : 0),
121 m_end(work_begin < work_end ? work_end : 0),
122 m_granularity(0),
123 m_granularity_mask(0) {
124 set_auto_chunk_size();
125 }
126
128 inline RangePolicy(const member_type work_begin, const member_type work_end)
129 : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
130 set_auto_chunk_size();
131 }
132
134 template <class... Args>
135 inline RangePolicy(const typename traits::execution_space& work_space,
136 const member_type work_begin, const member_type work_end,
137 Args... args)
138 : m_space(work_space),
139 m_begin(work_begin < work_end ? work_begin : 0),
140 m_end(work_begin < work_end ? work_end : 0),
141 m_granularity(0),
142 m_granularity_mask(0) {
143 set_auto_chunk_size();
144 set(args...);
145 }
146
148 template <class... Args>
149 inline RangePolicy(const member_type work_begin, const member_type work_end,
150 Args... args)
151 : RangePolicy(typename traits::execution_space(), work_begin, work_end) {
152 set_auto_chunk_size();
153 set(args...);
154 }
155
156 private:
157 inline void set() {}
158
159 public:
160 template <class... Args>
161 inline void set(Args...) {
162 static_assert(
163 0 == sizeof...(Args),
164 "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
165 }
166
167 template <class... Args>
168 inline void set(const ChunkSize& chunksize, Args... args) {
169 m_granularity = chunksize.value;
170 m_granularity_mask = m_granularity - 1;
171 set(args...);
172 }
173
174 public:
176 inline member_type chunk_size() const { return m_granularity; }
177
180 m_granularity = chunk_size;
181 m_granularity_mask = m_granularity - 1;
182 return *this;
183 }
184
185 private:
187 inline void set_auto_chunk_size() {
188#ifdef KOKKOS_ENABLE_SYCL
189 if (std::is_same_v<typename traits::execution_space,
190 Kokkos::Experimental::SYCL>) {
191 // chunk_size <=1 lets the compiler choose the workgroup size when
192 // launching kernels
193 m_granularity = 1;
194 m_granularity_mask = 0;
195 return;
196 }
197#endif
198 auto concurrency = static_cast<int64_t>(m_space.concurrency());
199 if (concurrency == 0) concurrency = 1;
200
201 if (m_granularity > 0) {
202 if (!Impl::is_integral_power_of_two(m_granularity))
203 Kokkos::abort("RangePolicy blocking granularity must be power of two");
204 }
205
206 int64_t new_chunk_size = 1;
207 while (new_chunk_size * 100 * concurrency <
208 static_cast<int64_t>(m_end - m_begin))
209 new_chunk_size *= 2;
210 if (new_chunk_size < 128) {
211 new_chunk_size = 1;
212 while ((new_chunk_size * 40 * concurrency <
213 static_cast<int64_t>(m_end - m_begin)) &&
214 (new_chunk_size < 128))
215 new_chunk_size *= 2;
216 }
217 m_granularity = new_chunk_size;
218 m_granularity_mask = m_granularity - 1;
219 }
220
221 public:
226 struct WorkRange {
227 using work_tag = typename RangePolicy<Properties...>::work_tag;
228 using member_type = typename RangePolicy<Properties...>::member_type;
229
230 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
231 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
232
237 KOKKOS_INLINE_FUNCTION
239 const int part_size)
240 : m_begin(0), m_end(0) {
241 if (part_size) {
242 // Split evenly among partitions, then round up to the granularity.
243 const member_type work_part =
244 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
245 range.m_granularity_mask) &
246 ~member_type(range.m_granularity_mask);
247
248 m_begin = range.begin() + work_part * part_rank;
249 m_end = m_begin + work_part;
250
251 if (range.end() < m_begin) m_begin = range.end();
252 if (range.end() < m_end) m_end = range.end();
253 }
254 }
255
256 private:
257 member_type m_begin;
258 member_type m_end;
259 WorkRange();
260 WorkRange& operator=(const WorkRange&);
261 };
262};
263
264} // namespace Kokkos
265
266//----------------------------------------------------------------------------
267//----------------------------------------------------------------------------
268
269namespace Kokkos {
270
271namespace Impl {
272
273template <class ExecSpace, class... Properties>
274class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
275 private:
276 using traits = Impl::PolicyTraits<Properties...>;
277
278 public:
279 using index_type = typename traits::index_type;
280
281 //----------------------------------------
292 template <class FunctorType>
293 static int team_size_max(const FunctorType&);
294
305 template <class FunctorType>
306 static int team_size_recommended(const FunctorType&);
307
308 template <class FunctorType>
309 static int team_size_recommended(const FunctorType&, const int&);
310
311 template <class FunctorType>
312 int team_size_recommended(const FunctorType& functor,
313 const int vector_length);
314
315 //----------------------------------------
317 TeamPolicyInternal(const typename traits::execution_space&,
318 int league_size_request, int team_size_request,
319 int vector_length_request = 1);
320
321 TeamPolicyInternal(const typename traits::execution_space&,
322 int league_size_request, const Kokkos::AUTO_t&,
323 int vector_length_request = 1);
324
327 TeamPolicyInternal(int league_size_request, int team_size_request,
328 int vector_length_request = 1);
329
330 TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
331 int vector_length_request = 1);
332
333 /* TeamPolicyInternal( int league_size_request , int team_size_request );
334
335 TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
336
342 KOKKOS_INLINE_FUNCTION int league_size() const;
343
349 KOKKOS_INLINE_FUNCTION int team_size() const;
350
353 inline bool impl_auto_team_size() const;
356 inline bool impl_auto_vector_length() const;
357
358 static int vector_length_max();
359
360 KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
361
362 inline typename traits::index_type chunk_size() const;
363
364 inline TeamPolicyInternal& set_chunk_size(int chunk_size);
365
369 struct member_type {
371 KOKKOS_INLINE_FUNCTION
372 typename traits::execution_space::scratch_memory_space team_shmem() const;
373
375 KOKKOS_INLINE_FUNCTION int league_rank() const;
376
378 KOKKOS_INLINE_FUNCTION int league_size() const;
379
381 KOKKOS_INLINE_FUNCTION int team_rank() const;
382
384 KOKKOS_INLINE_FUNCTION int team_size() const;
385
387 KOKKOS_INLINE_FUNCTION void team_barrier() const;
388
391 template <class JoinOp>
392 KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
393 const typename JoinOp::value_type, const JoinOp&) const;
394
400 template <typename Type>
401 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
402
412 template <typename Type>
413 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
414 Type* const global_accum) const;
415 };
416};
417
418struct PerTeamValue {
419 size_t value;
420 PerTeamValue(size_t arg);
421};
422
423struct PerThreadValue {
424 size_t value;
425 PerThreadValue(size_t arg);
426};
427
428template <class iType, class... Args>
429struct ExtractVectorLength {
430 static inline iType value(
431 std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) {
432 return val;
433 }
434 static inline std::enable_if_t<!std::is_integral<iType>::value, int> value(
435 std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) {
436 return 1;
437 }
438};
439
440template <class iType, class... Args>
441inline std::enable_if_t<std::is_integral<iType>::value, iType>
442extract_vector_length(iType val, Args...) {
443 return val;
444}
445
446template <class iType, class... Args>
447inline std::enable_if_t<!std::is_integral<iType>::value, int>
448extract_vector_length(iType, Args...) {
449 return 1;
450}
451
452} // namespace Impl
453
454Impl::PerTeamValue PerTeam(const size_t& arg);
455Impl::PerThreadValue PerThread(const size_t& arg);
456
457struct ScratchRequest {
458 int level;
459
460 size_t per_team;
461 size_t per_thread;
462
463 inline ScratchRequest(const int& level_,
464 const Impl::PerTeamValue& team_value) {
465 level = level_;
466 per_team = team_value.value;
467 per_thread = 0;
468 }
469
470 inline ScratchRequest(const int& level_,
471 const Impl::PerThreadValue& thread_value) {
472 level = level_;
473 per_team = 0;
474 per_thread = thread_value.value;
475 }
476
477 inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
478 const Impl::PerThreadValue& thread_value) {
479 level = level_;
480 per_team = team_value.value;
481 per_thread = thread_value.value;
482 }
483
484 inline ScratchRequest(const int& level_,
485 const Impl::PerThreadValue& thread_value,
486 const Impl::PerTeamValue& team_value) {
487 level = level_;
488 per_team = team_value.value;
489 per_thread = thread_value.value;
490 }
491};
492
493// Throws a runtime exception if level is not `0` or `1`
494void team_policy_check_valid_storage_level_argument(int level);
495
522template <class... Properties>
524 : public Impl::TeamPolicyInternal<
525 typename Impl::PolicyTraits<Properties...>::execution_space,
526 Properties...> {
527 using internal_policy = Impl::TeamPolicyInternal<
528 typename Impl::PolicyTraits<Properties...>::execution_space,
529 Properties...>;
530
531 template <class... OtherProperties>
532 friend class TeamPolicy;
533
534 public:
535 using traits = Impl::PolicyTraits<Properties...>;
536
538
539 TeamPolicy() : internal_policy(0, AUTO) {}
540
542 TeamPolicy(const typename traits::execution_space& space_,
544 int vector_length_request = 1)
545 : internal_policy(space_, league_size_request, team_size_request,
547
548 TeamPolicy(const typename traits::execution_space& space_,
549 int league_size_request, const Kokkos::AUTO_t&,
550 int vector_length_request = 1)
551 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
553
554 TeamPolicy(const typename traits::execution_space& space_,
555 int league_size_request, const Kokkos::AUTO_t&,
556 const Kokkos::AUTO_t&)
557 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
558 Kokkos::AUTO()) {}
559 TeamPolicy(const typename traits::execution_space& space_,
560 int league_size_request, const int team_size_request,
561 const Kokkos::AUTO_t&)
562 : internal_policy(space_, league_size_request, team_size_request,
563 Kokkos::AUTO()) {}
570
571 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
572 int vector_length_request = 1)
573 : internal_policy(league_size_request, Kokkos::AUTO(),
575
576 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
577 const Kokkos::AUTO_t&)
578 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
579 TeamPolicy(int league_size_request, const int team_size_request,
580 const Kokkos::AUTO_t&)
581 : internal_policy(league_size_request, team_size_request,
582 Kokkos::AUTO()) {}
583
584 template <class... OtherProperties>
585 TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
586 // Cannot call converting constructor in the member initializer list because
587 // it is not a direct base.
588 internal_policy::traits::operator=(p);
589 }
590
591 private:
592 TeamPolicy(const internal_policy& p) : internal_policy(p) {}
593
594 public:
595 inline TeamPolicy& set_chunk_size(int chunk) {
596 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
597 internal_policy&>::value,
598 "internal set_chunk_size should return a reference");
599 return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
600 }
601
602 inline TeamPolicy& set_scratch_size(const int& level,
603 const Impl::PerTeamValue& per_team) {
604 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
605 level, per_team)),
606 internal_policy&>::value,
607 "internal set_chunk_size should return a reference");
608
609 team_policy_check_valid_storage_level_argument(level);
610 return static_cast<TeamPolicy&>(
611 internal_policy::set_scratch_size(level, per_team));
612 }
613 inline TeamPolicy& set_scratch_size(const int& level,
614 const Impl::PerThreadValue& per_thread) {
615 team_policy_check_valid_storage_level_argument(level);
616 return static_cast<TeamPolicy&>(
617 internal_policy::set_scratch_size(level, per_thread));
618 }
619 inline TeamPolicy& set_scratch_size(const int& level,
620 const Impl::PerTeamValue& per_team,
621 const Impl::PerThreadValue& per_thread) {
622 team_policy_check_valid_storage_level_argument(level);
623 return static_cast<TeamPolicy&>(
624 internal_policy::set_scratch_size(level, per_team, per_thread));
625 }
626 inline TeamPolicy& set_scratch_size(const int& level,
627 const Impl::PerThreadValue& per_thread,
628 const Impl::PerTeamValue& per_team) {
629 team_policy_check_valid_storage_level_argument(level);
630 return static_cast<TeamPolicy&>(
631 internal_policy::set_scratch_size(level, per_team, per_thread));
632 }
633};
634
635namespace Impl {
636
637template <typename iType, class TeamMemberType>
638struct TeamThreadRangeBoundariesStruct {
639 private:
640 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
641 const iType& arg_end,
642 const iType& arg_rank,
643 const iType& arg_size) {
644 return arg_begin +
645 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
646 }
647
648 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
649 const iType& arg_end,
650 const iType& arg_rank,
651 const iType& arg_size) {
652 const iType end_ =
653 arg_begin +
654 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
655 return end_ < arg_end ? end_ : arg_end;
656 }
657
658 public:
659 using index_type = iType;
660 const iType start;
661 const iType end;
662 enum { increment = 1 };
663 const TeamMemberType& thread;
664
665 KOKKOS_INLINE_FUNCTION
666 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
667 const iType& arg_end)
668 : start(
669 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
670 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
671 thread(arg_thread) {}
672
673 KOKKOS_INLINE_FUNCTION
674 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
675 const iType& arg_begin, const iType& arg_end)
676 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
677 arg_thread.team_size())),
678 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
679 arg_thread.team_size())),
680 thread(arg_thread) {}
681};
682
683template <typename iType, class TeamMemberType>
684struct TeamVectorRangeBoundariesStruct {
685 private:
686 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
687 const iType& arg_end,
688 const iType& arg_rank,
689 const iType& arg_size) {
690 return arg_begin +
691 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
692 }
693
694 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
695 const iType& arg_end,
696 const iType& arg_rank,
697 const iType& arg_size) {
698 const iType end_ =
699 arg_begin +
700 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
701 return end_ < arg_end ? end_ : arg_end;
702 }
703
704 public:
705 using index_type = iType;
706 const iType start;
707 const iType end;
708 enum { increment = 1 };
709 const TeamMemberType& thread;
710
711 KOKKOS_INLINE_FUNCTION
712 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
713 const iType& arg_end)
714 : start(
715 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
716 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
717 thread(arg_thread) {}
718
719 KOKKOS_INLINE_FUNCTION
720 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
721 const iType& arg_begin, const iType& arg_end)
722 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
723 arg_thread.team_size())),
724 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
725 arg_thread.team_size())),
726 thread(arg_thread) {}
727};
728
729template <typename iType, class TeamMemberType>
730struct ThreadVectorRangeBoundariesStruct {
731 using index_type = iType;
732 const index_type start;
733 const index_type end;
734 enum { increment = 1 };
735
736 KOKKOS_INLINE_FUNCTION
737 constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
738 const index_type& count) noexcept
739 : start(static_cast<index_type>(0)), end(count) {}
740
741 KOKKOS_INLINE_FUNCTION
742 constexpr ThreadVectorRangeBoundariesStruct(const index_type& count) noexcept
743 : start(static_cast<index_type>(0)), end(count) {}
744
745 KOKKOS_INLINE_FUNCTION
746 constexpr ThreadVectorRangeBoundariesStruct(
747 const TeamMemberType, const index_type& arg_begin,
748 const index_type& arg_end) noexcept
749 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
750
751 KOKKOS_INLINE_FUNCTION
752 constexpr ThreadVectorRangeBoundariesStruct(
753 const index_type& arg_begin, const index_type& arg_end) noexcept
754 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
755};
756
757template <class TeamMemberType>
758struct ThreadSingleStruct {
759 const TeamMemberType& team_member;
760 KOKKOS_INLINE_FUNCTION
761 ThreadSingleStruct(const TeamMemberType& team_member_)
762 : team_member(team_member_) {}
763};
764
765template <class TeamMemberType>
766struct VectorSingleStruct {
767 const TeamMemberType& team_member;
768 KOKKOS_INLINE_FUNCTION
769 VectorSingleStruct(const TeamMemberType& team_member_)
770 : team_member(team_member_) {}
771};
772
773} // namespace Impl
774
782template <typename iType, class TeamMemberType, class _never_use_this_overload>
783KOKKOS_INLINE_FUNCTION_DELETED
784 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
785 TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
786
794template <typename iType1, typename iType2, class TeamMemberType,
795 class _never_use_this_overload>
796KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
797 std::common_type_t<iType1, iType2>, TeamMemberType>
798TeamThreadRange(const TeamMemberType&, const iType1& begin,
799 const iType2& end) = delete;
800
808template <typename iType, class TeamMemberType, class _never_use_this_overload>
809KOKKOS_INLINE_FUNCTION_DELETED
810 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
811 TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
812
820template <typename iType1, typename iType2, class TeamMemberType,
821 class _never_use_this_overload>
822KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
823 std::common_type_t<iType1, iType2>, TeamMemberType>
824TeamVectorRange(const TeamMemberType&, const iType1& begin,
825 const iType2& end) = delete;
826
834template <typename iType, class TeamMemberType, class _never_use_this_overload>
835KOKKOS_INLINE_FUNCTION_DELETED
836 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
837 ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
838
839template <typename iType1, typename iType2, class TeamMemberType,
840 class _never_use_this_overload>
841KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
842 std::common_type_t<iType1, iType2>, TeamMemberType>
843ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
844 const iType2& arg_end) = delete;
845
846namespace Impl {
847
848enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
849enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
850enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
851enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
852
853template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
854struct HostBasedNestLevel;
855
856template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
857struct AcceleratorBasedNestLevel;
858
859// ThreadAndVectorNestLevel determines on which nested level parallelization
860// happens.
861// - Rank is Kokkos::Rank<TotalNestLevel, Iter>
862// - TotalNestLevel is the total number of loop nests
863// - Iter is whether to go forward or backward through ranks (i.e. the
864// iteration order for MDRangePolicy)
865// - ThreadAndVector determines whether both vector and thread parallelism is
866// in use
867template <typename Rank, typename ExecSpace,
868 TeamMDRangeThreadAndVector ThreadAndVector>
869struct ThreadAndVectorNestLevel;
870
871struct NoReductionTag {};
872
873template <typename Rank, typename TeamMDPolicy, typename Lambda,
874 typename ReductionValueType>
875KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
876 Lambda const& lambda,
877 ReductionValueType&& val);
878} // namespace Impl
879
880template <typename Rank, typename TeamHandle>
881struct TeamThreadMDRange;
882
883template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
884struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
885 using NestLevelType = int;
886 using BoundaryType = int;
887 using TeamHandleType = TeamHandle;
888 using ExecutionSpace = typename TeamHandleType::execution_space;
889 using ArrayLayout = typename ExecutionSpace::array_layout;
890
891 static constexpr NestLevelType total_nest_level =
892 Rank<N, OuterDir, InnerDir>::rank;
893 static constexpr Iterate iter = OuterDir;
894 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
895 static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
896
897 static constexpr Iterate direction =
898 OuterDir == Iterate::Default
899 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
900 : iter;
901
902 template <class... Args>
903 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
904 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
905 static_assert(sizeof...(Args) == total_nest_level);
906 }
907
908 TeamHandleType const& team;
909 BoundaryType boundaries[total_nest_level];
910};
911
912template <typename TeamHandle, typename... Args>
913TeamThreadMDRange(TeamHandle const&, Args&&...)
914 ->TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
915
916template <typename Rank, typename TeamHandle>
917struct ThreadVectorMDRange;
918
919template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
920struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
921 using NestLevelType = int;
922 using BoundaryType = int;
923 using TeamHandleType = TeamHandle;
924 using ExecutionSpace = typename TeamHandleType::execution_space;
925 using ArrayLayout = typename ExecutionSpace::array_layout;
926
927 static constexpr NestLevelType total_nest_level =
928 Rank<N, OuterDir, InnerDir>::rank;
929 static constexpr Iterate iter = OuterDir;
930 static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
931 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
932
933 static constexpr Iterate direction =
934 OuterDir == Iterate::Default
935 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
936 : iter;
937
938 template <class... Args>
939 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
940 Args&&... args)
941 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
942 static_assert(sizeof...(Args) == total_nest_level);
943 }
944
945 TeamHandleType const& team;
946 BoundaryType boundaries[total_nest_level];
947};
948
949template <typename TeamHandle, typename... Args>
950ThreadVectorMDRange(TeamHandle const&, Args&&...)
951 ->ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
952
953template <typename Rank, typename TeamHandle>
954struct TeamVectorMDRange;
955
956template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
957struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
958 using NestLevelType = int;
959 using BoundaryType = int;
960 using TeamHandleType = TeamHandle;
961 using ExecutionSpace = typename TeamHandleType::execution_space;
962 using ArrayLayout = typename ExecutionSpace::array_layout;
963
964 static constexpr NestLevelType total_nest_level =
965 Rank<N, OuterDir, InnerDir>::rank;
966 static constexpr Iterate iter = OuterDir;
967 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
968 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
969
970 static constexpr Iterate direction =
971 iter == Iterate::Default
972 ? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
973 : iter;
974
975 template <class... Args>
976 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
977 Args&&... args)
978 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
979 static_assert(sizeof...(Args) == total_nest_level);
980 }
981
982 TeamHandleType const& team;
983 BoundaryType boundaries[total_nest_level];
984};
985
986template <typename TeamHandle, typename... Args>
987TeamVectorMDRange(TeamHandle const&, Args&&...)
988 ->TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
989
990template <typename Rank, typename TeamHandle, typename Lambda,
991 typename ReducerValueType>
992KOKKOS_INLINE_FUNCTION void parallel_reduce(
993 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
994 ReducerValueType& val) {
995 Impl::md_parallel_impl<Rank>(policy, lambda, val);
996}
997
998template <typename Rank, typename TeamHandle, typename Lambda>
999KOKKOS_INLINE_FUNCTION void parallel_for(
1000 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1001 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1002}
1003
1004template <typename Rank, typename TeamHandle, typename Lambda,
1005 typename ReducerValueType>
1006KOKKOS_INLINE_FUNCTION void parallel_reduce(
1007 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1008 ReducerValueType& val) {
1009 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1010}
1011
1012template <typename Rank, typename TeamHandle, typename Lambda>
1013KOKKOS_INLINE_FUNCTION void parallel_for(
1014 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1015 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1016}
1017
1018template <typename Rank, typename TeamHandle, typename Lambda,
1019 typename ReducerValueType>
1020KOKKOS_INLINE_FUNCTION void parallel_reduce(
1021 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1022 ReducerValueType& val) {
1023 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1024}
1025
1026template <typename Rank, typename TeamHandle, typename Lambda>
1027KOKKOS_INLINE_FUNCTION void parallel_for(
1028 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1029 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1030}
1031
1032namespace Impl {
1033
1034template <typename FunctorType, typename TagType,
1035 bool HasTag = !std::is_void<TagType>::value>
1036struct ParallelConstructName;
1037
1038template <typename FunctorType, typename TagType>
1039struct ParallelConstructName<FunctorType, TagType, true> {
1040 ParallelConstructName(std::string const& label) : label_ref(label) {
1041 if (label.empty()) {
1042 default_name = std::string(typeid(FunctorType).name()) + "/" +
1043 typeid(TagType).name();
1044 }
1045 }
1046 std::string const& get() {
1047 return (label_ref.empty()) ? default_name : label_ref;
1048 }
1049 std::string const& label_ref;
1050 std::string default_name;
1051};
1052
1053template <typename FunctorType, typename TagType>
1054struct ParallelConstructName<FunctorType, TagType, false> {
1055 ParallelConstructName(std::string const& label) : label_ref(label) {
1056 if (label.empty()) {
1057 default_name = std::string(typeid(FunctorType).name());
1058 }
1059 }
1060 std::string const& get() {
1061 return (label_ref.empty()) ? default_name : label_ref;
1062 }
1063 std::string const& label_ref;
1064 std::string default_name;
1065};
1066
1067} // namespace Impl
1068
1069} // namespace Kokkos
1070
1071namespace Kokkos {
1072
1073namespace Impl {
1074
1075template <class PatternTag, class... Args>
1076struct PatternImplSpecializationFromTag;
1077
1078template <class... Args>
1079struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1080 : type_identity<ParallelFor<Args...>> {};
1081
1082template <class... Args>
1083struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1084 : type_identity<ParallelReduce<Args...>> {};
1085
1086template <class... Args>
1087struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1088 : type_identity<ParallelScan<Args...>> {};
1089
1090template <class PatternImpl>
1091struct PatternTagFromImplSpecialization;
1092
1093template <class... Args>
1094struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1095 : type_identity<ParallelForTag> {};
1096
1097template <class... Args>
1098struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1099 : type_identity<ParallelReduceTag> {};
1100
1101template <class... Args>
1102struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1103 : type_identity<ParallelScanTag> {};
1104
1105} // end namespace Impl
1106
1107} // namespace Kokkos
1108#endif /* #define KOKKOS_EXECPOLICY_HPP */
A thread safe view to a bitset.
Execution policy for work over a range of an integral type.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
member_type chunk_size() const
return chunk_size
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
Execution policy for parallel work over a league of teams of threads.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
Parallel execution of a functor calls the functor once with each member of the execution policy.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value, Type *const global_accum) const
Intra-team exclusive prefix sum with team_rank() ordering with intra-team non-deterministic ordering ...
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.