Skip to content

Commit 3667cf3

Browse files
[NFC][SYCL][Reduction] Use different workaround than #7484 (#7500)
This reverts #7484 and uses a less intrusive workaround for the gcc bug.
1 parent ccd1639 commit 3667cf3

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

sycl/include/sycl/reduction.hpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2314,15 +2314,17 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
23142314

23152315
// Before running the kernels, check that device has enough local memory
23162316
// to hold local arrays required for the tree-reduction algorithm.
2317-
size_t OneElemSize;
2318-
if constexpr (NumArgs == 2) {
2319-
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
2320-
constexpr bool IsTreeReduction =
2321-
!Reduction::has_fast_reduce && !Reduction::has_fast_atomics;
2322-
OneElemSize = IsTreeReduction ? sizeof(typename Reduction::result_type) : 0;
2323-
} else {
2324-
OneElemSize = reduGetMemPerWorkItem(ReduTuple, ReduIndices);
2325-
}
2317+
size_t OneElemSize = [&]() {
2318+
// Can't use outlined NumArgs due to a bug in gcc 8.4.
2319+
if constexpr (sizeof...(RestT) == 2) {
2320+
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
2321+
constexpr bool IsTreeReduction =
2322+
!Reduction::has_fast_reduce && !Reduction::has_fast_atomics;
2323+
return IsTreeReduction ? sizeof(typename Reduction::result_type) : 0;
2324+
} else {
2325+
return reduGetMemPerWorkItem(ReduTuple, ReduIndices);
2326+
}
2327+
}();
23262328

23272329
uint32_t NumConcurrentWorkGroups =
23282330
#ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS

0 commit comments

Comments
 (0)