Skip to content

Commit 009cc86

Browse files
Unify caches
1 parent 1e32c2a commit 009cc86

File tree

2 files changed

+124
-124
lines changed

2 files changed

+124
-124
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ device_impl::device_impl(ur_device_handle_t Device, platform_impl &Platform,
2929
? nullptr
3030
: get_info_impl<UR_DEVICE_INFO_PARENT_DEVICE>()),
3131
// TODO catch an exception and put it to list of asynchronous exceptions:
32-
MURInfoCache{*this}, MInfoCache{*this} {
32+
MCache{*this} {
3333
// Interoperability Constructor already calls DeviceRetain in
3434
// urDeviceCreateWithNativeHandle.
3535
getAdapter()->call<UrApiKind::urDeviceRetain>(MDevice);

sycl/source/detail/device_impl.hpp

Lines changed: 123 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
146146

147147
template <ur_device_info_t Desc, bool InitializingCache = false>
148148
decltype(auto) get_info_impl() const {
149-
if constexpr (decltype(MURInfoCache)::has<Desc>() && !InitializingCache) {
150-
return MURInfoCache.get<Desc>();
149+
if constexpr (decltype(MCache)::has<Desc>() && !InitializingCache) {
150+
return MCache.get<Desc>();
151151
} else {
152152
using ur_ret_t = ur_ret_type<Desc>;
153153
if constexpr (std::is_same_v<ur_ret_t, std::string>) {
@@ -172,149 +172,148 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
172172
}
173173
}
174174

175-
template <ur_device_info_t Desc> struct UREagerCached {
176-
const ur_ret_type<Desc> value;
175+
// Define some helpers to cache properties. We use the same template
176+
// implementation for both SYCL information descriptors and raw calls to
177+
// `urDeviceGetInfo` by wrapping latter's `ur_device_info_t Desc` into a
178+
// wrapper class (to go from values to types, as we don't have universal
179+
// template parameters yet).
180+
//
181+
// Note that some modifications are also done in `get_info` and
182+
// `get_info_impl` so this caching is a part of device_impl implementation and
183+
// all the infrastructure should legitimally be as a class member.
184+
//
185+
// See `MCache` data member below for instruction how to make a property
186+
// cached.
187+
188+
// Eager - initialize the value right in the device_impl's ctor.
189+
template <typename Desc> struct EagerCached {
190+
const typename Desc::return_type value;
177191
};
178192

179-
template <ur_device_info_t... Descs>
180-
struct UREagerCache : public UREagerCached<Descs>... {
181-
UREagerCache(device_impl &device)
182-
: UREagerCached<Descs>{
183-
device.get_info_impl<Descs, true /* InitializingCache */>()}... {}
184-
template <ur_device_info_t Desc> static constexpr bool has() {
185-
return (((Desc == Descs) || ...));
193+
template <typename Initializer, typename... Descs>
194+
struct EagerCache : EagerCached<Descs>... {
195+
EagerCache(device_impl &device)
196+
: EagerCached<Descs>{[&]() {
197+
// We optimize `init` signature so that it could be immediately
198+
// passed to `std::call_once` in the `CallOnceCached` below with an
199+
// expectation that it's easier to inline this lambda than outline a
200+
// creation of lambda in `CallOnceCached` if we'd be forced to have
201+
// one if `init` returned by value.
202+
typename Descs::return_type res;
203+
Initializer::template init<Descs>(device, res);
204+
return res;
205+
}()}... {}
206+
207+
template <typename Desc> static constexpr bool has() {
208+
return ((std::is_same_v<Desc, Descs> || ...));
186209
}
187210

188-
template <ur_device_info_t Desc, typename = std::enable_if_t<has<Desc>()>>
189-
auto &get() const {
190-
return static_cast<const UREagerCached<Desc> *>(this)->value;
211+
template <typename Desc> decltype(auto) get() const {
212+
// Extra parentheses to return as reference (see `decltype(auto)`).
213+
return (static_cast<const EagerCached<Desc> *>(this)->value);
191214
}
192215
};
193216

194-
template <ur_device_info_t Desc> struct URCallOnceCached {
217+
// CallOnce - initialize on first query, but exactly once so that we could
218+
// return cached values by reference. Important for `std::vector` /
219+
// `std::string` values where returning cached values by value would cause
220+
// heap allocations.
221+
template <typename Desc> struct CallOnceCached {
195222
std::once_flag flag;
196-
ur_ret_type<Desc> value;
223+
typename Desc::return_type value;
197224
};
198225

199-
template <ur_device_info_t... Descs>
200-
struct URCallOnceCache : public URCallOnceCached<Descs>... {
226+
template <typename Initializer, typename... Descs>
227+
struct CallOnceCache : public CallOnceCached<Descs>... {
201228
device_impl &device;
202229

203-
URCallOnceCache(device_impl &device) : device(device) {}
204-
template <ur_device_info_t Desc> static constexpr bool has() {
205-
return (((Desc == Descs) || ...));
230+
CallOnceCache(device_impl &device) : device(device) {}
231+
232+
template <typename Desc> static constexpr bool has() {
233+
return ((std::is_same_v<Desc, Descs> || ...));
206234
}
207235

208-
template <ur_device_info_t Desc, typename = std::enable_if_t<has<Desc>()>>
209-
auto &get() const {
210-
auto &Entry = *static_cast<URCallOnceCached<Desc> *>(
211-
const_cast<URCallOnceCache *>(this));
212-
std::call_once(Entry.flag, [&]() {
213-
Entry.value =
214-
device.get_info_impl<Desc, true /* InitializingCache */>();
215-
});
216-
return Entry.value;
236+
template <typename Desc> decltype(auto) get() {
237+
auto &Entry = *static_cast<CallOnceCached<Desc> *>(this);
238+
std::call_once(Entry.flag, Initializer::template init<Desc>, device,
239+
Entry.value);
240+
// Extra parentheses to return as reference (see `decltype(auto)`).
241+
return (std::as_const(Entry.value));
217242
}
218243
};
219244

220-
template <typename EagerCache, typename CallOnceCache>
221-
struct URInfoCache : public EagerCache, public CallOnceCache {
222-
URInfoCache(device_impl &device)
223-
: EagerCache(device), CallOnceCache(device) {}
245+
// get_info and get_info_impl need to know if a particular query is cacheable.
246+
// It's easier if all the cache instances (eager/call-once * UR/SYCL) are
247+
// merged into a single object.
248+
template <typename... Caches> struct JointCache : public Caches... {
249+
JointCache(device_impl &device) : Caches(device)... {}
224250

225-
template <ur_device_info_t Desc> static constexpr bool has() {
226-
return EagerCache::template has<Desc>() ||
227-
CallOnceCache::template has<Desc>();
251+
template <typename Desc> static constexpr bool has() {
252+
return ((Caches::template has<Desc>() || ...));
228253
}
229254

230-
template <ur_device_info_t Desc> decltype(auto) get() const {
231-
if constexpr (EagerCache::template has<Desc>()) {
232-
return EagerCache::template get<Desc>();
233-
} else if constexpr (CallOnceCache::template has<Desc>()) {
234-
return CallOnceCache::template get<Desc>();
235-
} else {
236-
static_assert(has<Desc>());
237-
}
255+
template <ur_device_info_t Desc> static constexpr bool has() {
256+
return has<URDesc<Desc>>();
257+
}
258+
259+
template <typename Desc> decltype(auto) get() {
260+
// Couldn't find a smarter way for this...
261+
constexpr auto N = sizeof...(Caches);
262+
if constexpr (N >= 1 && nth_type_t<0, Caches...>::template has<Desc>())
263+
return nth_type_t<0, Caches...>::template get<Desc>();
264+
else if constexpr (N >= 2 &&
265+
nth_type_t<1, Caches...>::template has<Desc>())
266+
return nth_type_t<1, Caches...>::template get<Desc>();
267+
else if constexpr (N >= 3 &&
268+
nth_type_t<2, Caches...>::template has<Desc>())
269+
return nth_type_t<2, Caches...>::template get<Desc>();
270+
else if constexpr (N >= 4 &&
271+
nth_type_t<3, Caches...>::template has<Desc>())
272+
return nth_type_t<3, Caches...>::template get<Desc>();
273+
else
274+
static_assert(N <= 4 && N > 0);
275+
}
276+
template <ur_device_info_t Desc> decltype(auto) get() {
277+
return get<URDesc<Desc>>();
238278
}
239279
};
240280

241-
template <typename Desc> struct InfoEagerCached {
242-
const typename Desc::return_type value;
243-
};
281+
// With generic infrastructure above finished, provide the customization
282+
// points:
244283

245-
template <typename... Descs>
246-
struct InfoEagerCache : public InfoEagerCached<Descs>... {
247-
InfoEagerCache(device_impl &device)
248-
: InfoEagerCached<Descs>{
284+
struct InfoInitializer {
285+
template <typename Desc>
286+
static void init(device_impl &device, typename Desc::return_type &value) {
287+
value = device.
249288
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
250-
device.get_info<Descs, true /* InitializingCache */>()
289+
get_info
251290
#else
252-
device.get_info_abi_workaround<Descs,
253-
true /* InitializingCache */>()
291+
get_info_abi_workaround
254292
#endif
255-
}... {
256-
}
257-
template <typename Desc> static constexpr bool has() {
258-
return ((std::is_same_v<Desc, Descs> || ...));
259-
}
260-
template <typename Desc, typename = std::enable_if_t<has<Desc>()>>
261-
auto &get() const {
262-
return static_cast<const InfoEagerCached<Desc> *>(this)->value;
293+
<Desc, true /* InitializingCache */>();
263294
}
264295
};
265296

266-
template <typename Desc> struct InfoCallOnceCached {
267-
std::once_flag flag;
268-
typename Desc::return_type value;
297+
template <ur_device_info_t Desc> struct URDesc {
298+
using return_type = ur_ret_type<Desc>;
299+
static constexpr ur_device_info_t UR_DESC = Desc;
269300
};
270301

271-
template <typename... Descs>
272-
struct InfoCallOnceCache : public InfoCallOnceCached<Descs>... {
273-
device_impl &device;
274-
275-
InfoCallOnceCache(device_impl &device) : device(device) {}
276-
277-
template <typename Desc> static constexpr bool has() {
278-
return ((std::is_same_v<Desc, Descs> || ...));
279-
}
280-
281-
template <typename Desc, typename = std::enable_if_t<has<Desc>()>>
282-
auto &get() const {
283-
auto &Entry = *static_cast<InfoCallOnceCached<Desc> *>(
284-
const_cast<InfoCallOnceCache *>(this));
285-
std::call_once(Entry.flag, [&]() {
286-
Entry.value = device.
287-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
288-
get_info
289-
#else
290-
get_info_abi_workaround
291-
#endif
292-
<Desc, true /* InitializingCache */>();
293-
});
294-
return Entry.value;
302+
struct URInfoInitializer {
303+
template <typename Desc>
304+
static void init(device_impl &device, typename Desc::return_type &value) {
305+
value =
306+
device.get_info_impl<Desc::UR_DESC, true /* InitializingCache */>();
295307
}
296308
};
297309

298-
template <typename EagerCache, typename CallOnceCache>
299-
struct InfoCache : public EagerCache, public CallOnceCache {
300-
InfoCache(device_impl &device)
301-
: EagerCache(device), CallOnceCache(device) {}
302-
303-
template <typename Desc> static constexpr bool has() {
304-
return EagerCache::template has<Desc>() ||
305-
CallOnceCache::template has<Desc>();
306-
}
310+
template <template <typename...> typename Cache, ur_device_info_t... Descs>
311+
using URCache = Cache<URInfoInitializer, URDesc<Descs>...>;
307312

308-
template <typename Desc> decltype(auto) get() const {
309-
if constexpr (EagerCache::template has<Desc>()) {
310-
return EagerCache::template get<Desc>();
311-
} else if constexpr (CallOnceCache::template has<Desc>()) {
312-
return CallOnceCache::template get<Desc>();
313-
} else {
314-
static_assert(has<Desc>());
315-
}
316-
}
317-
};
313+
template <ur_device_info_t... Descs>
314+
using UREagerCache = URCache<EagerCache, Descs...>;
315+
template <ur_device_info_t... Descs>
316+
using URCallOnceCache = URCache<CallOnceCache, Descs...>;
318317

319318
public:
320319
/// Constructs a SYCL device instance using the provided
@@ -476,8 +475,8 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
476475
#endif
477476
using execution_scope = ext::oneapi::experimental::execution_scope;
478477

479-
if constexpr (decltype(MInfoCache)::has<Param>() && !InitializingCache) {
480-
return MInfoCache.get<Param>();
478+
if constexpr (decltype(MCache)::has<Param>() && !InitializingCache) {
479+
return MCache.get<Param>();
481480
}
482481
#define CASE(PARAM) else if constexpr (std::is_same_v<Param, PARAM>)
483482
// device_traits.def
@@ -1750,18 +1749,19 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
17501749

17511750
const ur_device_handle_t MRootDevice;
17521751

1753-
// This must come before `MInfoCache` below, so that eager initialization of
1754-
// this cache would happen before its potential use in the initialization of
1755-
// `MInfoCache`.
1756-
URInfoCache<
1757-
UREagerCache<UR_DEVICE_INFO_TYPE, UR_DEVICE_INFO_USE_NATIVE_ASSERT>,
1758-
URCallOnceCache<UR_DEVICE_INFO_NAME>>
1759-
MURInfoCache;
1760-
1761-
InfoCache<
1762-
InfoEagerCache<>,
1763-
InfoCallOnceCache<ext::oneapi::experimental::info::device::architecture>>
1764-
MInfoCache;
1752+
// Order of caches matters! UR must come before SYCL info descriptors (because
1753+
// get_info calls get_info_impl but the opposite never happens).
1754+
//
1755+
// To make an addition property cacheable just expand one of the caches below
1756+
// with that property, no other changes should be necessary.
1757+
mutable JointCache<
1758+
UREagerCache<UR_DEVICE_INFO_TYPE, UR_DEVICE_INFO_USE_NATIVE_ASSERT>, //
1759+
URCallOnceCache<UR_DEVICE_INFO_NAME>, //
1760+
EagerCache<InfoInitializer>, //
1761+
CallOnceCache<InfoInitializer,
1762+
ext::oneapi::experimental::info::device::architecture>>
1763+
MCache;
1764+
17651765
}; // class device_impl
17661766

17671767
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES

0 commit comments

Comments
 (0)