@@ -146,8 +146,8 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
146
146
147
147
template <ur_device_info_t Desc, bool InitializingCache = false >
148
148
decltype (auto ) get_info_impl() const {
149
- if constexpr (decltype (MURInfoCache )::has<Desc>() && !InitializingCache) {
150
- return MURInfoCache .get <Desc>();
149
+ if constexpr (decltype (MCache )::has<Desc>() && !InitializingCache) {
150
+ return MCache .get <Desc>();
151
151
} else {
152
152
using ur_ret_t = ur_ret_type<Desc>;
153
153
if constexpr (std::is_same_v<ur_ret_t , std::string>) {
@@ -172,149 +172,148 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
172
172
}
173
173
}
174
174
175
- template <ur_device_info_t Desc> struct UREagerCached {
176
- const ur_ret_type<Desc> value;
175
+ // Define some helpers to cache properties. We use the same template
176
+ // implementation for both SYCL information descriptors and raw calls to
177
+ // `urDeviceGetInfo` by wrapping latter's `ur_device_info_t Desc` into a
178
+ // wrapper class (to go from values to types, as we don't have universal
179
+ // template parameters yet).
180
+ //
181
+ // Note that some modifications are also done in `get_info` and
182
+ // `get_info_impl` so this caching is a part of device_impl implementation and
183
+ // all the infrastructure should legitimally be as a class member.
184
+ //
185
+ // See `MCache` data member below for instruction how to make a property
186
+ // cached.
187
+
188
+ // Eager - initialize the value right in the device_impl's ctor.
189
+ template <typename Desc> struct EagerCached {
190
+ const typename Desc::return_type value;
177
191
};
178
192
179
- template <ur_device_info_t ... Descs>
180
- struct UREagerCache : public UREagerCached <Descs>... {
181
- UREagerCache (device_impl &device)
182
- : UREagerCached<Descs>{
183
- device.get_info_impl <Descs, true /* InitializingCache */ >()}... {}
184
- template <ur_device_info_t Desc> static constexpr bool has () {
185
- return (((Desc == Descs) || ...));
193
+ template <typename Initializer, typename ... Descs>
194
+ struct EagerCache : EagerCached<Descs>... {
195
+ EagerCache (device_impl &device)
196
+ : EagerCached<Descs>{[&]() {
197
+ // We optimize `init` signature so that it could be immediately
198
+ // passed to `std::call_once` in the `CallOnceCached` below with an
199
+ // expectation that it's easier to inline this lambda than outline a
200
+ // creation of lambda in `CallOnceCached` if we'd be forced to have
201
+ // one if `init` returned by value.
202
+ typename Descs::return_type res;
203
+ Initializer::template init<Descs>(device, res);
204
+ return res;
205
+ }()}... {}
206
+
207
+ template <typename Desc> static constexpr bool has () {
208
+ return ((std::is_same_v<Desc, Descs> || ...));
186
209
}
187
210
188
- template <ur_device_info_t Desc, typename = std:: enable_if_t <has< Desc>()>>
189
- auto & get () const {
190
- return static_cast <const UREagerCached <Desc> *>(this )->value ;
211
+ template <typename Desc> decltype ( auto ) get() const {
212
+ // Extra parentheses to return as reference (see `decltype(auto)`).
213
+ return ( static_cast <const EagerCached <Desc> *>(this )->value ) ;
191
214
}
192
215
};
193
216
194
- template <ur_device_info_t Desc> struct URCallOnceCached {
217
+ // CallOnce - initialize on first query, but exactly once so that we could
218
+ // return cached values by reference. Important for `std::vector` /
219
+ // `std::string` values where returning cached values by value would cause
220
+ // heap allocations.
221
+ template <typename Desc> struct CallOnceCached {
195
222
std::once_flag flag;
196
- ur_ret_type< Desc> value;
223
+ typename Desc::return_type value;
197
224
};
198
225
199
- template <ur_device_info_t ... Descs>
200
- struct URCallOnceCache : public URCallOnceCached <Descs>... {
226
+ template <typename Initializer, typename ... Descs>
227
+ struct CallOnceCache : public CallOnceCached <Descs>... {
201
228
device_impl &device;
202
229
203
- URCallOnceCache (device_impl &device) : device(device) {}
204
- template <ur_device_info_t Desc> static constexpr bool has () {
205
- return (((Desc == Descs) || ...));
230
+ CallOnceCache (device_impl &device) : device(device) {}
231
+
232
+ template <typename Desc> static constexpr bool has () {
233
+ return ((std::is_same_v<Desc, Descs> || ...));
206
234
}
207
235
208
- template <ur_device_info_t Desc, typename = std::enable_if_t <has<Desc>()>>
209
- auto &get () const {
210
- auto &Entry = *static_cast <URCallOnceCached<Desc> *>(
211
- const_cast <URCallOnceCache *>(this ));
212
- std::call_once (Entry.flag , [&]() {
213
- Entry.value =
214
- device.get_info_impl <Desc, true /* InitializingCache */ >();
215
- });
216
- return Entry.value ;
236
+ template <typename Desc> decltype (auto ) get() {
237
+ auto &Entry = *static_cast <CallOnceCached<Desc> *>(this );
238
+ std::call_once (Entry.flag , Initializer::template init<Desc>, device,
239
+ Entry.value );
240
+ // Extra parentheses to return as reference (see `decltype(auto)`).
241
+ return (std::as_const (Entry.value ));
217
242
}
218
243
};
219
244
220
- template <typename EagerCache, typename CallOnceCache>
221
- struct URInfoCache : public EagerCache , public CallOnceCache {
222
- URInfoCache (device_impl &device)
223
- : EagerCache(device), CallOnceCache(device) {}
245
+ // get_info and get_info_impl need to know if a particular query is cacheable.
246
+ // It's easier if all the cache instances (eager/call-once * UR/SYCL) are
247
+ // merged into a single object.
248
+ template <typename ... Caches> struct JointCache : public Caches ... {
249
+ JointCache (device_impl &device) : Caches(device)... {}
224
250
225
- template <ur_device_info_t Desc> static constexpr bool has () {
226
- return EagerCache::template has<Desc>() ||
227
- CallOnceCache::template has<Desc>();
251
+ template <typename Desc> static constexpr bool has () {
252
+ return ((Caches::template has<Desc>() || ...));
228
253
}
229
254
230
- template <ur_device_info_t Desc> decltype (auto ) get() const {
231
- if constexpr (EagerCache::template has<Desc>()) {
232
- return EagerCache::template get<Desc>();
233
- } else if constexpr (CallOnceCache::template has<Desc>()) {
234
- return CallOnceCache::template get<Desc>();
235
- } else {
236
- static_assert (has<Desc>());
237
- }
255
+ template <ur_device_info_t Desc> static constexpr bool has () {
256
+ return has<URDesc<Desc>>();
257
+ }
258
+
259
+ template <typename Desc> decltype (auto ) get() {
260
+ // Couldn't find a smarter way for this...
261
+ constexpr auto N = sizeof ...(Caches);
262
+ if constexpr (N >= 1 && nth_type_t <0 , Caches...>::template has<Desc>())
263
+ return nth_type_t <0 , Caches...>::template get<Desc>();
264
+ else if constexpr (N >= 2 &&
265
+ nth_type_t <1 , Caches...>::template has<Desc>())
266
+ return nth_type_t <1 , Caches...>::template get<Desc>();
267
+ else if constexpr (N >= 3 &&
268
+ nth_type_t <2 , Caches...>::template has<Desc>())
269
+ return nth_type_t <2 , Caches...>::template get<Desc>();
270
+ else if constexpr (N >= 4 &&
271
+ nth_type_t <3 , Caches...>::template has<Desc>())
272
+ return nth_type_t <3 , Caches...>::template get<Desc>();
273
+ else
274
+ static_assert (N <= 4 && N > 0 );
275
+ }
276
+ template <ur_device_info_t Desc> decltype (auto ) get() {
277
+ return get<URDesc<Desc>>();
238
278
}
239
279
};
240
280
241
- template <typename Desc> struct InfoEagerCached {
242
- const typename Desc::return_type value;
243
- };
281
+ // With generic infrastructure above finished, provide the customization
282
+ // points:
244
283
245
- template < typename ... Descs>
246
- struct InfoEagerCache : public InfoEagerCached <Descs>... {
247
- InfoEagerCache (device_impl &device)
248
- : InfoEagerCached<Descs>{
284
+ struct InfoInitializer {
285
+ template < typename Desc>
286
+ static void init (device_impl &device, typename Desc::return_type &value) {
287
+ value = device.
249
288
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
250
- device. get_info <Descs, true /* InitializingCache */ >()
289
+ get_info
251
290
#else
252
- device.get_info_abi_workaround <Descs,
253
- true /* InitializingCache */ >()
291
+ get_info_abi_workaround
254
292
#endif
255
- }... {
256
- }
257
- template <typename Desc> static constexpr bool has () {
258
- return ((std::is_same_v<Desc, Descs> || ...));
259
- }
260
- template <typename Desc, typename = std::enable_if_t <has<Desc>()>>
261
- auto &get () const {
262
- return static_cast <const InfoEagerCached<Desc> *>(this )->value ;
293
+ <Desc, true /* InitializingCache */ >();
263
294
}
264
295
};
265
296
266
- template <typename Desc> struct InfoCallOnceCached {
267
- std::once_flag flag ;
268
- typename Desc::return_type value ;
297
+ template <ur_device_info_t Desc> struct URDesc {
298
+ using return_type = ur_ret_type<Desc> ;
299
+ static constexpr ur_device_info_t UR_DESC = Desc ;
269
300
};
270
301
271
- template <typename ... Descs>
272
- struct InfoCallOnceCache : public InfoCallOnceCached <Descs>... {
273
- device_impl &device;
274
-
275
- InfoCallOnceCache (device_impl &device) : device(device) {}
276
-
277
- template <typename Desc> static constexpr bool has () {
278
- return ((std::is_same_v<Desc, Descs> || ...));
279
- }
280
-
281
- template <typename Desc, typename = std::enable_if_t <has<Desc>()>>
282
- auto &get () const {
283
- auto &Entry = *static_cast <InfoCallOnceCached<Desc> *>(
284
- const_cast <InfoCallOnceCache *>(this ));
285
- std::call_once (Entry.flag , [&]() {
286
- Entry.value = device.
287
- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
288
- get_info
289
- #else
290
- get_info_abi_workaround
291
- #endif
292
- <Desc, true /* InitializingCache */ >();
293
- });
294
- return Entry.value ;
302
+ struct URInfoInitializer {
303
+ template <typename Desc>
304
+ static void init (device_impl &device, typename Desc::return_type &value) {
305
+ value =
306
+ device.get_info_impl <Desc::UR_DESC, true /* InitializingCache */ >();
295
307
}
296
308
};
297
309
298
- template <typename EagerCache, typename CallOnceCache>
299
- struct InfoCache : public EagerCache , public CallOnceCache {
300
- InfoCache (device_impl &device)
301
- : EagerCache(device), CallOnceCache(device) {}
302
-
303
- template <typename Desc> static constexpr bool has () {
304
- return EagerCache::template has<Desc>() ||
305
- CallOnceCache::template has<Desc>();
306
- }
310
+ template <template <typename ...> typename Cache, ur_device_info_t ... Descs>
311
+ using URCache = Cache<URInfoInitializer, URDesc<Descs>...>;
307
312
308
- template <typename Desc> decltype (auto ) get() const {
309
- if constexpr (EagerCache::template has<Desc>()) {
310
- return EagerCache::template get<Desc>();
311
- } else if constexpr (CallOnceCache::template has<Desc>()) {
312
- return CallOnceCache::template get<Desc>();
313
- } else {
314
- static_assert (has<Desc>());
315
- }
316
- }
317
- };
313
+ template <ur_device_info_t ... Descs>
314
+ using UREagerCache = URCache<EagerCache, Descs...>;
315
+ template <ur_device_info_t ... Descs>
316
+ using URCallOnceCache = URCache<CallOnceCache, Descs...>;
318
317
319
318
public:
320
319
// / Constructs a SYCL device instance using the provided
@@ -476,8 +475,8 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
476
475
#endif
477
476
using execution_scope = ext::oneapi::experimental::execution_scope;
478
477
479
- if constexpr (decltype (MInfoCache )::has<Param>() && !InitializingCache) {
480
- return MInfoCache .get <Param>();
478
+ if constexpr (decltype (MCache )::has<Param>() && !InitializingCache) {
479
+ return MCache .get <Param>();
481
480
}
482
481
#define CASE (PARAM ) else if constexpr (std::is_same_v<Param, PARAM>)
483
482
// device_traits.def
@@ -1750,18 +1749,19 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
1750
1749
1751
1750
const ur_device_handle_t MRootDevice;
1752
1751
1753
- // This must come before `MInfoCache` below, so that eager initialization of
1754
- // this cache would happen before its potential use in the initialization of
1755
- // `MInfoCache`.
1756
- URInfoCache<
1757
- UREagerCache<UR_DEVICE_INFO_TYPE, UR_DEVICE_INFO_USE_NATIVE_ASSERT>,
1758
- URCallOnceCache<UR_DEVICE_INFO_NAME>>
1759
- MURInfoCache;
1760
-
1761
- InfoCache<
1762
- InfoEagerCache<>,
1763
- InfoCallOnceCache<ext::oneapi::experimental::info::device::architecture>>
1764
- MInfoCache;
1752
+ // Order of caches matters! UR must come before SYCL info descriptors (because
1753
+ // get_info calls get_info_impl but the opposite never happens).
1754
+ //
1755
+ // To make an addition property cacheable just expand one of the caches below
1756
+ // with that property, no other changes should be necessary.
1757
+ mutable JointCache<
1758
+ UREagerCache<UR_DEVICE_INFO_TYPE, UR_DEVICE_INFO_USE_NATIVE_ASSERT>, //
1759
+ URCallOnceCache<UR_DEVICE_INFO_NAME>, //
1760
+ EagerCache<InfoInitializer>, //
1761
+ CallOnceCache<InfoInitializer,
1762
+ ext::oneapi::experimental::info::device::architecture>>
1763
+ MCache;
1764
+
1765
1765
}; // class device_impl
1766
1766
1767
1767
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
0 commit comments