From 59d664680ad8b0dd8441e689415c8fc92e63969f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Sat, 3 May 2025 12:58:10 -0700 Subject: [PATCH 01/23] Implement dpctl.SyclDevice peer access --- dpctl/_backend.pxd | 11 ++ dpctl/_sycl_device.pyx | 106 ++++++++++++++++++ .../helper/include/dpctl_utils_helper.h | 27 +++++ .../helper/source/dpctl_utils_helper.cpp | 26 +++++ .../dpctl_sycl_device_interface.h | 36 ++++++ .../syclinterface/dpctl_sycl_enum_types.h | 10 ++ .../source/dpctl_sycl_device_interface.cpp | 48 ++++++++ 7 files changed, 264 insertions(+) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index ca9e9ccb9f..93d9b5ef97 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -112,6 +112,10 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _L1_cache "L1_cache", _next_partitionable "next_partitionable", + ctypedef enum _peer_access "DPCTLPeerAccessType": + _access_supported "access_supported", + _atomics_supported "atomics_supported", + ctypedef enum _event_status_type "DPCTLSyclEventStatusType": _UNKNOWN_STATUS "DPCTL_UNKNOWN_STATUS" _SUBMITTED "DPCTL_SUBMITTED" @@ -278,7 +282,14 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef DPCTLDeviceVectorRef DPCTLDevice_GetComponentDevices( const DPCTLSyclDeviceRef DRef ) + cdef bool DPCTLDevice_CanAccessPeer(const DPCTLSyclDeviceRef DRef, + const DPCTLSyclDeviceRef PDRef, + _peer_access PT) + cdef void DPCTLDevice_EnablePeerAccess(const DPCTLSyclDeviceRef DRef, + const DPCTLSyclDeviceRef PDRef) + cdef void DPCTLDevice_DisablePeerAccess(const DPCTLSyclDeviceRef DRef, + const DPCTLSyclDeviceRef PDRef) cdef extern from "syclinterface/dpctl_sycl_device_manager.h": cdef DPCTLDeviceVectorRef DPCTLDeviceVector_CreateFromArray( diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 5b43ffed1a..c5f9eff124 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -25,12 +25,15 @@ from ._backend cimport ( # noqa: E211 DPCTLCString_Delete, DPCTLDefaultSelector_Create, DPCTLDevice_AreEq, + DPCTLDevice_CanAccessPeer, DPCTLDevice_Copy, DPCTLDevice_CreateFromSelector, DPCTLDevice_CreateSubDevicesByAffinity, DPCTLDevice_CreateSubDevicesByCounts, DPCTLDevice_CreateSubDevicesEqually, DPCTLDevice_Delete, + DPCTLDevice_DisablePeerAccess, + DPCTLDevice_EnablePeerAccess, DPCTLDevice_GetBackend, DPCTLDevice_GetComponentDevices, DPCTLDevice_GetCompositeDevice, @@ -103,6 +106,7 @@ from ._backend cimport ( # noqa: E211 _device_type, _global_mem_cache_type, _partition_affinity_domain_type, + _peer_access, ) from .enum_types import backend_type, device_type, global_mem_cache_type @@ -1792,6 +1796,108 @@ cdef class SyclDevice(_SyclDevice): raise ValueError("Internal error: NULL device vector encountered") return _get_devices(cDVRef) + def can_access_peer(self, peer): + """ Returns ``True`` if `self` can enable peer access + to `peer`, ``False`` otherwise. + + Args: + peer (dpctl.SyclDevice): + The :class:`dpctl.SyclDevice` instance to + check. + + Returns: + bool: + ``True`` if `self` can enable peer access + to `peer`, otherwise ``False``. + """ + cdef SyclDevice p_dev + if not isinstance(peer, SyclDevice): + raise TypeError( + "second argument must be a `dpctl.SyclDevice`, got " + f"{type(peer)}" + ) + p_dev = peer + return DPCTLDevice_CanAccessPeer( + self._device_ref, + p_dev.get_device_ref(), + _peer_access._access_supported + ) + + def can_access_peer_atomics_supported(self, peer): + """ Returns ``True`` if `self` can enable peer access + to and can atomically modify memory on `peer`, ``False`` otherwise. + + Args: + peer (dpctl.SyclDevice): + The :class:`dpctl.SyclDevice` instance to + check. + + Returns: + bool: + ``True`` if `self` can enable peer access + to and can atomically modify memory on `peer`, + otherwise ``False``. + """ + cdef SyclDevice p_dev + if not isinstance(peer, SyclDevice): + raise TypeError( + "second argument must be a `dpctl.SyclDevice`, got " + f"{type(peer)}" + ) + p_dev = peer + return DPCTLDevice_CanAccessPeer( + self._device_ref, + p_dev.get_device_ref(), + _peer_access._atomics_supported + ) + + def enable_peer_access(self, peer): + """ Enables this device (`self`) to access USM device allocations + located on `peer`. + + Args: + peer (dpctl.SyclDevice): + The :class:`dpctl.SyclDevice` instance to + enable peer access to. + + Raises: + ValueError: + If the ``DPCTLDevice_GetComponentDevices`` call returned + ``NULL`` instead of a ``DPCTLDeviceVectorRef`` object. + """ + cdef SyclDevice p_dev + if not isinstance(peer, SyclDevice): + raise TypeError( + "second argument must be a `dpctl.SyclDevice`, got " + f"{type(peer)}" + ) + p_dev = peer + DPCTLDevice_EnablePeerAccess(self._device_ref, p_dev.get_device_ref()) + return + + def disable_peer_access(self, peer): + """ Disables peer access to `peer` from `self`. + + Args: + peer (dpctl.SyclDevice): + The :class:`dpctl.SyclDevice` instance to + disable peer access to. + + Raises: + ValueError: + If the ``DPCTLDevice_GetComponentDevices`` call returned + ``NULL`` instead of a ``DPCTLDeviceVectorRef`` object. + """ + cdef SyclDevice p_dev + if not isinstance(peer, SyclDevice): + raise TypeError( + "second argument must be a `dpctl.SyclDevice`, got " + f"{type(peer)}" + ) + p_dev = peer + DPCTLDevice_DisablePeerAccess(self._device_ref, p_dev.get_device_ref()) + return + @property def profiling_timer_resolution(self): """ Profiling timer resolution. diff --git a/libsyclinterface/helper/include/dpctl_utils_helper.h b/libsyclinterface/helper/include/dpctl_utils_helper.h index e1cb186d0e..9d9401b807 100644 --- a/libsyclinterface/helper/include/dpctl_utils_helper.h +++ b/libsyclinterface/helper/include/dpctl_utils_helper.h @@ -179,6 +179,33 @@ DPCTL_API DPCTLPartitionAffinityDomainType DPCTL_SyclPartitionAffinityDomainToDPCTLType( sycl::info::partition_affinity_domain PartitionAffinityDomain); +/*! + * @brief Converts a DPCTLPeerAccessType enum value to its corresponding + * sycl::ext::oneapi::peer_access enum value. + * + * @param PeerAccessTy A DPCTLPeerAccessType enum value + * @return A sycl::ext::oneapi::peer_access enum value for the input + * DPCTLPeerAccessType enum value. + * @throws runtime_error + */ +DPCTL_API +sycl::ext::oneapi::peer_access +DPCTL_DPCTLPeerAccessTypeToSycl(DPCTLPeerAccessType PeerAccessTy); + +/*! + * @brief Converts a sycl::ext::oneapi::peer_access enum value to + * corresponding DPCTLPeerAccessType enum value. + * + * @param PeerAccess sycl::ext::oneapi::peer_access to be + * converted to DPCTLPeerAccessType enum. + * @return A DPCTLPeerAccessType enum value for the input + * sycl::ext::oneapi::peer_access enum value. + * @throws runtime_error + */ +DPCTL_API +DPCTLPeerAccessType +DPCTL_SyclPeerAccessToDPCTLType(sycl::ext::oneapi::peer_access PeerAccess); + /*! * @brief Gives the index of the given device with respective to all the other * devices of the same type in the device's platform. diff --git a/libsyclinterface/helper/source/dpctl_utils_helper.cpp b/libsyclinterface/helper/source/dpctl_utils_helper.cpp index 88c1385a26..866e7e828e 100644 --- a/libsyclinterface/helper/source/dpctl_utils_helper.cpp +++ b/libsyclinterface/helper/source/dpctl_utils_helper.cpp @@ -452,6 +452,32 @@ DPCTLPartitionAffinityDomainType DPCTL_SyclPartitionAffinityDomainToDPCTLType( } } +ext::oneapi::peer_access +DPCTL_DPCTLPeerAccessTypeToSycl(DPCTLPeerAccessType PeerAccessTy) +{ + switch (PeerAccessTy) { + case DPCTLPeerAccessType::access_supported: + return ext::oneapi::peer_access::access_supported; + case DPCTLPeerAccessType::atomics_supported: + return ext::oneapi::peer_access::atomics_supported; + default: + throw std::runtime_error("Unsupported peer_access type"); + } +} + +DPCTLPeerAccessType +DPCTL_SyclPeerAccessToDPCTLType(ext::oneapi::peer_access PeerAccess) +{ + switch (PeerAccess) { + case ext::oneapi::peer_access::access_supported: + return DPCTLPeerAccessType::access_supported; + case ext::oneapi::peer_access::atomics_supported: + return DPCTLPeerAccessType::atomics_supported; + default: + throw std::runtime_error("Unsupported peer_access type"); + } +} + int64_t DPCTL_GetRelativeDeviceId(const device &Device) { auto relid = -1; diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h b/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h index 6fddb2967f..f54f9ab136 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h @@ -792,4 +792,40 @@ DPCTL_API __dpctl_give DPCTLDeviceVectorRef DPCTLDevice_GetComponentDevices(__dpctl_keep const DPCTLSyclDeviceRef DRef); +/*! + * @brief Checks if device supports peer access to another device. + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @param PDRef Opaque pointer to a ``sycl::device`` + * @param PT DPCTLPeerAccessType of ``ext::oneapi::peer_access``. + * @return True if sycl::device supports the kind of peer access, else false. + * @ingroup DeviceInterface + */ +DPCTL_API +bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef, + DPCTLPeerAccessType PT); + +/*! + * @brief Checks if device supports peer access to another device. + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @param PDRef Opaque pointer to a ``sycl::device`` + * @ingroup DeviceInterface + */ +DPCTL_API +void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef); + +/*! + * @brief Checks if device supports peer access to another device. + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @param PDRef Opaque pointer to a ``sycl::device`` + * @ingroup DeviceInterface + */ +DPCTL_API +void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef); + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h b/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h index 799f9d1484..76311daa7b 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h @@ -151,6 +151,16 @@ typedef enum next_partitionable } DPCTLPartitionAffinityDomainType; +/*! + * @brief DPCTL analogue of ``sycl::ext::oneapi::peer_access`` enum. + * + */ +typedef enum +{ + access_supported, + atomics_supported +} DPCTLPeerAccessType; + /*! * @brief Enums to depict the properties that can be passed to a sycl::queue * constructor. diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 7b1e900b58..b3179ce47f 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -903,3 +903,51 @@ DPCTLDevice_GetCompositeDevice(__dpctl_keep const DPCTLSyclDeviceRef DRef) else return nullptr; } + +bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef, + DPCTLPeerAccessType PT) +{ + bool canAccess = false; + auto D = unwrap(DRef); + auto PD = unwrap(PDRef); + if (D && PD) { + try { + canAccess = D->ext_oneapi_can_access_peer( + *PD, DPCTL_DPCTLPeerAccessTypeToSycl(PT)); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + } + return canAccess; +} + +void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef) +{ + auto D = unwrap(DRef); + auto PD = unwrap(PDRef); + if (D && PD) { + try { + D->ext_oneapi_enable_peer_access(*PD); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + } + return; +} + +void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, + __dpctl_keep const DPCTLSyclDeviceRef PDRef) +{ + auto D = unwrap(DRef); + auto PD = unwrap(PDRef); + if (D && PD) { + try { + D->ext_oneapi_disable_peer_access(*PD); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + } + return; +} From 7d2d20f6b6b60e2bae81b8ba0bcf5e69454db155 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Sat, 3 May 2025 18:05:15 -0700 Subject: [PATCH 02/23] Add backend validation for peer access only implemented for backends HIP, CUDA, Level Zero. Validation prevents crashes --- dpctl/_sycl_device.pyx | 126 +++++++++++++++++- .../source/dpctl_sycl_device_interface.cpp | 66 +++++++++ 2 files changed, 187 insertions(+), 5 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index c5f9eff124..2dcd416d3a 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1796,7 +1796,7 @@ cdef class SyclDevice(_SyclDevice): raise ValueError("Internal error: NULL device vector encountered") return _get_devices(cDVRef) - def can_access_peer(self, peer): + def can_access_peer_access_supported(self, peer): """ Returns ``True`` if `self` can enable peer access to `peer`, ``False`` otherwise. @@ -1809,14 +1809,45 @@ cdef class SyclDevice(_SyclDevice): bool: ``True`` if `self` can enable peer access to `peer`, otherwise ``False``. + + Raises: + TypeError: + If `peer` is not `dpctl.SyclDevice`. + ValueError: + If the backend associated with `self` or `peer` does not + support peer access. """ cdef SyclDevice p_dev + cdef _backend_type BTy1 + cdef _backend_type BTy2 + if not isinstance(peer, SyclDevice): raise TypeError( "second argument must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + BTy1 = DPCTLDevice_GetBackend(self._device_ref) + if ( + BTy1 != _backend_type._CUDA and + BTy1 != _backend_type._HIP and + BTy1 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy1)}" + ) + BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) + if ( + BTy2 != _backend_type._CUDA and + BTy2 != _backend_type._HIP and + BTy2 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + return DPCTLDevice_CanAccessPeer( self._device_ref, p_dev.get_device_ref(), @@ -1837,14 +1868,45 @@ cdef class SyclDevice(_SyclDevice): ``True`` if `self` can enable peer access to and can atomically modify memory on `peer`, otherwise ``False``. + + Raises: + TypeError: + If `peer` is not `dpctl.SyclDevice`. + ValueError: + If the backend associated with `self` or `peer` does not + support peer access. """ cdef SyclDevice p_dev + cdef _backend_type BTy1 + cdef _backend_type BTy2 + if not isinstance(peer, SyclDevice): raise TypeError( "second argument must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + BTy1 = DPCTLDevice_GetBackend(self._device_ref) + if ( + BTy1 != _backend_type._CUDA and + BTy1 != _backend_type._HIP and + BTy1 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy1)}" + ) + BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) + if ( + BTy2 != _backend_type._CUDA and + BTy2 != _backend_type._HIP and + BTy2 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + return DPCTLDevice_CanAccessPeer( self._device_ref, p_dev.get_device_ref(), @@ -1861,17 +1923,45 @@ cdef class SyclDevice(_SyclDevice): enable peer access to. Raises: + TypeError: + If `peer` is not `dpctl.SyclDevice`. ValueError: - If the ``DPCTLDevice_GetComponentDevices`` call returned - ``NULL`` instead of a ``DPCTLDeviceVectorRef`` object. + If the backend associated with `self` or `peer` does not + support peer access. """ cdef SyclDevice p_dev + cdef _backend_type BTy1 + cdef _backend_type BTy2 + if not isinstance(peer, SyclDevice): raise TypeError( "second argument must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + BTy1 = ( + DPCTLDevice_GetBackend(self._device_ref) + ) + if ( + BTy1 != _backend_type._CUDA and + BTy1 != _backend_type._HIP and + BTy1 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy1)}" + ) + BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) + if ( + BTy2 != _backend_type._CUDA and + BTy2 != _backend_type._HIP and + BTy2 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + DPCTLDevice_EnablePeerAccess(self._device_ref, p_dev.get_device_ref()) return @@ -1884,17 +1974,43 @@ cdef class SyclDevice(_SyclDevice): disable peer access to. Raises: + TypeError: + If `peer` is not `dpctl.SyclDevice`. ValueError: - If the ``DPCTLDevice_GetComponentDevices`` call returned - ``NULL`` instead of a ``DPCTLDeviceVectorRef`` object. + If the backend associated with `self` or `peer` does not + support peer access. """ cdef SyclDevice p_dev + cdef _backend_type BTy1 + cdef _backend_type BTy2 + if not isinstance(peer, SyclDevice): raise TypeError( "second argument must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + BTy1 = DPCTLDevice_GetBackend(self._device_ref) + if ( + BTy1 != _backend_type._CUDA and + BTy1 != _backend_type._HIP and + BTy1 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy1)}" + ) + BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) + if ( + BTy2 != _backend_type._CUDA and + BTy2 != _backend_type._HIP and + BTy2 != _backend_type._LEVEL_ZERO + ): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + DPCTLDevice_DisablePeerAccess(self._device_ref, p_dev.get_device_ref()) return diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index b3179ce47f..72e67d788c 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -912,6 +912,28 @@ bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { + auto BE1 = D->get_backend(); + auto BE2 = PD->get_backend(); + + if (BE1 != sycl::backend::ext_oneapi_level_zero && + BE1 != sycl::backend::ext_oneapi_cuda && + BE1 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE1)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return false; + } + + if (BE2 != sycl::backend::ext_oneapi_level_zero && + BE2 != sycl::backend::ext_oneapi_cuda && + BE2 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE2)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return false; + } try { canAccess = D->ext_oneapi_can_access_peer( *PD, DPCTL_DPCTLPeerAccessTypeToSycl(PT)); @@ -928,6 +950,28 @@ void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { + auto BE1 = D->get_backend(); + auto BE2 = PD->get_backend(); + + if (BE1 != sycl::backend::ext_oneapi_level_zero && + BE1 != sycl::backend::ext_oneapi_cuda && + BE1 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE1)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return; + } + + if (BE2 != sycl::backend::ext_oneapi_level_zero && + BE2 != sycl::backend::ext_oneapi_cuda && + BE2 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE2)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return; + } try { D->ext_oneapi_enable_peer_access(*PD); } catch (std::exception const &e) { @@ -943,6 +987,28 @@ void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { + auto BE1 = D->get_backend(); + auto BE2 = PD->get_backend(); + + if (BE1 != sycl::backend::ext_oneapi_level_zero && + BE1 != sycl::backend::ext_oneapi_cuda && + BE1 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE1)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return; + } + + if (BE2 != sycl::backend::ext_oneapi_level_zero && + BE2 != sycl::backend::ext_oneapi_cuda && + BE2 != sycl::backend::ext_oneapi_hip) + { + error_handler("Backend " + std::to_string(static_cast(BE2)) + + " does not support peer access", + __FILE__, __func__, __LINE__); + return; + } try { D->ext_oneapi_disable_peer_access(*PD); } catch (std::exception const &e) { From fc50e3b764b775987584fd867de8f4c6f673f8bb Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 May 2025 14:02:14 -0700 Subject: [PATCH 03/23] Directly use `get_backend()` from sycl_device --- libsyclinterface/source/dpctl_sycl_device_interface.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 72e67d788c..98714d1f09 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -184,8 +184,7 @@ DPCTLDevice_GetBackend(__dpctl_keep const DPCTLSyclDeviceRef DRef) DPCTLSyclBackendType BTy = DPCTLSyclBackendType::DPCTL_UNKNOWN_BACKEND; auto D = unwrap(DRef); if (D) { - BTy = DPCTL_SyclBackendToDPCTLBackendType( - D->get_platform().get_backend()); + BTy = DPCTL_SyclBackendToDPCTLBackendType(D->get_backend()); } return BTy; } From e08c2c96a269d93364ee3726cd466692b5c8c178 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 13:38:20 -0700 Subject: [PATCH 04/23] Tweak docstrings Peer device docstrings link to related methods and docs on peer access extension Slips in fixes to other docstrings for SyclDevice methods --- docs/doc_sources/urls.json | 1 + dpctl/_sycl_device.pyx | 83 ++++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/docs/doc_sources/urls.json b/docs/doc_sources/urls.json index 647bbaea99..34801db920 100644 --- a/docs/doc_sources/urls.json +++ b/docs/doc_sources/urls.json @@ -5,6 +5,7 @@ "oneapi_filter_selection": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc", "oneapi_default_context": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_default_context.asciidoc", "oneapi_enqueue_barrier": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc", + "oneapi_peer_access": "https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_peer_access.asciidoc", "sycl_aspects": "https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#table.device.aspect", "sycl_context": "https://sycl.readthedocs.io/en/latest/iface/context.html", "sycl_device": "https://sycl.readthedocs.io/en/latest/iface/device.html", diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 2dcd416d3a..fb40ba7907 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -224,7 +224,7 @@ def _cached_filter_string(d : SyclDevice): and cached with `functools.cache`. Args: - d (dpctl.SyclDevice): + d (:class:`dpctl.SyclDevice`): A device for which to compute the filter string. Returns: out(str): @@ -1797,24 +1797,30 @@ cdef class SyclDevice(_SyclDevice): return _get_devices(cDVRef) def can_access_peer_access_supported(self, peer): - """ Returns ``True`` if `self` can enable peer access - to `peer`, ``False`` otherwise. + """ Returns ``True`` if this device (``self``) can enable peer access + to USM device memory on ``peer``, ``False`` otherwise. + + If peer access is supported, it may be enabled by calling + :meth:`.enable_peer_access`. + + For details, see + :oneapi_peer_access:`DPC++ peer access SYCL extension <>`. Args: - peer (dpctl.SyclDevice): - The :class:`dpctl.SyclDevice` instance to - check. + peer (:class:`dpctl.SyclDevice`): + The :class:`dpctl.SyclDevice` instance to check for peer access + by this device. Returns: bool: - ``True`` if `self` can enable peer access - to `peer`, otherwise ``False``. + ``True`` if this device may access USM device memory on + ``peer`` when peer access is enabled, otherwise ``False``. Raises: TypeError: - If `peer` is not `dpctl.SyclDevice`. + If ``peer`` is not :class:`dpctl.SyclDevice`. ValueError: - If the backend associated with `self` or `peer` does not + If the backend associated with this device or ``peer`` does not support peer access. """ cdef SyclDevice p_dev @@ -1855,25 +1861,32 @@ cdef class SyclDevice(_SyclDevice): ) def can_access_peer_atomics_supported(self, peer): - """ Returns ``True`` if `self` can enable peer access - to and can atomically modify memory on `peer`, ``False`` otherwise. + """ Returns ``True`` if this device (``self``) can concurrently access + and modify USM device memory on ``peer`` when peer access is enabled, + ``False`` otherwise. + + If peer access is supported, it may be enabled by calling + :meth:`.enable_peer_access`. + + For details, see + :oneapi_peer_access:`DPC++ peer access SYCL extension <>`. Args: - peer (dpctl.SyclDevice): - The :class:`dpctl.SyclDevice` instance to - check. + peer (:class:`dpctl.SyclDevice`): + The :class:`dpctl.SyclDevice` instance to check for concurrent + peer access and modification by this device. Returns: bool: - ``True`` if `self` can enable peer access - to and can atomically modify memory on `peer`, + ``True`` if this device may concurrently access and modify USM + device memory on ``peer`` when peer access is enabled, otherwise ``False``. Raises: TypeError: - If `peer` is not `dpctl.SyclDevice`. + If ``peer`` is not :class:`dpctl.SyclDevice`. ValueError: - If the backend associated with `self` or `peer` does not + If the backend associated with this device or ``peer`` does not support peer access. """ cdef SyclDevice p_dev @@ -1914,19 +1927,24 @@ cdef class SyclDevice(_SyclDevice): ) def enable_peer_access(self, peer): - """ Enables this device (`self`) to access USM device allocations - located on `peer`. + """ Enables this device (``self``) to access USM device allocations + located on ``peer``. + + Peer access may be disabled by calling :meth:`.disable_peer_access`. + + For details, see + :oneapi_peer_access:`DPC++ peer access SYCL extension <>`. Args: - peer (dpctl.SyclDevice): - The :class:`dpctl.SyclDevice` instance to - enable peer access to. + peer (:class:`dpctl.SyclDevice`): + The :class:`dpctl.SyclDevice` instance to enable peer access + to. Raises: TypeError: - If `peer` is not `dpctl.SyclDevice`. + If ``peer`` is not :class:`dpctl.SyclDevice`. ValueError: - If the backend associated with `self` or `peer` does not + If the backend associated with this device or ``peer`` does not support peer access. """ cdef SyclDevice p_dev @@ -1966,18 +1984,21 @@ cdef class SyclDevice(_SyclDevice): return def disable_peer_access(self, peer): - """ Disables peer access to `peer` from `self`. + """ Disables peer access to ``peer`` from this device (``self``). + + For details, see + :oneapi_peer_access:`DPC++ peer access SYCL extension <>`. Args: - peer (dpctl.SyclDevice): + peer (:class:`dpctl.SyclDevice`): The :class:`dpctl.SyclDevice` instance to disable peer access to. Raises: TypeError: - If `peer` is not `dpctl.SyclDevice`. + If ``peer`` is not :class:`dpctl.SyclDevice`. ValueError: - If the backend associated with `self` or `peer` does not + If the backend associated with this device or ``peer`` does not support peer access. """ cdef SyclDevice p_dev @@ -2134,7 +2155,7 @@ cdef class SyclDevice(_SyclDevice): same _device_ref as this SyclDevice. Args: - other (dpctl.SyclDevice): + other (:class:`dpctl.SyclDevice`): A :class:`dpctl.SyclDevice` instance to compare against. From 3af91a8f83b9b1eb32fe2c4e89aee57bd31d6271 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 13:52:54 -0700 Subject: [PATCH 05/23] Simplify logic checking backends for peer access --- dpctl/_sycl_device.pyx | 80 +++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index fb40ba7907..00a5baaa6c 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1829,26 +1829,24 @@ cdef class SyclDevice(_SyclDevice): if not isinstance(peer, SyclDevice): raise TypeError( - "second argument must be a `dpctl.SyclDevice`, got " + "peer device must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + + _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if ( - BTy1 != _backend_type._CUDA and - BTy1 != _backend_type._HIP and - BTy1 != _backend_type._LEVEL_ZERO - ): + if BTy1 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if ( - BTy2 != _backend_type._CUDA and - BTy2 != _backend_type._HIP and - BTy2 != _backend_type._LEVEL_ZERO - ): + if BTy2 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy2)}" @@ -1895,26 +1893,24 @@ cdef class SyclDevice(_SyclDevice): if not isinstance(peer, SyclDevice): raise TypeError( - "second argument must be a `dpctl.SyclDevice`, got " + "peer device must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + + _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if ( - BTy1 != _backend_type._CUDA and - BTy1 != _backend_type._HIP and - BTy1 != _backend_type._LEVEL_ZERO - ): + if BTy1 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if ( - BTy2 != _backend_type._CUDA and - BTy2 != _backend_type._HIP and - BTy2 != _backend_type._LEVEL_ZERO - ): + if BTy2 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy2)}" @@ -1953,28 +1949,26 @@ cdef class SyclDevice(_SyclDevice): if not isinstance(peer, SyclDevice): raise TypeError( - "second argument must be a `dpctl.SyclDevice`, got " + "peer device must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + + _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] BTy1 = ( DPCTLDevice_GetBackend(self._device_ref) ) - if ( - BTy1 != _backend_type._CUDA and - BTy1 != _backend_type._HIP and - BTy1 != _backend_type._LEVEL_ZERO - ): + if BTy1 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if ( - BTy2 != _backend_type._CUDA and - BTy2 != _backend_type._HIP and - BTy2 != _backend_type._LEVEL_ZERO - ): + if BTy2 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy2)}" @@ -2007,26 +2001,24 @@ cdef class SyclDevice(_SyclDevice): if not isinstance(peer, SyclDevice): raise TypeError( - "second argument must be a `dpctl.SyclDevice`, got " + "peer device must be a `dpctl.SyclDevice`, got " f"{type(peer)}" ) p_dev = peer + + _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if ( - BTy1 != _backend_type._CUDA and - BTy1 != _backend_type._HIP and - BTy1 != _backend_type._LEVEL_ZERO - ): + if BTy1 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if ( - BTy2 != _backend_type._CUDA and - BTy2 != _backend_type._HIP and - BTy2 != _backend_type._LEVEL_ZERO - ): + if BTy2 not in _peer_access_backends: raise ValueError( "Peer access not supported for backend " f"{_backend_type_to_filter_string_part(BTy2)}" From dfca71328c6049e4121e605c80cb58fc356b29a7 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 13:53:41 -0700 Subject: [PATCH 06/23] link method for disabling peer access to enabling method --- dpctl/_sycl_device.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 00a5baaa6c..6c2183adcb 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1980,6 +1980,8 @@ cdef class SyclDevice(_SyclDevice): def disable_peer_access(self, peer): """ Disables peer access to ``peer`` from this device (``self``). + Peer access may be enabled by calling :meth:`.enable_peer_access`. + For details, see :oneapi_peer_access:`DPC++ peer access SYCL extension <>`. From bdfee1bd5fb295f91f8c669fc09530aa99332380 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 14:04:30 -0700 Subject: [PATCH 07/23] Fix copy paste errors in peer device interface header --- .../include/syclinterface/dpctl_sycl_device_interface.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h b/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h index f54f9ab136..72b0261e1f 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_device_interface.h @@ -807,7 +807,7 @@ bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, DPCTLPeerAccessType PT); /*! - * @brief Checks if device supports peer access to another device. + * @brief Enables peer access to another device. * * @param DRef Opaque pointer to a ``sycl::device`` * @param PDRef Opaque pointer to a ``sycl::device`` @@ -818,7 +818,7 @@ void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, __dpctl_keep const DPCTLSyclDeviceRef PDRef); /*! - * @brief Checks if device supports peer access to another device. + * @brief Disables peer access to another device. * * @param DRef Opaque pointer to a ``sycl::device`` * @param PDRef Opaque pointer to a ``sycl::device`` From 4b3e9f183f5647ca16ca8741917a259fb2e36711 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 14:04:52 -0700 Subject: [PATCH 08/23] Clarify which device has invalid backend in peer_access methods --- dpctl/_sycl_device.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 6c2183adcb..1551673664 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1842,13 +1842,13 @@ cdef class SyclDevice(_SyclDevice): BTy1 = DPCTLDevice_GetBackend(self._device_ref) if BTy1 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for this device backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) if BTy2 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for peer device backend " f"{_backend_type_to_filter_string_part(BTy2)}" ) @@ -1906,13 +1906,13 @@ cdef class SyclDevice(_SyclDevice): BTy1 = DPCTLDevice_GetBackend(self._device_ref) if BTy1 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for this device backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) if BTy2 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for peer device backend " f"{_backend_type_to_filter_string_part(BTy2)}" ) @@ -1964,13 +1964,13 @@ cdef class SyclDevice(_SyclDevice): ) if BTy1 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for this device backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) if BTy2 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for peer device backend " f"{_backend_type_to_filter_string_part(BTy2)}" ) @@ -2016,13 +2016,13 @@ cdef class SyclDevice(_SyclDevice): BTy1 = DPCTLDevice_GetBackend(self._device_ref) if BTy1 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for this device backend " f"{_backend_type_to_filter_string_part(BTy1)}" ) BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) if BTy2 not in _peer_access_backends: raise ValueError( - "Peer access not supported for backend " + "Peer access not supported for peer device backend " f"{_backend_type_to_filter_string_part(BTy2)}" ) From a21d585959ea388759ec7ddf81620c4428ba20cb Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 15:28:37 -0700 Subject: [PATCH 09/23] Use ostringstream to output backend names instead of integers More user-readable --- .../source/dpctl_sycl_device_interface.cpp | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 98714d1f09..5a7f5e4bf3 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -33,7 +33,7 @@ #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" #include -#include +#include #include #include /* SYCL headers */ #include @@ -918,9 +918,9 @@ bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, BE1 != sycl::backend::ext_oneapi_cuda && BE1 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE1)) + - " does not support peer access", - __FILE__, __func__, __LINE__); + std::ostringstream os; + os << "Backend " << BE1 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); return false; } @@ -928,9 +928,9 @@ bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, BE2 != sycl::backend::ext_oneapi_cuda && BE2 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE2)) + - " does not support peer access", - __FILE__, __func__, __LINE__); + std::ostringstream os; + os << "Backend " << BE2 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); return false; } try { @@ -956,20 +956,18 @@ void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, BE1 != sycl::backend::ext_oneapi_cuda && BE1 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE1)) + - " does not support peer access", - __FILE__, __func__, __LINE__); - return; + std::ostringstream os; + os << "Backend " << BE1 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); } if (BE2 != sycl::backend::ext_oneapi_level_zero && BE2 != sycl::backend::ext_oneapi_cuda && BE2 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE2)) + - " does not support peer access", - __FILE__, __func__, __LINE__); - return; + std::ostringstream os; + os << "Backend " << BE2 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); } try { D->ext_oneapi_enable_peer_access(*PD); @@ -993,20 +991,18 @@ void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, BE1 != sycl::backend::ext_oneapi_cuda && BE1 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE1)) + - " does not support peer access", - __FILE__, __func__, __LINE__); - return; + std::ostringstream os; + os << "Backend " << BE1 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); } if (BE2 != sycl::backend::ext_oneapi_level_zero && BE2 != sycl::backend::ext_oneapi_cuda && BE2 != sycl::backend::ext_oneapi_hip) { - error_handler("Backend " + std::to_string(static_cast(BE2)) + - " does not support peer access", - __FILE__, __func__, __LINE__); - return; + std::ostringstream os; + os << "Backend " << BE2 << " does not support peer access"; + error_handler(os.str(), __FILE__, __func__, __LINE__); } try { D->ext_oneapi_disable_peer_access(*PD); From b8c899e81cf2fb93c46ea0fa0799659bc6be72f1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 16:53:35 -0700 Subject: [PATCH 10/23] Use ostringstream in kernel bundle interface --- .../dpctl_sycl_kernel_bundle_interface.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index c702018687..70d89fd4fb 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -620,9 +620,9 @@ DPCTLKernelBundle_CreateFromSpirv(__dpctl_keep const DPCTLSyclContextRef CtxRef, break; #endif default: - error_handler("Backend " + std::to_string(static_cast(BE)) + - " is not supported", - __FILE__, __func__, __LINE__); + std::ostringstream os; + os << "Backend " << BE << " is not supported"; + error_handler(os.str(), __FILE__, __func__, __LINE__); break; } return KBRef; @@ -700,9 +700,9 @@ DPCTLKernelBundle_GetKernel(__dpctl_keep DPCTLSyclKernelBundleRef KBRef, return _GetKernel_ze_impl(*SyclKB, KernelName); #endif default: - error_handler("Backend " + std::to_string(static_cast(be)) + - " is not supported.", - __FILE__, __func__, __LINE__); + std::ostringstream os; + os << "Backend " << be << " is not supported"; + error_handler(os.str(), __FILE__, __func__, __LINE__); return nullptr; } } @@ -730,9 +730,9 @@ bool DPCTLKernelBundle_HasKernel(__dpctl_keep DPCTLSyclKernelBundleRef KBRef, return _HasKernel_ze_impl(*SyclKB, KernelName); #endif default: - error_handler("Backend " + std::to_string(static_cast(be)) + - " is not supported.", - __FILE__, __func__, __LINE__); + std::ostringstream os; + os << "Backend " << be << " is not supported"; + error_handler(os.str(), __FILE__, __func__, __LINE__); return false; } } From bb1f1d92776567e3897209da571c0df0a6e2aac8 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 6 May 2025 21:55:52 -0700 Subject: [PATCH 11/23] Adds Python tests for peer_access methods --- dpctl/tests/test_sycl_device.py | 66 +++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index 0d8025c060..3df600804d 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -341,3 +341,69 @@ def test_get_component_devices_from_composite(): assert d.has_aspect_is_component # component devices are root devices assert d in devices + + +@pytest.mark.parametrize("platform_name", ["level_zero", "cuda", "hip"]) +def test_can_access_peer(platform_name): + """ + Test checks for peer access. + """ + try: + platform = dpctl.SyclPlatform(platform_name) + except ValueError as e: + pytest.skip(f"{str(e)} {platform_name}") + devices = platform.get_devices() + if len(devices) < 2: + pytest.skip( + f"Platform {platform_name} does not have enough devices to " + "test peer access" + ) + dev0 = devices[0] + dev1 = devices[1] + assert isinstance(dev0.can_access_peer_access_supported(dev1), bool) + assert isinstance(dev0.can_access_peer_atomics_supported(dev1), bool) + + +@pytest.mark.parametrize("platform_name", ["level_zero", "cuda", "hip"]) +def test_enable_disable_peer(platform_name): + """ + Test that peer access can be enabled and disabled. + """ + try: + platform = dpctl.SyclPlatform(platform_name) + except ValueError as e: + pytest.skip(f"{str(e)} {platform_name}") + devices = platform.get_devices() + if len(devices) < 2: + pytest.skip( + f"Platform {platform_name} does not have enough devices to " + "test peer access" + ) + dev0 = devices[0] + dev1 = devices[1] + if dev0.can_access_peer_access_supported(dev1): + dev0.enable_peer_access(dev1) + dev0.disable_peer_access(dev1) + else: + pytest.skip( + f"Provided {platform_name} devices do not support peer access" + ) + + +def test_peer_device_arg_validation(): + """ + Test for validation of arguments to peer access related methods. + """ + try: + dev = dpctl.SyclDevice() + except dpctl.SyclDeviceCreationError: + pytest.skip("No default device available") + bad_dev = dict() + with pytest.raises(TypeError): + dev.can_access_peer_access_supported(bad_dev) + with pytest.raises(TypeError): + dev.can_access_peer_atomics_supported(bad_dev) + with pytest.raises(TypeError): + dev.enable_peer_access(bad_dev) + with pytest.raises(TypeError): + dev.disable_peer_access(bad_dev) From 71068457e901b25295ec6695c49f846fa448ea49 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 7 May 2025 11:49:00 -0700 Subject: [PATCH 12/23] parametrize test_peer_device_arg_validation --- dpctl/tests/test_sycl_device.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index 3df600804d..d7b2896571 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -390,7 +390,16 @@ def test_enable_disable_peer(platform_name): ) -def test_peer_device_arg_validation(): +@pytest.mark.parametrize( + "method", + [ + "can_access_peer_access_supported", + "can_access_peer_atomics_supported", + "enable_peer_access", + "disable_peer_access", + ], +) +def test_peer_device_arg_validation(method): """ Test for validation of arguments to peer access related methods. """ @@ -399,11 +408,6 @@ def test_peer_device_arg_validation(): except dpctl.SyclDeviceCreationError: pytest.skip("No default device available") bad_dev = dict() + callable = getattr(dev, method) with pytest.raises(TypeError): - dev.can_access_peer_access_supported(bad_dev) - with pytest.raises(TypeError): - dev.can_access_peer_atomics_supported(bad_dev) - with pytest.raises(TypeError): - dev.enable_peer_access(bad_dev) - with pytest.raises(TypeError): - dev.disable_peer_access(bad_dev) + callable(bad_dev) From 8563cb08389e81af8cd69ed923cc253357c4b6e1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Sun, 11 May 2025 21:17:19 -0700 Subject: [PATCH 13/23] Factor out peer access validation also make peer access invalid when the device and peer device are the same --- dpctl/_sycl_device.pyx | 142 +++++++++++++---------------------------- 1 file changed, 46 insertions(+), 96 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 1551673664..f00b7e4b32 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -217,6 +217,28 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef) except *: raise RuntimeError("Descriptor 'max_work_item_sizes3d' not available") +cdef bint _check_peer_access(SyclDevice dev, SyclDevice peer) except *: + """ + Check peer access ahead of time to avoid errors from unified runtime or + compiler implementation. + """ + cdef list _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] + cdef _backend_type BTy1 = DPCTLDevice_GetBackend(dev._device_ref) + cdef _backend_type BTy2 = DPCTLDevice_GetBackend(peer.get_device_ref()) + if ( + BTy1 == BTy2 and + BTy1 in _peer_access_backends and + BTy2 in _peer_access_backends and + dev != peer + ): + return True + return False + + @functools.lru_cache(maxsize=None) def _cached_filter_string(d : SyclDevice): """ @@ -1819,13 +1841,8 @@ cdef class SyclDevice(_SyclDevice): Raises: TypeError: If ``peer`` is not :class:`dpctl.SyclDevice`. - ValueError: - If the backend associated with this device or ``peer`` does not - support peer access. """ cdef SyclDevice p_dev - cdef _backend_type BTy1 - cdef _backend_type BTy2 if not isinstance(peer, SyclDevice): raise TypeError( @@ -1834,29 +1851,13 @@ cdef class SyclDevice(_SyclDevice): ) p_dev = peer - _peer_access_backends = [ - _backend_type._CUDA, - _backend_type._HIP, - _backend_type._LEVEL_ZERO - ] - BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if BTy1 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for this device backend " - f"{_backend_type_to_filter_string_part(BTy1)}" + if _check_peer_access(self, p_dev): + return DPCTLDevice_CanAccessPeer( + self._device_ref, + p_dev.get_device_ref(), + _peer_access._access_supported ) - BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if BTy2 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for peer device backend " - f"{_backend_type_to_filter_string_part(BTy2)}" - ) - - return DPCTLDevice_CanAccessPeer( - self._device_ref, - p_dev.get_device_ref(), - _peer_access._access_supported - ) + return False def can_access_peer_atomics_supported(self, peer): """ Returns ``True`` if this device (``self``) can concurrently access @@ -1883,13 +1884,8 @@ cdef class SyclDevice(_SyclDevice): Raises: TypeError: If ``peer`` is not :class:`dpctl.SyclDevice`. - ValueError: - If the backend associated with this device or ``peer`` does not - support peer access. """ cdef SyclDevice p_dev - cdef _backend_type BTy1 - cdef _backend_type BTy2 if not isinstance(peer, SyclDevice): raise TypeError( @@ -1898,29 +1894,13 @@ cdef class SyclDevice(_SyclDevice): ) p_dev = peer - _peer_access_backends = [ - _backend_type._CUDA, - _backend_type._HIP, - _backend_type._LEVEL_ZERO - ] - BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if BTy1 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for this device backend " - f"{_backend_type_to_filter_string_part(BTy1)}" + if _check_peer_access(self, p_dev): + return DPCTLDevice_CanAccessPeer( + self._device_ref, + p_dev.get_device_ref(), + _peer_access._atomics_supported ) - BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if BTy2 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for peer device backend " - f"{_backend_type_to_filter_string_part(BTy2)}" - ) - - return DPCTLDevice_CanAccessPeer( - self._device_ref, - p_dev.get_device_ref(), - _peer_access._atomics_supported - ) + return False def enable_peer_access(self, peer): """ Enables this device (``self``) to access USM device allocations @@ -1944,8 +1924,6 @@ cdef class SyclDevice(_SyclDevice): support peer access. """ cdef SyclDevice p_dev - cdef _backend_type BTy1 - cdef _backend_type BTy2 if not isinstance(peer, SyclDevice): raise TypeError( @@ -1954,27 +1932,13 @@ cdef class SyclDevice(_SyclDevice): ) p_dev = peer - _peer_access_backends = [ - _backend_type._CUDA, - _backend_type._HIP, - _backend_type._LEVEL_ZERO - ] - BTy1 = ( - DPCTLDevice_GetBackend(self._device_ref) - ) - if BTy1 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for this device backend " - f"{_backend_type_to_filter_string_part(BTy1)}" + if _check_peer_access(self, p_dev): + DPCTLDevice_EnablePeerAccess( + self._device_ref, + p_dev.get_device_ref() ) - BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if BTy2 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for peer device backend " - f"{_backend_type_to_filter_string_part(BTy2)}" - ) - - DPCTLDevice_EnablePeerAccess(self._device_ref, p_dev.get_device_ref()) + else: + raise ValueError("Peer access cannot be enabled for these devices") return def disable_peer_access(self, peer): @@ -1998,8 +1962,6 @@ cdef class SyclDevice(_SyclDevice): support peer access. """ cdef SyclDevice p_dev - cdef _backend_type BTy1 - cdef _backend_type BTy2 if not isinstance(peer, SyclDevice): raise TypeError( @@ -2008,25 +1970,13 @@ cdef class SyclDevice(_SyclDevice): ) p_dev = peer - _peer_access_backends = [ - _backend_type._CUDA, - _backend_type._HIP, - _backend_type._LEVEL_ZERO - ] - BTy1 = DPCTLDevice_GetBackend(self._device_ref) - if BTy1 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for this device backend " - f"{_backend_type_to_filter_string_part(BTy1)}" + if _check_peer_access(self, p_dev): + DPCTLDevice_DisablePeerAccess( + self._device_ref, + p_dev.get_device_ref() ) - BTy2 = DPCTLDevice_GetBackend(p_dev.get_device_ref()) - if BTy2 not in _peer_access_backends: - raise ValueError( - "Peer access not supported for peer device backend " - f"{_backend_type_to_filter_string_part(BTy2)}" - ) - - DPCTLDevice_DisablePeerAccess(self._device_ref, p_dev.get_device_ref()) + else: + raise ValueError("Peer access cannot be enabled for these devices") return @property From 9e7031ee1cbec9b875f067c928e3cb277f93247a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 May 2025 10:22:09 -0700 Subject: [PATCH 14/23] Factor common code out of C-API peer access functions --- .../source/dpctl_sycl_device_interface.cpp | 110 +++++++----------- 1 file changed, 41 insertions(+), 69 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 5a7f5e4bf3..31830d43fb 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -903,6 +903,24 @@ DPCTLDevice_GetCompositeDevice(__dpctl_keep const DPCTLSyclDeviceRef DRef) return nullptr; } +bool _CallPeerAccess(device dev, device peer) +{ + auto BE1 = dev.get_backend(); + auto BE2 = peer.get_backend(); + + if ((BE1 != sycl::backend::ext_oneapi_level_zero && + BE1 != sycl::backend::ext_oneapi_cuda && + BE1 != sycl::backend::ext_oneapi_hip) || + (BE2 != sycl::backend::ext_oneapi_level_zero && + BE2 != sycl::backend::ext_oneapi_cuda && + BE2 != sycl::backend::ext_oneapi_hip) || + (dev == peer)) + { + return false; + } + return true; +} + bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, __dpctl_keep const DPCTLSyclDeviceRef PDRef, DPCTLPeerAccessType PT) @@ -911,33 +929,13 @@ bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { - auto BE1 = D->get_backend(); - auto BE2 = PD->get_backend(); - - if (BE1 != sycl::backend::ext_oneapi_level_zero && - BE1 != sycl::backend::ext_oneapi_cuda && - BE1 != sycl::backend::ext_oneapi_hip) - { - std::ostringstream os; - os << "Backend " << BE1 << " does not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); - return false; - } - - if (BE2 != sycl::backend::ext_oneapi_level_zero && - BE2 != sycl::backend::ext_oneapi_cuda && - BE2 != sycl::backend::ext_oneapi_hip) - { - std::ostringstream os; - os << "Backend " << BE2 << " does not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); - return false; - } - try { - canAccess = D->ext_oneapi_can_access_peer( - *PD, DPCTL_DPCTLPeerAccessTypeToSycl(PT)); - } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); + if (_CallPeerAccess(*D, *PD)) { + try { + canAccess = D->ext_oneapi_can_access_peer( + *PD, DPCTL_DPCTLPeerAccessTypeToSycl(PT)); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } } } return canAccess; @@ -949,31 +947,18 @@ void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { - auto BE1 = D->get_backend(); - auto BE2 = PD->get_backend(); - - if (BE1 != sycl::backend::ext_oneapi_level_zero && - BE1 != sycl::backend::ext_oneapi_cuda && - BE1 != sycl::backend::ext_oneapi_hip) - { - std::ostringstream os; - os << "Backend " << BE1 << " does not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); + if (_CallPeerAccess(*D, *PD)) { + try { + D->ext_oneapi_enable_peer_access(*PD); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } } - - if (BE2 != sycl::backend::ext_oneapi_level_zero && - BE2 != sycl::backend::ext_oneapi_cuda && - BE2 != sycl::backend::ext_oneapi_hip) - { + else { std::ostringstream os; - os << "Backend " << BE2 << " does not support peer access"; + os << "Given devices do not support peer access"; error_handler(os.str(), __FILE__, __func__, __LINE__); } - try { - D->ext_oneapi_enable_peer_access(*PD); - } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); - } } return; } @@ -984,31 +969,18 @@ void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, auto D = unwrap(DRef); auto PD = unwrap(PDRef); if (D && PD) { - auto BE1 = D->get_backend(); - auto BE2 = PD->get_backend(); - - if (BE1 != sycl::backend::ext_oneapi_level_zero && - BE1 != sycl::backend::ext_oneapi_cuda && - BE1 != sycl::backend::ext_oneapi_hip) - { - std::ostringstream os; - os << "Backend " << BE1 << " does not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); + if (_CallPeerAccess(*D, *PD)) { + try { + D->ext_oneapi_disable_peer_access(*PD); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } } - - if (BE2 != sycl::backend::ext_oneapi_level_zero && - BE2 != sycl::backend::ext_oneapi_cuda && - BE2 != sycl::backend::ext_oneapi_hip) - { + else { std::ostringstream os; - os << "Backend " << BE2 << " does not support peer access"; + os << "Given devices do not support peer access"; error_handler(os.str(), __FILE__, __func__, __LINE__); } - try { - D->ext_oneapi_disable_peer_access(*PD); - } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); - } } return; } From da45cdb6bc084a4c0ac55b92a2e8c543a5a6472b Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 May 2025 15:12:45 -0700 Subject: [PATCH 15/23] Add C-API tests for peer access functions --- libsyclinterface/tests/CMakeLists.txt | 1 + .../tests/test_sycl_peer_access.cpp | 139 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 libsyclinterface/tests/test_sycl_peer_access.cpp diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 424340260e..dc4465dc2e 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -47,6 +47,7 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_selector_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_aspects.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_event_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_peer_access.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_bundle_interface.cpp diff --git a/libsyclinterface/tests/test_sycl_peer_access.cpp b/libsyclinterface/tests/test_sycl_peer_access.cpp new file mode 100644 index 0000000000..c06f717bfd --- /dev/null +++ b/libsyclinterface/tests/test_sycl_peer_access.cpp @@ -0,0 +1,139 @@ +//===--- test_sycl_peer_access.cpp - Test cases for device peer access ===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file has unit test cases for peer access functions defined in +/// dpctl_sycl_device_interface.h. +/// +//===----------------------------------------------------------------------===// + +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_selector_interface.h" +#include "dpctl_sycl_platform_interface.h" +#include "dpctl_utils_helper.h" + +#include +#include + +struct TestDPCTLPeerAccess : public ::testing::TestWithParam +{ + DPCTLSyclPlatformRef P = nullptr; + DPCTLDeviceVectorRef DV = nullptr; + + TestDPCTLPeerAccess() + { + auto DS = DPCTLFilterSelector_Create(GetParam()); + if (DS) { + EXPECT_NO_FATAL_FAILURE(P = DPCTLPlatform_CreateFromSelector(DS)); + } + DPCTLDeviceSelector_Delete(DS); + if (P) { + DV = DPCTLPlatform_GetDevices(P, DPCTLSyclDeviceType::DPCTL_ALL); + } + } + + void SetUp() + { + if (!P || !DV) { + auto message = "Skipping as no devices of type " + + std::string(GetParam()) + "."; + GTEST_SKIP_(message.c_str()); + } + + if (DPCTLDeviceVector_Size(DV) < 2) { + GTEST_SKIP_("Peer access tests require more than one device."); + } + } + + ~TestDPCTLPeerAccess() + { + DPCTLDeviceVector_Delete(DV); + DPCTLPlatform_Delete(P); + } +}; + +TEST_P(TestDPCTLPeerAccess, ChkAccessSupported) +{ + auto D0 = DPCTLDeviceVector_GetAt(DV, 0); + auto D1 = DPCTLDeviceVector_GetAt(DV, 1); + ASSERT_TRUE(D0 != nullptr); + ASSERT_TRUE(D1 != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_CanAccessPeer( + D0, D1, DPCTLPeerAccessType::access_supported)); +} + +TEST_P(TestDPCTLPeerAccess, ChkAtomicsSupported) +{ + auto D0 = DPCTLDeviceVector_GetAt(DV, 0); + auto D1 = DPCTLDeviceVector_GetAt(DV, 1); + ASSERT_TRUE(D0 != nullptr); + ASSERT_TRUE(D1 != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_CanAccessPeer( + D0, D1, DPCTLPeerAccessType::atomics_supported)); +} + +TEST_P(TestDPCTLPeerAccess, ChkPeerAccess) +{ + auto D0 = DPCTLDeviceVector_GetAt(DV, 0); + auto D1 = DPCTLDeviceVector_GetAt(DV, 1); + ASSERT_TRUE(D0 != nullptr); + ASSERT_TRUE(D1 != nullptr); + bool canEnable = false; + EXPECT_NO_FATAL_FAILURE(canEnable = DPCTLDevice_CanAccessPeer( + D0, D1, DPCTLPeerAccessType::access_supported)); + if (canEnable) { + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_EnablePeerAccess(D0, D1)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_DisablePeerAccess(D0, D1)); + } +} + +INSTANTIATE_TEST_SUITE_P(DPCTLDeviceFns, + TestDPCTLPeerAccess, + ::testing::Values("level_zero", "cuda", "hip")); + +struct TestDPCTLPeerAccessNullArgs : public ::testing::Test +{ + DPCTLSyclDeviceRef Null_DR0 = nullptr; + DPCTLSyclDeviceRef Null_DR1 = nullptr; +}; + +TEST_F(TestDPCTLPeerAccessNullArgs, ChkAccessSupported) +{ + bool accessSupported = true; + EXPECT_NO_FATAL_FAILURE( + accessSupported = DPCTLDevice_CanAccessPeer( + Null_DR0, Null_DR1, DPCTLPeerAccessType::access_supported)); + ASSERT_FALSE(accessSupported); +} + +TEST_F(TestDPCTLPeerAccessNullArgs, ChkAtomicsSupported) +{ + bool accessSupported = true; + EXPECT_NO_FATAL_FAILURE( + accessSupported = DPCTLDevice_CanAccessPeer( + Null_DR0, Null_DR1, DPCTLPeerAccessType::atomics_supported)); + ASSERT_FALSE(accessSupported); +} + +TEST_F(TestDPCTLPeerAccessNullArgs, ChkPeerAccess) +{ + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_EnablePeerAccess(Null_DR0, Null_DR1)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_DisablePeerAccess(Null_DR0, Null_DR1)); +} From ba7222f4ca95f7eead071cb8d7cdf3a406ca4835 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 May 2025 15:16:31 -0700 Subject: [PATCH 16/23] Add check that backend types are the same in _CallPeerAccess --- .../source/dpctl_sycl_device_interface.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 31830d43fb..dcf7125986 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -908,17 +908,18 @@ bool _CallPeerAccess(device dev, device peer) auto BE1 = dev.get_backend(); auto BE2 = peer.get_backend(); - if ((BE1 != sycl::backend::ext_oneapi_level_zero && - BE1 != sycl::backend::ext_oneapi_cuda && - BE1 != sycl::backend::ext_oneapi_hip) || - (BE2 != sycl::backend::ext_oneapi_level_zero && - BE2 != sycl::backend::ext_oneapi_cuda && - BE2 != sycl::backend::ext_oneapi_hip) || - (dev == peer)) + if ((BE1 == BE2) && + (BE1 == sycl::backend::ext_oneapi_level_zero || + BE1 == sycl::backend::ext_oneapi_cuda || + BE1 == sycl::backend::ext_oneapi_hip) && + (BE2 == sycl::backend::ext_oneapi_level_zero || + BE2 == sycl::backend::ext_oneapi_cuda || + BE2 == sycl::backend::ext_oneapi_hip) && + (dev != peer)) { - return false; + return true; } - return true; + return false; } bool DPCTLDevice_CanAccessPeer(__dpctl_keep const DPCTLSyclDeviceRef DRef, From 417889db2c62bddaede684b3fe1f6f9c3b7a6e83 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 13 May 2025 14:34:47 -0700 Subject: [PATCH 17/23] Inline _CallPeerAccess helper function --- libsyclinterface/source/dpctl_sycl_device_interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index dcf7125986..a99ee93829 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -903,7 +903,7 @@ DPCTLDevice_GetCompositeDevice(__dpctl_keep const DPCTLSyclDeviceRef DRef) return nullptr; } -bool _CallPeerAccess(device dev, device peer) +static inline bool _CallPeerAccess(device dev, device peer) { auto BE1 = dev.get_backend(); auto BE2 = peer.get_backend(); From 95e7478360fa4b63ac954233e6457f69f13ff08d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 13 May 2025 14:35:32 -0700 Subject: [PATCH 18/23] In;ine _check_peer_access helper function --- dpctl/_sycl_device.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index f00b7e4b32..39ccd36fc6 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -217,7 +217,7 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef) except *: raise RuntimeError("Descriptor 'max_work_item_sizes3d' not available") -cdef bint _check_peer_access(SyclDevice dev, SyclDevice peer) except *: +cdef inline bint _check_peer_access(SyclDevice dev, SyclDevice peer) except *: """ Check peer access ahead of time to avoid errors from unified runtime or compiler implementation. From 26f27d139deac86fa39056e2a583ec37e4d1dace Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 13 May 2025 23:33:28 -0700 Subject: [PATCH 19/23] Clean up peer access functions in libsyclinterface Discard unused header for sstream --- .../source/dpctl_sycl_device_interface.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index a99ee93829..af1058c6bb 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -33,7 +33,6 @@ #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" #include -#include #include #include /* SYCL headers */ #include @@ -950,15 +949,15 @@ void DPCTLDevice_EnablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, if (D && PD) { if (_CallPeerAccess(*D, *PD)) { try { + throw std::invalid_argument("test"); D->ext_oneapi_enable_peer_access(*PD); } catch (std::exception const &e) { error_handler(e, __FILE__, __func__, __LINE__); } } else { - std::ostringstream os; - os << "Given devices do not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); + error_handler("Devices do not support peer access", __FILE__, + __func__, __LINE__); } } return; @@ -978,9 +977,8 @@ void DPCTLDevice_DisablePeerAccess(__dpctl_keep const DPCTLSyclDeviceRef DRef, } } else { - std::ostringstream os; - os << "Given devices do not support peer access"; - error_handler(os.str(), __FILE__, __func__, __LINE__); + error_handler("Devices do not support peer access", __FILE__, + __func__, __LINE__); } } return; From a3df074a0844b05388d89c8c3a6836db6c4bfbe8 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 14 May 2025 01:43:04 -0700 Subject: [PATCH 20/23] Add helper for raising more specific errors in peer access methods --- dpctl/_sycl_device.pyx | 56 ++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 39ccd36fc6..8638b46cf0 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -239,6 +239,43 @@ cdef inline bint _check_peer_access(SyclDevice dev, SyclDevice peer) except *: return False +cdef inline void _raise_invalid_peer_access( + SyclDevice dev, + SyclDevice peer, +) except *: + """ + Check peer access ahead of time and raise errors for invalid cases. + """ + cdef list _peer_access_backends = [ + _backend_type._CUDA, + _backend_type._HIP, + _backend_type._LEVEL_ZERO + ] + cdef _backend_type BTy1 = DPCTLDevice_GetBackend(dev._device_ref) + cdef _backend_type BTy2 = DPCTLDevice_GetBackend(peer.get_device_ref()) + if (BTy1 != BTy2): + raise ValueError( + f"Device with backend {_backend_type_to_filter_string_part(BTy1)} " + "cannot peer access device with backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + if (BTy1 not in _peer_access_backends): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy1)}" + ) + if (BTy2 not in _peer_access_backends): + raise ValueError( + "Peer access not supported for backend " + f"{_backend_type_to_filter_string_part(BTy2)}" + ) + if (dev == peer): + raise ValueError( + "Peer access cannot be enabled between a device and itself" + ) + return + + @functools.lru_cache(maxsize=None) def _cached_filter_string(d : SyclDevice): """ @@ -1850,7 +1887,6 @@ cdef class SyclDevice(_SyclDevice): f"{type(peer)}" ) p_dev = peer - if _check_peer_access(self, p_dev): return DPCTLDevice_CanAccessPeer( self._device_ref, @@ -1893,7 +1929,6 @@ cdef class SyclDevice(_SyclDevice): f"{type(peer)}" ) p_dev = peer - if _check_peer_access(self, p_dev): return DPCTLDevice_CanAccessPeer( self._device_ref, @@ -1931,14 +1966,11 @@ cdef class SyclDevice(_SyclDevice): f"{type(peer)}" ) p_dev = peer - - if _check_peer_access(self, p_dev): - DPCTLDevice_EnablePeerAccess( - self._device_ref, - p_dev.get_device_ref() - ) - else: - raise ValueError("Peer access cannot be enabled for these devices") + _raise_invalid_peer_access(self, p_dev) + DPCTLDevice_EnablePeerAccess( + self._device_ref, + p_dev.get_device_ref() + ) return def disable_peer_access(self, peer): @@ -1969,14 +2001,12 @@ cdef class SyclDevice(_SyclDevice): f"{type(peer)}" ) p_dev = peer - + _raise_invalid_peer_access(self, p_dev) if _check_peer_access(self, p_dev): DPCTLDevice_DisablePeerAccess( self._device_ref, p_dev.get_device_ref() ) - else: - raise ValueError("Peer access cannot be enabled for these devices") return @property From 27162a8a16092428ff2b69824338a63d2b6d0bc1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 14 May 2025 01:59:42 -0700 Subject: [PATCH 21/23] Add tests for dpctl peer access enum helper utilities --- libsyclinterface/tests/test_helper.cpp | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/libsyclinterface/tests/test_helper.cpp b/libsyclinterface/tests/test_helper.cpp index 8743d82dfd..37ca33c41c 100644 --- a/libsyclinterface/tests/test_helper.cpp +++ b/libsyclinterface/tests/test_helper.cpp @@ -184,3 +184,31 @@ TEST_F(TestHelperFns, SyclDeviceTypeToDPCTLDeviceType) sycl::info::device_type::custom)); ASSERT_TRUE(DTy == DPCTLSyclDeviceType::DPCTL_CUSTOM); } + +TEST_F(TestHelperFns, ChkDPCTLPeerAccessTypeToSycl) +{ + sycl::ext::oneapi::peer_access peer_type; + + EXPECT_NO_FATAL_FAILURE(peer_type = DPCTL_DPCTLPeerAccessTypeToSycl( + DPCTLPeerAccessType::access_supported)); + ASSERT_TRUE(peer_type == sycl::ext::oneapi::peer_access::access_supported); + + EXPECT_NO_FATAL_FAILURE(peer_type = DPCTL_DPCTLPeerAccessTypeToSycl( + DPCTLPeerAccessType::atomics_supported)); + ASSERT_TRUE(peer_type == sycl::ext::oneapi::peer_access::atomics_supported); +} + +TEST_F(TestHelperFns, ChkSyclPeerAccessToDPCTLType) +{ + DPCTLPeerAccessType PTy; + + EXPECT_NO_FATAL_FAILURE( + PTy = DPCTL_SyclPeerAccessToDPCTLType( + sycl::ext::oneapi::peer_access::access_supported)); + ASSERT_TRUE(PTy == DPCTLPeerAccessType::access_supported); + + EXPECT_NO_FATAL_FAILURE( + PTy = DPCTL_SyclPeerAccessToDPCTLType( + sycl::ext::oneapi::peer_access::atomics_supported)); + ASSERT_TRUE(PTy == DPCTLPeerAccessType::atomics_supported); +} From 3814d56898d17fd69e9d722a944e84b5daa80a9f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 14 May 2025 02:06:37 -0700 Subject: [PATCH 22/23] Add an additional libsyclinterface test for peer access to self --- libsyclinterface/tests/test_sycl_peer_access.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libsyclinterface/tests/test_sycl_peer_access.cpp b/libsyclinterface/tests/test_sycl_peer_access.cpp index c06f717bfd..37f3f7057a 100644 --- a/libsyclinterface/tests/test_sycl_peer_access.cpp +++ b/libsyclinterface/tests/test_sycl_peer_access.cpp @@ -104,6 +104,17 @@ TEST_P(TestDPCTLPeerAccess, ChkPeerAccess) } } +TEST_P(TestDPCTLPeerAccess, ChkPeerAccessToSelf) +{ + auto D0 = DPCTLDeviceVector_GetAt(DV, 0); + ASSERT_TRUE(D1 != nullptr); + bool canEnable = false; + EXPECT_NO_FATAL_FAILURE(canEnable = DPCTLDevice_CanAccessPeer( + D0, D0, DPCTLPeerAccessType::access_supported)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_EnablePeerAccess(D0, D0)); + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_DisablePeerAccess(D0, D0)); +} + INSTANTIATE_TEST_SUITE_P(DPCTLDeviceFns, TestDPCTLPeerAccess, ::testing::Values("level_zero", "cuda", "hip")); From 031cff17b9a12a3053c1357d78ed2d48a7505cc3 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 14 May 2025 02:22:55 -0700 Subject: [PATCH 23/23] Adds a Python test for peer access to self raising ValueError --- dpctl/tests/test_sycl_device.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index d7b2896571..2e20a7dd5d 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -365,7 +365,7 @@ def test_can_access_peer(platform_name): @pytest.mark.parametrize("platform_name", ["level_zero", "cuda", "hip"]) -def test_enable_disable_peer(platform_name): +def test_enable_disable_peer_access(platform_name): """ Test that peer access can be enabled and disabled. """ @@ -411,3 +411,19 @@ def test_peer_device_arg_validation(method): callable = getattr(dev, method) with pytest.raises(TypeError): callable(bad_dev) + + +@pytest.mark.parametrize("platform_name", ["level_zero", "cuda", "hip"]) +def test_peer_access_to_self(platform_name): + """ + Test for validation of arguments to peer access related methods. + """ + try: + platform = dpctl.SyclPlatform(platform_name) + except ValueError as e: + pytest.skip(f"{str(e)} {platform_name}") + dev = platform.get_devices()[0] + with pytest.raises(ValueError): + dev.enable_peer_access(dev) + with pytest.raises(ValueError): + dev.enable_peer_access(dev)