From e69da3e866f1828388a81700e26d2aa762c9c2d1 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 28 Aug 2021 14:02:29 -0500 Subject: [PATCH 1/2] DPCTLQueue_Memcpy, _Prefetch, _Memadvise are not asynchronous The return value of these functions changed. Declaration, documentation, tests and implementations updated in dpctl-capi/. dpctl/_backend.pxd updated for changed return type. Usages updated accordingly to get the event and wait on it to preserve the behavior for now. --- .../include/dpctl_sycl_queue_interface.h | 39 +++++----- .../source/dpctl_sycl_queue_interface.cpp | 71 ++++++++++++++----- dpctl-capi/tests/test_sycl_usm_interface.cpp | 19 +++-- dpctl/_backend.pxd | 6 +- dpctl/_sycl_queue.pyx | 29 +++++++- dpctl/memory/_memory.pyx | 28 ++++++-- 6 files changed, 141 insertions(+), 51 deletions(-) diff --git a/dpctl-capi/include/dpctl_sycl_queue_interface.h b/dpctl-capi/include/dpctl_sycl_queue_interface.h index 104c3ed5d5..9f46a83e50 100644 --- a/dpctl-capi/include/dpctl_sycl_queue_interface.h +++ b/dpctl-capi/include/dpctl_sycl_queue_interface.h @@ -266,7 +266,7 @@ DPCTLQueue_SubmitNDRange(__dpctl_keep const DPCTLSyclKernelRef KRef, size_t NDepEvents); /*! - * @brief Calls the ``sycl::queue.submit`` function to do a blocking wait on + * @brief Calls the ``sycl::queue::submit`` function to do a blocking wait on * all enqueued tasks in the queue. * * @param QRef Opaque pointer to a ``sycl::queue``. @@ -276,38 +276,39 @@ DPCTL_API void DPCTLQueue_Wait(__dpctl_keep const DPCTLSyclQueueRef QRef); /*! - * @brief C-API wrapper for ``sycl::queue::memcpy``, the function waits on an - * event till the memcpy operation completes. + * @brief C-API wrapper for ``sycl::queue::memcpy``. * * @param QRef An opaque pointer to the ``sycl::queue``. * @param Dest An USM pointer to the destination memory. * @param Src An USM pointer to the source memory. * @param Count A number of bytes to copy. + * @return An opaque pointer to the ``sycl::event`` returned by the + * ``sycl::queue::memcpy`` function. * @ingroup QueueInterface */ DPCTL_API -void DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, - void *Dest, - const void *Src, - size_t Count); +DPCTLSyclEventRef DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, + void *Dest, + const void *Src, + size_t Count); /*! - * @brief C-API wrapper for ``sycl::queue::prefetch``, the function waits on an - * event till the prefetch operation completes. + * @brief C-API wrapper for ``sycl::queue::prefetch``. * * @param QRef An opaque pointer to the ``sycl::queue``. * @param Ptr An USM pointer to memory. * @param Count A number of bytes to prefetch. + * @return An opaque pointer to the ``sycl::event`` returned by the + * ``sycl::queue::prefetch`` function. * @ingroup QueueInterface */ DPCTL_API -void DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, - const void *Ptr, - size_t Count); +DPCTLSyclEventRef DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, + const void *Ptr, + size_t Count); /*! - * @brief C-API wrapper for sycl::queue::mem_advise, the function waits on an - * event till the operation completes. + * @brief C-API wrapper for ``sycl::queue::mem_advise``. * * @param QRef An opaque pointer to the ``sycl::queue``. * @param Ptr An USM pointer to memory. @@ -315,13 +316,15 @@ void DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, * @param Advice Device-defined advice for the specified allocation. * A value of 0 reverts the advice for Ptr to the * default behavior. + * @return An opaque pointer to the ``sycl::event`` returned by the + * ``sycl::queue::mem_advise`` function. * @ingroup QueueInterface */ DPCTL_API -void DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, - const void *Ptr, - size_t Count, - int Advice); +DPCTLSyclEventRef DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, + const void *Ptr, + size_t Count, + int Advice); /*! * @brief C-API wrapper for sycl::queue::is_in_order that indicates whether diff --git a/dpctl-capi/source/dpctl_sycl_queue_interface.cpp b/dpctl-capi/source/dpctl_sycl_queue_interface.cpp index adb4cc2f90..bba692c8e5 100644 --- a/dpctl-capi/source/dpctl_sycl_queue_interface.cpp +++ b/dpctl-capi/source/dpctl_sycl_queue_interface.cpp @@ -484,39 +484,74 @@ void DPCTLQueue_Wait(__dpctl_keep DPCTLSyclQueueRef QRef) } } -void DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, - void *Dest, - const void *Src, - size_t Count) +DPCTLSyclEventRef DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, + void *Dest, + const void *Src, + size_t Count) { auto Q = unwrap(QRef); if (Q) { - auto event = Q->memcpy(Dest, Src, Count); - event.wait(); + sycl::event ev; + try { + ev = Q->memcpy(Dest, Src, Count); + } catch (const sycl::runtime_error &re) { + // todo: log error + std::cerr << re.what() << '\n'; + return nullptr; + } + return wrap(new event(ev)); + } + else { + // todo: log error + std::cerr << "QRef passed to memcpy was NULL" << '\n'; + return nullptr; } } -void DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, - const void *Ptr, - size_t Count) +DPCTLSyclEventRef DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, + const void *Ptr, + size_t Count) { auto Q = unwrap(QRef); if (Q) { - auto event = Q->prefetch(Ptr, Count); - event.wait(); + sycl::event ev; + try { + ev = Q->prefetch(Ptr, Count); + } catch (sycl::runtime_error &re) { + // todo: log error + std::cerr << re.what() << '\n'; + return nullptr; + } + return wrap(new event(ev)); + } + else { + // todo: log error + std::cerr << "QRef passed to prefetch was NULL" << '\n'; + return nullptr; } } -void DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, - const void *Ptr, - size_t Count, - int Advice) +DPCTLSyclEventRef DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, + const void *Ptr, + size_t Count, + int Advice) { auto Q = unwrap(QRef); if (Q) { - auto event = - Q->mem_advise(Ptr, Count, static_cast(Advice)); - event.wait(); + sycl::event ev; + try { + ev = Q->mem_advise(Ptr, Count, static_cast(Advice)); + } catch (const sycl::runtime_error &re) { + // todo: log error + std::cerr << re.what() << '\n'; + return nullptr; + } + return wrap(new event(ev)); + } + else { + // todo: log error + std::cerr << "QRef passed to prefetch was NULL" << '\n'; + return nullptr; } } diff --git a/dpctl-capi/tests/test_sycl_usm_interface.cpp b/dpctl-capi/tests/test_sycl_usm_interface.cpp index c646a6b7cc..414c37a565 100644 --- a/dpctl-capi/tests/test_sycl_usm_interface.cpp +++ b/dpctl-capi/tests/test_sycl_usm_interface.cpp @@ -56,12 +56,23 @@ void common_test_body(size_t nbytes, auto QueueDev = DPCTLQueue_GetDevice(Q); EXPECT_TRUE(DPCTLDevice_AreEq(Dev, QueueDev)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Prefetch(Q, Ptr, nbytes)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_MemAdvise(Q, Ptr, nbytes, 0)); - + DPCTLSyclEventRef E1Ref = nullptr, E2Ref = nullptr, E3Ref = nullptr; + EXPECT_NO_FATAL_FAILURE(E1Ref = DPCTLQueue_Prefetch(Q, Ptr, nbytes)); + EXPECT_TRUE(E1Ref != nullptr); + EXPECT_NO_FATAL_FAILURE(E2Ref = DPCTLQueue_MemAdvise(Q, Ptr, nbytes, 0)); + EXPECT_TRUE(E2Ref != nullptr); + + EXPECT_NO_FATAL_FAILURE(DPCTLEvent_Wait(E1Ref)); + DPCTLEvent_Delete(E1Ref); + EXPECT_NO_FATAL_FAILURE(DPCTLEvent_Wait(E2Ref)); + DPCTLEvent_Delete(E2Ref); try { unsigned short *host_ptr = new unsigned short[nbytes]; - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Memcpy(Q, host_ptr, Ptr, nbytes)); + EXPECT_NO_FATAL_FAILURE( + E3Ref = DPCTLQueue_Memcpy(Q, host_ptr, Ptr, nbytes)); + EXPECT_TRUE(E3Ref != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLEvent_Wait(E3Ref)); + DPCTLEvent_Delete(E3Ref); delete[] host_ptr; } catch (std::bad_alloc const &ba) { // pass diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 4af83deb4e..aa29740d82 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -355,16 +355,16 @@ cdef extern from "dpctl_sycl_queue_interface.h": const DPCTLSyclEventRef *DepEvents, size_t NDepEvents) cdef void DPCTLQueue_Wait(const DPCTLSyclQueueRef QRef) - cdef void DPCTLQueue_Memcpy( + cdef DPCTLSyclEventRef DPCTLQueue_Memcpy( const DPCTLSyclQueueRef Q, void *Dest, const void *Src, size_t Count) - cdef void DPCTLQueue_Prefetch( + cdef DPCTLSyclEventRef DPCTLQueue_Prefetch( const DPCTLSyclQueueRef Q, const void *Src, size_t Count) - cdef void DPCTLQueue_MemAdvise( + cdef DPCTLSyclEventRef DPCTLQueue_MemAdvise( const DPCTLSyclQueueRef Q, const void *Src, size_t Count, diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index d8ff0141ad..d76032f996 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -30,6 +30,8 @@ from ._backend cimport ( # noqa: E211 DPCTLDevice_Delete, DPCTLDeviceMgr_GetCachedContext, DPCTLDeviceSelector_Delete, + DPCTLEvent_Delete, + DPCTLEvent_Wait, DPCTLFilterSelector_Create, DPCTLQueue_AreEq, DPCTLQueue_Copy, @@ -812,6 +814,7 @@ cdef class SyclQueue(_SyclQueue): cpdef memcpy(self, dest, src, size_t count): cdef void *c_dest cdef void *c_src + cdef DPCTLSyclEventRef ERef = NULL if isinstance(dest, _Memory): c_dest = (<_Memory>dest).memory_ptr @@ -823,10 +826,17 @@ cdef class SyclQueue(_SyclQueue): else: raise TypeError("Parameter `src` should have type _Memory.") - DPCTLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) + ERef = DPCTLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) + if (ERef is NULL): + raise RuntimeError( + "SyclQueue.memcpy operation encountered an error" + ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) cpdef prefetch(self, mem, size_t count=0): cdef void *ptr + cdef DPCTLSyclEventRef ERef = NULL if isinstance(mem, _Memory): ptr = (<_Memory>mem).memory_ptr @@ -836,10 +846,17 @@ cdef class SyclQueue(_SyclQueue): if (count <=0 or count > self.nbytes): count = self.nbytes - DPCTLQueue_Prefetch(self._queue_ref, ptr, count) + ERef = DPCTLQueue_Prefetch(self._queue_ref, ptr, count) + if (ERef is NULL): + raise RuntimeError( + "SyclQueue.prefetch encountered an error" + ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) cpdef mem_advise(self, mem, size_t count, int advice): cdef void *ptr + cdef DPCTLSyclEventRef ERef = NULL if isinstance(mem, _Memory): ptr = (<_Memory>mem).memory_ptr @@ -849,7 +866,13 @@ cdef class SyclQueue(_SyclQueue): if (count <=0 or count > self.nbytes): count = self.nbytes - DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice) + ERef = DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice) + if (ERef is NULL): + raise RuntimeError( + "SyclQueue.mem_advise operation encountered an error" + ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) @property def is_in_order(self): diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index a0164d235c..beacd34dc6 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -34,6 +34,8 @@ from dpctl._backend cimport ( # noqa: E211 DPCTLaligned_alloc_host, DPCTLaligned_alloc_shared, DPCTLContext_Delete, + DPCTLEvent_Delete, + DPCTLEvent_Wait, DPCTLfree_with_queue, DPCTLmalloc_device, DPCTLmalloc_host, @@ -45,6 +47,7 @@ from dpctl._backend cimport ( # noqa: E211 DPCTLQueue_Memcpy, DPCTLSyclContextRef, DPCTLSyclDeviceRef, + DPCTLSyclEventRef, DPCTLSyclUSMRef, DPCTLUSM_GetPointerDevice, DPCTLUSM_GetPointerType, @@ -79,20 +82,26 @@ cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, """ # could also have used bytearray(nbytes) cdef unsigned char[::1] host_buf = np.empty((nbytes,), dtype="|u1") + cdef DPCTLSyclEventRef E1Ref = NULL + cdef DPCTLSyclEventRef E2Ref = NULL - DPCTLQueue_Memcpy( + E1Ref = DPCTLQueue_Memcpy( src_queue.get_queue_ref(), &host_buf[0], src_ptr, nbytes ) + DPCTLEvent_Wait(E1Ref) - DPCTLQueue_Memcpy( + E2Ref = DPCTLQueue_Memcpy( dest_queue.get_queue_ref(), dest_ptr, &host_buf[0], nbytes ) + DPCTLEvent_Wait(E2Ref) + DPCTLEvent_Delete(E1Ref) + DPCTLEvent_Delete(E2Ref) def _to_memory(unsigned char[::1] b, str usm_kind): @@ -356,6 +365,7 @@ cdef class _Memory: """ # Cython does the right thing here cdef unsigned char[::1] host_buf = obj + cdef DPCTLSyclEventRef ERef = NULL if (host_buf is None): # Python object did not have buffer interface @@ -368,12 +378,14 @@ cdef class _Memory: .format(self.nbytes) ) # call kernel to copy from - DPCTLQueue_Memcpy( + ERef = DPCTLQueue_Memcpy( self.queue.get_queue_ref(), &host_buf[0], # destination self.memory_ptr, # source self.nbytes ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) return obj @@ -383,6 +395,7 @@ cdef class _Memory: """ cdef const unsigned char[::1] host_buf = obj cdef Py_ssize_t buf_len = len(host_buf) + cdef DPCTLSyclEventRef ERef = NULL if (buf_len > self.nbytes): raise ValueError( @@ -390,12 +403,14 @@ cdef class _Memory: "buffer".format(self.nbytes) ) # call kernel to copy from - DPCTLQueue_Memcpy( + ERef = DPCTLQueue_Memcpy( self.queue.get_queue_ref(), self.memory_ptr, # destination &host_buf[0], # source buf_len ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) cpdef copy_from_device(self, object sycl_usm_ary): """ @@ -404,6 +419,7 @@ cdef class _Memory: """ cdef _USMBufferData src_buf cdef const char* kind + cdef DPCTLSyclEventRef ERef = NULL if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'): raise ValueError( @@ -428,12 +444,14 @@ cdef class _Memory: src_buf.nbytes ) else: - DPCTLQueue_Memcpy( + ERef = DPCTLQueue_Memcpy( self.queue.get_queue_ref(), self.memory_ptr, src_buf.p, src_buf.nbytes ) + DPCTLEvent_Wait(ERef) + DPCTLEvent_Delete(ERef) else: raise TypeError From 583fbf7d84086c9691afd2b350b1800cf06cfb71 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 31 Aug 2021 14:36:42 -0500 Subject: [PATCH 2/2] Tests for Memcpy, Prefetch, MemAdivse with null QRef --- .../tests/test_sycl_queue_interface.cpp | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/dpctl-capi/tests/test_sycl_queue_interface.cpp b/dpctl-capi/tests/test_sycl_queue_interface.cpp index 2c2d246a70..a75262eb20 100644 --- a/dpctl-capi/tests/test_sycl_queue_interface.cpp +++ b/dpctl-capi/tests/test_sycl_queue_interface.cpp @@ -327,6 +327,24 @@ TEST(TestDPCTLSyclQueueInterface, CheckPropertyHandling) EXPECT_NO_FATAL_FAILURE(DPCTLDeviceSelector_Delete(DSRef)); } +TEST(TestDPCTLSyclQueueInterface, CheckMemOpsZeroQRef) +{ + DPCTLSyclQueueRef QRef = nullptr; + void *p1 = nullptr; + void *p2 = nullptr; + size_t n_bytes = 0; + DPCTLSyclEventRef ERef = nullptr; + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_Memcpy(QRef, p1, p2, n_bytes)); + ASSERT_FALSE(bool(ERef)); + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_Prefetch(QRef, p1, n_bytes)); + ASSERT_FALSE(bool(ERef)); + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_MemAdvise(QRef, p1, n_bytes, 0)); + ASSERT_FALSE(bool(ERef)); +} + TEST_P(TestDPCTLQueueMemberFunctions, CheckGetBackend) { auto q = unwrap(QRef); @@ -364,6 +382,31 @@ TEST_P(TestDPCTLQueueMemberFunctions, CheckGetDevice) EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(D)); } +TEST_P(TestDPCTLQueueMemberFunctions, CheckMemOpsNullPtr) +{ + void *p1 = nullptr; + void *p2 = nullptr; + size_t n_bytes = 256; + DPCTLSyclEventRef ERef = nullptr; + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_Memcpy(QRef, p1, p2, n_bytes)); + ASSERT_FALSE(bool(ERef)); + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_Prefetch(QRef, p1, n_bytes)); + if (ERef) { + ASSERT_NO_FATAL_FAILURE(DPCTLEvent_Wait(ERef)); + ASSERT_NO_FATAL_FAILURE(DPCTLEvent_Delete(ERef)); + ERef = nullptr; + } + + ASSERT_NO_FATAL_FAILURE(ERef = DPCTLQueue_MemAdvise(QRef, p1, n_bytes, 0)); + if (ERef) { + ASSERT_NO_FATAL_FAILURE(DPCTLEvent_Wait(ERef)); + ASSERT_NO_FATAL_FAILURE(DPCTLEvent_Delete(ERef)); + ERef = nullptr; + } +} + INSTANTIATE_TEST_SUITE_P( DPCTLQueueMemberFuncTests, TestDPCTLQueueMemberFunctions,