Skip to content

Commit 5a9d816

Browse files
Merge pull request #1354 from IntelPython/master
Development milestone 0.14.6dev4
2 parents a53a7c1 + 96293fd commit 5a9d816

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1513
-910
lines changed

.github/workflows/os-llvm-sycl-build.yml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
jobs:
88
install-compiler:
99
name: Build with nightly build of DPC++ toolchain
10-
runs-on: ubuntu-20.04
10+
runs-on: ubuntu-22.04
1111

1212
env:
1313
DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
@@ -38,16 +38,17 @@ jobs:
3838
3939
- name: Download and install nightly and components
4040
env:
41-
USE_LATEST_SYCLOS: 0
41+
ARTIFACT_NAME: sycl_linux
42+
USE_LATEST_SYCLOS: 1
4243
shell: bash -l {0}
4344
run: |
4445
cd /home/runner/work
4546
mkdir -p sycl_bundle
4647
cd sycl_bundle
4748
if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
48-
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
49+
# get list of shas and tags from remote, filter nightly tags and reverse order
4950
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
50-
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
51+
grep 'refs/tags/nightly-' | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
5152
# initialize
5253
unset DEPLOY_NIGHTLY_TAG
5354
unset DEPLOY_NIGHTLY_TAG_SHA
@@ -57,7 +58,7 @@ jobs:
5758
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
5859
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
5960
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
60-
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
61+
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/${ARTIFACT_NAME}.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
6162
then
6263
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
6364
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
@@ -77,21 +78,20 @@ jobs:
7778
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
7879
echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
7980
else
80-
rm -rf dpcpp-compiler.tar.gz
81-
wget ${DOWNLOAD_URL_PREFIX}/${DEPLOY_NIGHTLY_TAG}/dpcpp-compiler.tar.gz && echo ${DEPLOY_LLVM_TAG_SHA} > bundle_id.txt || rm -rf bundle_id.txt
81+
rm -rf ${ARTIFACT_NAME}.tar.gz
82+
wget ${DOWNLOAD_URL_PREFIX}/${DEPLOY_NIGHTLY_TAG}/${ARTIFACT_NAME}.tar.gz && echo ${DEPLOY_LLVM_TAG_SHA} > bundle_id.txt || rm -rf bundle_id.txt
8283
[ -f ${OCLCPUEXP_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${OCLCPUEXP_FN} || rm -rf bundle_id.txt
8384
[ -f ${FPGAEMU_FN} ] || wget ${DOWNLOAD_URL_PREFIX}/${DRIVER_PATH}/${FPGAEMU_FN} || rm -rf bundle_id.txt
8485
[ -f ${TBB_FN} ] || wget ${TBB_URL}/${TBB_FN} || rm -rf bundle_id.txt
8586
rm -rf dpcpp_compiler
86-
tar xf dpcpp-compiler.tar.gz
87+
mkdir -p dpcpp_compiler
88+
tar xf ${ARTIFACT_NAME}.tar.gz -C dpcpp_compiler
8789
mkdir -p oclcpuexp
8890
mkdir -p fpgaemu
8991
[ -d oclcpuexp/x64 ] || tar xf ${OCLCPUEXP_FN} -C oclcpuexp
9092
[ -d fpgaemu/x64 ] || tar xf ${FPGAEMU_FN} -C fpgaemu
9193
[ -d ${TBB_INSTALL_DIR}/lib ] || tar xf ${TBB_FN}
92-
mkdir -p dpcpp_compiler/lib
93-
mkdir -p dpcpp_compiler/lib/oclfpga
94-
touch dpcpp_compiler/lib/oclfpga/fpgavars.sh
94+
cp oclcpuexp/x64/libOpenCL.so* dpcpp_compiler/lib/
9595
fi
9696
9797
- name: Install system components
@@ -121,7 +121,8 @@ jobs:
121121
cat << 'EOF' > set_allvars.sh
122122
#!/usr/bin/bash
123123
export SYCL_BUNDLE_FOLDER=/home/runner/work/sycl_bundle
124-
source ${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/startup.sh
124+
export PATH=${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/bin:${PATH}
125+
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/dpcpp_compiler/lib:${LD_LIBRARY_PATH}
125126
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/oclcpuexp/x64:${LD_LIBRARY_PATH}
126127
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/fpgaemu/x64:${LD_LIBRARY_PATH}
127128
export LD_LIBRARY_PATH=${SYCL_BUNDLE_FOLDER}/${TBB_INSTALL_DIR}/lib/intel64/gcc4.8:${LD_LIBRARY_PATH}

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ to program on XPUs.
4343

4444
# Installing
4545

46-
You can install the library with [conda](https://anaconda.org/intel/dpctl) and
47-
[pip](https://pypi.org/project/dpctl/). It is also available in the [Intel(R)
46+
You can install the library using [conda](https://anaconda.org/intel/dpctl) or
47+
[pip](https://pypi.org/project/dpctl/) package managers. It is also available in the [Intel(R)
4848
Distribution for
4949
Python](https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html)
5050
(IDP).
@@ -70,12 +70,13 @@ cloud, use the following command:
7070
conda install dpctl -c intel
7171
```
7272

73-
## PyPi
73+
## Pip
7474

75-
To install `dpctl` from PyPi, run the following command:
75+
The `dpctl` can be installed using `pip` obtaining wheel packages either from PyPi or from Intel(R) channel on Anaconda.
76+
To install `dpctl` wheel package from Intel(R) channel on Anaconda, run the following command:
7677

7778
```bash
78-
pip3 install dpctl
79+
python -m pip install --index-url https://pypi.anaconda.org/intel/simple dpctl
7980
```
8081

8182
Installing the bleeding edge

conda-recipe/meta.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ requirements:
3333

3434
test:
3535
requires:
36+
- {{ compiler('c') }}
37+
- {{ compiler('cxx') }}
3638
- cython
3739
- setuptools
3840
- pytest

dpctl/tensor/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,15 @@ set_source_files_properties(
5858
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp
5959
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp
6060
PROPERTIES COMPILE_OPTIONS "${_clang_prefix}-fno-fast-math")
61+
if (UNIX)
62+
set_source_files_properties(
63+
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp
64+
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
65+
endif()
6166
target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int)
6267
target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel)
6368
if(UNIX)
64-
# this option is support on Linux only
69+
# this option is supported on Linux only
6570
target_link_options(${python_module_name} PRIVATE -fsycl-link-huge-device-code)
6671
endif()
6772
target_include_directories(${python_module_name}

dpctl/tensor/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
trunc,
159159
)
160160
from ._reduction import sum
161+
from ._testing import allclose
161162

162163
__all__ = [
163164
"Device",
@@ -301,4 +302,5 @@
301302
"tan",
302303
"tanh",
303304
"trunc",
305+
"allclose",
304306
]

dpctl/tensor/_copy_utils.py

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import dpctl.tensor as dpt
2525
import dpctl.tensor._tensor_impl as ti
2626
import dpctl.utils
27-
from dpctl.tensor._ctors import _get_dtype
27+
from dpctl.tensor._data_types import _get_dtype
2828
from dpctl.tensor._device import normalize_queue_device
2929

3030
__doc__ = (
@@ -351,14 +351,16 @@ def _empty_like_orderK(X, dt, usm_type=None, dev=None):
351351
)
352352
st = list(X.strides)
353353
perm = sorted(
354-
range(X.ndim), key=lambda d: builtins.abs(st[d]), reverse=True
354+
range(X.ndim),
355+
key=lambda d: builtins.abs(st[d]) if X.shape[d] > 1 else 0,
356+
reverse=True,
355357
)
356358
inv_perm = sorted(range(X.ndim), key=lambda i: perm[i])
357-
st_sorted = [st[i] for i in perm]
358359
sh = X.shape
359360
sh_sorted = tuple(sh[i] for i in perm)
360361
R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
361-
if min(st_sorted) < 0:
362+
if min(st) < 0:
363+
st_sorted = [st[i] for i in perm]
362364
sl = tuple(
363365
slice(None, None, -1)
364366
if st_sorted[i] < 0
@@ -395,9 +397,14 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
395397
max_ndim = max(nd1, nd2)
396398
st1 += [0] * (max_ndim - len(st1))
397399
st2 += [0] * (max_ndim - len(st2))
400+
sh1 = list(X1.shape) + [0] * (max_ndim - nd1)
401+
sh2 = list(X2.shape) + [0] * (max_ndim - nd2)
398402
perm = sorted(
399403
range(max_ndim),
400-
key=lambda d: (builtins.abs(st1[d]), builtins.abs(st2[d])),
404+
key=lambda d: (
405+
builtins.abs(st1[d]) if sh1[d] > 1 else 0,
406+
builtins.abs(st2[d]) if sh2[d] > 1 else 0,
407+
),
401408
reverse=True,
402409
)
403410
inv_perm = sorted(range(max_ndim), key=lambda i: perm[i])
@@ -417,6 +424,74 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
417424
return dpt.permute_dims(R, inv_perm)
418425

419426

427+
def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev):
428+
if not isinstance(X1, dpt.usm_ndarray):
429+
raise TypeError(f"Expected usm_ndarray, got {type(X1)}")
430+
if not isinstance(X2, dpt.usm_ndarray):
431+
raise TypeError(f"Expected usm_ndarray, got {type(X2)}")
432+
if not isinstance(X3, dpt.usm_ndarray):
433+
raise TypeError(f"Expected usm_ndarray, got {type(X3)}")
434+
nd1 = X1.ndim
435+
nd2 = X2.ndim
436+
nd3 = X3.ndim
437+
if X1.shape == res_shape and X2.shape == res_shape and len(res_shape) > nd3:
438+
return _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev)
439+
elif (
440+
X2.shape == res_shape and X3.shape == res_shape and len(res_shape) > nd1
441+
):
442+
return _empty_like_pair_orderK(X2, X3, dt, res_shape, usm_type, dev)
443+
elif (
444+
X1.shape == res_shape and X3.shape == res_shape and len(res_shape) > nd2
445+
):
446+
return _empty_like_pair_orderK(X1, X3, dt, res_shape, usm_type, dev)
447+
fl1 = X1.flags
448+
fl2 = X2.flags
449+
fl3 = X3.flags
450+
if fl1["C"] or fl2["C"] or fl3["C"]:
451+
return dpt.empty(
452+
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
453+
)
454+
if fl1["F"] and fl2["F"] and fl3["F"]:
455+
return dpt.empty(
456+
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
457+
)
458+
st1 = list(X1.strides)
459+
st2 = list(X2.strides)
460+
st3 = list(X3.strides)
461+
max_ndim = max(nd1, nd2, nd3)
462+
st1 += [0] * (max_ndim - len(st1))
463+
st2 += [0] * (max_ndim - len(st2))
464+
st3 += [0] * (max_ndim - len(st3))
465+
sh1 = list(X1.shape) + [0] * (max_ndim - nd1)
466+
sh2 = list(X2.shape) + [0] * (max_ndim - nd2)
467+
sh3 = list(X3.shape) + [0] * (max_ndim - nd3)
468+
perm = sorted(
469+
range(max_ndim),
470+
key=lambda d: (
471+
builtins.abs(st1[d]) if sh1[d] > 1 else 0,
472+
builtins.abs(st2[d]) if sh2[d] > 1 else 0,
473+
builtins.abs(st3[d]) if sh3[d] > 1 else 0,
474+
),
475+
reverse=True,
476+
)
477+
inv_perm = sorted(range(max_ndim), key=lambda i: perm[i])
478+
st1_sorted = [st1[i] for i in perm]
479+
st2_sorted = [st2[i] for i in perm]
480+
st3_sorted = [st3[i] for i in perm]
481+
sh = res_shape
482+
sh_sorted = tuple(sh[i] for i in perm)
483+
R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
484+
if max(min(st1_sorted), min(st2_sorted), min(st3_sorted)) < 0:
485+
sl = tuple(
486+
slice(None, None, -1)
487+
if (st1_sorted[i] < 0 and st2_sorted[i] < 0 and st3_sorted[i] < 0)
488+
else slice(None, None, None)
489+
for i in range(nd1)
490+
)
491+
R = R[sl]
492+
return dpt.permute_dims(R, inv_perm)
493+
494+
420495
def copy(usm_ary, order="K"):
421496
"""copy(ary, order="K")
422497

dpctl/tensor/_ctors.py

Lines changed: 3 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import dpctl.tensor as dpt
2424
import dpctl.tensor._tensor_impl as ti
2525
import dpctl.utils
26+
from dpctl.tensor._copy_utils import _empty_like_orderK
27+
from dpctl.tensor._data_types import _get_dtype
2628
from dpctl.tensor._device import normalize_queue_device
2729
from dpctl.tensor._usmarray import _is_object_with_buffer_protocol
2830

@@ -32,24 +34,6 @@
3234
_host_set = frozenset([None])
3335

3436

35-
def _get_dtype(dtype, sycl_obj, ref_type=None):
36-
if dtype is None:
37-
if ref_type in [None, float] or np.issubdtype(ref_type, np.floating):
38-
dtype = ti.default_device_fp_type(sycl_obj)
39-
return dpt.dtype(dtype)
40-
if ref_type in [bool, np.bool_]:
41-
dtype = ti.default_device_bool_type(sycl_obj)
42-
return dpt.dtype(dtype)
43-
if ref_type is int or np.issubdtype(ref_type, np.integer):
44-
dtype = ti.default_device_int_type(sycl_obj)
45-
return dpt.dtype(dtype)
46-
if ref_type is complex or np.issubdtype(ref_type, np.complexfloating):
47-
dtype = ti.default_device_complex_type(sycl_obj)
48-
return dpt.dtype(dtype)
49-
raise TypeError(f"Reference type {ref_type} not recognized.")
50-
return dpt.dtype(dtype)
51-
52-
5337
def _array_info_dispatch(obj):
5438
if isinstance(obj, dpt.usm_ndarray):
5539
return obj.shape, obj.dtype, frozenset([obj.sycl_queue])
@@ -162,28 +146,7 @@ def _asarray_from_usm_ndarray(
162146
order = "C" if c_contig else "F"
163147
if order == "K":
164148
_ensure_native_dtype_device_support(dtype, copy_q.sycl_device)
165-
# new USM allocation
166-
res = dpt.usm_ndarray(
167-
usm_ndary.shape,
168-
dtype=dtype,
169-
buffer=usm_type,
170-
order="C",
171-
buffer_ctor_kwargs={"queue": copy_q},
172-
)
173-
original_strides = usm_ndary.strides
174-
ind = sorted(
175-
range(usm_ndary.ndim),
176-
key=lambda i: abs(original_strides[i]),
177-
reverse=True,
178-
)
179-
new_strides = tuple(res.strides[ind[i]] for i in ind)
180-
# reuse previously made USM allocation
181-
res = dpt.usm_ndarray(
182-
usm_ndary.shape,
183-
dtype=res.dtype,
184-
buffer=res.usm_data,
185-
strides=new_strides,
186-
)
149+
res = _empty_like_orderK(usm_ndary, dtype, usm_type, copy_q)
187150
else:
188151
_ensure_native_dtype_device_support(dtype, copy_q.sycl_device)
189152
res = dpt.usm_ndarray(

dpctl/tensor/_data_types.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,25 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
from numpy import bool_ as np_bool_
18+
from numpy import complexfloating as np_complexfloating
1719
from numpy import dtype
20+
from numpy import floating as np_floating
21+
from numpy import integer as np_integer
22+
from numpy import issubdtype as np_issubdtype
23+
24+
from dpctl.tensor._tensor_impl import (
25+
default_device_bool_type as ti_default_device_bool_type,
26+
)
27+
from dpctl.tensor._tensor_impl import (
28+
default_device_complex_type as ti_default_device_complex_type,
29+
)
30+
from dpctl.tensor._tensor_impl import (
31+
default_device_fp_type as ti_default_device_fp_type,
32+
)
33+
from dpctl.tensor._tensor_impl import (
34+
default_device_int_type as ti_default_device_int_type,
35+
)
1836

1937
bool = dtype("bool")
2038
int8 = dtype("int8")
@@ -74,6 +92,32 @@ def isdtype(dtype_, kind):
7492
raise TypeError(f"Unsupported data type kind: {kind}")
7593

7694

95+
def _get_dtype(inp_dt, sycl_obj, ref_type=None):
96+
"""
97+
Type inference utility to construct data type
98+
object with defaults based on reference type.
99+
100+
_get_dtype is used by dpctl.tensor.asarray
101+
to infer data type of the output array from the
102+
input sequence.
103+
"""
104+
if inp_dt is None:
105+
if ref_type in [None, float] or np_issubdtype(ref_type, np_floating):
106+
fp_dt = ti_default_device_fp_type(sycl_obj)
107+
return dtype(fp_dt)
108+
if ref_type in [bool, np_bool_]:
109+
bool_dt = ti_default_device_bool_type(sycl_obj)
110+
return dtype(bool_dt)
111+
if ref_type is int or np_issubdtype(ref_type, np_integer):
112+
int_dt = ti_default_device_int_type(sycl_obj)
113+
return dtype(int_dt)
114+
if ref_type is complex or np_issubdtype(ref_type, np_complexfloating):
115+
cfp_dt = ti_default_device_complex_type(sycl_obj)
116+
return dtype(cfp_dt)
117+
raise TypeError(f"Reference type {ref_type} not recognized.")
118+
return dtype(inp_dt)
119+
120+
77121
__all__ = [
78122
"dtype",
79123
"isdtype",

0 commit comments

Comments
 (0)