Skip to content

Commit 651f27f

Browse files
bolliger32TomNicholas
authored andcommitted
Allow incomplete hypercubes in combine_by_coords (#3649)
* allow incomplete hypercubes in combine_by_coords * If fill_value=None then still requires complete hypercube * Closes #3648
1 parent aaf3738 commit 651f27f

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ New Features
4141

4242
Bug fixes
4343
~~~~~~~~~
44+
- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete
45+
hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger
46+
<https://github.com/bolliger32>`_.
4447
- Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates
4548
which span long time intervals (:issue:`3535`). By `Spencer Clark
4649
<https://github.com/spencerkclark>`_.

xarray/core/combine.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,12 @@ def _infer_concat_order_from_coords(datasets):
115115
return combined_ids, concat_dims
116116

117117

118-
def _check_shape_tile_ids(combined_tile_ids):
118+
def _check_dimension_depth_tile_ids(combined_tile_ids):
119+
"""
120+
Check all tuples are the same length, i.e. check that all lists are
121+
nested to the same depth.
122+
"""
119123
tile_ids = combined_tile_ids.keys()
120-
121-
# Check all tuples are the same length
122-
# i.e. check that all lists are nested to the same depth
123124
nesting_depths = [len(tile_id) for tile_id in tile_ids]
124125
if not nesting_depths:
125126
nesting_depths = [0]
@@ -128,8 +129,13 @@ def _check_shape_tile_ids(combined_tile_ids):
128129
"The supplied objects do not form a hypercube because"
129130
" sub-lists do not have consistent depths"
130131
)
132+
# return these just to be reused in _check_shape_tile_ids
133+
return tile_ids, nesting_depths
131134

132-
# Check all lists along one dimension are same length
135+
136+
def _check_shape_tile_ids(combined_tile_ids):
137+
"""Check all lists along one dimension are same length."""
138+
tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids)
133139
for dim in range(nesting_depths[0]):
134140
indices_along_dim = [tile_id[dim] for tile_id in tile_ids]
135141
occurrences = Counter(indices_along_dim)
@@ -536,7 +542,8 @@ def combine_by_coords(
536542
coords : {'minimal', 'different', 'all' or list of str}, optional
537543
As per the 'data_vars' kwarg, but for coordinate variables.
538544
fill_value : scalar, optional
539-
Value to use for newly missing values
545+
Value to use for newly missing values. If None, raises a ValueError if
546+
the passed Datasets do not create a complete hypercube.
540547
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
541548
String indicating how to combine differing indexes
542549
(excluding concat_dim) in objects
@@ -653,6 +660,15 @@ def combine_by_coords(
653660
temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96
654661
precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953
655662
663+
>>> xr.combine_by_coords([x1, x2, x3])
664+
<xarray.Dataset>
665+
Dimensions: (x: 6, y: 4)
666+
Coordinates:
667+
* x (x) int64 10 20 30 40 50 60
668+
* y (y) int64 0 1 2 3
669+
Data variables:
670+
temperature (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96
671+
precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953
656672
"""
657673

658674
# Group by data vars
@@ -667,7 +683,13 @@ def combine_by_coords(
667683
list(datasets_with_same_vars)
668684
)
669685

670-
_check_shape_tile_ids(combined_ids)
686+
if fill_value is None:
687+
# check that datasets form complete hypercube
688+
_check_shape_tile_ids(combined_ids)
689+
else:
690+
# check only that all datasets have same dimension depth for these
691+
# vars
692+
_check_dimension_depth_tile_ids(combined_ids)
671693

672694
# Concatenate along all of concat_dims one by one to create single ds
673695
concatenated = _combine_nd(

xarray/tests/test_combine.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,22 @@ def test_check_for_impossible_ordering(self):
711711
):
712712
combine_by_coords([ds1, ds0])
713713

714+
def test_combine_by_coords_incomplete_hypercube(self):
715+
# test that this succeeds with default fill_value
716+
x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
717+
x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
718+
x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
719+
actual = combine_by_coords([x1, x2, x3])
720+
expected = Dataset(
721+
{"a": (("y", "x"), [[1, 1], [1, np.nan]])},
722+
coords={"y": [0, 1], "x": [0, 1]},
723+
)
724+
assert_identical(expected, actual)
725+
726+
# test that this fails if fill_value is None
727+
with pytest.raises(ValueError):
728+
combine_by_coords([x1, x2, x3], fill_value=None)
729+
714730

715731
@pytest.mark.filterwarnings(
716732
"ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"

0 commit comments

Comments
 (0)