Skip to content

Commit eece079

Browse files
andersy005max-sixty
authored andcommitted
Harmonize FillValue and missing_value during encoding and decoding steps (#3502)
* Replace `equivalent()` with `allclose_or_equiv()` * Ensure _FillValue & missing_value are cast to same dtype as data's * Use Numpy scalar during type casting * Update ValueError message * Formatting only * Update whats-new.rst
1 parent 810345c commit eece079

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ New Features
7979

8080
Bug fixes
8181
~~~~~~~~~
82+
- Harmonize `_FillValue`, `missing_value` during encoding and decoding steps. (:pull:`3502`)
83+
By `Anderson Banihirwe <https://github.com/andersy005>`_.
8284
- Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
8385
but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
8486
- Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`).

xarray/coding/variables.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from ..core import dtypes, duck_array_ops, indexing
1010
from ..core.pycompat import dask_array_type
11-
from ..core.utils import equivalent
1211
from ..core.variable import Variable
1312

1413

@@ -152,18 +151,25 @@ def encode(self, variable, name=None):
152151
fv = encoding.get("_FillValue")
153152
mv = encoding.get("missing_value")
154153

155-
if fv is not None and mv is not None and not equivalent(fv, mv):
154+
if (
155+
fv is not None
156+
and mv is not None
157+
and not duck_array_ops.allclose_or_equiv(fv, mv)
158+
):
156159
raise ValueError(
157-
"Variable {!r} has multiple fill values {}. "
158-
"Cannot encode data. ".format(name, [fv, mv])
160+
f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data."
159161
)
160162

161163
if fv is not None:
164+
# Ensure _FillValue is cast to same dtype as data's
165+
encoding["_FillValue"] = data.dtype.type(fv)
162166
fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
163167
if not pd.isnull(fill_value):
164168
data = duck_array_ops.fillna(data, fill_value)
165169

166170
if mv is not None:
171+
# Ensure missing_value is cast to same dtype as data's
172+
encoding["missing_value"] = data.dtype.type(mv)
167173
fill_value = pop_to(encoding, attrs, "missing_value", name=name)
168174
if not pd.isnull(fill_value) and fv is None:
169175
data = duck_array_ops.fillna(data, fill_value)

xarray/tests/test_coding.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,23 @@ def test_CFMaskCoder_decode():
2020
assert_identical(expected, encoded)
2121

2222

23+
def test_CFMaskCoder_encode_missing_fill_values_conflict():
24+
original = xr.Variable(
25+
("x",),
26+
[0.0, -1.0, 1.0],
27+
encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)},
28+
)
29+
coder = variables.CFMaskCoder()
30+
encoded = coder.encode(original)
31+
32+
assert encoded.dtype == encoded.attrs["missing_value"].dtype
33+
assert encoded.dtype == encoded.attrs["_FillValue"].dtype
34+
35+
with pytest.warns(variables.SerializationWarning):
36+
roundtripped = coder.decode(coder.encode(original))
37+
assert_identical(roundtripped, original)
38+
39+
2340
def test_CFMaskCoder_missing_value():
2441
expected = xr.DataArray(
2542
np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),

0 commit comments

Comments
 (0)