Skip to content

Commit c80aa82

Browse files
robintwshoyer
authored andcommitted
Added validation of attrs before saving to netCDF files (#991)
* Added validation of attrs before saving to netCDF files This allows us to give nice errors if users try to save a Dataset with attr values that can't be written to a netCDF file. Fixes #911 * Used ds.variables, improved docs and refactored tests * Added decorator for tests to ensure netCDF4 is installed * Removed unused collections.abc import * Fixed minor issues with tests and updated whats new
1 parent 3ecfa66 commit c80aa82

File tree

3 files changed

+141
-1
lines changed

3 files changed

+141
-1
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ Deprecations
3939

4040
Enhancements
4141
~~~~~~~~~~~~
42+
- Add checking of ``attr`` names and values when saving to netCDF, raising useful
43+
error messages if they are invalid. (:issue:`911`).
44+
By `Robin Wilson <https://github.com/robintw>`_.
4245

4346
Bug fixes
4447
~~~~~~~~~

xarray/backends/api.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
from distutils.version import StrictVersion
66
from glob import glob
77
from io import BytesIO
8+
from numbers import Number
9+
10+
import numpy as np
811

912
from .. import backends, conventions
1013
from .common import ArrayWriter
@@ -79,6 +82,37 @@ def check_name(name):
7982
check_name(k)
8083

8184

85+
def _validate_attrs(dataset):
86+
"""`attrs` must have a string key and a value which is either: a number
87+
a string, an ndarray or a list/tuple of numbers/strings.
88+
"""
89+
def check_attr(name, value):
90+
if isinstance(name, basestring):
91+
if not name:
92+
raise ValueError('Invalid name for attr: string must be length '
93+
'1 or greater for serialization to netCDF '
94+
'files')
95+
else:
96+
raise TypeError("Invalid name for attr: {} must be a string for "
97+
"serialization to netCDF files".format(name))
98+
99+
if not isinstance(value, (basestring, Number, np.ndarray, np.number,
100+
list, tuple)):
101+
raise TypeError('Invalid value for attr: {} must be a number '
102+
'string, ndarray or a list/tuple of numbers/strings '
103+
'for serialization to netCDF '
104+
'files'.format(value))
105+
106+
# Check attrs on the dataset itself
107+
for k, v in dataset.attrs.items():
108+
check_attr(k, v)
109+
110+
# Check attrs on each variable within the dataset
111+
for variable in dataset.variables.values():
112+
for k, v in variable.attrs.items():
113+
check_attr(k, v)
114+
115+
82116
def open_dataset(filename_or_obj, group=None, decode_cf=True,
83117
mask_and_scale=True, decode_times=True,
84118
concat_characters=True, decode_coords=True, engine=None,
@@ -335,8 +369,9 @@ def to_netcdf(dataset, path=None, mode='w', format=None, group=None,
335369
elif engine is None:
336370
engine = _get_default_engine(path)
337371

338-
# validate Dataset keys and DataArray names
372+
# validate Dataset keys, DataArray names, and attr keys/values
339373
_validate_dataset_names(dataset)
374+
_validate_attrs(dataset)
340375

341376
try:
342377
store_cls = WRITEABLE_STORES[engine]

xarray/test/test_backends.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,3 +1070,105 @@ def test_extract_h5nc_encoding(self):
10701070
{'least_sigificant_digit': 2})
10711071
with self.assertRaisesRegexp(ValueError, 'unexpected encoding'):
10721072
_extract_nc4_encoding(var, raise_on_invalid=True)
1073+
1074+
1075+
class MiscObject:
1076+
pass
1077+
1078+
@requires_netCDF4
1079+
class TestValidateAttrs(TestCase):
1080+
def test_validating_attrs(self):
1081+
def new_dataset():
1082+
return Dataset({'data': ('y', np.arange(10.0))})
1083+
1084+
def new_dataset_and_dataset_attrs():
1085+
ds = new_dataset()
1086+
return ds, ds.attrs
1087+
1088+
def new_dataset_and_data_attrs():
1089+
ds = new_dataset()
1090+
return ds, ds.data.attrs
1091+
1092+
def new_dataset_and_coord_attrs():
1093+
ds = new_dataset()
1094+
return ds, ds.coords['y'].attrs
1095+
1096+
for new_dataset_and_attrs in [new_dataset_and_dataset_attrs,
1097+
new_dataset_and_data_attrs,
1098+
new_dataset_and_coord_attrs]:
1099+
ds, attrs = new_dataset_and_attrs()
1100+
1101+
attrs[123] = 'test'
1102+
with self.assertRaisesRegexp(TypeError, 'Invalid name for attr'):
1103+
ds.to_netcdf('test.nc')
1104+
1105+
ds, attrs = new_dataset_and_attrs()
1106+
attrs[MiscObject()] = 'test'
1107+
with self.assertRaisesRegexp(TypeError, 'Invalid name for attr'):
1108+
ds.to_netcdf('test.nc')
1109+
1110+
ds, attrs = new_dataset_and_attrs()
1111+
attrs[''] = 'test'
1112+
with self.assertRaisesRegexp(ValueError, 'Invalid name for attr'):
1113+
ds.to_netcdf('test.nc')
1114+
1115+
# This one should work
1116+
ds, attrs = new_dataset_and_attrs()
1117+
attrs['test'] = 'test'
1118+
with create_tmp_file() as tmp_file:
1119+
ds.to_netcdf(tmp_file)
1120+
1121+
ds, attrs = new_dataset_and_attrs()
1122+
attrs['test'] = {'a': 5}
1123+
with self.assertRaisesRegexp(TypeError, 'Invalid value for attr'):
1124+
ds.to_netcdf('test.nc')
1125+
1126+
ds, attrs = new_dataset_and_attrs()
1127+
attrs['test'] = MiscObject()
1128+
with self.assertRaisesRegexp(TypeError, 'Invalid value for attr'):
1129+
ds.to_netcdf('test.nc')
1130+
1131+
ds, attrs = new_dataset_and_attrs()
1132+
attrs['test'] = 5
1133+
with create_tmp_file() as tmp_file:
1134+
ds.to_netcdf(tmp_file)
1135+
1136+
ds, attrs = new_dataset_and_attrs()
1137+
attrs['test'] = 3.14
1138+
with create_tmp_file() as tmp_file:
1139+
ds.to_netcdf(tmp_file)
1140+
1141+
ds, attrs = new_dataset_and_attrs()
1142+
attrs['test'] = [1, 2, 3, 4]
1143+
with create_tmp_file() as tmp_file:
1144+
ds.to_netcdf(tmp_file)
1145+
1146+
ds, attrs = new_dataset_and_attrs()
1147+
attrs['test'] = (1.9, 2.5)
1148+
with create_tmp_file() as tmp_file:
1149+
ds.to_netcdf(tmp_file)
1150+
1151+
ds, attrs = new_dataset_and_attrs()
1152+
attrs['test'] = np.arange(5)
1153+
with create_tmp_file() as tmp_file:
1154+
ds.to_netcdf(tmp_file)
1155+
1156+
ds, attrs = new_dataset_and_attrs()
1157+
attrs['test'] = np.arange(12).reshape(3, 4)
1158+
with create_tmp_file() as tmp_file:
1159+
ds.to_netcdf(tmp_file)
1160+
1161+
ds, attrs = new_dataset_and_attrs()
1162+
attrs['test'] = 'This is a string'
1163+
with create_tmp_file() as tmp_file:
1164+
ds.to_netcdf(tmp_file)
1165+
1166+
ds, attrs = new_dataset_and_attrs()
1167+
attrs['test'] = ''
1168+
with create_tmp_file() as tmp_file:
1169+
ds.to_netcdf(tmp_file)
1170+
1171+
ds, attrs = new_dataset_and_attrs()
1172+
attrs['test'] = np.arange(12).reshape(3, 4)
1173+
with create_tmp_file() as tmp_file:
1174+
ds.to_netcdf(tmp_file)

0 commit comments

Comments
 (0)