Skip to content

Commit 5c97641

Browse files
kmuehlbauershoyer
authored andcommitted
ENH: enable H5NetCDFStore to work with already open h5netcdf.File a… (#3618)
* ENH: enable `H5NetCDFStore` to work with already open h5netcdf.File and h5netcdf.Group objects, add test * FIX: add `.open` method for file-like objects * FIX: reformat using black * WIP: add item to whats-new.rst * FIX: temporary fix to tackle issue #3680 * FIX: do not use private API, use find_root_and_group instead * FIX: reformat using black
1 parent c32e58b commit 5c97641

File tree

5 files changed

+77
-20
lines changed

5 files changed

+77
-20
lines changed

doc/whats-new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ Breaking changes
3030

3131
New Features
3232
~~~~~~~~~~~~
33+
- Support using an existing, opened h5netcdf ``File`` with
34+
:py:class:`~xarray.backends.H5NetCDFStore`. This permits creating an
35+
:py:class:`~xarray.Dataset` from a h5netcdf ``File`` that has been opened
36+
using other means (:issue:`3618`).
37+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
3338
- Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking
3439
to a single chunk along all reduction axes. (:issue:`2999`).
3540
By `Deepak Cherian <https://github.com/dcherian>`_.

xarray/backends/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def maybe_decode_store(store, lock=False):
503503
elif engine == "pydap":
504504
store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs)
505505
elif engine == "h5netcdf":
506-
store = backends.H5NetCDFStore(
506+
store = backends.H5NetCDFStore.open(
507507
filename_or_obj, group=group, lock=lock, **backend_kwargs
508508
)
509509
elif engine == "pynio":
@@ -527,7 +527,7 @@ def maybe_decode_store(store, lock=False):
527527
if engine == "scipy":
528528
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
529529
elif engine == "h5netcdf":
530-
store = backends.H5NetCDFStore(
530+
store = backends.H5NetCDFStore.open(
531531
filename_or_obj, group=group, lock=lock, **backend_kwargs
532532
)
533533

@@ -981,7 +981,7 @@ def open_mfdataset(
981981
WRITEABLE_STORES: Dict[str, Callable] = {
982982
"netcdf4": backends.NetCDF4DataStore.open,
983983
"scipy": backends.ScipyDataStore,
984-
"h5netcdf": backends.H5NetCDFStore,
984+
"h5netcdf": backends.H5NetCDFStore.open,
985985
}
986986

987987

xarray/backends/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def find_root_and_group(ds):
3434
"""Find the root and group name of a netCDF4/h5netcdf dataset."""
3535
hierarchy = ()
3636
while ds.parent is not None:
37-
hierarchy = (ds.name,) + hierarchy
37+
hierarchy = (ds.name.split("/")[-1],) + hierarchy
3838
ds = ds.parent
3939
group = "/" + "/".join(hierarchy)
4040
return ds, group

xarray/backends/h5netcdf_.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
from .. import Variable
66
from ..core import indexing
7-
from ..core.utils import FrozenDict
8-
from .common import WritableCFDataStore
9-
from .file_manager import CachingFileManager
7+
from ..core.utils import FrozenDict, is_remote_uri
8+
from .common import WritableCFDataStore, find_root_and_group
9+
from .file_manager import CachingFileManager, DummyFileManager
1010
from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
1111
from .netCDF4_ import (
1212
BaseNetCDF4Array,
@@ -69,8 +69,47 @@ class H5NetCDFStore(WritableCFDataStore):
6969
"""Store for reading and writing data via h5netcdf
7070
"""
7171

72-
def __init__(
73-
self,
72+
__slots__ = (
73+
"autoclose",
74+
"format",
75+
"is_remote",
76+
"lock",
77+
"_filename",
78+
"_group",
79+
"_manager",
80+
"_mode",
81+
)
82+
83+
def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):
84+
85+
import h5netcdf
86+
87+
if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
88+
if group is None:
89+
root, group = find_root_and_group(manager)
90+
else:
91+
if not type(manager) is h5netcdf.File:
92+
raise ValueError(
93+
"must supply a h5netcdf.File if the group "
94+
"argument is provided"
95+
)
96+
root = manager
97+
manager = DummyFileManager(root)
98+
99+
self._manager = manager
100+
self._group = group
101+
self._mode = mode
102+
self.format = None
103+
# todo: utilizing find_root_and_group seems a bit clunky
104+
# making filename available on h5netcdf.Group seems better
105+
self._filename = find_root_and_group(self.ds)[0].filename
106+
self.is_remote = is_remote_uri(self._filename)
107+
self.lock = ensure_lock(lock)
108+
self.autoclose = autoclose
109+
110+
@classmethod
111+
def open(
112+
cls,
74113
filename,
75114
mode="r",
76115
format=None,
@@ -86,22 +125,14 @@ def __init__(
86125

87126
kwargs = {"invalid_netcdf": invalid_netcdf}
88127

89-
self._manager = CachingFileManager(
90-
h5netcdf.File, filename, mode=mode, kwargs=kwargs
91-
)
92-
93128
if lock is None:
94129
if mode == "r":
95130
lock = HDF5_LOCK
96131
else:
97132
lock = combine_locks([HDF5_LOCK, get_write_lock(filename)])
98133

99-
self._group = group
100-
self.format = format
101-
self._filename = filename
102-
self._mode = mode
103-
self.lock = ensure_lock(lock)
104-
self.autoclose = autoclose
134+
manager = CachingFileManager(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
135+
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
105136

106137
def _acquire(self, needs_lock=True):
107138
with self._manager.acquire_context(needs_lock) as root:

xarray/tests/test_backends.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2182,7 +2182,7 @@ class TestH5NetCDFData(NetCDF4Base):
21822182
@contextlib.contextmanager
21832183
def create_store(self):
21842184
with create_tmp_file() as tmp_file:
2185-
yield backends.H5NetCDFStore(tmp_file, "w")
2185+
yield backends.H5NetCDFStore.open(tmp_file, "w")
21862186

21872187
@pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py")
21882188
@pytest.mark.parametrize(
@@ -2345,6 +2345,27 @@ def test_dump_encodings_h5py(self):
23452345
assert actual.x.encoding["compression"] == "lzf"
23462346
assert actual.x.encoding["compression_opts"] is None
23472347

2348+
def test_already_open_dataset_group(self):
2349+
import h5netcdf
2350+
2351+
with create_tmp_file() as tmp_file:
2352+
with nc4.Dataset(tmp_file, mode="w") as nc:
2353+
group = nc.createGroup("g")
2354+
v = group.createVariable("x", "int")
2355+
v[...] = 42
2356+
2357+
h5 = h5netcdf.File(tmp_file, mode="r")
2358+
store = backends.H5NetCDFStore(h5["g"])
2359+
with open_dataset(store) as ds:
2360+
expected = Dataset({"x": ((), 42)})
2361+
assert_identical(expected, ds)
2362+
2363+
h5 = h5netcdf.File(tmp_file, mode="r")
2364+
store = backends.H5NetCDFStore(h5, group="g")
2365+
with open_dataset(store) as ds:
2366+
expected = Dataset({"x": ((), 42)})
2367+
assert_identical(expected, ds)
2368+
23482369

23492370
@requires_h5netcdf
23502371
class TestH5NetCDFFileObject(TestH5NetCDFData):

0 commit comments

Comments
 (0)