Skip to content

Commit 387b507

Browse files
author
Alan D. Snow
committed
ENH: Preserve attrs in to_dataframe()
1 parent 49aa235 commit 387b507

File tree

5 files changed

+41
-1
lines changed

5 files changed

+41
-1
lines changed

doc/whats-new.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ New Features
3333
By `Jimmy Westling <https://github.com/illviljan>`_.
3434
- Raise more informative error when decoding time variables with invalid reference dates.
3535
(:issue:`5199`, :pull:`5288`). By `Giacomo Caria <https://github.com/gcaria>`_.
36+
- Preserve attrs in `to_dataframe()` (:issue:`5327`). By `Alan Snow <https://github.com/snowman2>`_
3637

3738
Breaking changes
3839
~~~~~~~~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2714,7 +2714,9 @@ def to_dataframe(
27142714
ordered_dims = ds._normalize_dim_order(dim_order=dim_order)
27152715

27162716
df = ds._to_dataframe(ordered_dims)
2717+
attrs = df[unique_name].attrs
27172718
df.columns = [name if c == unique_name else c for c in df.columns]
2719+
df[name].attrs = attrs
27182720
return df
27192721

27202722
def to_series(self) -> pd.Series:

xarray/core/dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5182,7 +5182,12 @@ def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]):
51825182
for k in columns
51835183
]
51845184
index = self.coords.to_index([*ordered_dims])
5185-
return pd.DataFrame(dict(zip(columns, data)), index=index)
5185+
pdf = pd.DataFrame(dict(zip(columns, data)), index=index)
5186+
# add attributes to dataframe
5187+
pdf.attrs = self.attrs
5188+
for column in columns:
5189+
pdf[column].attrs = self[column].attrs
5190+
return pdf
51865191

51875192
def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
51885193
"""Convert this dataset into a pandas.DataFrame.

xarray/tests/test_dataarray.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3681,6 +3681,20 @@ def test_to_dataframe_0length(self):
36813681
assert len(actual) == 0
36823682
assert_array_equal(actual.index.names, list("ABC"))
36833683

3684+
def test_to_dataframe__attrs(self):
3685+
arr = DataArray(
3686+
np.zeros((5, 5)),
3687+
name="test",
3688+
dims=("y", "x"),
3689+
coords={"y": np.arange(1, 6), "x": np.arange(2, 7)},
3690+
attrs={"long_name": "Description of data array", "_FillValue": -1},
3691+
)
3692+
df = arr.to_dataframe()
3693+
assert df[df.columns[0]].attrs == {
3694+
"long_name": "Description of data array",
3695+
"_FillValue": -1,
3696+
}
3697+
36843698
def test_to_pandas_name_matches_coordinate(self):
36853699
# coordinate with same name as array
36863700
arr = DataArray([1, 2, 3], dims="x", name="x")

xarray/tests/test_dataset.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4205,6 +4205,24 @@ def test_to_and_from_dataframe(self):
42054205
expected = pd.DataFrame([[]], index=idx)
42064206
assert expected.equals(actual), (expected, actual)
42074207

4208+
def test_to_dataframe__attrs(self):
4209+
ds = Dataset(
4210+
{
4211+
"a": (
4212+
"t",
4213+
[1],
4214+
{"long_name": "Description of data array", "_FillValue": -1},
4215+
)
4216+
},
4217+
attrs={"test": "test"},
4218+
)
4219+
df = ds.to_dataframe()
4220+
assert df.attrs == {"test": "test"}
4221+
assert df[df.columns[0]].attrs == {
4222+
"long_name": "Description of data array",
4223+
"_FillValue": -1,
4224+
}
4225+
42084226
def test_from_dataframe_categorical(self):
42094227
cat = pd.CategoricalDtype(
42104228
categories=["foo", "bar", "baz", "qux", "quux", "corge"]

0 commit comments

Comments
 (0)