Skip to content

Commit 239309f

Browse files
Convert 360_day calendars by choosing random dates to drop or add (#8603)
* Convert 360 calendar randomly * add note to whats new * add pull number to whats new entry * run pre-commit * Change test to use recommended freq * Apply suggestions from code review Co-authored-by: Spencer Clark <[email protected]> * Fix merge - remove rng arg --------- Co-authored-by: Spencer Clark <[email protected]>
1 parent 2b2de81 commit 239309f

File tree

3 files changed

+86
-8
lines changed

3 files changed

+86
-8
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ v2024.04.0 (unreleased)
2222

2323
New Features
2424
~~~~~~~~~~~~
25+
- New "random" method for converting to and from 360_day calendars (:pull:`8603`).
26+
By `Pascal Bourgault <https://github.com/aulemahal>`_.
2527

2628

2729
Breaking changes

xarray/coding/calendar_ops.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def convert_calendar(
6464
The target calendar name.
6565
dim : str
6666
Name of the time coordinate in the input DataArray or Dataset.
67-
align_on : {None, 'date', 'year'}
67+
align_on : {None, 'date', 'year', 'random'}
6868
Must be specified when either the source or target is a `"360_day"`
6969
calendar; ignored otherwise. See Notes.
7070
missing : any, optional
@@ -143,6 +143,16 @@ def convert_calendar(
143143
will be dropped as there are no equivalent dates in a standard calendar.
144144
145145
This option is best used with data on a frequency coarser than daily.
146+
147+
"random"
148+
Similar to "year", each day of year of the source is mapped to another day of year
149+
of the target. However, instead of having always the same missing days according
150+
the source and target years, here 5 days are chosen randomly, one for each fifth
151+
of the year. However, February 29th is always missing when converting to a leap year,
152+
or its value is dropped when converting from a leap year. This is similar to the method
153+
used in the LOCA dataset (see Pierce, Cayan, and Thrasher (2014). doi:10.1175/JHM-D-14-0082.1).
154+
155+
This option is best used on daily data.
146156
"""
147157
from xarray.core.dataarray import DataArray
148158

@@ -174,14 +184,20 @@ def convert_calendar(
174184

175185
out = obj.copy()
176186

177-
if align_on == "year":
187+
if align_on in ["year", "random"]:
178188
# Special case for conversion involving 360_day calendar
179-
# Instead of translating dates directly, this tries to keep the position within a year similar.
180-
181-
new_doy = time.groupby(f"{dim}.year").map(
182-
_interpolate_day_of_year, target_calendar=calendar, use_cftime=use_cftime
183-
)
184-
189+
if align_on == "year":
190+
# Instead of translating dates directly, this tries to keep the position within a year similar.
191+
new_doy = time.groupby(f"{dim}.year").map(
192+
_interpolate_day_of_year,
193+
target_calendar=calendar,
194+
use_cftime=use_cftime,
195+
)
196+
elif align_on == "random":
197+
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
198+
new_doy = time.groupby(f"{dim}.year").map(
199+
_random_day_of_year, target_calendar=calendar, use_cftime=use_cftime
200+
)
185201
# Convert the source datetimes, but override the day of year with our new day of years.
186202
out[dim] = DataArray(
187203
[
@@ -229,6 +245,27 @@ def _interpolate_day_of_year(time, target_calendar, use_cftime):
229245
).astype(int)
230246

231247

248+
def _random_day_of_year(time, target_calendar, use_cftime):
249+
"""Return a day of year in the new calendar.
250+
251+
Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
252+
"""
253+
year = int(time.dt.year[0])
254+
source_calendar = time.dt.calendar
255+
new_doy = np.arange(360) + 1
256+
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
257+
if source_calendar == "360_day":
258+
for idx in rm_idx:
259+
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
260+
if _days_in_year(year, target_calendar, use_cftime) == 366:
261+
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
262+
elif target_calendar == "360_day":
263+
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
264+
if _days_in_year(year, source_calendar, use_cftime) == 366:
265+
new_doy = np.insert(new_doy, 60, -1)
266+
return new_doy[time.dt.dayofyear - 1]
267+
268+
232269
def _convert_to_new_calendar_with_new_day_of_year(
233270
date, day_of_year, calendar, use_cftime
234271
):

xarray/tests/test_calendar_ops.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,45 @@ def test_convert_calendar_360_days(source, target, freq, align_on):
106106
assert conv.size == 359 if freq == "D" else 359 * 4
107107

108108

109+
def test_convert_calendar_360_days_random():
110+
da_std = DataArray(
111+
np.linspace(0, 1, 366),
112+
dims=("time",),
113+
coords={
114+
"time": date_range(
115+
"2004-01-01",
116+
"2004-12-31",
117+
freq="D",
118+
calendar="standard",
119+
use_cftime=False,
120+
)
121+
},
122+
)
123+
da_360 = DataArray(
124+
np.linspace(0, 1, 360),
125+
dims=("time",),
126+
coords={
127+
"time": date_range("2004-01-01", "2004-12-30", freq="D", calendar="360_day")
128+
},
129+
)
130+
131+
conv = convert_calendar(da_std, "360_day", align_on="random")
132+
conv2 = convert_calendar(da_std, "360_day", align_on="random")
133+
assert (conv != conv2).any()
134+
135+
conv = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
136+
assert np.datetime64("2004-02-29") not in conv.time
137+
conv2 = convert_calendar(da_360, "standard", use_cftime=False, align_on="random")
138+
assert (conv2 != conv).any()
139+
140+
# Ensure that added days are evenly distributed in the 5 fifths of each year
141+
conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN)
142+
conv = conv.where(conv.isnull(), drop=True)
143+
nandoys = conv.time.dt.dayofyear[:366]
144+
assert all(nandoys < np.array([74, 147, 220, 293, 366]))
145+
assert all(nandoys > np.array([0, 73, 146, 219, 292]))
146+
147+
109148
@requires_cftime
110149
@pytest.mark.parametrize(
111150
"source,target,freq",

0 commit comments

Comments
 (0)