@@ -179,10 +179,10 @@ step_climate <-
179
179
arg_is_lgl_scalar(skip )
180
180
181
181
time_aggr <- switch (time_type ,
182
- epiweek = lubridate :: epiweek ,
183
- week = lubridate :: isoweek ,
182
+ epiweek = epiweek_leap ,
183
+ week = isoweek_leap ,
184
184
month = lubridate :: month ,
185
- day = lubridate :: yday
185
+ day = yday_leap
186
186
)
187
187
188
188
recipes :: add_step(
@@ -258,22 +258,32 @@ prep.step_climate <- function(x, training, info = NULL, ...) {
258
258
wts <- wts %|| % rep(1 , nrow(training ))
259
259
260
260
modulus <- switch (x $ time_type ,
261
- epiweek = 53L ,
262
- week = 53L ,
261
+ epiweek = 52L , # only sometimes true
262
+ week = 52L ,
263
263
month = 12L ,
264
- day = 365L
264
+ day = 365L # only sometimes true
265
265
)
266
266
267
267
fn <- switch (x $ center_method ,
268
268
mean = function (x , w ) stats :: weighted.mean(x , w , na.rm = TRUE ),
269
269
median = function (x , w ) median(x , na.rm = TRUE )
270
270
)
271
-
272
- climate_table <- training %> %
271
+ # suppose it's week 52, and there is no week 53 this year; then
272
+ # as originally written for 1 week ahead this grabs from week 52+1 %% 53
273
+ # which will be week 53, not week 1.
274
+ ahead_period <- switch (x $ time_type ,
275
+ epiweek = lubridate :: weeks(x $ forecast_ahead ),
276
+ week = lubridate :: weeks(x $ forecast_ahead ),
277
+ month = months(x $ forecast_ahead ),
278
+ day = lubridate :: days(x $ forecast_ahead ),
279
+ )
280
+ climate_table <-
281
+ training %> %
273
282
mutate(
274
- .idx = x $ time_aggr(time_value ), .weights = wts ,
275
- .idx = (.idx - x $ forecast_ahead ) %% modulus ,
276
- .idx = dplyr :: case_when(.idx == 0 ~ modulus , TRUE ~ .idx )
283
+ # subtracts a month w/o rollover (usual behavior on weeks/days)
284
+ .idx = time_value %m - % ahead_period ,
285
+ .idx = x $ time_aggr(.idx ),
286
+ .weights = wts
277
287
) %> %
278
288
select(.idx , .weights , all_of(c(col_names , x $ epi_keys ))) %> %
279
289
tidyr :: pivot_longer(all_of(unname(col_names ))) %> %
@@ -335,18 +345,75 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
335
345
# ' @param window_size the number of .idx entries before and after to include in
336
346
# ' the aggregation
337
347
# ' @param modulus the maximum value of `.idx`
348
+ # ' @importFrom lubridate %m-%
338
349
roll_modular_multivec <- function (col , .idx , weights , aggr , window_size , modulus ) {
339
350
tib <- tibble(col = col , weights = weights , .idx = .idx ) | >
340
351
arrange(.idx ) | >
341
352
tidyr :: nest(data = c(col , weights ), .by = .idx )
342
- out <- double(nrow( tib ) )
353
+ out <- double(modulus + 1 )
343
354
for (iter in seq_along(out )) {
355
+ # +1 from 1-indexing
344
356
entries <- (iter - window_size ): (iter + window_size ) %% modulus
345
357
entries [entries == 0 ] <- modulus
358
+ # note that because we are 1-indexing, we're looking for indices that are 1
359
+ # larger than the actual day/week in the year
360
+ if (modulus == 365 ) {
361
+ # we need to grab just the window around the leap day on the leap day
362
+ if (iter == 366 ) {
363
+ # there's an extra data point in front of the leap day
364
+ entries <- (59 - window_size ): (59 + window_size - 1 ) %% modulus
365
+ entries [entries == 0 ] <- modulus
366
+ # adding in the leap day itself
367
+ entries <- c(entries , 999 )
368
+ } else if ((59 %in% entries ) || (60 %in% entries )) {
369
+ # if we're on the Feb/March boundary for daily data, we need to add in the
370
+ # leap day data
371
+ entries <- c(entries , 999 )
372
+ }
373
+ } else if (modulus == 52 ) {
374
+ # we need to grab just the window around the leap week on the leap week
375
+ if (iter == 53 ) {
376
+ entries <- (53 - window_size ): (53 + window_size - 1 ) %% 52
377
+ entries [entries == 0 ] <- 52
378
+ entries <- c(entries , 999 )
379
+ } else if ((52 %in% entries ) || (1 %in% entries )) {
380
+ # if we're on the year boundary for weekly data, we need to add in the
381
+ # leap week data (which is the extra week at the end)
382
+ entries <- c(entries , 999 )
383
+ }
384
+ }
346
385
out [iter ] <- with(
347
- purrr :: list_rbind(tib $ data [ entries ] ),
386
+ purrr :: list_rbind(tib % > % filter( .idx %in% entries ) % > % pull( data ) ),
348
387
aggr(col , weights )
349
388
)
350
389
}
351
- tibble(.idx = unique(tib $ .idx ), climate_pred = out )
390
+ tibble(.idx = unique(tib $ .idx ), climate_pred = out [seq_len(nrow(tib ))])
391
+ }
392
+
393
+
394
+ # ' a function that assigns Feb 29th to 999, and aligns all other dates the same
395
+ # ' number in the year, regardless of whether it's a leap year
396
+ # ' @keywords internal
397
+ # ' @importFrom lubridate yday month leap_year
398
+ yday_leap <- function (time_value ) {
399
+ dplyr :: case_when(
400
+ ! leap_year(time_value ) ~ yday(time_value ),
401
+ leap_day(time_value ) ~ 999 ,
402
+ TRUE ~ yday(time_value ) - as.numeric(month(time_value ) > 2L )
403
+ )
404
+ }
405
+ leap_day <- function (x ) lubridate :: month(x ) == 2 & lubridate :: day(x ) == 29
406
+ # ' epiweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
407
+ # ' @keywords internal
408
+ epiweek_leap <- function (time_value ) {
409
+ week_values <- lubridate :: epiweek(time_value )
410
+ week_values [week_values == 53 ] <- 999
411
+ week_values
412
+ }
413
+ # ' isoweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
414
+ # ' @keywords internal
415
+ isoweek_leap <- function (time_value ) {
416
+ week_values <- lubridate :: isoweek(time_value )
417
+ week_values [week_values == 53 ] <- 999
418
+ week_values
352
419
}
0 commit comments