Skip to content

Commit 7e10b52

Browse files
authored
Merge pull request #445 from cmu-delphi/leap_time_handling
Leap time handling
2 parents 08fcd01 + 7c9d1c6 commit 7e10b52

7 files changed

+855
-642
lines changed

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,10 @@ importFrom(glue,glue)
279279
importFrom(hardhat,extract_recipe)
280280
importFrom(hardhat,refresh_blueprint)
281281
importFrom(hardhat,run_mold)
282+
importFrom(lubridate,"%m-%")
283+
importFrom(lubridate,leap_year)
284+
importFrom(lubridate,month)
285+
importFrom(lubridate,yday)
282286
importFrom(magrittr,"%>%")
283287
importFrom(magrittr,extract2)
284288
importFrom(recipes,bake)

R/climatological_forecaster.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ climatological_forecaster <- function(epi_data,
100100
select(all_of(c(key_colnames(epi_data), outcome)))
101101
if (time_type %in% c("week", "epiweek")) {
102102
ttype_dur <- lubridate::weeks
103-
time_aggr <- ifelse(time_type == "week", lubridate::epiweek, lubridate::isoweek)
104-
modulus <- 53L
103+
time_aggr <- ifelse(time_type == "week", epiweek_leap, isoweek_leap)
104+
modulus <- 52L
105105
} else if (time_type == "month") {
106106
ttype_dur <- function(x) lubridate::period(month = x)
107107
time_aggr <- lubridate::month
108108
modulus <- 12L
109109
} else if (time_type == "day") {
110110
ttype_dur <- lubridate::days
111-
time_aggr <- lubridate::yday
111+
time_aggr <- yday_leap
112112
modulus <- 365L
113113
}
114114
center_fn <- switch(args_list$center_method,

R/step_climate.R

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ step_climate <-
179179
arg_is_lgl_scalar(skip)
180180

181181
time_aggr <- switch(time_type,
182-
epiweek = lubridate::epiweek,
183-
week = lubridate::isoweek,
182+
epiweek = epiweek_leap,
183+
week = isoweek_leap,
184184
month = lubridate::month,
185-
day = lubridate::yday
185+
day = yday_leap
186186
)
187187

188188
recipes::add_step(
@@ -258,22 +258,32 @@ prep.step_climate <- function(x, training, info = NULL, ...) {
258258
wts <- wts %||% rep(1, nrow(training))
259259

260260
modulus <- switch(x$time_type,
261-
epiweek = 53L,
262-
week = 53L,
261+
epiweek = 52L, # only sometimes true
262+
week = 52L,
263263
month = 12L,
264-
day = 365L
264+
day = 365L # only sometimes true
265265
)
266266

267267
fn <- switch(x$center_method,
268268
mean = function(x, w) stats::weighted.mean(x, w, na.rm = TRUE),
269269
median = function(x, w) median(x, na.rm = TRUE)
270270
)
271-
272-
climate_table <- training %>%
271+
# suppose it's week 52, and there is no week 53 this year; then
272+
# as originally written for 1 week ahead this grabs from week 52+1 %% 53
273+
# which will be week 53, not week 1.
274+
ahead_period <- switch(x$time_type,
275+
epiweek = lubridate::weeks(x$forecast_ahead),
276+
week = lubridate::weeks(x$forecast_ahead),
277+
month = months(x$forecast_ahead),
278+
day = lubridate::days(x$forecast_ahead),
279+
)
280+
climate_table <-
281+
training %>%
273282
mutate(
274-
.idx = x$time_aggr(time_value), .weights = wts,
275-
.idx = (.idx - x$forecast_ahead) %% modulus,
276-
.idx = dplyr::case_when(.idx == 0 ~ modulus, TRUE ~ .idx)
283+
# subtracts a month w/o rollover (usual behavior on weeks/days)
284+
.idx = time_value %m-% ahead_period,
285+
.idx = x$time_aggr(.idx),
286+
.weights = wts
277287
) %>%
278288
select(.idx, .weights, all_of(c(col_names, x$epi_keys))) %>%
279289
tidyr::pivot_longer(all_of(unname(col_names))) %>%
@@ -335,18 +345,75 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
335345
#' @param window_size the number of .idx entries before and after to include in
336346
#' the aggregation
337347
#' @param modulus the maximum value of `.idx`
348+
#' @importFrom lubridate %m-%
338349
roll_modular_multivec <- function(col, .idx, weights, aggr, window_size, modulus) {
339350
tib <- tibble(col = col, weights = weights, .idx = .idx) |>
340351
arrange(.idx) |>
341352
tidyr::nest(data = c(col, weights), .by = .idx)
342-
out <- double(nrow(tib))
353+
out <- double(modulus + 1)
343354
for (iter in seq_along(out)) {
355+
# +1 from 1-indexing
344356
entries <- (iter - window_size):(iter + window_size) %% modulus
345357
entries[entries == 0] <- modulus
358+
# note that because we are 1-indexing, we're looking for indices that are 1
359+
# larger than the actual day/week in the year
360+
if (modulus == 365) {
361+
# we need to grab just the window around the leap day on the leap day
362+
if (iter == 366) {
363+
# there's an extra data point in front of the leap day
364+
entries <- (59 - window_size):(59 + window_size - 1) %% modulus
365+
entries[entries == 0] <- modulus
366+
# adding in the leap day itself
367+
entries <- c(entries, 999)
368+
} else if ((59 %in% entries) || (60 %in% entries)) {
369+
# if we're on the Feb/March boundary for daily data, we need to add in the
370+
# leap day data
371+
entries <- c(entries, 999)
372+
}
373+
} else if (modulus == 52) {
374+
# we need to grab just the window around the leap week on the leap week
375+
if (iter == 53) {
376+
entries <- (53 - window_size):(53 + window_size - 1) %% 52
377+
entries[entries == 0] <- 52
378+
entries <- c(entries, 999)
379+
} else if ((52 %in% entries) || (1 %in% entries)) {
380+
# if we're on the year boundary for weekly data, we need to add in the
381+
# leap week data (which is the extra week at the end)
382+
entries <- c(entries, 999)
383+
}
384+
}
346385
out[iter] <- with(
347-
purrr::list_rbind(tib$data[entries]),
386+
purrr::list_rbind(tib %>% filter(.idx %in% entries) %>% pull(data)),
348387
aggr(col, weights)
349388
)
350389
}
351-
tibble(.idx = unique(tib$.idx), climate_pred = out)
390+
tibble(.idx = unique(tib$.idx), climate_pred = out[seq_len(nrow(tib))])
391+
}
392+
393+
394+
#' a function that assigns Feb 29th to 999, and aligns all other dates the same
395+
#' number in the year, regardless of whether it's a leap year
396+
#' @keywords internal
397+
#' @importFrom lubridate yday month leap_year
398+
yday_leap <- function(time_value) {
399+
dplyr::case_when(
400+
!leap_year(time_value) ~ yday(time_value),
401+
leap_day(time_value) ~ 999,
402+
TRUE ~ yday(time_value) - as.numeric(month(time_value) > 2L)
403+
)
404+
}
405+
leap_day <- function(x) lubridate::month(x) == 2 & lubridate::day(x) == 29
406+
#' epiweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
407+
#' @keywords internal
408+
epiweek_leap <- function(time_value) {
409+
week_values <- lubridate::epiweek(time_value)
410+
week_values[week_values == 53] <- 999
411+
week_values
412+
}
413+
#' isoweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
414+
#' @keywords internal
415+
isoweek_leap <- function(time_value) {
416+
week_values <- lubridate::isoweek(time_value)
417+
week_values[week_values == 53] <- 999
418+
week_values
352419
}

man/step_adjust_latency.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)