Skip to content

Leap time handling #445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ importFrom(glue,glue)
importFrom(hardhat,extract_recipe)
importFrom(hardhat,refresh_blueprint)
importFrom(hardhat,run_mold)
importFrom(lubridate,"%m-%")
importFrom(lubridate,leap_year)
importFrom(lubridate,month)
importFrom(lubridate,yday)
importFrom(magrittr,"%>%")
importFrom(magrittr,extract2)
importFrom(recipes,bake)
Expand Down
6 changes: 3 additions & 3 deletions R/climatological_forecaster.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ climatological_forecaster <- function(epi_data,
select(all_of(c(key_colnames(epi_data), outcome)))
if (time_type %in% c("week", "epiweek")) {
ttype_dur <- lubridate::weeks
time_aggr <- ifelse(time_type == "week", lubridate::epiweek, lubridate::isoweek)
modulus <- 53L
time_aggr <- ifelse(time_type == "week", epiweek_leap, isoweek_leap)
modulus <- 52L
} else if (time_type == "month") {
ttype_dur <- function(x) lubridate::period(month = x)
time_aggr <- lubridate::month
modulus <- 12L
} else if (time_type == "day") {
ttype_dur <- lubridate::days
time_aggr <- lubridate::yday
time_aggr <- yday_leap
modulus <- 365L
}
center_fn <- switch(args_list$center_method,
Expand Down
95 changes: 81 additions & 14 deletions R/step_climate.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ step_climate <-
arg_is_lgl_scalar(skip)

time_aggr <- switch(time_type,
epiweek = lubridate::epiweek,
week = lubridate::isoweek,
epiweek = epiweek_leap,
week = isoweek_leap,
month = lubridate::month,
day = lubridate::yday
day = yday_leap
)

recipes::add_step(
Expand Down Expand Up @@ -258,22 +258,32 @@ prep.step_climate <- function(x, training, info = NULL, ...) {
wts <- wts %||% rep(1, nrow(training))

modulus <- switch(x$time_type,
epiweek = 53L,
week = 53L,
epiweek = 52L, # only sometimes true
week = 52L,
month = 12L,
day = 365L
day = 365L # only sometimes true
)

fn <- switch(x$center_method,
mean = function(x, w) stats::weighted.mean(x, w, na.rm = TRUE),
median = function(x, w) median(x, na.rm = TRUE)
)

climate_table <- training %>%
# suppose it's week 52, and there is no week 53 this year; then
# as originally written for 1 week ahead this grabs from week 52+1 %% 53
# which will be week 53, not week 1.
ahead_period <- switch(x$time_type,
epiweek = lubridate::weeks(x$forecast_ahead),
week = lubridate::weeks(x$forecast_ahead),
month = months(x$forecast_ahead),
day = lubridate::days(x$forecast_ahead),
)
climate_table <-
training %>%
mutate(
.idx = x$time_aggr(time_value), .weights = wts,
.idx = (.idx - x$forecast_ahead) %% modulus,
.idx = dplyr::case_when(.idx == 0 ~ modulus, TRUE ~ .idx)
# subtracts a month w/o rollover (usual behavior on weeks/days)
.idx = time_value %m-% ahead_period,
.idx = x$time_aggr(.idx),
.weights = wts
) %>%
select(.idx, .weights, all_of(c(col_names, x$epi_keys))) %>%
tidyr::pivot_longer(all_of(unname(col_names))) %>%
Expand Down Expand Up @@ -335,18 +345,75 @@ print.step_climate <- function(x, width = max(20, options()$width - 30), ...) {
#' @param window_size the number of .idx entries before and after to include in
#' the aggregation
#' @param modulus the maximum value of `.idx`
#' @importFrom lubridate %m-%
roll_modular_multivec <- function(col, .idx, weights, aggr, window_size, modulus) {
tib <- tibble(col = col, weights = weights, .idx = .idx) |>
arrange(.idx) |>
tidyr::nest(data = c(col, weights), .by = .idx)
out <- double(nrow(tib))
out <- double(modulus + 1)
for (iter in seq_along(out)) {
# +1 from 1-indexing
entries <- (iter - window_size):(iter + window_size) %% modulus
entries[entries == 0] <- modulus
# note that because we are 1-indexing, we're looking for indices that are 1
# larger than the actual day/week in the year
if (modulus == 365) {
# we need to grab just the window around the leap day on the leap day
if (iter == 366) {
# there's an extra data point in front of the leap day
entries <- (59 - window_size):(59 + window_size - 1) %% modulus
entries[entries == 0] <- modulus
# adding in the leap day itself
entries <- c(entries, 999)
} else if ((59 %in% entries) || (60 %in% entries)) {
# if we're on the Feb/March boundary for daily data, we need to add in the
# leap day data
entries <- c(entries, 999)
}
} else if (modulus == 52) {
# we need to grab just the window around the leap week on the leap week
if (iter == 53) {
entries <- (53 - window_size):(53 + window_size - 1) %% 52
entries[entries == 0] <- 52
entries <- c(entries, 999)
} else if ((52 %in% entries) || (1 %in% entries)) {
# if we're on the year boundary for weekly data, we need to add in the
# leap week data (which is the extra week at the end)
entries <- c(entries, 999)
}
}
out[iter] <- with(
purrr::list_rbind(tib$data[entries]),
purrr::list_rbind(tib %>% filter(.idx %in% entries) %>% pull(data)),
aggr(col, weights)
)
}
tibble(.idx = unique(tib$.idx), climate_pred = out)
tibble(.idx = unique(tib$.idx), climate_pred = out[seq_len(nrow(tib))])
}


#' a function that assigns Feb 29th to 999, and aligns all other dates the same
#' number in the year, regardless of whether it's a leap year
#' @keywords internal
#' @importFrom lubridate yday month leap_year
yday_leap <- function(time_value) {
dplyr::case_when(
!leap_year(time_value) ~ yday(time_value),
leap_day(time_value) ~ 999,
TRUE ~ yday(time_value) - as.numeric(month(time_value) > 2L)
)
}
leap_day <- function(x) lubridate::month(x) == 2 & lubridate::day(x) == 29
#' epiweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
#' @keywords internal
epiweek_leap <- function(time_value) {
week_values <- lubridate::epiweek(time_value)
week_values[week_values == 53] <- 999
week_values
}
#' isoweek, but it assigns week 53 the value of 999 instead so it mirrors the assignments in yday_leap
#' @keywords internal
isoweek_leap <- function(time_value) {
week_values <- lubridate::isoweek(time_value)
week_values[week_values == 53] <- 999
week_values
}
4 changes: 2 additions & 2 deletions man/step_adjust_latency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading