diff --git a/NAMESPACE b/NAMESPACE index b632f8922..4a678f56f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(bake,step_epi_ahead) S3method(bake,step_epi_lag) S3method(epi_keys,default) S3method(epi_keys,epi_df) @@ -8,9 +7,7 @@ S3method(epi_keys,recipe) S3method(epi_recipe,default) S3method(epi_recipe,epi_df) S3method(epi_recipe,formula) -S3method(prep,step_epi_ahead) S3method(prep,step_epi_lag) -S3method(print,step_epi_ahead) S3method(print,step_epi_lag) export("%>%") export(arx_args_list) @@ -27,7 +24,6 @@ export(knnarx_args_list) export(knnarx_forecaster) export(smooth_arx_args_list) export(smooth_arx_forecaster) -export(step_epi_ahead) export(step_epi_lag) import(recipes) importFrom(magrittr,"%>%") diff --git a/R/epi_ahead.R b/R/epi_ahead.R deleted file mode 100644 index 3434b7d93..000000000 --- a/R/epi_ahead.R +++ /dev/null @@ -1,163 +0,0 @@ -#' Create a leading outcome -#' -#' `step_epi_ahead` creates a *specification* of a recipe step that -#' will add new columns of leading data. Leading data will -#' by default include NA values where the lag was induced. -#' These can be removed with [step_naomit()], or you may -#' specify an alternative filler value with the `default` -#' argument. -#' -#' @param recipe A recipe object. The step will be added to the -#' sequence of operations for this recipe. -#' @param ... One or more selector functions to choose variables -#' for this step. See [selections()] for more details. -#' @param role For model terms created by this step, what analysis role should -#' they be assigned? -#' @param trained A logical to indicate if the quantities for -#' preprocessing have been estimated. -#' @param ahead A vector of positive integers. Each specified column will be -#' lead for each value in the vector. -#' @param prefix A prefix for generated column names, default to "ahead_". -#' @param default Determines what fills empty rows -#' left by leading/lagging (defaults to NA). -#' @param keys A character vector of the keys in an epi_df -#' @param columns A character string of variable names that will -#' be populated (eventually) by the `terms` argument. -#' @param skip A logical. Should the step be skipped when the -#' recipe is baked by [bake()]? While all operations are baked -#' when [prep()] is run, some operations may not be able to be -#' conducted on new data (e.g. processing the outcome variable(s)). -#' Care should be taken when using `skip = TRUE` as it may affect -#' the computations for subsequent operations. -#' @param id A character string that is unique to this step to identify it. -#' @template step-return -#' -#' @details The step assumes that the data are already _in the proper sequential -#' order_ for leading. -#' -#' @family row operation steps -#' @export -#' -#' @examples -#' tib <- tibble::tibble( -#' x = 1:5, y = 1:5, -#' time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), -#' geo_value = "ca" -#' ) %>% epiprocess::as_epi_df() -#' -#' library(recipes) -#' epi_recipe(y ~ x, data = tib) %>% -#' step_epi_lag(x, lag = 2:3) %>% -#' step_epi_ahead(y, ahead = 1) %>% -#' prep(tib) %>% -#' bake(tib) -step_epi_ahead <- - function(recipe, - ..., - role = "outcome", - trained = FALSE, - ahead = 1, - prefix = "ahead_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_ahead")) { - add_step( - recipe, - step_epi_ahead_new( - terms = dplyr::enquos(...), - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - ) - } - -step_epi_ahead_new <- - function(terms, role, trained, ahead, prefix, default, keys, - columns, skip, id) { - step( - subclass = "epi_ahead", - terms = terms, - role = role, - trained = trained, - ahead = ahead, - prefix = prefix, - default = default, - keys = keys, - columns = columns, - skip = skip, - id = id - ) - } - -#' @export -prep.step_epi_ahead <- function(x, training, info = NULL, ...) { - step_epi_ahead_new( - terms = x$terms, - role = x$role, - trained = TRUE, - ahead = x$ahead, - prefix = x$prefix, - default = x$default, - keys = x$keys, - columns = recipes_eval_select(x$terms, training, info), - skip = x$skip, - id = x$id - ) -} - -#' @export -bake.step_epi_ahead <- function(object, new_data, ...) { - if (!all(object$ahead == as.integer(object$ahead))) { - rlang::abort("step_epi_ahead requires 'ahead' argument to be integer valued.") - } - - grid <- tidyr::expand_grid( - col = object$columns, lag_val = -object$ahead) %>% - dplyr::mutate( - ahead_val = -lag_val, - newname = glue::glue("{object$prefix}{ahead_val}_{col}") - ) %>% - dplyr::select(-ahead_val) - - ## ensure no name clashes - new_data_names <- colnames(new_data) - intersection <- new_data_names %in% grid$newname - if (any(intersection)) { - rlang::abort( - paste0("Name collision occured in `", class(object)[1], - "`. The following variable names already exists: ", - paste0(new_data_names[intersection], collapse = ", "), - ".")) - } - - ok <- object$keys - lagged <- purrr::reduce( - purrr::pmap(grid, epi_shift_single, x = new_data, key_cols = ok), - dplyr::full_join, - by = ok - ) - - dplyr::full_join(new_data, lagged, by = ok) %>% - dplyr::group_by(dplyr::across(dplyr::all_of(ok[-1]))) %>% - dplyr::arrange(time_value) %>% - dplyr::ungroup() - -} - -#' @export -print.step_epi_ahead <- - function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the lags - title <- "Leading " - recipes::print_step(x$columns, x$terms, x$trained, title, width) - invisible(x) - } diff --git a/R/epi_lag.R b/R/epi_lag.R index b7dc28b2a..46c93f105 100644 --- a/R/epi_lag.R +++ b/R/epi_lag.R @@ -16,13 +16,13 @@ #' #' @family row operation steps #' @export -#' @rdname step_epi_ahead +#' @rdname step_epi_lag step_epi_lag <- function(recipe, ..., role = "predictor", trained = FALSE, - lag = 1, + lag = 1, # negative for ahead prefix = "lag_", default = NA, keys = epi_keys(recipe), @@ -117,7 +117,7 @@ bake.step_epi_lag <- function(object, new_data, ...) { print.step_epi_lag <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the lags - title <- "Lagging " + title <- ifelse(x$lag >= 0, "Lagging", "Leading") recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) } diff --git a/man/step_epi_ahead.Rd b/man/step_epi_ahead.Rd deleted file mode 100644 index 006c224c3..000000000 --- a/man/step_epi_ahead.Rd +++ /dev/null @@ -1,117 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/epi_ahead.R, R/epi_lag.R -\name{step_epi_ahead} -\alias{step_epi_ahead} -\alias{step_epi_lag} -\title{Create a leading outcome} -\usage{ -step_epi_ahead( - recipe, - ..., - role = "outcome", - trained = FALSE, - ahead = 1, - prefix = "ahead_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_ahead") -) - -step_epi_lag( - recipe, - ..., - role = "predictor", - trained = FALSE, - lag = 1, - prefix = "lag_", - default = NA, - keys = epi_keys(recipe), - columns = NULL, - skip = FALSE, - id = rand_id("epi_lag") -) -} -\arguments{ -\item{recipe}{A recipe object. The step will be added to the -sequence of operations for this recipe.} - -\item{...}{One or more selector functions to choose variables -for this step. See \code{\link[=selections]{selections()}} for more details.} - -\item{role}{For model terms created by this step, what analysis role should -they be assigned?} - -\item{trained}{A logical to indicate if the quantities for -preprocessing have been estimated.} - -\item{ahead}{A vector of positive integers. Each specified column will be -lead for each value in the vector.} - -\item{prefix}{A prefix for generated column names, default to "ahead_".} - -\item{default}{Determines what fills empty rows -left by leading/lagging (defaults to NA).} - -\item{keys}{A character vector of the keys in an epi_df} - -\item{columns}{A character string of variable names that will -be populated (eventually) by the \code{terms} argument.} - -\item{skip}{A logical. Should the step be skipped when the -recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked -when \code{\link[=prep]{prep()}} is run, some operations may not be able to be -conducted on new data (e.g. processing the outcome variable(s)). -Care should be taken when using \code{skip = TRUE} as it may affect -the computations for subsequent operations.} - -\item{id}{A character string that is unique to this step to identify it.} - -\item{lag}{A vector of positive integers. Each specified column will be -lagged for each value in the vector.} -} -\value{ -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. - -An updated version of \code{recipe} with the new step added to the -sequence of any existing operations. -} -\description{ -\code{step_epi_ahead} creates a \emph{specification} of a recipe step that -will add new columns of leading data. Leading data will -by default include NA values where the lag was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. - -\code{step_epi_lag} creates a \emph{specification} of a recipe step that -will add new columns of lagged data. Lagged data will -by default include NA values where the lag was induced. -These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may -specify an alternative filler value with the \code{default} -argument. -} -\details{ -The step assumes that the data are already \emph{in the proper sequential -order} for leading. - -The step assumes that the data are already \emph{in the proper sequential -order} for lagging. -} -\examples{ -tib <- tibble::tibble( - x = 1:5, y = 1:5, - time_value = seq(as.Date("2020-01-01"), by = 1, length.out = 5), - geo_value = "ca" - ) \%>\% epiprocess::as_epi_df() - -library(recipes) -epi_recipe(y ~ x, data = tib) \%>\% - step_epi_lag(x, lag = 2:3) \%>\% - step_epi_ahead(y, ahead = 1) \%>\% - prep(tib) \%>\% - bake(tib) -} -\concept{row operation steps} diff --git a/man/step_epi_lag.Rd b/man/step_epi_lag.Rd new file mode 100644 index 000000000..52361f696 --- /dev/null +++ b/man/step_epi_lag.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_lag.R +\name{step_epi_lag} +\alias{step_epi_lag} +\title{Create a lagged predictor} +\usage{ +step_epi_lag( + recipe, + ..., + role = "predictor", + trained = FALSE, + lag = 1, + prefix = "lag_", + default = NA, + keys = epi_keys(recipe), + columns = NULL, + skip = FALSE, + id = rand_id("epi_lag") +) +} +\arguments{ +\item{lag}{A vector of positive integers. Each specified column will be +lagged for each value in the vector.} +} +\value{ +An updated version of \code{recipe} with the new step added to the +sequence of any existing operations. +} +\description{ +\code{step_epi_lag} creates a \emph{specification} of a recipe step that +will add new columns of lagged data. Lagged data will +by default include NA values where the lag was induced. +These can be removed with \code{\link[=step_naomit]{step_naomit()}}, or you may +specify an alternative filler value with the \code{default} +argument. +} +\details{ +The step assumes that the data are already \emph{in the proper sequential +order} for lagging. +} +\concept{row operation steps} diff --git a/musings/example-recipe.R b/musings/example-recipe.R index 61e4b6868..d10bd7e8d 100644 --- a/musings/example-recipe.R +++ b/musings/example-recipe.R @@ -36,7 +36,7 @@ xx <- x %>% filter(time_value > "2021-12-01") # Baseline AR3 r <- epi_recipe(x) %>% # if we add this as a class, maybe we get better # behaviour downstream? - step_epi_ahead(death_rate, ahead = 7) %>% + step_epi_lag(death_rate, lag = -7) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_lag(case_rate, lag = c(0, 7, 14)) %>% step_naomit(all_predictors()) %>% diff --git a/tests/testthat/test-assign_arg_list.R b/tests/testthat/test-assign_arg_list.R new file mode 100644 index 000000000..017c40935 --- /dev/null +++ b/tests/testthat/test-assign_arg_list.R @@ -0,0 +1,6 @@ +test_that("First argument must be a list",{ + expect_error(assign_arg_list(c(1,2,3))) +}) +test_that("All arguments should be named",{ + expect_error(assign_arg_list(list(1,2))) +}) diff --git a/tests/testthat/test-df_mat_mul.R b/tests/testthat/test-df_mat_mul.R new file mode 100644 index 000000000..166fdc92d --- /dev/null +++ b/tests/testthat/test-df_mat_mul.R @@ -0,0 +1,19 @@ +df <- data.frame(matrix(1:100, ncol = 5)) +mat <- matrix(1:4, ncol = 2) +df_mat_mul(df, mat, "z", dplyr::num_range("X", 2:3)) +test_that("df_mat_mul checks inputs", { + expect_error(df_mat_mul(30,mat)) + expect_error(df_mat_mul(df,20)) +}) + +test_that("Incompatible matrix multipication cannot happen", { + expect_error(df_mat_mul(df, mat, "z", dplyr::num_range("X", 1:3))) +}) + +test_that("Matrix multiplication is being handled as expected", { + X <- df[c(1,4,5)] + Z <- as.data.frame(as.matrix(df[2:3]) %*% mat) + colnames(Z) <- c("z1","z2") + output <- cbind(X,Z) + expect_identical(df_mat_mul(df,mat, "z", dplyr::num_range("X", 2:3)),output) +})