feat: add forecast method #293

dshemetov · dshemetov · commit ca9f45033795 · 2024-04-30T12:58:39.000-07:00
diff --git a/NAMESPACE b/NAMESPACE
@@ -152,6 +152,7 @@ export(flatline)
 export(flatline_args_list)
 export(flatline_forecaster)
 export(flusight_hub_formatter)
+export(forecast)
 export(frosting)
 export(get_test_data)
 export(grab_names)
diff --git a/R/epi_workflow.R b/R/epi_workflow.R
@@ -197,7 +197,11 @@ update_model.epi_workflow <- function(x, spec, ..., formula = NULL) {
 #'
 #' @export
 fit.epi_workflow <- function(object, data, ..., control = workflows::control_workflow()) {
-  object$fit$meta <- list(max_time_value = max(data$time_value), as_of = attributes(data)$metadata$as_of)
+  object$fit$meta <- list(
+    max_time_value = max(data$time_value),
+    as_of = attributes(data)$metadata$as_of
+  )
+  object$original_data <- data
 
   NextMethod()
 }
@@ -326,3 +330,40 @@ print.epi_workflow <- function(x, ...) {
   print_postprocessor(x)
   invisible(x)
 }
+
+
+#' Produce a forecast from an epi workflow
+#'
+#' @param epi_workflow An epi workflow
+#' @param fill_locf Logical. Should we use locf to fill in missing data?
+#' @param n_recent Integer or NULL. If filling missing data with locf = TRUE,
+#' how far back are we willing to tolerate missing data? Larger values allow
+#' more filling. The default NULL will determine this from the the recipe. For
+#' example, suppose n_recent = 3, then if the 3 most recent observations in any
+#' geo_value are all NA’s, we won’t be able to fill anything, and an error
+#' message will be thrown. (See details.)
+#' @param forecast_date By default, this is set to the maximum time_value in x.
+#' But if there is data latency such that recent NA's should be filled, this may
+#' be after the last available time_value.
+#'
+#' @return A forecast tibble.
+#'
+#' @export
+forecast <- function(epi_workflow, fill_locf = FALSE, n_recent = NULL, forecast_date = NULL) {
+  if (!epi_workflow$trained) {
+    cli_abort(c(
+      "You cannot `forecast()` a {.cls workflow} that has not been trained.",
+      i = "Please use `fit()` before forecasting."
+    ))
+  }
+
+  test_data <- get_test_data(
+    hardhat::extract_preprocessor(epi_workflow),
+    epi_workflow$original_data,
+    fill_locf = fill_locf,
+    n_recent = n_recent %||% Inf,
+    forecast_date = forecast_date %||% max(epi_workflow$original_data$time_value)
+  )
+
+  predict(epi_workflow, new_data = test_data)
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -22,21 +22,20 @@ navbar:
   type: light
 
 articles:
-- title: Get started
-  navbar: ~
-  contents:
-  - epipredict
-  - preprocessing-and-models
-  - arx-classifier
-  - articles/update
-
-- title: Advanced methods
-  contents:
-  - articles/sliding
-  - articles/smooth-qr
-  - articles/symptom-surveys
-  - panel-data
+  - title: Get started
+    navbar: ~
+    contents:
+      - epipredict
+      - preprocessing-and-models
+      - arx-classifier
+      - articles/update
 
+  - title: Advanced methods
+    contents:
+      - articles/sliding
+      - articles/smooth-qr
+      - articles/symptom-surveys
+      - panel-data
 
 repo:
   url:
@@ -47,81 +46,78 @@ repo:
 
 home:
   links:
-  - text: Introduction to Delphi's Tooling Work
-    href: https://cmu-delphi.github.io/delphi-tooling-book/
-  - text: The epiprocess R package
-    href: https://cmu-delphi.github.io/epiprocess/
-  - text: The epidatr R package
-    href: https://github.com/cmu-delphi/epidatr/
-  - text: The epidatasets R package
-    href: https://cmu-delphi.github.io/epidatasets/
-  - text: The covidcast R package
-    href: https://cmu-delphi.github.io/covidcast/covidcastR/
-
+    - text: Introduction to Delphi's Tooling Work
+      href: https://cmu-delphi.github.io/delphi-tooling-book/
+    - text: The epiprocess R package
+      href: https://cmu-delphi.github.io/epiprocess/
+    - text: The epidatr R package
+      href: https://github.com/cmu-delphi/epidatr/
+    - text: The epidatasets R package
+      href: https://cmu-delphi.github.io/epidatasets/
+    - text: The covidcast R package
+      href: https://cmu-delphi.github.io/covidcast/covidcastR/
 
 reference:
   - title: Simple forecasters
     desc: Complete forecasters that produce reasonable baselines
     contents:
-    - contains("forecaster")
-    - contains("classifier")
+      - contains("forecaster")
+      - contains("classifier")
   - title: Forecaster modifications
     desc: Constructors to modify forecaster arguments and utilities to produce `epi_workflow` objects
     contents:
-    - contains("args_list")
-    - contains("_epi_workflow")
+      - contains("args_list")
+      - contains("_epi_workflow")
   - title: Helper functions for Hub submission
     contents:
-    - flusight_hub_formatter
+      - flusight_hub_formatter
   - title: Parsnip engines
     desc: Prediction methods not available elsewhere
     contents:
-    - quantile_reg
-    - smooth_quantile_reg
+      - quantile_reg
+      - smooth_quantile_reg
   - title: Custom panel data forecasting workflows
     contents:
-    - epi_recipe
-    - epi_workflow
-    - add_epi_recipe
-    - adjust_epi_recipe
-    - add_model
-    - predict.epi_workflow
-    - fit.epi_workflow
-    - augment.epi_workflow
+      - epi_recipe
+      - epi_workflow
+      - add_epi_recipe
+      - adjust_epi_recipe
+      - add_model
+      - predict.epi_workflow
+      - fit.epi_workflow
+      - augment.epi_workflow
+      - forecast
   - title: Epi recipe preprocessing steps
     contents:
-    - starts_with("step_")
-    - contains("bake")
-    - contains("juice")
+      - starts_with("step_")
+      - contains("bake")
+      - contains("juice")
   - title: Epi recipe verification checks
     contents:
-    - check_enough_train_data
+      - check_enough_train_data
   - title: Forecast postprocessing
     desc: Create a series of postprocessing operations
     contents:
-    - frosting
-    - ends_with("_frosting")
-    - get_test_data
-    - tidy.frosting
+      - frosting
+      - ends_with("_frosting")
+      - get_test_data
+      - tidy.frosting
   - title: Frosting layers
     contents:
-    - contains("layer")
-    - contains("slather")
+      - contains("layer")
+      - contains("slather")
   - title: Automatic forecast visualization
     contents:
-    - autoplot.epi_workflow
-    - autoplot.canned_epipred
+      - autoplot.epi_workflow
+      - autoplot.canned_epipred
   - title: Utilities for quantile distribution processing
     contents:
-    - dist_quantiles
-    - extrapolate_quantiles
-    - nested_quantiles
-    - starts_with("pivot_quantiles")
+      - dist_quantiles
+      - extrapolate_quantiles
+      - nested_quantiles
+      - starts_with("pivot_quantiles")
   - title: Included datasets
     contents:
-    - case_death_rate_subset
-    - state_census
-    - grad_employ_subset
-
-
-
+      - case_death_rate_subset
+      - state_census
+      - grad_employ_subset
diff --git a/man/forecast.Rd b/man/forecast.Rd
diff --git a/tests/testthat/test-epi_workflow.R b/tests/testthat/test-epi_workflow.R
@@ -62,3 +62,43 @@ test_that("model can be added/updated/removed from epi_workflow", {
   expect_error(extract_spec_parsnip(wf))
   expect_equal(wf$fit$actions$model$spec, NULL)
 })
+
+test_that("forecast method works", {
+  jhu <- case_death_rate_subset %>%
+    filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
+  r <- epi_recipe(jhu) %>%
+    step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
+    step_epi_ahead(death_rate, ahead = 7) %>%
+    step_epi_naomit()
+  wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu)
+
+  latest <- get_test_data(
+    hardhat::extract_preprocessor(wf),
+    jhu
+  )
+
+  expect_equal(
+    forecast(wf),
+    predict(wf, new_data = latest)
+  )
+})
+
+test_that("forecast method errors when workflow not fit", {
+  jhu <- case_death_rate_subset %>%
+    filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny"))
+  r <- epi_recipe(jhu) %>%
+    step_epi_lag(death_rate, lag = c(0, 7, 14)) %>%
+    step_epi_ahead(death_rate, ahead = 7) %>%
+    step_epi_naomit()
+  wf <- epi_workflow(r, parsnip::linear_reg())
+
+  latest <- get_test_data(
+    hardhat::extract_preprocessor(wf),
+    jhu
+  )
+
+  expect_error(
+    forecast(wf),
+    regexp = "workflow that has not been fit"
+  )
+})