Skip to content

Commit 84f991d

Browse files
committed
rename, check for pre in names, dplyr::between
1 parent 1499279 commit 84f991d

7 files changed

+36
-36
lines changed

R/arx_forecaster.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,13 @@ arx_fcast_epi_workflow <- function(
172172
r <- r %>%
173173
step_epi_naomit() %>%
174174
step_training_window(n_recent = args_list$n_training) %>%
175-
check_enough_data(all_predictors(), min_data_points = 1, skip = FALSE)
175+
check_enough_data(all_predictors(), min_observations = 1, skip = FALSE)
176176

177177
if (!is.null(args_list$check_enough_data_n)) {
178178
r <- r %>% check_enough_data(
179179
all_predictors(),
180180
all_outcomes(),
181-
min_data_points = args_list$check_enough_data_n,
181+
min_observations = args_list$check_enough_data_n,
182182
epi_keys = args_list$check_enough_data_epi_keys,
183183
drop_na = FALSE
184184
)

R/canned-epipred.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ print.canned_epipred <- function(x, name, ...) {
112112
"At forecast date{?s}: {.val {fds}},",
113113
"For target date{?s}: {.val {tds}},"
114114
))
115-
if ("actions" %in% names(x$pre) && "recipe" %in% names(x$pre$actions)) {
115+
if ("pre" %in% names(x) && "actions" %in% names(x$pre) && "recipe" %in% names(x$pre$actions)) {
116116
fit_recipe <- extract_recipe(x$epi_workflow)
117117
if (detect_step(fit_recipe, "adjust_latency")) {
118118
is_adj_latency <- map_lgl(fit_recipe$steps, function(x) inherits(x, "step_adjust_latency"))

R/check_enough_data.R

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#' @param ... One or more selector functions to choose variables for this check.
99
#' See [selections()] for more details. You will usually want to use
1010
#' [recipes::all_predictors()] and/or [recipes::all_outcomes()] here.
11-
#' @param min_data_points The minimum number of data points required for
11+
#' @param min_observations The minimum number of data points required for
1212
#' training. If this is NULL, the total number of predictors will be used.
1313
#' @param epi_keys A character vector of column names on which to group the data
1414
#' and check threshold within each group. Useful if your forecaster trains
@@ -44,7 +44,7 @@
4444
check_enough_data <-
4545
function(recipe,
4646
...,
47-
min_data_points = NULL,
47+
min_observations = NULL,
4848
epi_keys = NULL,
4949
drop_na = TRUE,
5050
role = NA,
@@ -54,7 +54,7 @@ check_enough_data <-
5454
recipes::add_check(
5555
recipe,
5656
check_enough_data_new(
57-
min_data_points = min_data_points,
57+
min_observations = min_observations,
5858
epi_keys = epi_keys,
5959
drop_na = drop_na,
6060
terms = enquos(...),
@@ -68,12 +68,12 @@ check_enough_data <-
6868
}
6969

7070
check_enough_data_new <-
71-
function(min_data_points, epi_keys, drop_na, terms,
71+
function(min_observations, epi_keys, drop_na, terms,
7272
role, trained, columns, skip, id) {
7373
recipes::check(
7474
subclass = "enough_data",
7575
prefix = "check_",
76-
min_data_points = min_data_points,
76+
min_observations = min_observations,
7777
epi_keys = epi_keys,
7878
drop_na = drop_na,
7979
terms = terms,
@@ -88,14 +88,14 @@ check_enough_data_new <-
8888
#' @export
8989
prep.check_enough_data <- function(x, training, info = NULL, ...) {
9090
col_names <- recipes::recipes_eval_select(x$terms, training, info)
91-
if (is.null(x$min_data_points)) {
92-
x$min_data_points <- length(col_names)
91+
if (is.null(x$min_observations)) {
92+
x$min_observations <- length(col_names)
9393
}
9494

9595
check_enough_data_core(training, x, col_names, "train")
9696

9797
check_enough_data_new(
98-
min_data_points = x$min_data_points,
98+
min_observations = x$min_observations,
9999
epi_keys = x$epi_keys,
100100
drop_na = x$drop_na,
101101
terms = x$terms,
@@ -116,7 +116,7 @@ bake.check_enough_data <- function(object, new_data, ...) {
116116

117117
#' @export
118118
print.check_enough_data <- function(x, width = max(20, options()$width - 30), ...) {
119-
title <- paste0("Check enough data (n = ", x$min_data_points, ") for ")
119+
title <- paste0("Check enough data (n = ", x$min_observations, ") for ")
120120
recipes::print_step(x$columns, x$terms, x$trained, title, width)
121121
invisible(x)
122122
}
@@ -129,7 +129,7 @@ tidy.check_enough_data <- function(x, ...) {
129129
res <- tibble(terms = recipes::sel2char(x$terms))
130130
}
131131
res$id <- x$id
132-
res$min_data_points <- x$min_data_points
132+
res$min_observations <- x$min_observations
133133
res$epi_keys <- x$epi_keys
134134
res$drop_na <- x$drop_na
135135
res
@@ -142,7 +142,7 @@ check_enough_data_core <- function(epi_df, step_obj, col_names, train_or_predict
142142
any_missing_data <- epi_df %>%
143143
mutate(any_are_na = rowSums(across(any_of(.env$col_names), ~ is.na(.x))) > 0) %>%
144144
# count the number of rows where they're all not na
145-
summarise(sum(any_are_na == 0) < .env$step_obj$min_data_points, .groups = "drop")
145+
summarise(sum(any_are_na == 0) < .env$step_obj$min_observations, .groups = "drop")
146146
any_missing_data <- any_missing_data %>%
147147
summarize(across(all_of(setdiff(names(any_missing_data), step_obj$epi_keys)), any)) %>%
148148
any()
@@ -153,7 +153,7 @@ check_enough_data_core <- function(epi_df, step_obj, col_names, train_or_predict
153153
summarise(
154154
across(
155155
all_of(.env$col_names),
156-
~ sum(!is.na(.x)) < .env$step_obj$min_data_points
156+
~ sum(!is.na(.x)) < .env$step_obj$min_observations
157157
),
158158
.groups = "drop"
159159
) %>%
@@ -173,7 +173,7 @@ check_enough_data_core <- function(epi_df, step_obj, col_names, train_or_predict
173173
} else {
174174
# if we're not dropping na values, just count
175175
cols_not_enough_data <- epi_df %>%
176-
summarise(across(all_of(.env$col_names), ~ dplyr::n() < .env$step_obj$min_data_points))
176+
summarise(across(all_of(.env$col_names), ~ dplyr::n() < .env$step_obj$min_observations))
177177
any_missing_data <- cols_not_enough_data %>%
178178
summarize(across(all_of(.env$col_names), all)) %>%
179179
all()

man/check_enough_data.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/check_enough_data.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# check_enough_data works on pooled data
22

33
Code
4-
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_data_points = 2 * n + 1,
4+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_observations = 2 * n + 1,
55
drop_na = FALSE) %>% prep(toy_epi_df)
66
Condition
77
Error in `check_enough_data_core()`:
@@ -10,7 +10,7 @@
1010
---
1111

1212
Code
13-
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_data_points = 2 * n - 1,
13+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_observations = 2 * n - 1,
1414
drop_na = TRUE) %>% prep(toy_epi_df)
1515
Condition
1616
Error in `check_enough_data_core()`:
@@ -19,7 +19,7 @@
1919
# check_enough_data works on unpooled data
2020

2121
Code
22-
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_data_points = n + 1,
22+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_observations = n + 1,
2323
epi_keys = "geo_value", drop_na = FALSE) %>% prep(toy_epi_df)
2424
Condition
2525
Error in `check_enough_data_core()`:
@@ -28,7 +28,7 @@
2828
---
2929

3030
Code
31-
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_data_points = 2 * n - 3,
31+
epi_recipe(toy_epi_df) %>% check_enough_data(x, y, min_observations = 2 * n - 3,
3232
epi_keys = "geo_value", drop_na = TRUE) %>% prep(toy_epi_df)
3333
Condition
3434
Error in `check_enough_data_core()`:
@@ -47,7 +47,7 @@
4747

4848
Code
4949
epi_recipe(toy_epi_df) %>% step_epi_lag(x, lag = c(1, 2)) %>% check_enough_data(
50-
all_predictors(), y, min_data_points = 2 * n - 4) %>% prep(toy_epi_df)
50+
all_predictors(), y, min_observations = 2 * n - 4) %>% prep(toy_epi_df)
5151
Condition
5252
Error in `check_enough_data_core()`:
5353
! The following columns don't have enough data to train: no single column, but the combination of lag_1_x, lag_2_x, y.

tests/testthat/test-arx_forecaster.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ test_that("warns if there's not enough data to predict", {
3232
) %>%
3333
mutate(value = seq_len(nrow(.)) + rnorm(nrow(.))) %>%
3434
# Oct to May (flu season, ish) only:
35-
filter(!between(as.POSIXlt(time_value)$mon + 1L, 6L, 9L)) %>%
35+
filter(!dplyr::between(as.POSIXlt(time_value)$mon + 1L, 6L, 9L)) %>%
3636
# and actually, pretend we're around mid-October 2022:
3737
filter(time_value <= as.Date("2022-10-12")) %>%
3838
as_epi_df(as_of = as.Date("2022-10-12"))

tests/testthat/test-check_enough_data.R

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@ test_that("check_enough_data works on pooled data", {
1818
# Check both columns have enough data
1919
expect_no_error(
2020
epi_recipe(toy_epi_df) %>%
21-
check_enough_data(x, y, min_data_points = 2 * n, drop_na = FALSE) %>%
21+
check_enough_data(x, y, min_observations = 2 * n, drop_na = FALSE) %>%
2222
prep(toy_epi_df) %>%
2323
bake(new_data = NULL)
2424
)
2525
# Check both column don't have enough data
2626
expect_snapshot(
2727
error = TRUE,
2828
epi_recipe(toy_epi_df) %>%
29-
check_enough_data(x, y, min_data_points = 2 * n + 1, drop_na = FALSE) %>%
29+
check_enough_data(x, y, min_observations = 2 * n + 1, drop_na = FALSE) %>%
3030
prep(toy_epi_df)
3131
)
3232
# Check drop_na works
3333
expect_snapshot(
3434
error = TRUE,
3535
epi_recipe(toy_epi_df) %>%
36-
check_enough_data(x, y, min_data_points = 2 * n - 1, drop_na = TRUE) %>%
36+
check_enough_data(x, y, min_observations = 2 * n - 1, drop_na = TRUE) %>%
3737
prep(toy_epi_df)
3838
)
3939
})
@@ -42,30 +42,30 @@ test_that("check_enough_data works on unpooled data", {
4242
# Check both columns have enough data
4343
expect_no_error(
4444
epi_recipe(toy_epi_df) %>%
45-
check_enough_data(x, y, min_data_points = n, epi_keys = "geo_value", drop_na = FALSE) %>%
45+
check_enough_data(x, y, min_observations = n, epi_keys = "geo_value", drop_na = FALSE) %>%
4646
prep(toy_epi_df) %>%
4747
bake(new_data = NULL)
4848
)
4949
# Check one column don't have enough data
5050
expect_snapshot(
5151
error = TRUE,
5252
epi_recipe(toy_epi_df) %>%
53-
check_enough_data(x, y, min_data_points = n + 1, epi_keys = "geo_value", drop_na = FALSE) %>%
53+
check_enough_data(x, y, min_observations = n + 1, epi_keys = "geo_value", drop_na = FALSE) %>%
5454
prep(toy_epi_df)
5555
)
5656
# Check drop_na works
5757
expect_snapshot(
5858
error = TRUE,
5959
epi_recipe(toy_epi_df) %>%
60-
check_enough_data(x, y, min_data_points = 2 * n - 3, epi_keys = "geo_value", drop_na = TRUE) %>%
60+
check_enough_data(x, y, min_observations = 2 * n - 3, epi_keys = "geo_value", drop_na = TRUE) %>%
6161
prep(toy_epi_df)
6262
)
6363
})
6464

6565
test_that("check_enough_data outputs the correct recipe values", {
6666
expect_no_error(
6767
p <- epi_recipe(toy_epi_df) %>%
68-
check_enough_data(x, y, min_data_points = 2 * n - 2) %>%
68+
check_enough_data(x, y, min_observations = 2 * n - 2) %>%
6969
prep(toy_epi_df) %>%
7070
bake(new_data = NULL)
7171
)
@@ -90,15 +90,15 @@ test_that("check_enough_data only checks train data when skip = FALSE", {
9090
epiprocess::as_epi_df()
9191
expect_no_error(
9292
epi_recipe(toy_epi_df) %>%
93-
check_enough_data(x, y, min_data_points = n - 2, epi_keys = "geo_value") %>%
93+
check_enough_data(x, y, min_observations = n - 2, epi_keys = "geo_value") %>%
9494
prep(toy_epi_df) %>%
9595
bake(new_data = toy_test_data)
9696
)
9797
# Making sure `skip = TRUE` is working correctly in `predict`
9898
expect_no_error(
9999
epi_recipe(toy_epi_df) %>%
100100
add_role(y, new_role = "outcome") %>%
101-
check_enough_data(x, min_data_points = n - 2, epi_keys = "geo_value") %>%
101+
check_enough_data(x, min_observations = n - 2, epi_keys = "geo_value") %>%
102102
epi_workflow(linear_reg()) %>%
103103
fit(toy_epi_df) %>%
104104
predict(new_data = toy_test_data %>% filter(time_value > "2020-01-08"))
@@ -108,7 +108,7 @@ test_that("check_enough_data only checks train data when skip = FALSE", {
108108
expect_no_error(
109109
forecaster <- epi_recipe(toy_epi_df) %>%
110110
add_role(y, new_role = "outcome") %>%
111-
check_enough_data(x, min_data_points = 1, epi_keys = "geo_value", skip = FALSE) %>%
111+
check_enough_data(x, min_observations = 1, epi_keys = "geo_value", skip = FALSE) %>%
112112
epi_workflow(linear_reg()) %>%
113113
fit(toy_epi_df)
114114
)
@@ -125,15 +125,15 @@ test_that("check_enough_data works with all_predictors() downstream of construct
125125
expect_no_error(
126126
epi_recipe(toy_epi_df) %>%
127127
step_epi_lag(x, lag = c(1, 2)) %>%
128-
check_enough_data(all_predictors(), y, min_data_points = 2 * n - 5) %>%
128+
check_enough_data(all_predictors(), y, min_observations = 2 * n - 5) %>%
129129
prep(toy_epi_df) %>%
130130
bake(new_data = NULL)
131131
)
132132
expect_snapshot(
133133
error = TRUE,
134134
epi_recipe(toy_epi_df) %>%
135135
step_epi_lag(x, lag = c(1, 2)) %>%
136-
check_enough_data(all_predictors(), y, min_data_points = 2 * n - 4) %>%
136+
check_enough_data(all_predictors(), y, min_observations = 2 * n - 4) %>%
137137
prep(toy_epi_df)
138138
)
139139
})

0 commit comments

Comments
 (0)