4
4
# ' that will generate one or more new columns of derived data by "sliding"
5
5
# ' a computation along existing data.
6
6
# '
7
- # '
8
7
# ' @inheritParams step_epi_lag
9
8
# ' @param .f A function in one of the following formats:
10
9
# ' 1. An unquoted function name with no arguments, e.g., `mean`
20
19
# ' argument must be named `.x`. A common, though very difficult to debug
21
20
# ' error is using something like `function(x) mean`. This will not work
22
21
# ' because it returns the function mean, rather than `mean(x)`
22
+ # ' @param before,after the size of the sliding window on the left and the right
23
+ # ' of the center. Usually non-negative integers for data indexed by date, but
24
+ # ' more restrictive in other cases (see [epiprocess::epi_slide()] for details).
25
+ # ' @param prefix A character string that will be prefixed to the new column.
23
26
# ' @param f_name a character string of at most 20 characters that describes
24
27
# ' the function. This will be combined with `prefix` and the columns in `...`
25
28
# ' to name the result using `{prefix}{f_name}_{column}`. By default it will be determined
26
29
# ' automatically using `clean_f_name()`.
27
- # ' @param before,after non-negative integers.
28
- # ' How far `before` and `after` each `time_value` should
29
- # ' the sliding window extend? Any value provided for either
30
- # ' argument must be a single, non-`NA`, non-negative,
31
- # ' [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of
32
- # ' the window are inclusive. Common settings:
33
- # ' * For trailing/right-aligned windows from `time_value - time_step(k)` to
34
- # ' `time_value`, use `before=k, after=0`. This is the most likely use case
35
- # ' for the purposes of forecasting.
36
- # ' * For center-aligned windows from `time_value - time_step(k)` to
37
- # ' `time_value + time_step(k)`, use `before=k, after=k`.
38
- # ' * For leading/left-aligned windows from `time_value` to
39
- # ' `time_value + time_step(k)`, use `after=k, after=0`.
40
30
# '
41
- # ' You may also pass a [lubridate::period], like `lubridate::weeks(1)` or a
42
- # ' character string that is coercible to a [lubridate::period], like
43
- # ' `"2 weeks"`.
44
31
# ' @template step-return
45
32
# '
46
33
# ' @export
@@ -69,9 +56,8 @@ step_epi_slide <-
69
56
rlang :: abort(" This recipe step can only operate on an `epi_recipe`." )
70
57
}
71
58
.f <- validate_slide_fun(.f )
72
- arg_is_scalar(before , after )
73
- before <- try_period(before )
74
- after <- try_period(after )
59
+ epiprocess ::: validate_slide_window_arg(before , attributes(recipe $ template )$ metadata $ time_type )
60
+ epiprocess ::: validate_slide_window_arg(after , attributes(recipe $ template )$ metadata $ time_type )
75
61
arg_is_chr_scalar(role , prefix , id )
76
62
arg_is_lgl_scalar(skip )
77
63
@@ -126,7 +112,6 @@ step_epi_slide_new <-
126
112
}
127
113
128
114
129
-
130
115
# ' @export
131
116
prep.step_epi_slide <- function (x , training , info = NULL , ... ) {
132
117
col_names <- recipes :: recipes_eval_select(x $ terms , data = training , info = info )
@@ -150,7 +135,6 @@ prep.step_epi_slide <- function(x, training, info = NULL, ...) {
150
135
}
151
136
152
137
153
-
154
138
# ' @export
155
139
bake.step_epi_slide <- function (object , new_data , ... ) {
156
140
recipes :: check_new_data(names(object $ columns ), object , new_data )
@@ -170,12 +154,16 @@ bake.step_epi_slide <- function(object, new_data, ...) {
170
154
class = " epipredict__step__name_collision_error"
171
155
)
172
156
}
173
- if (any(vapply(c(mean , sum ), \(x ) identical(x , object $ .f ), logical (1L )))) {
174
- cli_warn(
175
- c(" There is an optimized version of both mean and sum. See `step_epi_slide_mean`, `step_epi_slide_sum`, or `step_epi_slide_opt`." ),
176
- class = " epipredict__step_epi_slide__optimized_version"
177
- )
178
- }
157
+ # TODO: Uncomment this whenever we make the optimized versions available.
158
+ # if (any(vapply(c(mean, sum), \(x) identical(x, object$.f), logical(1L)))) {
159
+ # cli_warn(
160
+ # c(
161
+ # "There is an optimized version of both mean and sum. See `step_epi_slide_mean`, `step_epi_slide_sum`,
162
+ # or `step_epi_slide_opt`."
163
+ # ),
164
+ # class = "epipredict__step_epi_slide__optimized_version"
165
+ # )
166
+ # }
179
167
epi_slide_wrapper(
180
168
new_data ,
181
169
object $ before ,
@@ -187,48 +175,51 @@ bake.step_epi_slide <- function(object, new_data, ...) {
187
175
object $ prefix
188
176
)
189
177
}
190
- # ' wrapper to handle epi_slide particulars
178
+
179
+
180
+ # ' Wrapper to handle epi_slide particulars
181
+ # '
191
182
# ' @description
192
183
# ' This should simplify somewhat in the future when we can run `epi_slide` on
193
184
# ' columns. Surprisingly, lapply is several orders of magnitude faster than
194
185
# ' using roughly equivalent tidy select style.
186
+ # '
195
187
# ' @param fns vector of functions, even if it's length 1.
196
188
# ' @param group_keys the keys to group by. likely `epi_keys[-1]` (to remove time_value)
189
+ # '
197
190
# ' @importFrom tidyr crossing
198
191
# ' @importFrom dplyr bind_cols group_by ungroup
199
192
# ' @importFrom epiprocess epi_slide
200
193
# ' @keywords internal
201
194
epi_slide_wrapper <- function (new_data , before , after , columns , fns , fn_names , group_keys , name_prefix ) {
202
195
cols_fns <- tidyr :: crossing(col_name = columns , fn_name = fn_names , fn = fns )
196
+ # Iterate over the rows of cols_fns. For each row number, we will output a
197
+ # transformed column. The first result returns all the original columns along
198
+ # with the new column. The rest just return the new column.
203
199
seq_len(nrow(cols_fns )) %> %
204
- lapply( # iterate over the rows of cols_fns
205
- # takes in the row number, outputs the transformed column
206
- function (comp_i ) {
207
- # extract values from the row
208
- col_name <- cols_fns [[comp_i , " col_name" ]]
209
- fn_name <- cols_fns [[comp_i , " fn_name" ]]
210
- fn <- cols_fns [[comp_i , " fn" ]][[1L ]]
211
- result_name <- paste(name_prefix , fn_name , col_name , sep = " _" )
212
- result <- new_data %> %
213
- group_by(across(all_of(group_keys ))) %> %
214
- epi_slide(
215
- before = before ,
216
- after = after ,
217
- new_col_name = result_name ,
218
- f = function (slice , geo_key , ref_time_value ) {
219
- fn(slice [[col_name ]])
220
- }
221
- ) %> %
222
- ungroup()
223
- # the first result needs to include all of the original columns
224
- if (comp_i == 1L ) {
225
- result
226
- } else {
227
- # everything else just needs that column transformed
228
- result [result_name ]
229
- }
200
+ lapply(function (comp_i ) {
201
+ col_name <- cols_fns [[comp_i , " col_name" ]]
202
+ fn_name <- cols_fns [[comp_i , " fn_name" ]]
203
+ fn <- cols_fns [[comp_i , " fn" ]][[1L ]]
204
+ result_name <- paste(name_prefix , fn_name , col_name , sep = " _" )
205
+ result <- new_data %> %
206
+ group_by(across(all_of(group_keys ))) %> %
207
+ epi_slide(
208
+ before = before ,
209
+ after = after ,
210
+ new_col_name = result_name ,
211
+ f = function (slice , geo_key , ref_time_value ) {
212
+ fn(slice [[col_name ]])
213
+ }
214
+ ) %> %
215
+ ungroup()
216
+
217
+ if (comp_i == 1L ) {
218
+ result
219
+ } else {
220
+ result [result_name ]
230
221
}
231
- ) %> %
222
+ } ) %> %
232
223
bind_cols()
233
224
}
234
225
@@ -286,33 +277,11 @@ validate_slide_fun <- function(.f) {
286
277
cli_abort(" In, `step_epi_slide()`, `.f` may not be missing." )
287
278
}
288
279
if (rlang :: is_formula(.f , scoped = TRUE )) {
289
- if (! is.null(rlang :: f_lhs(.f ))) {
290
- cli_abort(" In, `step_epi_slide()`, `.f` must be a one-sided formula." )
291
- }
280
+ cli_abort(" In, `step_epi_slide()`, `.f` cannot be a formula." )
292
281
} else if (rlang :: is_character(.f )) {
293
282
.f <- rlang :: as_function(.f )
294
283
} else if (! rlang :: is_function(.f )) {
295
284
cli_abort(" In, `step_epi_slide()`, `.f` must be a function." )
296
285
}
297
286
.f
298
287
}
299
-
300
- try_period <- function (x ) {
301
- err <- is.na(x )
302
- if (! err ) {
303
- if (is.numeric(x )) {
304
- err <- ! rlang :: is_integerish(x ) || x < 0
305
- } else {
306
- x <- lubridate :: as.period(x )
307
- err <- is.na(x )
308
- }
309
- }
310
- if (err ) {
311
- cli_abort(paste(
312
- " The value supplied to `before` or `after` must be a non-negative integer" ,
313
- " a {.cls lubridate::period} or a character scalar that can be coerced" ,
314
- ' as a {.cls lubridate::period}, e.g., `"1 week"`.'
315
- ), )
316
- }
317
- x
318
- }
0 commit comments