View source: R/e_match_closest_in_range.R
e_match_closest_in_range | R Documentation |
Similar to survival::neardate
but chooses closest in both directions restricted to an asymmetrical range.
e_match_closest_in_range(
dat_to_match,
id_vars_to_match,
val_var_to_match,
dat_key,
id_vars_key,
val_var_key,
diff_lower = -Inf,
diff_upper = +Inf,
sw_criteria = c("closest", "minimum", "maximum")[1],
sw_return_key_vars = FALSE
)
dat_to_match |
data to match to the key dataset |
id_vars_to_match |
associated ID variables in data to match |
val_var_to_match |
associated value variable in data to match |
dat_key |
key dataset |
id_vars_key |
ID variables in key dataset |
val_var_key |
value variable to determine closeness in key dataset |
diff_lower |
match from data to match can be no lower than the key data by this amount |
diff_upper |
match from data to match can be no higher than the key data by this amount |
sw_criteria |
criteria for match proximity (useful when range values |
sw_return_key_vars |
T/F return the key value for use in matching if multiple records per ID |
Can also be used to match closest within a range of dates in the future by setting diff_lower
and diff_upper
to be positive numbers, e.g., 5 and 7.
dat_to_match restricted to only those unique observations that are closest to the key data
set.seed(1)
dat_key <-
tidyr::expand_grid(
key1 = c("a", "b", "c")
, key2 = c("x", "y")
) |>
dplyr::mutate(
value = 1:dplyr::n()
)
dat_to_match <-
tidyr::expand_grid(
key1_m = c("a", "b") # no "c"
, key2_m = c("x", "y", "z") # added "z"
) |>
dplyr::slice(
sample.int(n = 2*3, size = 4 * 2*3, replace = TRUE) # produce multiple per obs
) |>
dplyr::mutate(
value_m = runif(n = dplyr::n(), min = -5, max = 10)
, other1 = rnorm(dplyr::n())
, other2 = rnorm(dplyr::n())
) |>
dplyr::arrange(
key1_m, key2_m
)
dat_to_match_sub <-
e_match_closest_in_range(
dat_to_match = dat_to_match
, id_vars_to_match = c("key1_m", "key2_m")
, val_var_to_match = "value_m"
, dat_key = dat_key
, id_vars_key = c("key1" , "key2" )
, val_var_key = "value"
, diff_lower = -Inf
, diff_upper = +Inf
)
dat_key |> print()
dat_to_match |> print(n = Inf)
dat_to_match_sub |> print()
# within specified range
e_match_closest_in_range(
dat_to_match = dat_to_match
, id_vars_to_match = c("key1_m", "key2_m")
, val_var_to_match = "value_m"
, dat_key = dat_key
, id_vars_key = c("key1" , "key2" )
, val_var_key = "value"
, diff_lower = -2
, diff_upper = +4
, sw_return_key_vars = TRUE
)
# within specified range, maximum value
e_match_closest_in_range(
dat_to_match = dat_to_match
, id_vars_to_match = c("key1_m", "key2_m")
, val_var_to_match = "value_m"
, dat_key = dat_key
, id_vars_key = c("key1" , "key2" )
, val_var_key = "value"
, diff_lower = -2
, diff_upper = +4
, sw_criteria = "maximum"
, sw_return_key_vars = TRUE
)
# within specified range, minimum value
e_match_closest_in_range(
dat_to_match = dat_to_match
, id_vars_to_match = c("key1_m", "key2_m")
, val_var_to_match = "value_m"
, dat_key = dat_key
, id_vars_key = c("key1" , "key2" )
, val_var_key = "value"
, diff_lower = -2
, diff_upper = +4
, sw_criteria = "minimum"
, sw_return_key_vars = TRUE
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.