Nothing
#' Interlace the secondary tables by the keys and validity of the primary table
#'
#' @description
#' Using the primary table as the basis of keys and validity, the secondary tables are joined to the
#' primary table according to the shared keys and then truncated to the validity of the primary table
#'
#' @param primary (`.data`)\cr
#' The table with the keys the interlacing is done by (first "key_" column in primary).
#' The secondary tables are matched to the validity interval of the primary table before interlacing
#' @param secondary (`list`(`.data`))\cr
#' A list of tables that should be matched and joined to the primary table
#' @return An object the same type as the primary. If secondary is NULL, primary is returned.
#' If secondary is given, the output is the join of the secondary onto primary.
#' @examples
#' # Lets create some synthetic data
#' data <- dplyr::mutate(mtcars, "key_name" = rownames(mtcars))
#'
#' x <- dplyr::select(data, "key_name", "mpg", "cyl")
#' y <- dplyr::select(data, "key_name", "wt", "vs")
#' z <- dplyr::select(data, "key_name", "qsec")
#'
#' # In x, the mpg was changed on 2000-01-01 for all but the first 10 cars
#' x <- list(utils::head(x, 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date(NA)),
#' utils::tail(x, base::nrow(x) - 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date("2000-01-01")),
#' utils::tail(x, base::nrow(x) - 10) |>
#' dplyr::mutate("mpg" = 0.9 * mpg,
#' "valid_from" = as.Date("2000-01-01"),
#' "valid_until" = as.Date(NA))) |>
#' purrr::reduce(dplyr::union_all)
#'
#' # In y, the wt was changed on 2010-01-01 for all but the last 10 cars
#' y <- list(utils::head(y, base::nrow(y) - 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date(NA)),
#' utils::tail(y, 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date("2010-01-01")),
#' utils::tail(y, 10) |>
#' dplyr::mutate(wt = 1.1 * wt,
#' "valid_from" = as.Date("2010-01-01"),
#' "valid_until" = as.Date(NA))) |>
#' purrr::reduce(dplyr::union_all)
#'
#'
#' # In z, the qsec was changed on 2020-01-01 for all but the last and first 10 cars
#' z <- list(utils::head(z, base::nrow(z) - 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date(NA)),
#' utils::tail(z, 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date(NA)),
#' utils::head(z, base::nrow(z) - 10) |>
#' utils::tail(base::nrow(z) - 10) |>
#' dplyr::mutate("valid_from" = as.Date("1990-01-01"),
#' "valid_until" = as.Date("2020-01-01")),
#' utils::head(z, base::nrow(z) - 10) |>
#' utils::tail(base::nrow(z) - 10) |>
#' dplyr::mutate(qsec = 1.1 * qsec,
#' "valid_from" = as.Date("2020-01-01"),
#' "valid_until" = as.Date(NA))) |>
#' purrr::reduce(dplyr::union_all)
#'
#'
#' # We choose a primary interval to interlace on
#' primary <- dplyr::transmute(data, key_name,
#' valid_from = as.Date("1985-01-01"),
#' valid_until = as.Date(NA))
#'
#'
#' # Perform the truncated interlace
#' truncate_interlace(primary, secondary = list(x, y, z))
#'
#' @importFrom rlang .data
#' @noRd
truncate_interlace <- function(primary, secondary = NULL) {
# Check edge case
if (is.null(secondary) || (is.list(secondary) && length(secondary) == 0)) {
return(primary)
}
# Ensure secondary is list
if (!inherits(secondary, "list")) secondary <- list(secondary)
# Determine the keys of the primary data
primary_keys <- colnames(primary)[startsWith(colnames(primary), "key_")]
# Find all secondary information that is valid while primary information is valid
secondary_truncated <- secondary |>
purrr::map(~ {
# First we find keys that are common with current secondary table and the primary table
common_keys <- intersect(primary_keys, colnames(.x)[startsWith(colnames(.x), "key_")])
if (length(common_keys) == 0) {
stop("No common keys found to interlace by!", call. = FALSE)
}
# We then join the tables by these keys and truncate the secondary table to validity range of the primary
dplyr::left_join(x = primary, y = .x, suffix = c("", ".y"), by = common_keys) |>
dplyr::filter( # Keep secondary records that is within validity of the primary data.
(.data$valid_from < .data$valid_until.y) | is.na(.data$valid_until.y),
(.data$valid_until > .data$valid_from.y) | is.na(.data$valid_until)
) |>
dplyr::mutate(
"valid_from" = ifelse(.data$valid_from >= .data$valid_from.y, .data$valid_from, .data$valid_from.y), # nolint: ifelse_censor_linter
"valid_until" = ifelse(.data$valid_until <= .data$valid_until.y, .data$valid_until, .data$valid_until.y) # nolint: ifelse_censor_linter
) |>
dplyr::select(-tidyselect::ends_with(".y"))
})
# With the secondary data truncated, we can interlace and return
out <- SCDB::interlace(secondary_truncated, by = purrr::pluck(primary_keys, 1))
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.