R/exposed_df_helpers.R

Defines functions verify_col_names verify_get_trx_types verify_exposed_df make_date_col_names vec_cast.grouped_df.split_exposed_df vec_cast.split_exposed_df.grouped_df vec_ptype2.grouped_df.split_exposed_df vec_ptype2.split_exposed_df.grouped_df vec_cast.grouped_df.exposed_df vec_cast.exposed_df.grouped_df vec_ptype2.grouped_df.exposed_df vec_ptype2.exposed_df.grouped_df vec_cast.data.frame.split_exposed_df vec_cast.split_exposed_df.data.frame vec_ptype2.data.frame.split_exposed_df vec_ptype2.split_exposed_df.data.frame vec_cast.data.frame.exposed_df vec_cast.exposed_df.data.frame vec_ptype2.data.frame.exposed_df vec_ptype2.exposed_df.data.frame vec_cast.tbl_df.split_exposed_df vec_cast.split_exposed_df.tbl_df vec_ptype2.tbl_df.split_exposed_df vec_ptype2.split_exposed_df.tbl_df vec_cast.tbl_df.exposed_df vec_cast.exposed_df.tbl_df vec_ptype2.tbl_df.exposed_df vec_ptype2.exposed_df.tbl_df vec_cast.exposed_df.split_exposed_df vec_ptype2.exposed_df.split_exposed_df vec_cast.split_exposed_df.exposed_df vec_ptype2.split_exposed_df.exposed_df vec_cast.split_exposed_df.split_exposed_df vec_ptype2.split_exposed_df.split_exposed_df vec_cast.exposed_df.exposed_df vec_ptype2.exposed_df.exposed_df exposed_df_cast exposed_df_ptype2 has_compatible_expo `%||%` full_join.exposed_df inner_join.exposed_df right_join.exposed_df left_join.exposed_df rebuild_mutate_join relocate.exposed_df rename.exposed_df select.exposed_df rebuild_select_relocate mutate.exposed_df arrange.exposed_df filter.exposed_df ungroup.exposed_df group_by.exposed_df print.exposed_df new_exposed_df as_exposed_df is_exposed_df

Documented in as_exposed_df is_exposed_df

#' Exposed data frame helper functions
#'
#' Test for and coerce to the `exposed_df` class.
#'
#' `is_exposed_df()` will return `TRUE` if `x` is an `exposed_df` object.
#'
#' `as_exposed_df()` will coerce a data frame to an `exposed_df` object if that
#' data frame has columns for policy numbers, statuses, exposures,
#' policy periods (for policy exposures only), and exposure start / end dates.
#' Optionally, if `x` has transaction counts and amounts by type, these can
#' be specified without calling [add_transactions()].
#'
#' @param x An object. For `as_exposed_df()`, `x` must be a data frame.
#' @param trx_types Optional. Character vector containing unique transaction
#' types that have been attached to `x`. For each value in `trx_types`,
#' `as_exposed_df` requires that columns exist in `x` named `trx_n_{*}` and
#' `trx_amt_{*}` containing transaction counts and amounts, respectively. The
#' prefixes "trx_n_" and "trx_amt_" can be overridden using the `col_trx_n_`
#' and `col_trx_amt_` arguments.
#' @param col_pol_num Optional. Name of the column in `x` containing the policy
#' number. The assumed default is "pol_num".
#' @param col_status Optional. Name of the column in `x` containing the policy
#' status. The assumed default is "status".
#' @param col_exposure Optional. Name of the column in `x` containing exposures.
#' The assumed default is "exposure".
#' @param col_pol_per Optional. Name of the column in `x` containing policy
#' exposure periods. Only necessary if `cal_expo` is `FALSE`. The assumed
#' default is either "pol_yr", "pol_qtr", "pol_mth", or "pol_wk" depending on
#' the value of `expo_length`.
#' @param cols_dates Optional. Names of the columns in `x` containing exposure
#' start and end dates. Both date ranges are assumed to be exclusive. The
#' assumed default is of the form *A*_*B*. *A* is "cal" if `cal_expo` is `TRUE`
#' or "pol" otherwise. *B* is either "yr", "qtr", "mth", or "wk" depending on
#' the value of `expo_length`.
#' @param col_trx_n_ Optional. Prefix to use for columns containing transaction
#' counts.
#' @param col_trx_amt_ Optional. Prefix to use for columns containing transaction
#' amounts.
#' @inheritParams expose
#'
#' @return For `is_exposed_df()`, a length-1 logical vector. For
#' `as_exposed_df()`, an `exposed_df` object.
#'
#' @importFrom vctrs vec_ptype2 vec_cast
#' @seealso [expose()] for information on how `exposed_df` objects are typically
#' created from census data.
#'
#' @export
is_exposed_df <- function(x) {
  inherits(x, "exposed_df")
}

#' @rdname is_exposed_df
#' @export
as_exposed_df <- function(x, end_date, start_date = as.Date("1900-01-01"),
                          target_status = NULL, cal_expo = FALSE,
                          expo_length = c("year", "quarter", "month", "week"),
                          trx_types = NULL,
                          col_pol_num,
                          col_status,
                          col_exposure,
                          col_pol_per,
                          cols_dates,
                          col_trx_n_ = "trx_n_",
                          col_trx_amt_ = "trx_amt_",
                          default_status) {

  if (is_exposed_df(x)) return(x)

  expo_length <- rlang::arg_match(expo_length)

  if (!is.data.frame(x)) {
    rlang::abort("`x` must be a data frame.")
  }

  # column name alignment
  if (!missing(col_pol_num)) x <- x |> rename(pol_num = {{col_pol_num}})
  if (!missing(col_status)) x <- x |> rename(status = {{col_status}})
  if (!missing(col_exposure)) x <- x |> rename(exposure = {{col_exposure}})

  # column name alignment - policy exposure periods
  exp_col_pol_per <- if (!cal_expo) {
    abbrev <- abbr_period(expo_length)
    paste0("pol_", abbrev)
  }
  if (!missing(col_pol_per) && !cal_expo) {
    x <- x |> rename({{exp_col_pol_per}} := {{col_pol_per}})
  }

  # column name alignment - period start and end dates
  exp_cols_dates <- make_date_col_names(cal_expo, expo_length)
  if (!missing(cols_dates)) {

    if (length(cols_dates) != 2 && is.character(cols_dates)) {
      rlang::abort("`cols_dates` must be a length 2 character vector")
    }

    q_exp_cols_dates <- rlang::syms(exp_cols_dates)
    q_cols_dates <- rlang::syms(cols_dates)

    x <- x |> rename(!!q_exp_cols_dates[[1]] := !!q_cols_dates[[1]],
                     !!q_exp_cols_dates[[2]] := !!q_cols_dates[[2]])
  }

  # check transaction types
  if (!is.null(trx_types)) {

    trx_renamer <- function(x) {
      x <- gsub(paste0("^", col_trx_n_), "trx_n_", x)
      gsub(paste0("^", col_trx_amt_), "trx_amt_", x)
    }

    x <- dplyr::rename_with(x, trx_renamer)

    trx_types <- unique(trx_types)
    exp_cols_trx <- outer(c("trx_n_", "trx_amt_"), trx_types, paste0) |>
      as.character()
  } else {
    exp_cols_trx <- NULL
  }

  # check required columns
  # pol_num, status, exposure, 2 date cols, policy period (policy expo only)
  req_names <- c("pol_num", "status", "exposure",
                 exp_col_pol_per,
                 exp_cols_dates,
                 exp_cols_trx)
  verify_col_names(names(x), req_names)

  if (missing(default_status)) {
    default_status <- most_common(x$status)
  }

  new_exposed_df(x, end_date, start_date, target_status, cal_expo,
                 expo_length, trx_types, default_status)

}

# low-level class constructor
new_exposed_df <- function(x, end_date, start_date, target_status,
                           cal_expo, expo_length, trx_types = NULL,
                           default_status, split = FALSE) {


  date_cols <- make_date_col_names(cal_expo, expo_length)
  exposure_type <- if (cal_expo) {
    if (split) {
      "split"
    } else {
      "calendar"
    }
  } else {
    "policy"
  }

  new_class <- if (exposure_type == "split") {
    c("split_exposed_df", "exposed_df")
  } else {
    "exposed_df"
  }

  tibble::new_tibble(x,
                     class = new_class,
                     target_status = target_status,
                     exposure_type = paste(exposure_type, expo_length,
                                           sep = "_"),
                     start_date = start_date,
                     end_date = end_date,
                     date_cols = date_cols,
                     trx_types = trx_types,
                     default_status = as.character(default_status))

}

#' @export
print.exposed_df <- function(x, ...) {
  cat("Exposure data\n\n",
      "Exposure type:", attr(x, "exposure_type"), "\n",
      "Target status:", paste(attr(x, "target_status"), collapse = ", "), "\n",
      "Study range:", as.character(attr(x, "start_date")), "to",
      as.character(attr(x, "end_date")))

  trx_types <- attr(x, "trx_types")
  if (!is.null(trx_types)) {
    cat("\n", "Transaction types:", paste(trx_types, collapse = ", "), "\n")
  }

  cat("\n\n")
  NextMethod()
}

#' @export
group_by.exposed_df <- function(.data, ..., .add, .drop) {
  x <- NextMethod()
  if (is_split_exposed_df(.data)) {
    class(x) <- c("split_exposed_df", "exposed_df", class(x))
  } else {
    class(x) <- c("exposed_df", class(x))
  }

  x
}

#' @export
ungroup.exposed_df <- function(x, ...) {
  res <- NextMethod()
  vec_cast(res, x)
}

#' @export
filter.exposed_df <- function(.data, ..., .by = NULL, .preserve = FALSE) {
  x <- NextMethod()
  vec_cast(x, .data)
}

#' @export
arrange.exposed_df <- function(.data, ..., .by_group) {
  x <- NextMethod()
  vec_cast(x, .data)
}

#' @export
mutate.exposed_df <- function(.data, ...) {
  x <- NextMethod()
  rebuild_mutate_join(.data, x)
}

rebuild_select_relocate <- function(.data, res) {
  if (dplyr::is_grouped_df(.data)) {
    g <- groups(.data)
    vec_cast(res, ungroup(.data)[, names(res)] |> group_by(!!!g))
  } else {
    res
  }
}

#' @export
select.exposed_df <- function(.data, ...) {
  x <- NextMethod()
  rebuild_select_relocate(.data, x)
}

#' @export
slice.exposed_df <- function (.data, ..., .by = NULL, .preserve = FALSE)  {
  if (dplyr::is_grouped_df(.data)) {
    NextMethod(.by = NULL) |> vec_cast(.data)
  } else {
    NextMethod()
  }
}

#' @export
rename.exposed_df <- function(.data, ..., .by_group) {
  x <- NextMethod()
  if (dplyr::is_grouped_df(.data)) {
    g <- groups(.data)
    vec_cast(x, stats::setNames(ungroup(.data), names(x)) |> group_by(!!!g))
  } else {
    x
  }
}

#' @export
relocate.exposed_df <- function(.data, ..., .by_group) {
  x <- NextMethod()
  rebuild_select_relocate(.data, x)
}

rebuild_mutate_join <- function(.data, res) {
  if (dplyr::is_grouped_df(.data)) {
    g <- groups(.data)
    ptype <- vec_ptype2(ungroup(.data), res)
    vec_cast(res, ptype |> group_by(!!!g))
  } else {
    res
  }
}

#' @export
left_join.exposed_df <- function(x, y, by = NULL, copy = FALSE,
                                 suffix = c(".x", ".y"), ..., keep = NULL) {
  res <- NextMethod()
  rebuild_mutate_join(x, res)
}

#' @export
right_join.exposed_df <- function(x, y, by = NULL, copy = FALSE,
                                  suffix = c(".x", ".y"), ..., keep = NULL) {
  res <- NextMethod()
  rebuild_mutate_join(x, res)
}

#' @export
inner_join.exposed_df <- function(x, y, by = NULL, copy = FALSE,
                                  suffix = c(".x", ".y"), ..., keep = NULL) {
  res <- NextMethod()
  rebuild_mutate_join(x, res)
}

#' @export
full_join.exposed_df <- function(x, y, by = NULL, copy = FALSE,
                                 suffix = c(".x", ".y"), ..., keep = NULL) {
  res <- NextMethod()
  rebuild_mutate_join(x, res)
}

#' @export
semi_join.exposed_df <- function (x, y, by = NULL, copy = FALSE, ...) {
  NextMethod() |> vec_cast(x)
}

#' @export
anti_join.exposed_df <- function (x, y, by = NULL, copy = FALSE, ...) {
  NextMethod() |> vec_cast(x)
}


# NULL coalesce function
`%||%` <- function(x, y) if (is.null(x)) y else x

has_compatible_expo <- function(x, y) {
  x <- attr(x, "exposure_type")
  y <- attr(y, "exposure_type")
  x <- x %||% y
  y <- y %||% x
  x == y
}

exposed_df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") {

  out <- vctrs::tib_ptype2(x, y, ..., x_arg = x_arg, y_arg = y_arg)

  if (!has_compatible_expo(x, y)) {
    vctrs::stop_incompatible_type(
      x,
      y,
      x_arg = x_arg,
      y_arg = y_arg,
      details = "Two different exposure period types cannot be combined."
    )
  }

  end_date <- max(as.Date(attr(x, "end_date")),
                  as.Date(attr(y, "end_date")))
  start_date <- min(as.Date(attr(x, "start_date")),
                    as.Date(attr(y, "start_date")))
  target_status <- union(attr(x, "target_status"), attr(y, "target_status"))
  trx_types <- union(attr(x, "trx_types"), attr(y, "trx_types"))

  expo_type <- attr(x, "exposure_type") %||% attr(y, "exposure_type")

  split_type <- strsplit(expo_type, "_")[[1]]
  cal_expo <- split_type[[1]] %in% c("calendar", "split")
  expo_length <- split_type[[2]]
  default_status <- attr(x, "default_status")

  new_exposed_df(out, end_date, start_date, target_status, cal_expo,
                 expo_length, trx_types, default_status,
                 is_split_exposed_df(x))
}


exposed_df_cast <- function(x, to, ..., x_arg = "", to_arg = "") {

  out <- vctrs::tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)

  if (!has_compatible_expo(x, to)) {
    vctrs::stop_incompatible_cast(
      x,
      to,
      x_arg = x_arg,
      to_arg = to_arg,
      details = "Two different exposure period types cannot be combined."
    )
  }

  end_date <- attr(to, "end_date")
  start_date <- attr(to, "start_date")
  target_status <- attr(to, "target_status")
  trx_types <- attr(to, "trx_types")

  expo_type <- attr(to, "exposure_type")
  split_type <- strsplit(expo_type, "_")[[1]]
  cal_expo <- split_type[[1]] %in% c("calendar", "split")
  expo_length <- split_type[[2]]
  default_status <- attr(to, "default_status")

  new_exposed_df(out, end_date, start_date, target_status, cal_expo,
                 expo_length, trx_types, default_status,
                 is_split_exposed_df(to))
}


# exposed_df | exposed_df
#' @export
vec_ptype2.exposed_df.exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.exposed_df.exposed_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

# split_exposed_df | split_exposed_df
#' @export
vec_ptype2.split_exposed_df.split_exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.split_exposed_df.split_exposed_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}


# exposed_df | split_exposed_df
#' @export
vec_ptype2.split_exposed_df.exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.split_exposed_df.exposed_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

#' @export
vec_ptype2.exposed_df.split_exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.exposed_df.split_exposed_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}


# exposed_df | tbl_df
#' @export
vec_ptype2.exposed_df.tbl_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_ptype2.tbl_df.exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.exposed_df.tbl_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

#' @export
vec_cast.tbl_df.exposed_df <- function(x, to, ...) {
  vctrs::tib_cast(x, to, ...)
}

# split_exposed_df | tbl_df
#' @export
vec_ptype2.split_exposed_df.tbl_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_ptype2.tbl_df.split_exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.split_exposed_df.tbl_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

#' @export
vec_cast.tbl_df.split_exposed_df <- function(x, to, ...) {
  vctrs::tib_cast(x, to, ...)
}


# exposed_df | data.frame
#' @export
vec_ptype2.exposed_df.data.frame <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_ptype2.data.frame.exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.exposed_df.data.frame <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

#' @export
vec_cast.data.frame.exposed_df <- function(x, to, ...) {
  vctrs::df_cast(x, to, ...)
}

# split_exposed_df | data.frame
#' @export
vec_ptype2.split_exposed_df.data.frame <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_ptype2.data.frame.split_exposed_df <- function(x, y, ...) {
  exposed_df_ptype2(x, y, ...)
}

#' @export
vec_cast.split_exposed_df.data.frame <- function(x, to, ...) {
  exposed_df_cast(x, to, ...)
}

#' @export
vec_cast.data.frame.split_exposed_df <- function(x, to, ...) {
  vctrs::df_cast(x, to, ...)
}


# exposed_df | grouped_df
#' @export
vec_ptype2.exposed_df.grouped_df <- function(x, y, ...) {
  g <- union(dplyr::group_vars(x), dplyr::group_vars(y))
  exposed_df_ptype2(x, y, ...) |>
    group_by(dplyr::across(dplyr::all_of(g)))
}

#' @export
vec_ptype2.grouped_df.exposed_df <- function(x, y, ...) {
  g <- union(dplyr::group_vars(x), dplyr::group_vars(y))
  exposed_df_ptype2(x, y, ...) |>
    group_by(dplyr::across(dplyr::all_of(g)))
}

#' @export
vec_cast.exposed_df.grouped_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...) |>
    group_by(!!!groups(to))
}

#' @export
vec_cast.grouped_df.exposed_df <- function(x, to, ...) {
  vctrs::tib_cast(x, to, ...) |>
    group_by(!!!groups(to))
}


# split_exposed_df | grouped_df
#' @export
vec_ptype2.split_exposed_df.grouped_df <- function(x, y, ...) {
  g <- union(dplyr::group_vars(x), dplyr::group_vars(y))
  exposed_df_ptype2(x, y, ...) |>
    group_by(dplyr::across(dplyr::all_of(g)))
}

#' @export
vec_ptype2.grouped_df.split_exposed_df <- function(x, y, ...) {
  g <- union(dplyr::group_vars(x), dplyr::group_vars(y))
  exposed_df_ptype2(x, y, ...) |>
    group_by(dplyr::across(dplyr::all_of(g)))
}

#' @export
vec_cast.split_exposed_df.grouped_df <- function(x, to, ...) {
  exposed_df_cast(x, to, ...) |>
    group_by(!!!groups(to))
}

#' @export
vec_cast.grouped_df.split_exposed_df <- function(x, to, ...) {
  vctrs::tib_cast(x, to, ...) |>
    group_by(!!!groups(to))
}


# helper for determining date columns
make_date_col_names <- function(cal_expo, expo_length) {
  abbrev <- abbr_period(expo_length)
  c(
    paste0(if (cal_expo) "cal_" else "pol_date_", abbrev),
    paste0(if (cal_expo) "cal_" else "pol_date_", abbrev, "_end")
  )
}

verify_exposed_df <- function(.data) {
  if (!is_exposed_df(.data)) {
    rlang::abort(c(x = glue::glue("`{deparse(substitute(.data))}` must be an `exposed_df` object."),
                   i = "Hint: Use `as_exposed_df()` to convert your data to the required format."
    ))
  }
}

# verify transactions exist and return all unique transaction types.
# If no transactions and required = TRUE, an error is thrown. Otherwise NULL
# if returned
verify_get_trx_types <- function(.data, required = TRUE) {
  trx_types <- attr(.data, "trx_types")
  if (is.null(trx_types)) {
    if (required) {
      rlang::abort(c(x = "No transactions have been attached to `.data`.",
                     i = "Add transaction data using `add_transactions()` before calling this function."))
    }
    return(NULL)
  }
  trx_types
}

# function to verify that required names exist and to send an error if not
verify_col_names <- function(x_names, required) {
  unmatched <- setdiff(required, x_names)

  if (length(unmatched) > 0) {
    rlang::abort(c(x = glue::glue("The following columns are missing: {paste(unmatched, collapse = ', ')}."),
                   i = "Hint: create these columns or use the `col_*` arguments to specify existing columns that should be mapped to these elements."))
  }
}

Try the actxps package in your browser

Any scripts or data that you put into this service are public.

actxps documentation built on June 26, 2024, 9:07 a.m.