#' Split period to months
#'
#' @description The function splits rows with a time period longer than one
#' month to multiple rows with a time period of exactly one month each. Values
#' in numeric columns (e.g. exposure or premium) are divided over the months
#' proportionately.
#'
#' @param df data.frame
#' @param begin column in `df` with begin dates
#' @param end column in `df` with end dates
#' @param ... numeric columns in `df` to split
#'
#' @return data.frame with same columns as in `df`, and one extra column
#' called `id`
#'
#' @author Martin Haringa
#'
#' @import data.table
#' @importFrom lubridate is.Date
#' @importFrom lubridate ceiling_date
#'
#' @details In insurance portfolios it is common that rows relate to periods
#' longer than one month. This is for example problematic in case exposures per
#' month are desired.
#'
#' Since insurance premiums are constant over the months, and do not depend on
#' the number of days per month, the function assumes that each month has the
#' same number of days (i.e. 30).
#'
#' @examples
#' library(lubridate)
#' portfolio <- data.frame(
#' begin1 = ymd(c("2014-01-01", "2014-01-01")),
#' end = ymd(c("2014-03-14", "2014-05-10")),
#' termination = ymd(c("2014-03-14", "2014-05-10")),
#' exposure = c(0.2025, 0.3583),
#' premium = c(125, 150))
#' period_to_months(portfolio, begin1, end, premium, exposure)
#'
#' @export
period_to_months <- function(df, begin, end, ...) {
begin00 <- deparse(substitute(begin))
end00 <- deparse(substitute(end))
df00 <- deparse(substitute(df))
cols <- vapply(substitute(list(...))[-1], deparse, FUN.VALUE = character(1))
column_names <- names(df)
if (!lubridate::is.Date(df[[begin00]]) || !lubridate::is.Date(df[[end00]])) {
stop("Columns begin and end should be Date objects. Use e.g.
lubridate::ymd() to create Date object.", call. = FALSE)
}
if (length(cols) > 0 && !all(cols %in% column_names)) {
stop("Numeric column names to split not found in ", df00, call. = FALSE)
}
# Create look up table
datum_begin <- seq(min(df[[begin00]]), max(df[[end00]]), by = "months")
datum_end <- lubridate::ceiling_date(datum_begin, unit = "months") - 1
datum_begin <- lubridate::floor_date(datum_begin, unit = "months")
lookup <- data.table::data.table(datum_begin, datum_end)
data.table::setnames(lookup, old = c("datum_begin", "datum_end"),
new = c(begin00, end00))
data.table::setkeyv(lookup, c(begin00, end00))
# due to NSE notes in R CMD check
new_end00 <- start_int <- end_int <- end_days <- begin_days <-
overlap_begin_end <- overlap_period <- overlap_total <- NULL
data.table::setDT(df)
df[, id := .I][ # Add row id
, new_end00 := get(end00) + 1] # Add one day such that day 31
# equals 0 days in new month
# Overlap periods
ans <- data.table::foverlaps(df, lookup, type = "any", which = FALSE)
# Divide periods
if (length(cols) > 0) {
ans[, start_int := data.table::fifelse(
get(begin00) < get(paste0("i.", begin00)), 0, 1)][
, end_int := data.table::fifelse(
get(end00) > get(paste0("i.", end00)), 0, 1)][
, end_days := data.table::fifelse(
end_int == 0, elapsed_days(new_end00) / 30, 0)][
, begin_days := data.table::fifelse(
start_int == 0,
(30 - elapsed_days(get(paste0("i.", begin00)))) / 30, 0)][
, overlap_begin_end := begin_days + end_days][
, overlap_period := data.table::fifelse(
overlap_begin_end == 0, 1, overlap_begin_end)][
, overlap_total := sum(overlap_period, na.rm = TRUE), by = id]
# Divide split columns
for (i in seq_len(length(cols))) {
ans[, `:=`(cols[i], get(cols[i]) * overlap_period / overlap_total)]
}
}
ans[, c("id", column_names), with = FALSE]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.