R/durCalc.R

Defines functions durCalc

Documented in durCalc

#' @title Filter Dataset by Date Duration
#' @description Calculates the duration between two dates, use it as a filter to select rows that satisfy the length criteria.
#' Returns the dataset with additional columns regarding the length of durations in different units.
#' @usage
#' durCalc(df = NULL, start, end, timeunit = "day", filterlength = NA,
#'        filterlonger = TRUE, year = 365.25, month = 30.42)
#' @param df Data frame containing start and end dates.
#' @param start Column in df for start dates or a date to use as start date.
#' @param end Column in df for end dates or a date to use as a end date.
#' @param timeunit Unit of time to be used in plots. \code{"day(s)"}, \code{"week(s)"}, \code{"month(s)"}, \code{"quarter(s)"}, \code{"semiannual"}, \code{"halfyear"},or \code{"year(s)"}.
#' @param filterlength A time length to use as filter.
#' @param filterlonger If TRUE, the function gives rows with longer durations than specified in filterlength. If FALSE, gives rows with shorter durations.
#' @param year Number of days to use as a year. Default is 365.25.
#' @param month Number of days to use as a month. Default is 30.42.
#' @details
#' Additional columns returned with the filtered rows are: length of duration in days, in specified time unit, and in calendar units, and
#' how much longer/shorter the durations are compared to filter length in calendar units.
#'
#' If no \code{filterlength} is provided, then returns all rows with length of duration in days and calendar units.
#'
#' You can use dates for \code{start} and \code{end} and provide no \code{df} to get the length of duration between the dates in calendar units. See example.
#' @return A subset of original data frame with additional columns in specified time units and calendar units.
#' @seealso \code{\link{durPlot}}, \code{\link{durSummary}}
#' @examples
#' ### Filter people who lived longer than 85 years
#' durCalc(life_exp, start = "Birth", end = "Death", timeunit = "years", filterlength = 85)
#'
#' ### How old each person would be as of January 1, 2000 if they were alive
#' durCalc(life_exp, start = "Birth", end = as.Date("2000-1-1"), timeunit = "years")
#'
#' ### Use as unit-converter between two dates
#' durCalc(start = as.Date("2010-12-1"), end = as.Date("2015-4-26"), timeunit = "weeks")
#' @author Dahee Lee
#' @export

durCalc <- function(df = NULL, start, end, timeunit = "day",
                    filterlength = NA, filterlonger = TRUE, year = 365.25, month = 30.42){

  if(is.null(start)){stop("'start' needed")}
  if(is.null(end)){stop("'end' needed")}

  # Make duration data for calculation
  if (!is.null(df)){
    if (class(end) == "Date" & class(start) == "character"){
      df$days <- as.numeric(end - df[[start]])
    } else if (class(start) == "Date" & class(end) == "character"){
      df$days <- as.numeric(df[[end]] - start)
    } else if (class(start) == "character" & class(end) == "character") {
      df$days <- as.numeric(df[[end]] - df[[start]])
    } else {
      stop("'start and 'end' have to be dates or column names in 'df'")
    }

    # Remove missing rows
    df <- df[!is.na(df$days),]

  } else {

    if (class(start) == "Date" & class(end) == "Date"){
      days <- as.numeric(end-start)
    } else {
      stop("'start and 'end' have to be dates")
    }

  }

  # Change Time unit
  unit <- if(timeunit %in% c("year","years")){
    1 / year
  } else if (timeunit %in% c("semiannual", "halfyear")){
    2 / year
  } else if (timeunit %in% c("quarter", "quarters")){
    4 / year
  } else if (timeunit %in% c("month", "months")){
    1 / month
  } else if (timeunit %in% c("week", "weeks")){
    1 / 7
  } else if (timeunit %in% c("day", "days")){
    1
  } else {
    warning("invalid 'timeunit': default 'days' used")
    1 }

  # Make diff_timeunit column
  if (!is.null(df)){
    if(!is.na(filterlength)){
      df$diff_timeunit <- df$days*unit
    }
  } else {
    diff_timeunit <- days*unit
  }

  # Duration in calendar terms
  calc_diffdays <- function(days){

    # Year
    Yr <- floor(days / year)
    outputYr <- paste0(Yr, ifelse(Yr > 1, "years ", "year "))
    remaining_days <- days %% year

    # Month
    M <- floor(remaining_days / month)
    outputM <- paste0(M, ifelse(M > 1, "months ", "month "))

    # Day
    D <- remaining_days %% month

    # Paste Y-M-D
    outputStr <- paste0(if(Yr >= 1){outputYr}, if(M >= 1){outputM},  round(D,0), ifelse(D > 1, "days", "day"))

    return(outputStr)
  }

  # Select
  if(!is.null(df)){

    if (is.na(filterlength)){
      df$diff_length <- sapply(df$days, calc_diffdays)

    } else {

      if (filterlonger == TRUE){
        df <- df[df$diff_timeunit > filterlength, ]
        df$diff_timeunit <- round(df$days * unit,2)
        df$late_days <- df$days - filterlength / unit
        df$diff_length <- sapply(df$days, calc_diffdays)
        df$longer_by <- sapply(df$late_days, calc_diffdays)
        names(df)[names(df) == "diff_timeunit"] <- paste0("diff_", timeunit)
        df$late_days <- NULL

      } else {

        df <- df[df$diff_timeunit < filterlength, ]
        df$diff_timeunit <- round(df$days * unit, 2)
        df$early_days <- filterlength / unit - df$days
        df$diff_length <- sapply(df$days, calc_diffdays)
        df$shorter_by <- sapply(df$early_days, calc_diffdays)
        names(df)[names(df) == "diff_timeunit"] <- paste0("diff_", timeunit)
        df$early_days <- NULL
      }
    }

  } else {

    diff_timeunit <- round(days * unit,2)
    diff_length <- calc_diffdays(days)
    df <- data.frame(days, diff_timeunit, diff_length)
    names(df)[names(df) == "diff_timeunit"] <- paste0("diff_", timeunit)
  }

  # Print result
  if (nrow(df) == 0){
    print("No data meeting criteria")
  } else {return(df)}
}

Try the timelineS package in your browser

Any scripts or data that you put into this service are public.

timelineS documentation built on May 2, 2019, 8:57 a.m.