R/expand_time_at_risk.R

Defines functions expand_time_at_risk

Documented in expand_time_at_risk

#' Expand time at risk
#' 
#' Using a time at risk dataframe generated by \code{create_time_at_risk},
#' generate an "expanded" version with one row for each person-day combination
#' @param time_at_risk A dataframe generated by \code{create_time_at_risk}
#' @return A \code{data.frame} with one row for each person's 
#' uninterrupted period of residency
#' @export
#' @examples
#' residency <- get_data(tab = 'residency', dbname = 'openhds')
#' individual <- get_data(tab = 'individual', dbname = 'openhds')
#' location <- get_data(tab = 'location', dbname = 'openhds')
#' time_at_risk <- create_time_at_risk(residency = residency, individual = individual, location = location)
#' #' expanded_time_at_risk <- expand_time_at_risk(time_at_risk = time_at_risk)

expand_time_at_risk <- function(time_at_risk,
                                start_date = '2002-01-01',
                                end_date = Sys.Date()){
  
  message('Expanding the time at risk data set. This will be slow.')
  
  # Packages
  require(dplyr)
  require(data.table)
  
  # Make date objects
  start_date <- as.Date(start_date)
  end_date <- as.Date(end_date)
  
  # Manually adjust dates to not take into account anything before opd
  time_at_risk$startDate[time_at_risk$startDate < start_date] <-  start_date
  
  # Manually adjust exit dates to only go to end_date
  time_at_risk$endDate[is.na(time_at_risk$endDate)] <- end_date
  time_at_risk$endDate[time_at_risk$endDate > end_date] <- end_date
  
  
  # Remove any exits prior to the study period
  time_at_risk <- time_at_risk[time_at_risk$endDate >= 
                                 time_at_risk$startDate,]
    
  # Make an expanded dataframe of all possible person-periods of residence
  # expanded <- expand.grid(individual_uuid = sort(unique(time_at_risk$individual_uuid))[1:2000],
  #                         date = seq(min(time_at_risk$startDate),
  #                                    max(time_at_risk$endDate),
  #                                    by = 1)[1:1000])
  
  # Join to expanded whether the person was resident or not
  results_list <- list()
  for (i in 1:nrow(time_at_risk)){
    this_instance <- time_at_risk[i,]
    right <- data_frame(individual_uuid = this_instance$individual_uuid,
                           date = seq(this_instance$startDate,
                                      this_instance$endDate,
                                      by = 1))
    results_list[[i]] <- right
  }
  # result <- bind_rows(results_list)
  result <- data.table::rbindlist(results_list)

  # Return result
  return(result)
}
joebrew/cism documentation built on May 19, 2019, 2:58 p.m.