R/transform-tdc.R

#' Extract time-dependent covariates from data set
#'
#' For all covariates in the data set, this functions checks if the values
#' of the covariate changes per ID or other grouping variable. Returns the names
#' of variables that change over time.
#'
#' @param event_df A data frame (potentially) containing time-dependent covariates.
#' @param id_var A character indicating the grouping variable. For each covariate
#' it will be checked if their values change within a group specified by
#' \code{id_var}.
#' @import dplyr
#' @return A character vector containing names of variables that are not constant
#' in each group (\code{id_var}).
#' @keywords internal
get_tdc <- function(event_df, id_var) {

	event_df %>% group_by_(.dots=list(id_var)) %>%
		summarize_all(.funs=funs(any(length(unique(.)) > 1))) %>%
		select_if(any) %>%
		names()

}

#' Extract unique cut points when time-dependent covariates present
#'
#' Given a data frame with one row per subject containing event times and
#' a data frame containing time points at which a time-dependent covariate changes
#' its value, returns the unique time at which either event occurs or a
#' time-dependent covariate changes its value.
#'
#' @inheritParams get_tdc
#' @param tdc_df A data frame containing information on time-dependent variables
#' in long format. Needs to contain a "time" variable indicating when the
#' TDCs change their value. Must have same name as variable indicating event
#' times in \code{event_df}.
#' @param time_var A character, specifies the column of the event or
#' censoring time in \code{event_df} and the time of measurement for
#' the time-dependent covariates in \code{tdc_df}.
#' @param status_var As \code{time_var}, but specifies column containing the
#' event indicator. Can be missing in the \code{tdc_df}.
#' @param cens_value The value that indicates censoring in the
#' \code{status_var} column.
#' @import dplyr
#' @keywords internal
combine_cut <- function(
	event_df,
	tdc_df,
	time_var,
	status_var,
	cens_value=0) {


	tdc_time   <- tdc_df %>% select(one_of(time_var)) %>% unlist() %>% unique()
	event_time <- event_df %>% select(one_of(time_var)) %>% unlist()
	event_time <- event_time[event_df[[status_var]] != cens_value] %>% unique()

	union(tdc_time, event_time) %>% sort()

}

#' Create piece-wise exponential data in case of time-dependent covariates
#'
#' Given to data frames, one containing event time information (one row per subject)
#' and one containing information on time-dependent covariates, creates
#' piece-wise exponential data (with one split per event time and time at
#' which a TDC changes its value).
#'
#' @inherit combine_cut
#' @inheritParams split_data
#' @param id_var The ID variable name, identifying subjects.
#' Needs to be the same name in both data sets.
#' @importFrom tidyr fill
#' @examples
#' data("pbc", package="survival")# loads both, pbc and pbcseq
#' pbc$status = 1*(pbc$status == 2)
#' pbcseq$time <- pbcseq$day # time of event/measurement must be equal in both data
#' pbcseq$day  <- NULL
#' pbc_ped     <- split_tdc(Surv(time, status)~., pbc, pbcseq, "id", "time", "status")
#' @export
split_tdc <- function(
	formula,
	event_df,
	tdc_df,
	id_var,
	time_var,
	status_var,
	cens_value=0) {

	# intervals must be split at each event time and time at which the TDC
	# changes its value
	utime <- combine_cut(event_df, tdc_df, time_var, status_var,
		cens_value=cens_value)
	# for joining, we remove baseline information of variables that are present
	# as TDC variables in tdc_df
	tdc <- setdiff(get_tdc(tdc_df, id_var), c(id_var, time_var, status_var))
	event_df %<>% select(-one_of(tdc))
	ped <- split_data(formula, data = event_df, cut=utime, id=id_var)

	#
 	tdc_df %<>% select(one_of(c(id_var, time_var, tdc)))

	ped %>% left_join(tdc_df, by=c(id_var, "tstart"=time_var)) %>%
		group_by_(.dots=list(id_var)) %>%
		fill(setdiff(tdc, c(id_var, time_var, status_var)))

}
adibender/pamm documentation built on May 14, 2019, 5:22 p.m.