Nothing
#' Data manipulation
#' @description The function \code{dataprep} reshapes data from a long format to a ready-to-use format to be used directly in the function \code{ciregic}.
#' @author Jun Park, \email{jun.park@alumni.iu.edu}
#' @author Giorgos Bakoyannis, \email{gbakogia@iu.edu}
#' @param data a data frame that includes the variables named in the \code{ID}, \code{time}, \code{event}, and \code{z} arguments
#' @param ID a variable indicating individuals' ID
#' @param time a variable indicating observed time points
#' @param event a vector of event indicator. If an observation is righ-censored, \code{event = 0}; otherwise, \code{event = 1} or \code{event = 2}, where \code{1} represents the first cause of failure, and \code{2} represents the second cause of failure. The current version of package only allows two causes of failure.
#' @param Z a vector of variables indicating name of covariates
#' @keywords dataprep
#' @details The function \code{dataprep} provides a ready-to-use data format that can be directly used in the function \code{ciregic}. The returned data frame consists of \code{id}, \code{v}, \code{u}, \code{c}, and covariates as columns. The \code{v} and \code{u} indicate time window with the last observation time before the event and the first observation after the event. The \code{c} represents a type of event, for example, \code{c = 1} for the first cause of failure, \code{c = 2} for the second cause of failure, and \code{c = 0} for the right-censored. For individuals having one time record with the event, the lower bound \code{v} will be replaced by zero, for example \code{(0, v]}. For individuals having one time record without the event, the upper bound \code{u} will be replaced by \code{Inf}, for example \code{(v, Inf]}.
#' @return a data frame
#' @examples
#' library(intccr)
#' dataprep(data = longdata, ID = id, time = t, event = c, Z = c(z1, z2))
#'
#' @export
dataprep <- function(data, ID, time, event, Z) {
mcall <- match.call()
ID <- deparse(mcall$ID)
time <- deparse(mcall$time)
event <- deparse(mcall$event)
Z <- unlist(strsplit(as.character(mcall$Z), " "))
if(length(Z) > 1) Z <- Z[-1]
data <- data[order(data[, ID] & data[, time]), ]
tmiss <- sum(is.na(data[, time]))
if(tmiss > 0) {
print.df <- function(x) {
paste(capture.output(data[which(is.na(data[, time])), ]), collapse = "\n")
}
warning("The following records have missing visit times and will be discarded:\n\n", print.df(data))
data <- data[!is.na(data[, time]), ]
}
uid <- sort(unique(data[, ID]))
n <- length(uid)
p <- length(Z)
mZ <- data[colnames(data) %in% Z]
id <- v <- u <- c <- rep(NA, n)
X <- matrix(data = NA, nrow = n, ncol = p, byrow = TRUE)
for (i in 1:n){
indID <- (data[, ID] == uid[i])
indt <- data[, time][indID]
indc <- data[, event][indID]
indZ <- as.matrix(mZ[indID,])
id[i] <- uid[i]
X[i,] <- indZ[1,]
if(length(indt) == 1){
if(indc != 0){
v[i] <- 0
u[i] <- indt
c[i] <- indc
} else {
v[i] <- indt
u[i] <- Inf
c[i] <- indc
}
} else {
for (j in 1:length(indt)){
if (indc[j] == 0){
v[i] <- indt[j]
u[i] <- Inf
c[i] <- 0
} else {
u[i] <- indt[j]
if (indc[j] == 1){
c[i] <- 1
} else {
c[i] <- 2
}
break
}
}
}
}
colnames(X) <- Z
temp <- data.frame(id, v, u, c, X)
if (sum(is.na(temp)) != 0){
naval <- which(is.na(v))
if(length(naval) == 1) {
warning("subject id ", naval, " is omitted because its interval is (0, Inf).")
} else {
warning("subject id ", toString(naval), " are omitted because those intervals are (0, Inf).")
}
}
na.omit(temp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.