# Automatically generated from the noweb directory
tmerge <- function(data1, data2, id, ..., tstart, tstop, options) {
Call <- match.call()
# The function wants to recognize special keywords in the
# arguments, so define a set of functions which will be used to
# mark objects
new <- new.env(parent=parent.frame())
assign("tdc", function(time, value=NULL, init=NULL) {
x <- list(time=time, value=value, default= init);
class(x) <- "tdc"; x},
envir=new)
assign("cumtdc", function(time, value=NULL, init=NULL) {
x <- list(time=time, value=value, default= init);
class(x) <-"cumtdc"; x},
envir=new)
assign("event", function(time, value=NULL, censor=NULL) {
x <- list(time=time, value=value, censor=censor);
class(x) <-"event"; x},
envir=new)
assign("cumevent", function(time, value=NULL, censor=NULL) {
x <- list(time=time, value=value, censor=censor);
class(x) <-"cumevent"; x},
envir=new)
if (missing(data1) || missing(data2) || missing(id))
stop("the data1, data2, and id arguments are required")
if (!inherits(data1, "data.frame")) stop("data1 must be a data frame")
tmerge.control <- function(idname="id", tstartname="tstart", tstopname="tstop",
delay =0, na.rm=TRUE, tdcstart=NA_real_, ...) {
extras <- list(...)
if (length(extras) > 0)
stop("unrecognized option(s):", paste(names(extras), collapse=', '))
if (length(idname) != 1 || make.names(idname) != idname)
stop("idname option must be a valid variable name")
if (!is.null(tstartname) &&
(length(tstartname) !=1 || make.names(tstartname) != tstartname))
stop("tstart option must be NULL or a valid variable name")
if (length(tstopname) != 1 || make.names(tstopname) != tstopname)
stop("tstop option must be a valid variable name")
if (length(delay) !=1 || !is.numeric(delay) || delay < 0)
stop("delay option must be a number >= 0")
if (length(na.rm) !=1 || ! is.logical(na.rm))
stop("na.rm option must be TRUE or FALSE")
if (length(tdcstart) !=1) stop("tdcstart must be a single value")
list(idname=idname, tstartname=tstartname, tstopname=tstopname,
delay=delay, na.rm=na.rm, tdcstart=tdcstart)
}
if (!inherits(data1, "tmerge") && !is.null(attr(data1, "tname"))) {
# old style object that someone saved!
tm.retain <- list(tname = attr(data1, "tname"),
tevent= list(name=attr(data1, "tevent"),
censor= attr(data1, "tcensor")),
tdcvar = attr(data1, "tdcvar"),
n = nrow(data1))
attr(data1, "tname") <- attr(data1, "tevent") <- NULL
attr(data1, "tcensor") <- attr(data1, "tdcvar") <- NULL
attr(data1, "tm.retain") <- tm.retain
class(data1) <- c("tmerge", class(data1))
}
if (inherits(data1, "tmerge")) {
tm.retain <- attr(data1, "tm.retain")
firstcall <- FALSE
# check out whether the object looks legit:
# has someone tinkered with it? This won't catch everything
tname <- tm.retain$tname
tevent <- tm.retain$tevent
tdcvar <- tm.retain$tdcvar
if (nrow(data1) != tm.retain$n)
stop("tmerge object has been modified, size")
if (any(is.null(match(unlist(tname), names(data1)))) ||
any(is.null(match(tm.retain$tcdname, names(data1)))) ||
any(is.null(match(tevent$name, names(data1)))))
stop("tmerge object has been modified, missing variables")
for (i in seq(along=tevent$name)) {
ename <- tevent$name[i]
if (is.numeric(data1[[ename]])) {
if (!is.numeric(tevent$censor[[i]]))
stop("event variable ", ename,
" no longer matches it's original class")
}
else if (is.character(data1[[ename]])) {
if (!is.character(tevent$censor[[i]]))
stop("event variable ", ename,
" no longer matches it's original class")
}
else if (is.logical(data1[[ename]])) {
if (!is.logical(tevent$censor[[i]]))
stop("event variable ", ename,
" no longer matches it's original class")
}
else if (is.factor(data1[[ename]])) {
if (levels(data1[[ename]])[1] != tevent$censor[[i]])
stop("event variable ", ename,
" has a new first level")
}
else stop("event variable ", ename, " is of an invalid class")
}
} else {
firstcall <- TRUE
tname <- tevent <- tdcvar <- NULL
if (is.name(Call[["id"]])) {
idx <- as.character(Call[["id"]])
if (missing(options)) options <-list(idname= idx)
else if (is.null(options$idname)) options$idname <- idx
}
}
if (!missing(options)) {
if (!is.list(options)) stop("options must be a list")
if (!is.null(tname)) {
# If an option name matches one already in tname, don't confuse
# the tmerge.control routine with duplicate arguments
temp <- match(names(options), names(tname), nomatch=0)
topt <- do.call(tmerge.control, c(options, tname[temp==0]))
if (any(temp >0)) {
# A variable name is changing midstream, update the
# variable names in data1
varname <- tname[c("idname", "tstartname", "tstopname")]
temp2 <- match(varname, names(data1))
names(data1)[temp2] <- varname
}
}
else topt <- do.call(tmerge.control, options)
}
else if (length(tname)) topt <- do.call(tmerge.control, tname)
else topt <- tmerge.control()
# id, tstart, tstop are found in data2
if (missing(id)) stop("the id argument is required")
if (missing(data1) || missing(data2))
stop("two data sets are required")
id <- eval(Call[["id"]], data2, enclos=emptyenv()) #don't find it elsewhere
if (is.null(id)) stop("id variable not found in data2")
if (any(is.na(id))) stop("id variable cannot have missing values")
if (firstcall) {
if (!missing(tstop)) {
tstop <- eval(Call[["tstop"]], data2)
if (length(tstop) != length(id))
stop("tstop and id must be the same length")
# The neardate routine will check for legal tstop data type
}
if (!missing(tstart)) {
tstart <- eval(Call[["tstart"]], data2)
if (length(tstart)==1) tstart <- rep(tstart, length(id))
if (length(tstart) != length(id))
stop("tstart and id must be the same length")
if (any(tstart >= tstop))
stop("tstart must be < tstop")
}
}
else {
if (!missing(tstart) || !missing(tstop))
stop("tstart and tstop arguments only apply to the first call")
}
# grab the... arguments
notdot <- c("data1", "data2", "id", "tstart", "tstop", "options")
dotarg <- Call[is.na(match(names(Call), notdot))]
dotarg[[1]] <- as.name("list") # The as-yet dotarg arguments
if (missing(data2)) args <- eval(dotarg, envir=new)
else args <- eval(dotarg, data2, enclos=new)
argclass <- sapply(args, function(x) (class(x))[1])
argname <- names(args)
if (any(argname== "")) stop("all additional argments must have a name")
check <- match(argclass, c("tdc", "cumtdc", "event", "cumevent"))
if (any(is.na(check)))
stop(paste("argument(s)", argname[is.na(check)],
"not a recognized type"))
# The tcount matrix is useful for debugging
tcount <- matrix(0L, length(argname), 9)
dimnames(tcount) <- list(argname, c("early","late", "gap", "within",
"boundary", "leading", "trailing",
"tied", "missid"))
tcens <- tevent$censor
tevent <- tevent$name
if (is.null(tcens)) tcens <- vector('list', 0)
newdata <- data1 #make a copy
if (firstcall) {
# We don't look for topt$id. What if the user had id=clinic, but their
# starting data set also had a variable named "id". We want clinic for
# this first call.
idname <- Call[["id"]]
if (!is.name(idname))
stop("on the first call 'id' must be a single variable name")
# The line below finds tstop and tstart variables in data1
indx <- match(c(topt$idname, topt$tstartname, topt$tstopname), names(data1),
nomatch=0)
if (any(indx[1:2]>0) && FALSE) { # warning currently turned off. Be chatty?
overwrite <- c(topt$tstartname, topt$tstopname)[indx[2:3]]
warning("overwriting data1 variables", paste(overwrite, collapse=' '))
}
temp <- as.character(idname)
if (!is.na(match(temp, names(data1)))) {
data1[[topt$idname]] <- data1[[temp]]
baseid <- data1[[temp]]
}
else stop("id variable not found in data1")
if (any(duplicated(baseid)))
stop("for the first call (that establishes the time range) data1 must have no duplicate identifiers")
if (missing(tstop)) {
if (length(argclass)==0 || argclass[1] != "event")
stop("neither a tstop argument nor an initial event argument was found")
# this is case 2 -- the first time value for each obs sets the range
last <- !duplicated(id)
indx2 <- match(unique(id[last]), baseid)
if (any(is.na(indx2)))
stop("setting the range, and data2 has id values not in data1")
if (any(is.na(match(baseid, id))))
stop("setting the range, and data1 has id values not in data2")
newdata <- data1[indx2,]
tstop <- (args[[1]]$time)[last]
}
else {
if (length(baseid)== length(id) && all(baseid == id)) newdata <- data1
else { # Note: 'id' is the idlist for data 2
indx2 <- match(id, baseid)
if (any(is.na(indx2)))
stop("setting the range, and data2 has id values not in data1")
if (any(is.na(match(baseid, id))))
stop("setting the range, and data1 has id values not in data2")
newdata <- data1[indx2,]
}
}
if (any(is.na(tstop)))
stop("missing time value, when that variable defines the span")
if (missing(tstart)) {
indx <- which(tstop <=0)
if (length(indx) >0) stop("found an ending time of ", tstop[indx[1]],
", the default starting time of 0 is invalid")
tstart <- rep(0, length(tstop))
}
if (any(tstart >= tstop))
stop("tstart must be < tstop")
newdata[[topt$tstartname]] <- tstart
newdata[[topt$tstopname]] <- tstop
n <- nrow(newdata)
if (any(duplicated(id))) {
# sort by time within id
indx1 <- match(id, unique(id))
newdata <- newdata[order(indx1, tstop),]
}
temp <- newdata[[topt$idname]]
if (any(tstart >= tstop)) stop("tstart must be < tstop")
if (any(newdata$tstop[-n] > newdata$tstart[-1] &
temp[-n] == temp[-1]))
stop("first call has created overlapping or duplicated time intervals")
idmiss <- 0 # the tcount table should have a zero
}
else { #not a first call
idmatch <- match(id, data1[[topt$idname]], nomatch=0)
if (any(idmatch==0)) idmiss <- sum(idmatch==0)
else idmiss <- 0
}
saveid <- id
for (ii in seq(along.with=args)) {
argi <- args[[ii]]
baseid <- newdata[[topt$idname]]
dstart <- newdata[[topt$tstartname]]
dstop <- newdata[[topt$tstopname]]
argcen <- argi$censor
# if an event time is missing then skip that obs. Also toss obs that
# whose id does not match anyone in data1
etime <- argi$time
if (idmiss ==0) keep <- rep(TRUE, length(etime))
else keep <- (idmatch > 0)
if (length(etime) != length(saveid))
stop("argument ", argname[ii], " is not the same length as id")
if (!is.null(argi$value)) {
if (length(argi$value) != length(saveid))
stop("argument ", argname[ii], " is not the same length as id")
if (topt$na.rm) keep <- keep & !(is.na(etime) | is.na(argi$value))
else keep <- keep & !is.na(etime)
if (!all(keep)) {
etime <- etime[keep]
argi$value <- argi$value[keep]
}
}
else {
keep <- keep & !is.na(etime)
etime <- etime[keep]
}
id <- saveid[keep]
# Later steps become easier if we sort the new data by id and time
# The match() is critical when baseid is not in sorted order. The
# etime part of the sort will change from one ii value to the next.
indx <- order(match(id, baseid), etime)
id <- id[indx]
etime <- etime[indx]
if (!is.null(argi$value))
yinc <- argi$value[indx]
else yinc <- NULL
# indx1 points to the closest start time in the baseline data (data1)
# that is <= etime. indx2 to the closest end time that is >=etime.
# If etime falls into a (tstart, tstop) interval, indx1 and indx2
# will match
# If the "delay" argument is set and this event is of type tdc, then
# move any etime that is after the entry time for a subject.
if (topt$delay >0 && argclass[ii] %in% c("tdc", "cumtdc")) {
mintime <- tapply(dstart, baseid, min)
index <- match(id, names(mintime))
etime <- ifelse(etime <= mintime[index], etime, etime+ topt$delay)
}
indx1 <- neardate(id, baseid, etime, dstart, best="prior")
indx2 <- neardate(id, baseid, etime, dstop, best="after")
# The event times fall into one of 5 categories
# 1. Before the first interval
# 2. After the last interval
# 3. Outside any interval but with time span, i.e, it falls into
# a gap in follow-up
# 4. Strictly inside an interval (does't touch either end)
# 5. Inside an interval, but touching.
itype <- ifelse(is.na(indx1), 1,
ifelse(is.na(indx2), 2,
ifelse(indx2 > indx1, 3,
ifelse(etime== dstart[indx1] |
etime== dstop[indx2], 5, 4))))
# Subdivide the events that touch on a boundary
# 1: intervals of (a,b] (b,d], new count at b "tied edge"
# 2: intervals of (a,b] (c,d] with c>b, new count at c, "front edge"
# 3: intervals of (a,b] (c,d] with c>b, new count at b, "back edge"
#
subtype <- ifelse(itype!=5, 0,
ifelse(indx1 == indx2+1, 1,
ifelse(etime==dstart[indx1], 2, 3)))
tcount[ii,1:7] <- table(factor(itype+subtype, levels=c(1:4, 6:8)))
# count ties. id and etime are not necessarily sorted
tcount[ii,8] <- sum(tapply(etime, id, function(x) sum(duplicated(x))))
tcount[ii,9] <- idmiss
indx4 <- which(itype==4)
n4 <- length(indx4)
if (n4 > 0) {
# we need to eliminate duplicate times within the same id, but
# do so without changing the class of etime: it might
# be a Date, an integer, a double, ...
# Using unique on a data.frame does the trick
icount <- data.frame(irow= indx1[indx4], etime=etime[indx4])
icount <- unique(icount)
# the icount data frame will be sorted by second column within first
# so rle is faster than table
n.add <- rle(icount$irow)$length # number of rows to add for each id
# expand the data
irep <- rep.int(1L, nrow(newdata))
erow <- unique(indx1[indx4]) # which rows in newdata to be expanded
irep[erow] <- 1+ n.add # number of rows in new data
jrep <- rep(1:nrow(newdata), irep) #stutter the duplicated rows
newdata <- newdata[jrep,] #expand it out
dstart <- dstart[jrep]
dstop <- dstop[jrep]
#fix up times
nfix <- length(erow)
temp <- vector("list", nfix)
iend <- (cumsum(irep))[irep >1] #end row of each duplication set
for (j in 1:nfix) temp[[j]] <- -(seq(n.add[j] -1, 0)) + iend[j]
newrows <- unlist(temp)
# this should not be necessary
# if (inherits(dstart, "Date"))
# icount$etime <- as.Date(icount$etime, origin= "1970-01-01")
dstart[newrows] <- dstop[newrows-1] <- icount$etime
newdata[[topt$tstartname]] <- dstart
newdata[[topt$tstopname]] <- dstop
for (ename in tevent) newdata[newrows-1, ename] <- tcens[[ename]]
# refresh indices
baseid <- newdata[[topt$idname]]
indx1 <- neardate(id, baseid, etime, dstart, best="prior")
indx2 <- neardate(id, baseid, etime, dstop, best="after")
subtype[itype==4] <- 1 #all the "insides" are now on a tied edge
itype[itype==4] <- 5
}
# add a tdc variable
newvar <- newdata[[argname[ii]]] # prior value (for sequential tmerge calls)
if (argclass[ii] %in% c("tdc", "cumtdc")){
if (argname[[ii]] %in% tevent)
stop("attempt to turn event variable", argname[[ii]], "into a tdc")
if (!(argname[[ii]] %in% tdcvar)){
tdcvar <- c(tdcvar, argname[[ii]])
if (!is.null(newvar) && argclass[ii] == "tdc") {
warning(paste0("replacement of variable '", argname[ii], "'"))
newvar <- NULL
}
}
}
if (argclass[ii] == "tdc") {
default <- argi$default # default value
if (is.null(default)) default <- topt$tdcstart
else if (length(default) !=1)
stop("default tdc value must be of length 1")
# id can be any data type; feed integers to the C routine
storage.mode(dstart) <- storage.mode(etime) <- "double" #if time is integer
uid <- unique(baseid)
index <- .Call(Ctmerge2, match(baseid, uid), dstart,
match(id, uid), etime)
if (is.null(newvar)) { # create new variable
if (is.null(yinc)) newvar <- ifelse(index==0, 0L, 1L) #add a 0/1 variable
else {
newvar <- yinc[pmax(1L, index)]
if (any(index==0)) {
if (is.na(default)) is.na(newvar) <- (index==0L)
else {
if (is.numeric(newvar)) newvar[index==0L] <- as.numeric(default)
else {
if (is.factor(newvar)) {
# special case: if default isn't in the set of levels,
# add it to the levels
if (is.na(match(default, levels(newvar))))
levels(newvar) <- c(levels(newvar), default)
}
newvar[index== 0L] <- default
}
}
}
}
} else if (is.null(yinc)) {
# Existing variable, no yinc, so update will be 0 or 1
# Okay only if the current variable is 0/1
if (all(is.na(newvar) | newvar==0L | newvar==1L))
newvar[index!=0L] <- 1L
else stop("tdc update does not match prior variable type: ", argname[ii])
} else {
# attempt to update an existing tdc with new values from a new variable
# We know how to handle a few special cases, but the basic strategy
# is "don't try to be clever". Remember that class() can return
# a vector of length > 1
if (inherits(newvar, "factor") && (!inherits(yinc, "factor") ||
!identical(levels(newvar), levels(yinc))))
stop("tdc update does not match prior factor: ", argname[ii])
clnew <- class(yinc)
clold <- class(newvar)
if (identical(clnew, clold) || (length(clnew)==1 && length(clold)==1 &&
class(newvar) %in% c("integer", "numeric") &&
class(yinc) %in% c("integer", "numeric")))
newvar[index != 0L] <- yinc[index]
else stop("tdc update does not match prior variable type: ", argname[ii])
}
tdcvar <- unique(c(tdcvar, argname[[ii]]))
}
# add events
if (argclass[ii] %in% c("cumtdc", "cumevent")) {
if (is.null(yinc)) yinc <- rep(1L, length(id))
else if (is.logical(yinc)) yinc <- as.numeric(yinc) # allow cumulative T/F
if (!is.numeric(yinc)) stop("invalid increment for cumtdc or cumevent")
}
if (argclass[ii] == "cumevent"){
ykeep <- (yinc !=0) # ignore the addition of a censoring event
yinc <- unlist(tapply(yinc, match(id, baseid), cumsum))
}
if (argclass[ii] %in% c("event", "cumevent")) {
if (!is.null(newvar)) {
if (!argname[ii] %in% tevent) {
#warning(paste0("non-event variable '", argname[ii], "' replaced by an event variable"))
newvar <- NULL
}
else if (!is.null(yinc)) {
if (class(newvar) != class(yinc))
stop("attempt to update an event variable with a different type")
if (is.factor(newvar) && !all(levels(yinc) %in% levels(newvar)))
stop("attemp to update an event variable and levels do not match")
}
}
if (is.null(yinc)) yinc <- rep(1L, length(id))
if (is.null(newvar)) {
if (is.numeric(yinc)) newvar <- rep(0L, nrow(newdata))
else if (is.factor(yinc))
newvar <- factor(rep(levels(yinc)[1], nrow(newdata)),
levels(yinc))
else if (is.character(yinc)) newvar <- rep('', nrow(newdata))
else if (is.logical(yinc)) newvar <- rep(FALSE, nrow(newdata))
else stop("invalid value for a status variable")
}
keep <- (subtype==1 | subtype==3) # all other events are thrown away
if (argclass[ii] == "cumevent") keep <- (keep & ykeep)
newvar[indx2[keep]] <- yinc[keep]
# add this into our list of 'this is an event type variable'
if (!(argname[ii] %in% tevent)) {
tevent <- c(tevent, argname[[ii]])
if (is.factor(yinc)) tcens <- c(tcens, list(levels(yinc)[1]))
else if (is.logical(yinc)) tcens <- c(tcens, list(FALSE))
else if (is.character(yinc)) tcens <- c(tcens, list(""))
else if (is.integer(yinc)) tcens <- c(tcens, list(0L))
else tcens <- c(tcens, list(0))
names(tcens) <- tevent
}
}
else if (argclass[ii] == "cumtdc") { # process a cumtdc variable
# I don't have a good way to catch the reverse of this user error
if (argname[[ii]] %in% tevent)
stop("attempt to turn event variable", argname[[ii]], "into a cumtdc")
keep <- itype != 2 # changes after the last interval are ignored
indx <- ifelse(subtype==1, indx1,
ifelse(subtype==3, indx2+1L, indx2))
# we want to pass the right kind of NA to the C code
default <- argi$default
if (is.null(default)) default <- as.numeric(topt$tdcstart)
else {
if (length(default) != 1) stop("tdc initial value must be of length 1")
if (!is.numeric(default)) stop("cumtdc initial value must be numeric")
}
if (is.null(newvar)) { # not overwriting a prior value
if (is.null(argi$value)) newvar <- rep(0.0, nrow(newdata))
else newvar <- rep(default, nrow(newdata))
}
# the increment must be numeric
if (!is.numeric(newvar))
stop("data and starting value do not agree on data type")
# id can be any data type; feed integers to the C routine
storage.mode(yinc) <- storage.mode(dstart) <- "double"
storage.mode(newvar) <- storage.mode(etime) <- "double"
newvar <- .Call(Ctmerge, match(baseid, baseid), dstart, newvar,
match(id, baseid)[keep], etime[keep],
yinc[keep], indx[keep])
}
newdata[[argname[ii]]] <- newvar
}
tm.retain <- list(tname = topt[c("idname", "tstartname", "tstopname")],
n= nrow(newdata))
if (length(tevent))
tm.retain$tevent <- list(name = tevent, censor=tcens)
if (length(tdcvar)>0) tm.retain$tdcvar <- tdcvar
attr(newdata, "tm.retain") <- tm.retain
attr(newdata, "tcount") <- rbind(attr(data1, "tcount"), tcount)
attr(newdata, "call") <- Call
row.names(newdata) <- NULL #These are a mess; kill them off.
# Not that it works: R just assigns new row names.
class(newdata) <- c("tmerge", "data.frame")
newdata
}
summary.tmerge <- function(object, ...) {
if (!is.null(cl <- attr(object, "call"))) {
cat("Call:\n")
dput(cl)
cat("\n")
}
print(attr(object, "tcount"))
}
# This could be smarter: if you only drop variables that are not known
# to tmerge then it would be okay. But I currently like the "touch it
# and it dies" philosophy
"[.tmerge" <- function(x, ..., drop=TRUE){
class(x) <- "data.frame"
attr(x, "tm.retain") <- NULL
attr(x, "tcount") <- NULL
attr(x, "call") <- NULL
NextMethod(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.